From: Michael Vrable Date: Sun, 15 Apr 2007 20:48:09 +0000 (-0700) Subject: Begin work on an alternate object store mechanism using the TAR format. X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=commitdiff_plain;h=ae63026708d2e0e112f7ff79780c60439a990a85 Begin work on an alternate object store mechanism using the TAR format. Each segment is placed in a separate file, and each object is a file stored within that TAR archive. libtar is used to write (and perhaps later read) tar files. --- diff --git a/Makefile b/Makefile index 141ce9c..eb83a3d 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ PACKAGES=uuid CXXFLAGS=-O -Wall -D_FILE_OFFSET_BITS=64 -g -pg \ `pkg-config --cflags $(PACKAGES)` -LDFLAGS=-g -pg `pkg-config --libs $(PACKAGES)` +LDFLAGS=-g -pg -ltar `pkg-config --libs $(PACKAGES)` -SRCS=scandir.cc sha1.cc store.cc +SRCS=scandir.cc sha1.cc store.cc tarstore.cc OBJS=$(SRCS:.cc=.o) scandir : $(OBJS) diff --git a/scandir.cc b/scandir.cc index e253527..3a3637c 100644 --- a/scandir.cc +++ b/scandir.cc @@ -13,12 +13,16 @@ #include #include #include +#include +#include #include "store.h" +#include "tarstore.h" #include "sha1.h" using std::string; using std::vector; +using std::ostream; static SegmentStore *segment_store; static OutputStream *info_dump = NULL; @@ -29,7 +33,7 @@ static SegmentPartitioner *index_segment, *data_segment; static const int LBS_BLOCK_SIZE = 1024 * 1024; static char *block_buf; -void scandir(const string& path); +void scandir(const string& path, std::ostream& metadata); /* Converts time to microseconds since the epoch. */ int64_t encode_time(time_t time) @@ -110,7 +114,7 @@ void dumpfile(int fd, dictionary &file_info) file_info["data"] = encode_objref(segment_uuid, object_id); } -void scanfile(const string& path) +void scanfile(const string& path, ostream &metadata) { int fd; long flags; @@ -127,6 +131,14 @@ void scanfile(const string& path) printf("%s\n", path.c_str()); + metadata << "name: " << path << "\n"; + metadata << "mode: " << (stat_buf.st_mode & 07777) << "\n"; + metadata << "atime: " << stat_buf.st_atime << "\n"; + metadata << "ctime: " << stat_buf.st_ctime << "\n"; + metadata << "mtime: " << stat_buf.st_mtime << "\n"; + metadata << "user: " << stat_buf.st_uid << "\n"; + metadata << "group: " << stat_buf.st_gid << "\n"; + file_info["mode"] = encode_u16(stat_buf.st_mode & 07777); file_info["atime"] = encode_u64(encode_time(stat_buf.st_atime)); file_info["ctime"] = encode_u64(encode_time(stat_buf.st_ctime)); @@ -207,17 +219,20 @@ void scanfile(const string& path) } file_info["type"] = string(1, inode_type); + metadata << "type: " << inode_type << "\n"; info_dump->write_string(path); info_dump->write_dictionary(file_info); + metadata << "\n"; + // If we hit a directory, now that we've written the directory itself, // recursively scan the directory. if (recurse) - scandir(path); + scandir(path, metadata); } -void scandir(const string& path) +void scandir(const string& path, ostream &metadata) { DIR *dir = opendir(path.c_str()); @@ -240,7 +255,7 @@ void scandir(const string& path) for (vector::iterator i = contents.begin(); i != contents.end(); ++i) { const string& filename = *i; - scanfile(path + "/" + filename); + scanfile(path + "/" + filename, metadata); } closedir(dir); @@ -260,12 +275,25 @@ int main(int argc, char *argv[]) string uuid = SegmentWriter::format_uuid(sw->get_uuid()); printf("Backup UUID: %s\n", uuid.c_str()); + std::ostringstream metadata; + try { - scanfile("."); + scanfile(".", metadata); } catch (IOException e) { fprintf(stderr, "IOException: %s\n", e.getError().c_str()); } + Tarfile *t = new Tarfile("tarstore.tar", uuid); + const char testdata[] = "Test string."; + t->write_object(0, testdata, strlen(testdata)); + t->write_object(1, testdata, strlen(testdata)); + t->write_object(2, testdata, strlen(testdata)); + + const string md = metadata.str(); + t->write_object(3, md.data(), md.size()); + + delete t; + delete index_segment; delete data_segment; delete sw; diff --git a/tarstore.cc b/tarstore.cc new file mode 100644 index 0000000..1fad845 --- /dev/null +++ b/tarstore.cc @@ -0,0 +1,78 @@ +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2006 Michael Vrable + * + * Backup data is stored in a collection of objects, which are grouped together + * into segments for storage purposes. This implementation of the object store + * is built on top of libtar, and represents segments as TAR files and objects + * as files within them. */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "tarstore.h" + +using std::string; + +Tarfile::Tarfile(const string &path, const string &segment) + : segment_name(segment) +{ + if (tar_open(&t, (char *)path.c_str(), NULL, O_WRONLY | O_CREAT, 0600, + TAR_VERBOSE | TAR_GNU) == -1) + throw IOException("Error opening Tarfile"); +} + +Tarfile::~Tarfile() +{ + tar_append_eof(t); + + if (tar_close(t) != 0) + throw IOException("Error closing Tarfile"); +} + +void Tarfile::write_object(int id, const char *data, size_t len) +{ + memset(&t->th_buf, 0, sizeof(struct tar_header)); + + char buf[64]; + sprintf(buf, "%08x", id); + string path = segment_name + "/" + buf; + printf("path: %s\n", path.c_str()); + + th_set_type(t, S_IFREG | 0600); + th_set_user(t, 0); + th_set_group(t, 0); + th_set_mode(t, 0600); + th_set_size(t, len); + th_set_mtime(t, time(NULL)); + th_set_path(t, const_cast(path.c_str())); + th_finish(t); + + if (th_write(t) != 0) + throw IOException("Error writing tar header"); + + th_print(t); + + if (len == 0) + return; + + size_t blocks = (len + T_BLOCKSIZE - 1) / T_BLOCKSIZE; + size_t padding = blocks * T_BLOCKSIZE - len; + + for (size_t i = 0; i < blocks - 1; i++) { + if (tar_block_write(t, &data[i * T_BLOCKSIZE]) == -1) + throw IOException("Error writing tar block"); + } + + char block[T_BLOCKSIZE]; + memset(block, 0, sizeof(block)); + memcpy(block, &data[T_BLOCKSIZE * (blocks - 1)], T_BLOCKSIZE - padding); + if (tar_block_write(t, block) == -1) + throw IOException("Error writing final tar block"); +} diff --git a/tarstore.h b/tarstore.h new file mode 100644 index 0000000..923c75a --- /dev/null +++ b/tarstore.h @@ -0,0 +1,31 @@ +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2006 Michael Vrable + * + * Backup data is stored in a collection of objects, which are grouped together + * into segments for storage purposes. This implementation of the object store + * is built on top of libtar, and represents segments as TAR files and objects + * as files within them. */ + +#ifndef _LBS_TARSTORE_H +#define _LBS_TARSTORE_H + +#include +#include + +#include + +#include "store.h" + +class Tarfile { +public: + Tarfile(const std::string &path, const std::string &segment); + virtual ~Tarfile(); + + void write_object(int id, const char *data, size_t len); + +private: + std::string segment_name; + TAR *t; +}; + +#endif // _LBS_TARSTORE_H