From 5c10647d667fc4408b73402db9247181a54a57ad Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Fri, 11 May 2007 12:21:50 -0700 Subject: [PATCH] Allow metadata to be written incrementally. --- ref.cc | 4 +++ ref.h | 1 + scandir.cc | 99 +++++++++++++++++++++++++++++++++++++++--------------- store.cc | 21 ++++++++---- store.h | 20 ++++++----- 5 files changed, 102 insertions(+), 43 deletions(-) diff --git a/ref.cc b/ref.cc index ff710d2..ce16cb8 100644 --- a/ref.cc +++ b/ref.cc @@ -28,6 +28,10 @@ string generate_uuid() return string(buf); } +ObjectReference::ObjectReference() + : segment(""), object("") +{ +} ObjectReference::ObjectReference(const std::string& segment, int sequence) : segment(segment) diff --git a/ref.h b/ref.h index 13eba94..cc21a5a 100644 --- a/ref.h +++ b/ref.h @@ -64,6 +64,7 @@ std::string generate_uuid(); * and converted to and from the text representation. */ class ObjectReference { public: + ObjectReference(); ObjectReference(const std::string& segment, int sequence); ObjectReference(const std::string& segment, const std::string& sequence); diff --git a/scandir.cc b/scandir.cc index c11c561..c8a4180 100644 --- a/scandir.cc +++ b/scandir.cc @@ -16,6 +16,7 @@ #include #include #include +#include #include "format.h" #include "store.h" @@ -32,12 +33,40 @@ static TarSegmentStore *tss = NULL; static const int LBS_BLOCK_SIZE = 1024 * 1024; static char *block_buf; -void scandir(const string& path, std::ostream& metadata); +/* Contents of the root object. This will contain a set of indirect links to + * the metadata objects. */ +std::ostringstream metadata_root; -/* Converts time to microseconds since the epoch. */ -int64_t encode_time(time_t time) +/* Buffer for building up metadata. */ +std::ostringstream metadata; + +/* Keep track of all segments which are needed to reconstruct the snapshot. */ +std::set segment_list; + +void scandir(const string& path); + +/* Ensure contents of metadata are flushed to an object. */ +void metadata_flush() { - return (int64_t)time * 1000000; + string m = metadata.str(); + if (m.size() == 0) + return; + + /* Write current metadata information to a new object. */ + LbsObject *meta = new LbsObject; + meta->set_group("root"); + meta->set_data(m.data(), m.size()); + meta->write(tss); + meta->checksum(); + + /* Write a reference to this block in the root. */ + ObjectReference ref = meta->get_ref(); + metadata_root << "@" << ref.to_string() << "\n"; + segment_list.insert(ref.get_segment()); + + delete meta; + + metadata.str(""); } /* Read data from a file descriptor and return the amount of data read. A @@ -67,12 +96,12 @@ size_t file_read(int fd, char *buf, size_t maxlen) /* Read the contents of a file (specified by an open file descriptor) and copy * the data to the store. */ -void dumpfile(int fd, dictionary &file_info, ostream &metadata) +void dumpfile(int fd, dictionary &file_info) { struct stat stat_buf; fstat(fd, &stat_buf); int64_t size = 0; - list segment_list; + list object_list; if ((stat_buf.st_mode & S_IFMT) != S_IFREG) { printf("file is no longer a regular file!\n"); @@ -96,7 +125,8 @@ void dumpfile(int fd, dictionary &file_info, ostream &metadata) o->set_group("data"); o->set_data(block_buf, bytes); o->write(tss); - segment_list.push_back(o->get_name()); + object_list.push_back(o->get_name()); + segment_list.insert(o->get_ref().get_segment()); delete o; size += bytes; @@ -107,19 +137,19 @@ void dumpfile(int fd, dictionary &file_info, ostream &metadata) /* For files that only need to be broken apart into a few objects, store * the list of objects directly. For larger files, store the data * out-of-line and provide a pointer to the indrect object. */ - if (segment_list.size() < 8) { + if (object_list.size() < 8) { string blocklist = ""; - for (list::iterator i = segment_list.begin(); - i != segment_list.end(); ++i) { - if (i != segment_list.begin()) + for (list::iterator i = object_list.begin(); + i != object_list.end(); ++i) { + if (i != object_list.begin()) blocklist += " "; blocklist += *i; } file_info["data"] = blocklist; } else { string blocklist = ""; - for (list::iterator i = segment_list.begin(); - i != segment_list.end(); ++i) { + for (list::iterator i = object_list.begin(); + i != object_list.end(); ++i) { blocklist += *i + "\n"; } @@ -128,11 +158,12 @@ void dumpfile(int fd, dictionary &file_info, ostream &metadata) i->set_data(blocklist.data(), blocklist.size()); i->write(tss); file_info["data"] = "@" + i->get_name(); + segment_list.insert(i->get_ref().get_segment()); delete i; } } -void scanfile(const string& path, ostream &metadata) +void scanfile(const string& path) { int fd; long flags; @@ -217,7 +248,7 @@ void scanfile(const string& path, ostream &metadata) fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); file_info["size"] = encode_int(stat_buf.st_size); - dumpfile(fd, file_info, metadata); + dumpfile(fd, file_info); close(fd); break; @@ -237,13 +268,17 @@ void scanfile(const string& path, ostream &metadata) dict_output(metadata, file_info); metadata << "\n"; + // Break apart metadata listing if it becomes too large. + if (metadata.str().size() > 4096) + metadata_flush(); + // If we hit a directory, now that we've written the directory itself, // recursively scan the directory. if (recurse) - scandir(path, metadata); + scandir(path); } -void scandir(const string& path, ostream &metadata) +void scandir(const string& path) { DIR *dir = opendir(path.c_str()); @@ -266,7 +301,7 @@ void scandir(const string& path, ostream &metadata) for (vector::iterator i = contents.begin(); i != contents.end(); ++i) { const string& filename = *i; - scanfile(path + "/" + filename, metadata); + scanfile(path + "/" + filename); } closedir(dir); @@ -282,21 +317,31 @@ int main(int argc, char *argv[]) tss = new TarSegmentStore("."); } - std::ostringstream metadata; - try { - scanfile(".", metadata); + scanfile("."); } catch (IOException e) { fprintf(stderr, "IOException: %s\n", e.getError().c_str()); } - const string md = metadata.str(); + metadata_flush(); + const string md = metadata_root.str(); - LbsObject *r = new LbsObject; - r->set_group("root"); - r->set_data(md.data(), md.size()); - r->write(tss); - delete r; + LbsObject *root = new LbsObject; + root->set_group("root"); + root->set_data(md.data(), md.size()); + root->write(tss); + root->checksum(); + + segment_list.insert(root->get_ref().get_segment()); + string r = root->get_ref().to_string(); + printf("root: %s\n\n", r.c_str()); + delete root; + + printf("segments:\n"); + for (std::set::iterator i = segment_list.begin(); + i != segment_list.end(); ++i) { + printf(" %s\n", i->c_str()); + } tss->sync(); delete tss; diff --git a/store.cc b/store.cc index ce91e55..ce54e87 100644 --- a/store.cc +++ b/store.cc @@ -93,8 +93,8 @@ void Tarfile::internal_write_object(const string &path, static const size_t SEGMENT_SIZE = 4 * 1024 * 1024; -string TarSegmentStore::write_object(const char *data, size_t len, const - std::string &group) +ObjectReference TarSegmentStore::write_object(const char *data, size_t len, + const std::string &group) { struct segment_info *segment; @@ -122,14 +122,14 @@ string TarSegmentStore::write_object(const char *data, size_t len, const segment->file->write_object(id, data, len); segment->count++; - string full_name = segment->name + "/" + id_buf; + ObjectReference ref(segment->name, id_buf); // If this segment meets or exceeds the size target, close it so that // future objects will go into a new segment. if (segment->file->size_estimate() >= SEGMENT_SIZE) close_segment(group); - return full_name; + return ref; } void TarSegmentStore::sync() @@ -168,8 +168,15 @@ void LbsObject::write(TarSegmentStore *store) assert(data != NULL); assert(!written); - name = store->write_object(data, data_len, group); - + ref = store->write_object(data, data_len, group); written = true; - data = NULL; +} + +void LbsObject::checksum() +{ + assert(written); + + SHA1Checksum hash; + hash.process(data, data_len); + ref.set_checksum(hash.checksum_str()); } diff --git a/store.h b/store.h index a9d6365..1ef5db4 100644 --- a/store.h +++ b/store.h @@ -20,6 +20,7 @@ #include #include "sha1.h" +#include "ref.h" class LbsObject; @@ -72,8 +73,8 @@ public: // (segment/object) to refer to it. The optional parameter group can be // used to control object placement; objects with different group // parameters are kept in separate segments. - std::string write_object(const char *data, size_t len, - const std::string &group = ""); + ObjectReference write_object(const char *data, size_t len, + const std::string &group = ""); // Ensure all segments have been fully written. void sync(); @@ -82,7 +83,6 @@ private: struct segment_info { Tarfile *file; std::string name; // UUID - std::set refs; // Other segments this one refers to int count; // Objects written to this segment }; @@ -114,17 +114,21 @@ public: // incrementally. Data can be an arbitrary block of binary data of any // size. The pointer to the data need only remain valid until write() is // called. - //const char *get_data() const { return data; } - //size_t get_data_len() const { return data_len; } void set_data(const char *d, size_t len) { data = d; data_len = len; } // Write an object to a segment, thus making it permanent. This function // can be called at most once. void write(TarSegmentStore *store); + // Compute the checksum of an object, and include it in the object + // reference. This should be called after write(), and the data specified + // by set_data() must remain valid through the call to checksum(). + void checksum(); + // An object is assigned a permanent name once it has been written to a // segment. Until that time, its name cannot be determined. - std::string get_name() const { return name; } + std::string get_name() const { return ref.to_string(); } + ObjectReference get_ref() { return ref; } private: std::string group; @@ -132,9 +136,7 @@ private: size_t data_len; bool written; - std::string name; - - std::set refs; + ObjectReference ref; }; #endif // _LBS_STORE_H -- 2.20.1