X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=scandir.cc;h=d50867ae4bf0f5a76b1e6bbe20266a6a279d2783;hb=908cfc1af3e761d3243d31a68420fb8994840721;hp=83607630755e3972f6197f7f776ad8f9f382c1fd;hpb=0dfc70e01ddb7d2bce0db03d5364c0bd3a2bb308;p=cumulus.git diff --git a/scandir.cc b/scandir.cc index 8360763..d50867a 100644 --- a/scandir.cc +++ b/scandir.cc @@ -50,6 +50,7 @@ #include "remote.h" #include "store.h" #include "sha1.h" +#include "subfile.h" #include "util.h" using std::list; @@ -139,17 +140,23 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, /* Look up this file in the old stat cache, if we can. If the stat * information indicates that the file has not changed, do not bother - * re-reading the entire contents. */ + * re-reading the entire contents. Even if the information has been + * changed, we can use the list of old blocks in the search for a sub-block + * incremental representation. */ bool cached = false; + list old_blocks; - if (metawriter->find(path) && metawriter->is_unchanged(&stat_buf)) { + bool found = metawriter->find(path); + if (found) + old_blocks = metawriter->get_blocks(); + + if (found && metawriter->is_unchanged(&stat_buf)) { cached = true; - list blocks = metawriter->get_blocks(); /* If any of the blocks in the object have been expired, then we should * fall back to fully reading in the file. */ - for (list::const_iterator i = blocks.begin(); - i != blocks.end(); ++i) { + for (list::const_iterator i = old_blocks.begin(); + i != old_blocks.end(); ++i) { const ObjectReference &ref = *i; if (!db->IsAvailable(ref)) { cached = false; @@ -161,8 +168,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, /* If everything looks okay, use the cached information */ if (cached) { file_info["checksum"] = metawriter->get_checksum(); - for (list::const_iterator i = blocks.begin(); - i != blocks.end(); ++i) { + for (list::const_iterator i = old_blocks.begin(); + i != old_blocks.end(); ++i) { const ObjectReference &ref = *i; object_list.push_back(ref.to_string()); if (ref.is_normal()) @@ -177,6 +184,9 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, * time. */ if (!cached) { SHA1Checksum hash; + Subfile subfile(db); + subfile.load_old_blocks(old_blocks); + while (true) { ssize_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE); if (bytes == 0) @@ -215,6 +225,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, ref = db->FindObject(block_csum, bytes); } + list refs; + // Store a copy of the object if one does not yet exist if (ref.is_null()) { LbsObject *o = new LbsObject; @@ -247,18 +259,19 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, status = "new"; } - o->set_data(block_buf, bytes); - o->write(tss); - ref = o->get_ref(); - db->StoreObject(ref, block_csum, bytes, block_age); - ref.set_range(0, bytes); - delete o; + subfile.analyze_new_block(block_buf, bytes); + refs = subfile.create_incremental(tss, o, block_age); + } else { + refs.push_back(ref); } - object_list.push_back(ref.to_string()); - if (ref.is_normal()) - add_segment(ref.get_segment()); - db->UseObject(ref); + while (!refs.empty()) { + ref = refs.front(); refs.pop_front(); + object_list.push_back(ref.to_string()); + if (ref.is_normal()) + add_segment(ref.get_segment()); + db->UseObject(ref); + } size += bytes; if (status == NULL) @@ -415,7 +428,6 @@ void scanfile(const string& path, bool include) for (list::iterator i = includes.begin(); i != includes.end(); ++i) { if (path == *i) { - printf("Including %s\n", path.c_str()); include = true; } } @@ -423,7 +435,6 @@ void scanfile(const string& path, bool include) for (list::iterator i = excludes.begin(); i != excludes.end(); ++i) { if (path == *i) { - printf("Excluding %s\n", path.c_str()); include = false; } } @@ -431,7 +442,6 @@ void scanfile(const string& path, bool include) for (list::iterator i = searches.begin(); i != searches.end(); ++i) { if (path == *i) { - printf("Scanning %s\n", path.c_str()); scan_only = true; } } @@ -532,7 +542,6 @@ void scanfile(const string& path, bool include) * themselves are excluded from being backed up. */ void add_include(const char *path) { - printf("Add: %s\n", path); /* Was an absolute path specified? If so, we'll need to start scanning * from the root directory. Make sure that the user was consistent in * providing either all relative paths or all absolute paths. */ @@ -717,28 +726,6 @@ int main(int argc, char *argv[]) return 1; } - // Dump paths for debugging/informational purposes - { - list::const_iterator i; - - printf("LBS Version: %s\n", lbs_version); - - printf("--dest=%s\n--localdb=%s\n--upload-script=%s\n", - backup_dest.c_str(), localdb_dir.c_str(), backup_script.c_str()); - - printf("Includes:\n"); - for (i = includes.begin(); i != includes.end(); ++i) - printf(" %s\n", i->c_str()); - - printf("Excludes:\n"); - for (i = excludes.begin(); i != excludes.end(); ++i) - printf(" %s\n", i->c_str()); - - printf("Searching:\n"); - for (i = searches.begin(); i != searches.end(); ++i) - printf(" %s\n", i->c_str()); - } - block_buf = new char[LBS_BLOCK_SIZE]; /* Initialize the remote storage layer. If using an upload script, create