X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=scandir.cc;h=d50867ae4bf0f5a76b1e6bbe20266a6a279d2783;hb=908cfc1af3e761d3243d31a68420fb8994840721;hp=1daf352afc9f90fd1bd4b65e0c8028534664056f;hpb=09533a1615813c343e1244275531f933d9b48ddf;p=cumulus.git diff --git a/scandir.cc b/scandir.cc index 1daf352..d50867a 100644 --- a/scandir.cc +++ b/scandir.cc @@ -1,4 +1,25 @@ -/* Recursively descend the filesystem and visit each file. */ +/* Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2006-2008 The Regents of the University of California + * Written by Michael Vrable + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Main entry point for LBS. Contains logic for traversing the filesystem and + * constructing a backup. */ #include #include @@ -29,6 +50,7 @@ #include "remote.h" #include "store.h" #include "sha1.h" +#include "subfile.h" #include "util.h" using std::list; @@ -118,17 +140,23 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, /* Look up this file in the old stat cache, if we can. If the stat * information indicates that the file has not changed, do not bother - * re-reading the entire contents. */ + * re-reading the entire contents. Even if the information has been + * changed, we can use the list of old blocks in the search for a sub-block + * incremental representation. */ bool cached = false; + list old_blocks; - if (metawriter->find(path) && metawriter->is_unchanged(&stat_buf)) { + bool found = metawriter->find(path); + if (found) + old_blocks = metawriter->get_blocks(); + + if (found && metawriter->is_unchanged(&stat_buf)) { cached = true; - list blocks = metawriter->get_blocks(); /* If any of the blocks in the object have been expired, then we should * fall back to fully reading in the file. */ - for (list::const_iterator i = blocks.begin(); - i != blocks.end(); ++i) { + for (list::const_iterator i = old_blocks.begin(); + i != old_blocks.end(); ++i) { const ObjectReference &ref = *i; if (!db->IsAvailable(ref)) { cached = false; @@ -140,8 +168,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, /* If everything looks okay, use the cached information */ if (cached) { file_info["checksum"] = metawriter->get_checksum(); - for (list::const_iterator i = blocks.begin(); - i != blocks.end(); ++i) { + for (list::const_iterator i = old_blocks.begin(); + i != old_blocks.end(); ++i) { const ObjectReference &ref = *i; object_list.push_back(ref.to_string()); if (ref.is_normal()) @@ -156,6 +184,9 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, * time. */ if (!cached) { SHA1Checksum hash; + Subfile subfile(db); + subfile.load_old_blocks(old_blocks); + while (true) { ssize_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE); if (bytes == 0) @@ -194,6 +225,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, ref = db->FindObject(block_csum, bytes); } + list refs; + // Store a copy of the object if one does not yet exist if (ref.is_null()) { LbsObject *o = new LbsObject; @@ -226,18 +259,19 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, status = "new"; } - o->set_data(block_buf, bytes); - o->write(tss); - ref = o->get_ref(); - db->StoreObject(ref, block_csum, bytes, block_age); - ref.set_range(0, bytes); - delete o; + subfile.analyze_new_block(block_buf, bytes); + refs = subfile.create_incremental(tss, o, block_age); + } else { + refs.push_back(ref); } - object_list.push_back(ref.to_string()); - if (ref.is_normal()) - add_segment(ref.get_segment()); - db->UseObject(ref); + while (!refs.empty()) { + ref = refs.front(); refs.pop_front(); + object_list.push_back(ref.to_string()); + if (ref.is_normal()) + add_segment(ref.get_segment()); + db->UseObject(ref); + } size += bytes; if (status == NULL) @@ -394,7 +428,6 @@ void scanfile(const string& path, bool include) for (list::iterator i = includes.begin(); i != includes.end(); ++i) { if (path == *i) { - printf("Including %s\n", path.c_str()); include = true; } } @@ -402,7 +435,6 @@ void scanfile(const string& path, bool include) for (list::iterator i = excludes.begin(); i != excludes.end(); ++i) { if (path == *i) { - printf("Excluding %s\n", path.c_str()); include = false; } } @@ -410,7 +442,6 @@ void scanfile(const string& path, bool include) for (list::iterator i = searches.begin(); i != searches.end(); ++i) { if (path == *i) { - printf("Scanning %s\n", path.c_str()); scan_only = true; } } @@ -511,7 +542,6 @@ void scanfile(const string& path, bool include) * themselves are excluded from being backed up. */ void add_include(const char *path) { - printf("Add: %s\n", path); /* Was an absolute path specified? If so, we'll need to start scanning * from the root directory. Make sure that the user was consistent in * providing either all relative paths or all absolute paths. */ @@ -696,28 +726,6 @@ int main(int argc, char *argv[]) return 1; } - // Dump paths for debugging/informational purposes - { - list::const_iterator i; - - printf("LBS Version: %s\n", lbs_version); - - printf("--dest=%s\n--localdb=%s\n--upload-script=%s\n", - backup_dest.c_str(), localdb_dir.c_str(), backup_script.c_str()); - - printf("Includes:\n"); - for (i = includes.begin(); i != includes.end(); ++i) - printf(" %s\n", i->c_str()); - - printf("Excludes:\n"); - for (i = excludes.begin(); i != excludes.end(); ++i) - printf(" %s\n", i->c_str()); - - printf("Searching:\n"); - for (i = searches.begin(); i != searches.end(); ++i) - printf(" %s\n", i->c_str()); - } - block_buf = new char[LBS_BLOCK_SIZE]; /* Initialize the remote storage layer. If using an upload script, create @@ -782,7 +790,8 @@ int main(int argc, char *argv[]) if (backup_scheme.size() > 0) checksum_filename += backup_scheme + "-"; checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums"; - RemoteFile *checksum_file = remote->alloc_file(checksum_filename); + RemoteFile *checksum_file = remote->alloc_file(checksum_filename, + "checksums"); FILE *checksums = fdopen(checksum_file->get_fd(), "w"); for (std::set::iterator i = segment_list.begin(); @@ -824,7 +833,8 @@ int main(int argc, char *argv[]) desc_filename += backup_scheme + "-"; desc_filename = desc_filename + desc_buf + ".lbs"; - RemoteFile *descriptor_file = remote->alloc_file(desc_filename); + RemoteFile *descriptor_file = remote->alloc_file(desc_filename, + "snapshots"); int descriptor_fd = descriptor_file->get_fd(); if (descriptor_fd < 0) { fprintf(stderr, "Unable to open descriptor output file: %m\n");