X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=scandir.cc;h=71fba9dc22fcbc597303e44ceeac16a54048da4e;hb=248b2455853ed082bf3b032fea4cb6b557a145ae;hp=64a0dbf63b3f83168b69be257939a86e8a22ebab;hpb=6e7d9688683887ba53ea2858771a13fdafd16edf;p=cumulus.git diff --git a/scandir.cc b/scandir.cc index 64a0dbf..71fba9d 100644 --- a/scandir.cc +++ b/scandir.cc @@ -26,6 +26,7 @@ #include "localdb.h" #include "store.h" #include "sha1.h" +#include "statcache.h" using std::list; using std::string; @@ -44,6 +45,10 @@ static const size_t LBS_METADATA_BLOCK_SIZE = 65536; * invocations to help in creating incremental snapshots. */ LocalDb *db; +/* Stat cache, which stored data locally to speed the backup process by quickly + * skipping files which have not changed. */ +StatCache *statcache; + /* Contents of the root object. This will contain a set of indirect links to * the metadata objects. */ std::ostringstream metadata_root; @@ -111,7 +116,7 @@ size_t file_read(int fd, char *buf, size_t maxlen) /* Read the contents of a file (specified by an open file descriptor) and copy * the data to the store. Returns the size of the file (number of bytes * dumped), or -1 on error. */ -int64_t dumpfile(int fd, dictionary &file_info) +int64_t dumpfile(int fd, dictionary &file_info, const string &path) { struct stat stat_buf; fstat(fd, &stat_buf); @@ -137,6 +142,7 @@ int64_t dumpfile(int fd, dictionary &file_info) // Either find a copy of this block in an already-existing segment, or // index it so it can be re-used in the future + double block_age = 0.0; SHA1Checksum block_hash; block_hash.process(block_buf, bytes); string block_csum = block_hash.checksum_str(); @@ -153,8 +159,10 @@ int64_t dumpfile(int fd, dictionary &file_info) * put it in a separate group, so that old objects get grouped * together. The hope is that these old objects will continue to * be used in the future, and we obtain segments which will - * continue to be well-utilized. */ - if (db->IsOldObject(block_csum, bytes)) + * continue to be well-utilized. Additionally, keep track of the + * age of the data by looking up the age of the block which was + * expired and using that instead of the current time. */ + if (db->IsOldObject(block_csum, bytes, &block_age)) o->set_group("compacted"); else o->set_group("data"); @@ -162,7 +170,7 @@ int64_t dumpfile(int fd, dictionary &file_info) o->set_data(block_buf, bytes); o->write(tss); ref = o->get_ref(); - db->StoreObject(ref, block_csum, bytes); + db->StoreObject(ref, block_csum, bytes, block_age); delete o; } @@ -174,6 +182,8 @@ int64_t dumpfile(int fd, dictionary &file_info) file_info["checksum"] = hash.checksum_str(); + statcache->Save(path, &stat_buf, file_info["checksum"], object_list); + /* For files that only need to be broken apart into a few objects, store * the list of objects directly. For larger files, store the data * out-of-line and provide a pointer to the indrect object. */ @@ -312,7 +322,7 @@ void scanfile(const string& path) flags = fcntl(fd, F_GETFL); fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); - file_size = dumpfile(fd, file_info); + file_size = dumpfile(fd, file_info, path); file_info["size"] = encode_int(file_size); close(fd); @@ -403,8 +413,10 @@ int main(int argc, char *argv[]) while (1) { static struct option long_options[] = { - {"localdb", 1, 0, 0}, // 0 - {"exclude", 1, 0, 0}, // 1 + {"localdb", 1, 0, 0}, // 0 + {"exclude", 1, 0, 0}, // 1 + {"filter", 1, 0, 0}, // 2 + {"filter-extension", 1, 0, 0}, // 3 {NULL, 0, 0, 0}, }; @@ -422,6 +434,12 @@ int main(int argc, char *argv[]) case 1: // --exclude excludes.push_back(optarg); break; + case 2: // --filter + filter_program = optarg; + break; + case 3: // --filter-extension + filter_extension = optarg; + break; default: fprintf(stderr, "Unhandled long option!\n"); return 1; @@ -444,31 +462,32 @@ int main(int argc, char *argv[]) localdb_dir = backup_dest; } - printf("Source: %s, Dest: %s\n", - backup_source.c_str(), backup_dest.c_str()); + printf("Source: %s\nDest: %s\nDatabase: %s\n\n", + backup_source.c_str(), backup_dest.c_str(), localdb_dir.c_str()); tss = new TarSegmentStore(backup_dest); block_buf = new char[LBS_BLOCK_SIZE]; - /* Write a backup descriptor file, which says which segments are needed and - * where to start to restore this snapshot. The filename is based on the - * current time. */ + /* Store the time when the backup started, so it can be included in the + * snapshot name. */ time_t now; struct tm time_buf; char desc_buf[256]; time(&now); localtime_r(&now, &time_buf); strftime(desc_buf, sizeof(desc_buf), "%Y%m%dT%H%M%S", &time_buf); - string desc_filename = backup_dest + "/" + desc_buf + ".lbs"; - std::ofstream descriptor(desc_filename.c_str()); /* Open the local database which tracks all objects that are stored * remotely, for efficient incrementals. Provide it with the name of this * snapshot. */ - string database_path = backup_dest + "/localdb.sqlite"; + string database_path = localdb_dir + "/localdb.sqlite"; db = new LocalDb; db->Open(database_path.c_str(), desc_buf); + /* Initialize the stat cache, for skipping over unchanged files. */ + statcache = new StatCache; + statcache->Open(localdb_dir.c_str(), desc_buf); + try { scanfile("."); } catch (IOException e) { @@ -483,11 +502,18 @@ int main(int argc, char *argv[]) root->set_data(md.data(), md.size()); root->write(tss); root->checksum(); - segment_list.insert(root->get_ref().get_segment()); - descriptor << "Root: " << root->get_ref().to_string() << "\n"; + + /* Write a backup descriptor file, which says which segments are needed and + * where to start to restore this snapshot. The filename is based on the + * current time. */ + string desc_filename = backup_dest + "/snapshot-" + desc_buf + ".lbs"; + std::ofstream descriptor(desc_filename.c_str()); + + descriptor << "Format: LBS Snapshot v0.1\n"; strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", &time_buf); descriptor << "Date: " << desc_buf << "\n"; + descriptor << "Root: " << root->get_ref().to_string() << "\n"; delete root; @@ -499,6 +525,9 @@ int main(int argc, char *argv[]) db->Close(); + statcache->Close(); + delete statcache; + tss->sync(); delete tss;