X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=blobdiff_plain;f=main.cc;h=4c8f2bbb40093ab28acc698c7c0ee7629dd51e4d;hp=297e805367601f072e552231d32306886bab1f12;hb=7efae40a865fce46b74538745b17901785062e5f;hpb=35dd99aa3d47805b661fe3126a951710fa7dee11 diff --git a/main.cc b/main.cc index 297e805..4c8f2bb 100644 --- a/main.cc +++ b/main.cc @@ -1,8 +1,6 @@ -/* Cumulus: Smart Filesystem Backup to Dumb Servers - * - * Copyright (C) 2006-2009 The Regents of the University of California - * Copyright (C) 2012 Google Inc. - * Written by Michael Vrable +/* Cumulus: Efficient Filesystem Backup to the Cloud + * Copyright (C) 2006-2009, 2012 The Cumulus Developers + * See the AUTHORS file for a list of contributors. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -49,6 +47,7 @@ #include #include "exclude.h" +#include "hash.h" #include "localdb.h" #include "metadata.h" #include "remote.h" @@ -83,14 +82,6 @@ static char *block_buf; * invocations to help in creating incremental snapshots. */ LocalDb *db; -/* Keep track of all segments which are needed to reconstruct the snapshot. */ -std::set segment_list; - -/* Snapshot intent: 1=daily, 7=weekly, etc. This is not used directly, but is - * stored in the local database and can help guide segment cleaning and - * snapshot expiration policies. */ -double snapshot_intent = 1.0; - /* Selection of files to include/exclude in the snapshot. */ PathFilterList filter_rules; @@ -99,13 +90,6 @@ bool flag_rebuild_statcache = false; /* Whether verbose output is enabled. */ bool verbose = false; -/* Ensure that the given segment is listed as a dependency of the current - * snapshot. */ -void add_segment(const string& segment) -{ - segment_list.insert(segment); -} - /* Attempts to open a regular file read-only, but with safety checks for files * that might not be fully trusted. */ int safe_open(const string& path, struct stat *stat_buf) @@ -233,8 +217,6 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, i != old_blocks.end(); ++i) { const ObjectReference &ref = *i; object_list.push_back(ref.to_string()); - if (ref.is_normal()) - add_segment(ref.get_segment()); db->UseObject(ref); } size = stat_buf.st_size; @@ -244,7 +226,7 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, /* If the file is new or changed, we must read in the contents a block at a * time. */ if (!cached) { - SHA1Checksum hash; + Hash *hash = Hash::New(); Subfile subfile(db); subfile.load_old_blocks(old_blocks); @@ -258,7 +240,7 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, break; } - hash.process(block_buf, bytes); + hash->update(block_buf, bytes); // Sparse file processing: if we read a block of all zeroes, encode // that explicitly. @@ -275,9 +257,10 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, double block_age = 0.0; ObjectReference ref; - SHA1Checksum block_hash; - block_hash.process(block_buf, bytes); - string block_csum = block_hash.checksum_str(); + Hash *hash = Hash::New(); + hash->update(block_buf, bytes); + string block_csum = hash->digest_str(); + delete hash; if (all_zero) { ref = ObjectReference(ObjectReference::REF_ZERO); @@ -309,9 +292,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, if (object_group == 0) { o->set_group("data"); } else { - char group[32]; - sprintf(group, "compacted-%d", object_group); - o->set_group(group); + o->set_group(string_printf("compacted-%d", + object_group)); } if (status == NULL) status = "partial"; @@ -333,8 +315,6 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, while (!refs.empty()) { ref = refs.front(); refs.pop_front(); object_list.push_back(ref.to_string()); - if (ref.is_normal()) - add_segment(ref.get_segment()); db->UseObject(ref); } size += bytes; @@ -343,7 +323,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, status = "old"; } - file_info["checksum"] = hash.checksum_str(); + file_info["checksum"] = hash->digest_str(); + delete hash; } // Sanity check: if we are rebuilding the statcache, but the file looks @@ -670,8 +651,7 @@ void usage(const char *program) " --signature-filter=COMMAND\n" " program though which to filter descriptor\n" " --scheme=NAME optional name for this snapshot\n" - " --intent=FLOAT intended backup type: 1=daily, 7=weekly, ...\n" - " (defaults to \"1\")\n" + " --intent=FLOAT DEPRECATED: ignored, and will be removed soon\n" " --full-metadata do not re-use metadata from previous backups\n" " --rebuild-statcache re-read all file data to verify statcache\n" " -v --verbose list files as they are backed up\n" @@ -683,6 +663,8 @@ void usage(const char *program) int main(int argc, char *argv[]) { + hash_init(); + string backup_dest = "", backup_script = ""; string localdb_dir = ""; string backup_scheme = ""; @@ -700,7 +682,7 @@ int main(int argc, char *argv[]) {"dest", 1, 0, 0}, // 3 {"scheme", 1, 0, 0}, // 4 {"signature-filter", 1, 0, 0}, // 5 - {"intent", 1, 0, 0}, // 6 + {"intent", 1, 0, 0}, // 6, DEPRECATED {"full-metadata", 0, 0, 0}, // 7 {"tmpdir", 1, 0, 0}, // 8 {"upload-script", 1, 0, 0}, // 9 @@ -740,9 +722,9 @@ int main(int argc, char *argv[]) signature_filter = optarg; break; case 6: // --intent - snapshot_intent = atof(optarg); - if (snapshot_intent <= 0) - snapshot_intent = 1; + fprintf(stderr, + "Warning: The --intent= option is deprecated and will " + "be removed in the future.\n"); break; case 7: // --full-metadata flag_full_metadata = true; @@ -817,7 +799,7 @@ int main(int argc, char *argv[]) * a temporary directory for staging files. Otherwise, write backups * directly to the destination directory. */ if (backup_script != "") { - tmp_dir = tmp_dir + "/lbs." + generate_uuid(); + tmp_dir = tmp_dir + "/cumulus." + generate_uuid(); if (mkdir(tmp_dir.c_str(), 0700) < 0) { fprintf(stderr, "Cannot create temporary directory %s: %m\n", tmp_dir.c_str()); @@ -831,25 +813,21 @@ int main(int argc, char *argv[]) /* Store the time when the backup started, so it can be included in the * snapshot name. */ time_t now; - struct tm time_buf_local, time_buf_utc; - char desc_buf[256]; time(&now); - localtime_r(&now, &time_buf_local); - gmtime_r(&now, &time_buf_utc); - strftime(desc_buf, sizeof(desc_buf), "%Y%m%dT%H%M%S", &time_buf_utc); + string timestamp + = TimeFormat::format(now, TimeFormat::FORMAT_FILENAME, true); /* Open the local database which tracks all objects that are stored * remotely, for efficient incrementals. Provide it with the name of this * snapshot. */ string database_path = localdb_dir + "/localdb.sqlite"; db = new LocalDb; - db->Open(database_path.c_str(), desc_buf, backup_scheme.c_str(), - snapshot_intent); + db->Open(database_path.c_str(), timestamp.c_str(), backup_scheme.c_str()); tss = new TarSegmentStore(remote, db); /* Initialize the stat cache, for skipping over unchanged files. */ - metawriter = new MetadataWriter(tss, localdb_dir.c_str(), desc_buf, + metawriter = new MetadataWriter(tss, localdb_dir.c_str(), timestamp.c_str(), backup_scheme.c_str()); for (int i = optind; i < argc; i++) { @@ -857,7 +835,6 @@ int main(int argc, char *argv[]) } ObjectReference root_ref = metawriter->close(); - add_segment(root_ref.get_segment()); string backup_root = root_ref.to_string(); delete metawriter; @@ -873,15 +850,21 @@ int main(int argc, char *argv[]) string checksum_filename = "snapshot-"; if (backup_scheme.size() > 0) checksum_filename += backup_scheme + "-"; - checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums"; + checksum_filename + = checksum_filename + timestamp + "." + csum_type + "sums"; RemoteFile *checksum_file = remote->alloc_file(checksum_filename, - "checksums"); + "meta"); FILE *checksums = fdopen(checksum_file->get_fd(), "w"); + std::set segment_list = db->GetUsedSegments(); for (std::set::iterator i = segment_list.begin(); i != segment_list.end(); ++i) { - string seg_path, seg_csum; - if (db->GetSegmentChecksum(*i, &seg_path, &seg_csum)) { + map segment_metadata = db->GetSegmentMetadata(*i); + if (segment_metadata.count("path") + && segment_metadata.count("checksum")) + { + string seg_path = segment_metadata["path"]; + string seg_csum = segment_metadata["checksum"]; const char *raw_checksum = NULL; if (strncmp(seg_csum.c_str(), csum_type, strlen(csum_type)) == 0) { @@ -908,6 +891,41 @@ int main(int argc, char *argv[]) checksum_file->send(); + /* Write out a summary file with metadata for all the segments in this + * snapshot (can be used to reconstruct database contents if needed). */ + string dbmeta_filename = "snapshot-"; + if (backup_scheme.size() > 0) + dbmeta_filename += backup_scheme + "-"; + dbmeta_filename += timestamp + ".meta" + filter_extension; + RemoteFile *dbmeta_file = remote->alloc_file(dbmeta_filename, "meta"); + FileFilter *dbmeta_filter = FileFilter::New(dbmeta_file->get_fd(), + filter_program); + if (dbmeta_filter == NULL) { + fprintf(stderr, "Unable to open descriptor output file: %m\n"); + return 1; + } + FILE *dbmeta = fdopen(dbmeta_filter->get_wrapped_fd(), "w"); + + for (std::set::iterator i = segment_list.begin(); + i != segment_list.end(); ++i) { + map segment_metadata = db->GetSegmentMetadata(*i); + if (segment_metadata.size() > 0) { + map::const_iterator j; + for (j = segment_metadata.begin(); + j != segment_metadata.end(); ++j) + { + fprintf(dbmeta, "%s: %s\n", + j->first.c_str(), j->second.c_str()); + } + fprintf(dbmeta, "\n"); + } + } + fclose(dbmeta); + + string dbmeta_csum + = Hash::hash_file(dbmeta_file->get_local_path().c_str()); + dbmeta_file->send(); + db->Close(); /* All other files should be flushed to remote storage before writing the @@ -923,34 +941,31 @@ int main(int argc, char *argv[]) string desc_filename = "snapshot-"; if (backup_scheme.size() > 0) desc_filename += backup_scheme + "-"; - desc_filename = desc_filename + desc_buf + ".lbs"; + desc_filename = desc_filename + timestamp + ".cumulus"; RemoteFile *descriptor_file = remote->alloc_file(desc_filename, "snapshots"); - int descriptor_fd = descriptor_file->get_fd(); - if (descriptor_fd < 0) { + FileFilter *descriptor_filter = FileFilter::New(descriptor_file->get_fd(), + signature_filter.c_str()); + if (descriptor_filter == NULL) { fprintf(stderr, "Unable to open descriptor output file: %m\n"); return 1; } - pid_t signature_pid = 0; - if (signature_filter.size() > 0) { - int new_fd = spawn_filter(descriptor_fd, signature_filter.c_str(), - &signature_pid); - close(descriptor_fd); - descriptor_fd = new_fd; - } - FILE *descriptor = fdopen(descriptor_fd, "w"); + FILE *descriptor = fdopen(descriptor_filter->get_wrapped_fd(), "w"); fprintf(descriptor, "Format: Cumulus Snapshot v0.11\n"); fprintf(descriptor, "Producer: Cumulus %s\n", cumulus_version); - strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", - &time_buf_local); - fprintf(descriptor, "Date: %s\n", desc_buf); + string timestamp_local + = TimeFormat::format(now, TimeFormat::FORMAT_LOCALTIME, false); + fprintf(descriptor, "Date: %s\n", timestamp_local.c_str()); if (backup_scheme.size() > 0) fprintf(descriptor, "Scheme: %s\n", backup_scheme.c_str()); - fprintf(descriptor, "Backup-Intent: %g\n", snapshot_intent); fprintf(descriptor, "Root: %s\n", backup_root.c_str()); + if (dbmeta_csum.size() > 0) { + fprintf(descriptor, "Database-state: %s\n", dbmeta_csum.c_str()); + } + if (csum.size() > 0) { fprintf(descriptor, "Checksums: %s\n", csum.c_str()); } @@ -962,14 +977,8 @@ int main(int argc, char *argv[]) } fclose(descriptor); - - if (signature_pid) { - int status; - waitpid(signature_pid, &status, 0); - - if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { - fatal("Signature filter process error"); - } + if (descriptor_filter->wait() < 0) { + fatal("Signature filter process error"); } descriptor_file->send();