X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=blobdiff_plain;f=main.cc;h=4c8f2bbb40093ab28acc698c7c0ee7629dd51e4d;hp=8af3175b64c3b6007a5497f4c4ac8d81e5d9ff7c;hb=7efae40a865fce46b74538745b17901785062e5f;hpb=636adeac84baeaab72dde713b6e96807ca93de76 diff --git a/main.cc b/main.cc index 8af3175..4c8f2bb 100644 --- a/main.cc +++ b/main.cc @@ -1,7 +1,6 @@ -/* Cumulus: Smart Filesystem Backup to Dumb Servers - * - * Copyright (C) 2006-2008 The Regents of the University of California - * Written by Michael Vrable +/* Cumulus: Efficient Filesystem Backup to the Cloud + * Copyright (C) 2006-2009, 2012 The Cumulus Developers + * See the AUTHORS file for a list of contributors. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,8 +17,8 @@ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -/* Main entry point for LBS. Contains logic for traversing the filesystem and - * constructing a backup. */ +/* Main entry point for Cumulus. Contains logic for traversing the filesystem + * and constructing a backup. */ #include #include @@ -41,20 +40,24 @@ #include #include #include +#include #include #include #include #include +#include "exclude.h" +#include "hash.h" #include "localdb.h" #include "metadata.h" #include "remote.h" #include "store.h" -#include "sha1.h" #include "subfile.h" #include "util.h" +#include "third_party/sha1.h" using std::list; +using std::map; using std::string; using std::vector; using std::ostream; @@ -79,34 +82,67 @@ static char *block_buf; * invocations to help in creating incremental snapshots. */ LocalDb *db; -/* Keep track of all segments which are needed to reconstruct the snapshot. */ -std::set segment_list; - -/* Snapshot intent: 1=daily, 7=weekly, etc. This is not used directly, but is - * stored in the local database and can help guide segment cleaning and - * snapshot expiration policies. */ -double snapshot_intent = 1.0; - /* Selection of files to include/exclude in the snapshot. */ -std::list includes; // Paths in which files should be saved -std::list excludes; // Paths which will not be saved -std::list excluded_names; // Directories which will not be saved -std::list searches; // Directories we don't want to save, but - // do want to descend searching for data - // in included paths - -bool relative_paths = true; +PathFilterList filter_rules; bool flag_rebuild_statcache = false; /* Whether verbose output is enabled. */ bool verbose = false; -/* Ensure that the given segment is listed as a dependency of the current - * snapshot. */ -void add_segment(const string& segment) +/* Attempts to open a regular file read-only, but with safety checks for files + * that might not be fully trusted. */ +int safe_open(const string& path, struct stat *stat_buf) { - segment_list.insert(segment); + int fd; + + /* Be paranoid when opening the file. We have no guarantee that the + * file was not replaced between the stat() call above and the open() + * call below, so we might not even be opening a regular file. We + * supply flags to open to to guard against various conditions before + * we can perform an lstat to check that the file is still a regular + * file: + * - O_NOFOLLOW: in the event the file was replaced by a symlink + * - O_NONBLOCK: prevents open() from blocking if the file was + * replaced by a fifo + * We also add in O_NOATIME, since this may reduce disk writes (for + * inode updates). However, O_NOATIME may result in EPERM, so if the + * initial open fails, try again without O_NOATIME. */ + fd = open(path.c_str(), O_RDONLY|O_NOATIME|O_NOFOLLOW|O_NONBLOCK); + if (fd < 0) { + fd = open(path.c_str(), O_RDONLY|O_NOFOLLOW|O_NONBLOCK); + } + if (fd < 0) { + fprintf(stderr, "Unable to open file %s: %m\n", path.c_str()); + return -1; + } + + /* Drop the use of the O_NONBLOCK flag; we only wanted that for file + * open. */ + long flags = fcntl(fd, F_GETFL); + fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); + + /* Re-check file attributes, storing them into stat_buf if that is + * non-NULL. */ + struct stat internal_stat_buf; + if (stat_buf == NULL) + stat_buf = &internal_stat_buf; + + /* Perform the stat call again, and check that we still have a regular + * file. */ + if (fstat(fd, stat_buf) < 0) { + fprintf(stderr, "fstat: %m\n"); + close(fd); + return -1; + } + + if ((stat_buf->st_mode & S_IFMT) != S_IFREG) { + fprintf(stderr, "file is no longer a regular file!\n"); + close(fd); + return -1; + } + + return fd; } /* Read data from a file descriptor and return the amount of data read. A @@ -181,8 +217,6 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, i != old_blocks.end(); ++i) { const ObjectReference &ref = *i; object_list.push_back(ref.to_string()); - if (ref.is_normal()) - add_segment(ref.get_segment()); db->UseObject(ref); } size = stat_buf.st_size; @@ -192,7 +226,7 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, /* If the file is new or changed, we must read in the contents a block at a * time. */ if (!cached) { - SHA1Checksum hash; + Hash *hash = Hash::New(); Subfile subfile(db); subfile.load_old_blocks(old_blocks); @@ -206,7 +240,7 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, break; } - hash.process(block_buf, bytes); + hash->update(block_buf, bytes); // Sparse file processing: if we read a block of all zeroes, encode // that explicitly. @@ -223,9 +257,10 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, double block_age = 0.0; ObjectReference ref; - SHA1Checksum block_hash; - block_hash.process(block_buf, bytes); - string block_csum = block_hash.checksum_str(); + Hash *hash = Hash::New(); + hash->update(block_buf, bytes); + string block_csum = hash->digest_str(); + delete hash; if (all_zero) { ref = ObjectReference(ObjectReference::REF_ZERO); @@ -257,9 +292,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, if (object_group == 0) { o->set_group("data"); } else { - char group[32]; - sprintf(group, "compacted-%d", object_group); - o->set_group(group); + o->set_group(string_printf("compacted-%d", + object_group)); } if (status == NULL) status = "partial"; @@ -281,8 +315,6 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, while (!refs.empty()) { ref = refs.front(); refs.pop_front(); object_list.push_back(ref.to_string()); - if (ref.is_normal()) - add_segment(ref.get_segment()); db->UseObject(ref); } size += bytes; @@ -291,7 +323,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, status = "old"; } - file_info["checksum"] = hash.checksum_str(); + file_info["checksum"] = hash->digest_str(); + delete hash; } // Sanity check: if we are rebuilding the statcache, but the file looks @@ -327,6 +360,39 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, return size; } +/* Look up a user/group and convert it to string form (either strictly numeric + * or numeric plus symbolic). Caches the results of the call to + * getpwuid/getgrgid. */ +string user_to_string(uid_t uid) { + static map user_cache; + map::const_iterator i = user_cache.find(uid); + if (i != user_cache.end()) + return i->second; + + string result = encode_int(uid); + struct passwd *pwd = getpwuid(uid); + if (pwd != NULL && pwd->pw_name != NULL) { + result += " (" + uri_encode(pwd->pw_name) + ")"; + } + user_cache[uid] = result; + return result; +} + +string group_to_string(gid_t gid) { + static map group_cache; + map::const_iterator i = group_cache.find(gid); + if (i != group_cache.end()) + return i->second; + + string result = encode_int(gid); + struct group *grp = getgrgid(gid); + if (grp != NULL && grp->gr_name != NULL) { + result += " (" + uri_encode(grp->gr_name) + ")"; + } + group_cache[gid] = result; + return result; +} + /* Dump a specified filesystem object (file, directory, etc.) based on its * inode information. If the object is a regular file, an open filehandle is * provided. */ @@ -348,24 +414,14 @@ void dump_inode(const string& path, // Path within snapshot file_info["mode"] = encode_int(stat_buf.st_mode & 07777, 8); file_info["ctime"] = encode_int(stat_buf.st_ctime); file_info["mtime"] = encode_int(stat_buf.st_mtime); - file_info["user"] = encode_int(stat_buf.st_uid); - file_info["group"] = encode_int(stat_buf.st_gid); + file_info["user"] = user_to_string(stat_buf.st_uid); + file_info["group"] = group_to_string(stat_buf.st_gid); time_t now = time(NULL); if (now - stat_buf.st_ctime < 30 || now - stat_buf.st_mtime < 30) if ((stat_buf.st_mode & S_IFMT) != S_IFDIR) file_info["volatile"] = "1"; - struct passwd *pwd = getpwuid(stat_buf.st_uid); - if (pwd != NULL && pwd->pw_name != NULL) { - file_info["user"] += " (" + uri_encode(pwd->pw_name) + ")"; - } - - struct group *grp = getgrgid(stat_buf.st_gid); - if (grp != NULL && grp->gr_name != NULL) { - file_info["group"] += " (" + uri_encode(grp->gr_name) + ")"; - } - if (stat_buf.st_nlink > 1 && (stat_buf.st_mode & S_IFMT) != S_IFDIR) { file_info["links"] = encode_int(stat_buf.st_nlink); } @@ -438,123 +494,81 @@ void dump_inode(const string& path, // Path within snapshot metawriter->add(file_info); } -void scanfile(const string& path, bool include) +/* Converts a path to the normalized form used in the metadata log. Paths are + * written as relative (without any leading slashes). The root directory is + * referred to as ".". */ +string metafile_path(const string& path) { - int fd = -1; - long flags; - struct stat stat_buf; - list refs; - - string true_path; - if (relative_paths) - true_path = path; - else - true_path = "/" + path; - - // Set to true if we should scan through the contents of this directory, - // but not actually back files up - bool scan_only = false; - - // Check this file against the include/exclude list to see if it should be - // considered - for (list::iterator i = includes.begin(); - i != includes.end(); ++i) { - if (path == *i) { - include = true; - } - } - - for (list::iterator i = excludes.begin(); - i != excludes.end(); ++i) { - if (path == *i) { - include = false; - } - } + const char *newpath = path.c_str(); + if (*newpath == '/') + newpath++; + if (*newpath == '\0') + newpath = "."; + return newpath; +} - if (excluded_names.size() > 0) { - std::string name = path; - std::string::size_type last_slash = name.rfind('/'); - if (last_slash != std::string::npos) { - name.replace(0, last_slash + 1, ""); - } +void try_merge_filter(const string& path, const string& basedir) +{ + struct stat stat_buf; + if (lstat(path.c_str(), &stat_buf) < 0) + return; + if ((stat_buf.st_mode & S_IFMT) != S_IFREG) + return; + int fd = safe_open(path, NULL); + if (fd < 0) + return; - for (list::iterator i = excluded_names.begin(); - i != excluded_names.end(); ++i) { - if (name == *i) { - include = false; - } - } + /* As a very crude limit on the complexity of merge rules, only read up to + * one block (1 MB) worth of data. If the file doesn't seems like it might + * be larger than that, don't parse the rules in it. */ + ssize_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE); + close(fd); + if (bytes < 0 || bytes >= static_cast(LBS_BLOCK_SIZE - 1)) { + /* TODO: Add more strict resource limits on merge files? */ + fprintf(stderr, + "Unable to read filter merge file (possibly size too large\n"); + return; } + filter_rules.merge_patterns(metafile_path(path), basedir, + string(block_buf, bytes)); +} - for (list::iterator i = searches.begin(); - i != searches.end(); ++i) { - if (path == *i) { - scan_only = true; - } - } +void scanfile(const string& path) +{ + int fd = -1; + struct stat stat_buf; + list refs; - if (!include && !scan_only) - return; + string output_path = metafile_path(path); - if (lstat(true_path.c_str(), &stat_buf) < 0) { + if (lstat(path.c_str(), &stat_buf) < 0) { fprintf(stderr, "lstat(%s): %m\n", path.c_str()); return; } - if ((stat_buf.st_mode & S_IFMT) == S_IFREG) { - /* Be paranoid when opening the file. We have no guarantee that the - * file was not replaced between the stat() call above and the open() - * call below, so we might not even be opening a regular file. We - * supply flags to open to to guard against various conditions before - * we can perform an lstat to check that the file is still a regular - * file: - * - O_NOFOLLOW: in the event the file was replaced by a symlink - * - O_NONBLOCK: prevents open() from blocking if the file was - * replaced by a fifo - * We also add in O_NOATIME, since this may reduce disk writes (for - * inode updates). However, O_NOATIME may result in EPERM, so if the - * initial open fails, try again without O_NOATIME. */ - fd = open(true_path.c_str(), O_RDONLY|O_NOATIME|O_NOFOLLOW|O_NONBLOCK); - if (fd < 0) { - fd = open(true_path.c_str(), O_RDONLY|O_NOFOLLOW|O_NONBLOCK); - } - if (fd < 0) { - fprintf(stderr, "Unable to open file %s: %m\n", path.c_str()); - return; - } - - /* Drop the use of the O_NONBLOCK flag; we only wanted that for file - * open. */ - flags = fcntl(fd, F_GETFL); - fcntl(fd, F_SETFL, flags & ~O_NONBLOCK); - - /* Perform the stat call again, and check that we still have a regular - * file. */ - if (fstat(fd, &stat_buf) < 0) { - fprintf(stderr, "fstat: %m\n"); - close(fd); - return; - } + bool is_directory = ((stat_buf.st_mode & S_IFMT) == S_IFDIR); + if (!filter_rules.is_included(output_path, is_directory)) + return; - if ((stat_buf.st_mode & S_IFMT) != S_IFREG) { - fprintf(stderr, "file is no longer a regular file!\n"); - close(fd); + if ((stat_buf.st_mode & S_IFMT) == S_IFREG) { + fd = safe_open(path, &stat_buf); + if (fd < 0) return; - } } - dump_inode(path, true_path, stat_buf, fd); + dump_inode(output_path, path, stat_buf, fd); if (fd >= 0) close(fd); - // If we hit a directory, now that we've written the directory itself, - // recursively scan the directory. - if ((stat_buf.st_mode & S_IFMT) == S_IFDIR) { - DIR *dir = opendir(true_path.c_str()); + /* If we hit a directory, now that we've written the directory itself, + * recursively scan the directory. */ + if (is_directory) { + DIR *dir = opendir(path.c_str()); if (dir == NULL) { - fprintf(stderr, "Error: %m\n"); + fprintf(stderr, "Error reading directory %s: %m\n", + path.c_str()); return; } @@ -571,55 +585,42 @@ void scanfile(const string& path, bool include) sort(contents.begin(), contents.end()); + filter_rules.save(); + + /* First pass through the directory items: look for any filter rules to + * merge and do so. */ for (vector::iterator i = contents.begin(); i != contents.end(); ++i) { - const string& filename = *i; + string filename; if (path == ".") - scanfile(filename, include); + filename = *i; + else if (path == "/") + filename = "/" + *i; else - scanfile(path + "/" + filename, include); + filename = path + "/" + *i; + if (filter_rules.is_mergefile(metafile_path(filename))) { + if (verbose) { + printf("Merging directory filter rules %s\n", + filename.c_str()); + } + try_merge_filter(filename, output_path); + } } - } -} -/* Include the specified file path in the backups. Append the path to the - * includes list, and to ensure that we actually see the path when scanning the - * directory tree, add all the parent directories to the search list, which - * means we will scan through the directory listing even if the files - * themselves are excluded from being backed up. */ -void add_include(const char *path) -{ - /* Was an absolute path specified? If so, we'll need to start scanning - * from the root directory. Make sure that the user was consistent in - * providing either all relative paths or all absolute paths. */ - if (path[0] == '/') { - if (includes.size() > 0 && relative_paths == true) { - fprintf(stderr, - "Error: Cannot mix relative and absolute paths!\n"); - exit(1); + /* Second pass: recursively scan all items in the directory for backup; + * scanfile() will check if the item should be included or not. */ + for (vector::iterator i = contents.begin(); + i != contents.end(); ++i) { + const string& filename = *i; + if (path == ".") + scanfile(filename); + else if (path == "/") + scanfile("/" + filename); + else + scanfile(path + "/" + filename); } - relative_paths = false; - - // Skip over leading '/' - path++; - } else if (relative_paths == false && path[0] != '/') { - fprintf(stderr, "Error: Cannot mix relative and absolute paths!\n"); - exit(1); - } - - includes.push_back(path); - - /* Split the specified path into directory components, and ensure that we - * descend into all the directories along the path. */ - const char *slash = path; - - if (path[0] == '\0') - return; - - while ((slash = strchr(slash + 1, '/')) != NULL) { - string component(path, slash - path); - searches.push_back(component); + filter_rules.restore(); } } @@ -635,8 +636,10 @@ void usage(const char *program) " --dest=PATH path where backup is to be written\n" " --upload-script=COMMAND\n" " program to invoke for each backup file generated\n" - " --exclude=PATH exclude files in PATH from snapshot\n" - " --exclude-name=NAME exclude files called NAME from snapshot\n" + " --exclude=PATTERN exclude files matching PATTERN from snapshot\n" + " --include=PATTERN include files matching PATTERN in snapshot\n" + " --dir-merge=PATTERN parse files matching PATTERN to read additional\n" + " subtree-specific include/exclude rules during backup\n" " --localdb=PATH local backup metadata is stored in PATH\n" " --tmpdir=PATH path for temporarily storing backup files\n" " (defaults to TMPDIR environment variable or /tmp)\n" @@ -648,8 +651,7 @@ void usage(const char *program) " --signature-filter=COMMAND\n" " program though which to filter descriptor\n" " --scheme=NAME optional name for this snapshot\n" - " --intent=FLOAT intended backup type: 1=daily, 7=weekly, ...\n" - " (defaults to \"1\")\n" + " --intent=FLOAT DEPRECATED: ignored, and will be removed soon\n" " --full-metadata do not re-use metadata from previous backups\n" " --rebuild-statcache re-read all file data to verify statcache\n" " -v --verbose list files as they are backed up\n" @@ -661,6 +663,8 @@ void usage(const char *program) int main(int argc, char *argv[]) { + hash_init(); + string backup_dest = "", backup_script = ""; string localdb_dir = ""; string backup_scheme = ""; @@ -673,18 +677,19 @@ int main(int argc, char *argv[]) while (1) { static struct option long_options[] = { {"localdb", 1, 0, 0}, // 0 - {"exclude", 1, 0, 0}, // 1 - {"filter", 1, 0, 0}, // 2 - {"filter-extension", 1, 0, 0}, // 3 - {"dest", 1, 0, 0}, // 4 - {"scheme", 1, 0, 0}, // 5 - {"signature-filter", 1, 0, 0}, // 6 - {"intent", 1, 0, 0}, // 7 - {"full-metadata", 0, 0, 0}, // 8 - {"tmpdir", 1, 0, 0}, // 9 - {"upload-script", 1, 0, 0}, // 10 - {"rebuild-statcache", 0, 0, 0}, // 11 - {"exclude-name", 1, 0, 0}, // 12 + {"filter", 1, 0, 0}, // 1 + {"filter-extension", 1, 0, 0}, // 2 + {"dest", 1, 0, 0}, // 3 + {"scheme", 1, 0, 0}, // 4 + {"signature-filter", 1, 0, 0}, // 5 + {"intent", 1, 0, 0}, // 6, DEPRECATED + {"full-metadata", 0, 0, 0}, // 7 + {"tmpdir", 1, 0, 0}, // 8 + {"upload-script", 1, 0, 0}, // 9 + {"rebuild-statcache", 0, 0, 0}, // 10 + {"include", 1, 0, 0}, // 11 + {"exclude", 1, 0, 0}, // 12 + {"dir-merge", 1, 0, 0}, // 13 // Aliases for short options {"verbose", 0, 0, 'v'}, {NULL, 0, 0, 0}, @@ -701,46 +706,46 @@ int main(int argc, char *argv[]) case 0: // --localdb localdb_dir = optarg; break; - case 1: // --exclude - if (optarg[0] != '/') - excludes.push_back(optarg); - else - excludes.push_back(optarg + 1); - break; - case 2: // --filter + case 1: // --filter filter_program = optarg; break; - case 3: // --filter-extension + case 2: // --filter-extension filter_extension = optarg; break; - case 4: // --dest + case 3: // --dest backup_dest = optarg; break; - case 5: // --scheme + case 4: // --scheme backup_scheme = optarg; break; - case 6: // --signature-filter + case 5: // --signature-filter signature_filter = optarg; break; - case 7: // --intent - snapshot_intent = atof(optarg); - if (snapshot_intent <= 0) - snapshot_intent = 1; + case 6: // --intent + fprintf(stderr, + "Warning: The --intent= option is deprecated and will " + "be removed in the future.\n"); break; - case 8: // --full-metadata + case 7: // --full-metadata flag_full_metadata = true; break; - case 9: // --tmpdir + case 8: // --tmpdir tmp_dir = optarg; break; - case 10: // --upload-script + case 9: // --upload-script backup_script = optarg; break; - case 11: // --rebuild-statcache + case 10: // --rebuild-statcache flag_rebuild_statcache = true; break; - case 12: // --exclude-name - excluded_names.push_back(optarg); + case 11: // --include + filter_rules.add_pattern(PathFilterList::INCLUDE, optarg, ""); + break; + case 12: // --exclude + filter_rules.add_pattern(PathFilterList::EXCLUDE, optarg, ""); + break; + case 13: // --dir-merge + filter_rules.add_pattern(PathFilterList::DIRMERGE, optarg, ""); break; default: fprintf(stderr, "Unhandled long option!\n"); @@ -763,10 +768,6 @@ int main(int argc, char *argv[]) return 1; } - searches.push_back("."); - for (int i = optind; i < argc; i++) - add_include(argv[i]); - if (backup_dest == "" && backup_script == "") { fprintf(stderr, "Error: Backup destination must be specified using --dest= or --upload-script=\n"); @@ -798,7 +799,7 @@ int main(int argc, char *argv[]) * a temporary directory for staging files. Otherwise, write backups * directly to the destination directory. */ if (backup_script != "") { - tmp_dir = tmp_dir + "/lbs." + generate_uuid(); + tmp_dir = tmp_dir + "/cumulus." + generate_uuid(); if (mkdir(tmp_dir.c_str(), 0700) < 0) { fprintf(stderr, "Cannot create temporary directory %s: %m\n", tmp_dir.c_str()); @@ -812,30 +813,28 @@ int main(int argc, char *argv[]) /* Store the time when the backup started, so it can be included in the * snapshot name. */ time_t now; - struct tm time_buf; - char desc_buf[256]; time(&now); - localtime_r(&now, &time_buf); - strftime(desc_buf, sizeof(desc_buf), "%Y%m%dT%H%M%S", &time_buf); + string timestamp + = TimeFormat::format(now, TimeFormat::FORMAT_FILENAME, true); /* Open the local database which tracks all objects that are stored * remotely, for efficient incrementals. Provide it with the name of this * snapshot. */ string database_path = localdb_dir + "/localdb.sqlite"; db = new LocalDb; - db->Open(database_path.c_str(), desc_buf, backup_scheme.c_str(), - snapshot_intent); + db->Open(database_path.c_str(), timestamp.c_str(), backup_scheme.c_str()); tss = new TarSegmentStore(remote, db); /* Initialize the stat cache, for skipping over unchanged files. */ - metawriter = new MetadataWriter(tss, localdb_dir.c_str(), desc_buf, + metawriter = new MetadataWriter(tss, localdb_dir.c_str(), timestamp.c_str(), backup_scheme.c_str()); - scanfile(".", false); + for (int i = optind; i < argc; i++) { + scanfile(argv[i]); + } ObjectReference root_ref = metawriter->close(); - add_segment(root_ref.get_segment()); string backup_root = root_ref.to_string(); delete metawriter; @@ -851,15 +850,21 @@ int main(int argc, char *argv[]) string checksum_filename = "snapshot-"; if (backup_scheme.size() > 0) checksum_filename += backup_scheme + "-"; - checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums"; + checksum_filename + = checksum_filename + timestamp + "." + csum_type + "sums"; RemoteFile *checksum_file = remote->alloc_file(checksum_filename, - "checksums"); + "meta"); FILE *checksums = fdopen(checksum_file->get_fd(), "w"); + std::set segment_list = db->GetUsedSegments(); for (std::set::iterator i = segment_list.begin(); i != segment_list.end(); ++i) { - string seg_path, seg_csum; - if (db->GetSegmentChecksum(*i, &seg_path, &seg_csum)) { + map segment_metadata = db->GetSegmentMetadata(*i); + if (segment_metadata.count("path") + && segment_metadata.count("checksum")) + { + string seg_path = segment_metadata["path"]; + string seg_csum = segment_metadata["checksum"]; const char *raw_checksum = NULL; if (strncmp(seg_csum.c_str(), csum_type, strlen(csum_type)) == 0) { @@ -886,6 +891,41 @@ int main(int argc, char *argv[]) checksum_file->send(); + /* Write out a summary file with metadata for all the segments in this + * snapshot (can be used to reconstruct database contents if needed). */ + string dbmeta_filename = "snapshot-"; + if (backup_scheme.size() > 0) + dbmeta_filename += backup_scheme + "-"; + dbmeta_filename += timestamp + ".meta" + filter_extension; + RemoteFile *dbmeta_file = remote->alloc_file(dbmeta_filename, "meta"); + FileFilter *dbmeta_filter = FileFilter::New(dbmeta_file->get_fd(), + filter_program); + if (dbmeta_filter == NULL) { + fprintf(stderr, "Unable to open descriptor output file: %m\n"); + return 1; + } + FILE *dbmeta = fdopen(dbmeta_filter->get_wrapped_fd(), "w"); + + for (std::set::iterator i = segment_list.begin(); + i != segment_list.end(); ++i) { + map segment_metadata = db->GetSegmentMetadata(*i); + if (segment_metadata.size() > 0) { + map::const_iterator j; + for (j = segment_metadata.begin(); + j != segment_metadata.end(); ++j) + { + fprintf(dbmeta, "%s: %s\n", + j->first.c_str(), j->second.c_str()); + } + fprintf(dbmeta, "\n"); + } + } + fclose(dbmeta); + + string dbmeta_csum + = Hash::hash_file(dbmeta_file->get_local_path().c_str()); + dbmeta_file->send(); + db->Close(); /* All other files should be flushed to remote storage before writing the @@ -901,33 +941,31 @@ int main(int argc, char *argv[]) string desc_filename = "snapshot-"; if (backup_scheme.size() > 0) desc_filename += backup_scheme + "-"; - desc_filename = desc_filename + desc_buf + ".lbs"; + desc_filename = desc_filename + timestamp + ".cumulus"; RemoteFile *descriptor_file = remote->alloc_file(desc_filename, "snapshots"); - int descriptor_fd = descriptor_file->get_fd(); - if (descriptor_fd < 0) { + FileFilter *descriptor_filter = FileFilter::New(descriptor_file->get_fd(), + signature_filter.c_str()); + if (descriptor_filter == NULL) { fprintf(stderr, "Unable to open descriptor output file: %m\n"); return 1; } - pid_t signature_pid = 0; - if (signature_filter.size() > 0) { - int new_fd = spawn_filter(descriptor_fd, signature_filter.c_str(), - &signature_pid); - close(descriptor_fd); - descriptor_fd = new_fd; - } - FILE *descriptor = fdopen(descriptor_fd, "w"); + FILE *descriptor = fdopen(descriptor_filter->get_wrapped_fd(), "w"); - fprintf(descriptor, "Format: LBS Snapshot v0.8\n"); + fprintf(descriptor, "Format: Cumulus Snapshot v0.11\n"); fprintf(descriptor, "Producer: Cumulus %s\n", cumulus_version); - strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", &time_buf); - fprintf(descriptor, "Date: %s\n", desc_buf); + string timestamp_local + = TimeFormat::format(now, TimeFormat::FORMAT_LOCALTIME, false); + fprintf(descriptor, "Date: %s\n", timestamp_local.c_str()); if (backup_scheme.size() > 0) fprintf(descriptor, "Scheme: %s\n", backup_scheme.c_str()); - fprintf(descriptor, "Backup-Intent: %g\n", snapshot_intent); fprintf(descriptor, "Root: %s\n", backup_root.c_str()); + if (dbmeta_csum.size() > 0) { + fprintf(descriptor, "Database-state: %s\n", dbmeta_csum.c_str()); + } + if (csum.size() > 0) { fprintf(descriptor, "Checksums: %s\n", csum.c_str()); } @@ -939,14 +977,8 @@ int main(int argc, char *argv[]) } fclose(descriptor); - - if (signature_pid) { - int status; - waitpid(signature_pid, &status, 0); - - if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) { - fatal("Signature filter process error"); - } + if (descriptor_filter->wait() < 0) { + fatal("Signature filter process error"); } descriptor_file->send();