-/* Cumulus: Smart Filesystem Backup to Dumb Servers
- *
- * Copyright (C) 2006-2009 The Regents of the University of California
- * Copyright (C) 2012 Google Inc.
- * Written by Michael Vrable <mvrable@cs.ucsd.edu>
+/* Cumulus: Efficient Filesystem Backup to the Cloud
+ * Copyright (C) 2006-2009, 2012 The Cumulus Developers
+ * See the AUTHORS file for a list of contributors.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
#include <string>
#include <vector>
+#include "cumulus.h"
#include "exclude.h"
+#include "hash.h"
#include "localdb.h"
#include "metadata.h"
#include "remote.h"
* invocations to help in creating incremental snapshots. */
LocalDb *db;
-/* Keep track of all segments which are needed to reconstruct the snapshot. */
-std::set<string> segment_list;
-
-/* Snapshot intent: 1=daily, 7=weekly, etc. This is not used directly, but is
- * stored in the local database and can help guide segment cleaning and
- * snapshot expiration policies. */
-double snapshot_intent = 1.0;
-
/* Selection of files to include/exclude in the snapshot. */
PathFilterList filter_rules;
/* Whether verbose output is enabled. */
bool verbose = false;
-/* Ensure that the given segment is listed as a dependency of the current
- * snapshot. */
-void add_segment(const string& segment)
-{
- segment_list.insert(segment);
-}
-
/* Attempts to open a regular file read-only, but with safety checks for files
* that might not be fully trusted. */
int safe_open(const string& path, struct stat *stat_buf)
i != old_blocks.end(); ++i) {
const ObjectReference &ref = *i;
object_list.push_back(ref.to_string());
- if (ref.is_normal())
- add_segment(ref.get_segment());
db->UseObject(ref);
}
size = stat_buf.st_size;
/* If the file is new or changed, we must read in the contents a block at a
* time. */
if (!cached) {
- SHA1Checksum hash;
+ scoped_ptr<Hash> file_hash(Hash::New());
Subfile subfile(db);
subfile.load_old_blocks(old_blocks);
break;
}
- hash.process(block_buf, bytes);
+ file_hash->update(block_buf, bytes);
// Sparse file processing: if we read a block of all zeroes, encode
// that explicitly.
double block_age = 0.0;
ObjectReference ref;
- SHA1Checksum block_hash;
- block_hash.process(block_buf, bytes);
- string block_csum = block_hash.checksum_str();
+ scoped_ptr<Hash> block_hash(Hash::New());
+ block_hash->update(block_buf, bytes);
+ string block_csum = block_hash->digest_str();
if (all_zero) {
ref = ObjectReference(ObjectReference::REF_ZERO);
if (object_group == 0) {
o->set_group("data");
} else {
- char group[32];
- sprintf(group, "compacted-%d", object_group);
- o->set_group(group);
+ o->set_group(string_printf("compacted-%d",
+ object_group));
}
if (status == NULL)
status = "partial";
while (!refs.empty()) {
ref = refs.front(); refs.pop_front();
object_list.push_back(ref.to_string());
- if (ref.is_normal())
- add_segment(ref.get_segment());
db->UseObject(ref);
}
size += bytes;
status = "old";
}
- file_info["checksum"] = hash.checksum_str();
+ file_info["checksum"] = file_hash->digest_str();
}
// Sanity check: if we are rebuilding the statcache, but the file looks
" --signature-filter=COMMAND\n"
" program though which to filter descriptor\n"
" --scheme=NAME optional name for this snapshot\n"
- " --intent=FLOAT intended backup type: 1=daily, 7=weekly, ...\n"
- " (defaults to \"1\")\n"
+ " --intent=FLOAT DEPRECATED: ignored, and will be removed soon\n"
" --full-metadata do not re-use metadata from previous backups\n"
" --rebuild-statcache re-read all file data to verify statcache\n"
" -v --verbose list files as they are backed up\n"
int main(int argc, char *argv[])
{
+ hash_init();
+
string backup_dest = "", backup_script = "";
string localdb_dir = "";
string backup_scheme = "";
{"dest", 1, 0, 0}, // 3
{"scheme", 1, 0, 0}, // 4
{"signature-filter", 1, 0, 0}, // 5
- {"intent", 1, 0, 0}, // 6
+ {"intent", 1, 0, 0}, // 6, DEPRECATED
{"full-metadata", 0, 0, 0}, // 7
{"tmpdir", 1, 0, 0}, // 8
{"upload-script", 1, 0, 0}, // 9
signature_filter = optarg;
break;
case 6: // --intent
- snapshot_intent = atof(optarg);
- if (snapshot_intent <= 0)
- snapshot_intent = 1;
+ fprintf(stderr,
+ "Warning: The --intent= option is deprecated and will "
+ "be removed in the future.\n");
break;
case 7: // --full-metadata
flag_full_metadata = true;
* a temporary directory for staging files. Otherwise, write backups
* directly to the destination directory. */
if (backup_script != "") {
- tmp_dir = tmp_dir + "/lbs." + generate_uuid();
+ tmp_dir = tmp_dir + "/cumulus." + generate_uuid();
if (mkdir(tmp_dir.c_str(), 0700) < 0) {
fprintf(stderr, "Cannot create temporary directory %s: %m\n",
tmp_dir.c_str());
/* Store the time when the backup started, so it can be included in the
* snapshot name. */
time_t now;
- struct tm time_buf_local, time_buf_utc;
- char desc_buf[256];
time(&now);
- localtime_r(&now, &time_buf_local);
- gmtime_r(&now, &time_buf_utc);
- strftime(desc_buf, sizeof(desc_buf), "%Y%m%dT%H%M%S", &time_buf_utc);
+ string timestamp
+ = TimeFormat::format(now, TimeFormat::FORMAT_FILENAME, true);
/* Open the local database which tracks all objects that are stored
* remotely, for efficient incrementals. Provide it with the name of this
* snapshot. */
string database_path = localdb_dir + "/localdb.sqlite";
db = new LocalDb;
- db->Open(database_path.c_str(), desc_buf, backup_scheme.c_str(),
- snapshot_intent);
+ db->Open(database_path.c_str(), timestamp.c_str(), backup_scheme.c_str());
tss = new TarSegmentStore(remote, db);
/* Initialize the stat cache, for skipping over unchanged files. */
- metawriter = new MetadataWriter(tss, localdb_dir.c_str(), desc_buf,
+ metawriter = new MetadataWriter(tss, localdb_dir.c_str(), timestamp.c_str(),
backup_scheme.c_str());
for (int i = optind; i < argc; i++) {
}
ObjectReference root_ref = metawriter->close();
- add_segment(root_ref.get_segment());
string backup_root = root_ref.to_string();
delete metawriter;
tss->dump_stats();
delete tss;
- /* Write out a checksums file which lists the checksums for all the
- * segments included in this snapshot. The format is designed so that it
- * may be easily verified using the sha1sums command. */
- const char csum_type[] = "sha1";
- string checksum_filename = "snapshot-";
+ /* Write out a summary file with metadata for all the segments in this
+ * snapshot (can be used to reconstruct database contents if needed), and
+ * contains hash values for the segments for quick integrity checks. */
+ string dbmeta_filename = "snapshot-";
if (backup_scheme.size() > 0)
- checksum_filename += backup_scheme + "-";
- checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums";
- RemoteFile *checksum_file = remote->alloc_file(checksum_filename,
- "checksums");
- FILE *checksums = fdopen(checksum_file->get_fd(), "w");
+ dbmeta_filename += backup_scheme + "-";
+ dbmeta_filename += timestamp + ".meta" + filter_extension;
+ RemoteFile *dbmeta_file = remote->alloc_file(dbmeta_filename, "meta");
+ scoped_ptr<FileFilter> dbmeta_filter(FileFilter::New(dbmeta_file->get_fd(),
+ filter_program));
+ if (dbmeta_filter == NULL) {
+ fprintf(stderr, "Unable to open descriptor output file: %m\n");
+ return 1;
+ }
+ FILE *dbmeta = fdopen(dbmeta_filter->get_wrapped_fd(), "w");
+ std::set<string> segment_list = db->GetUsedSegments();
for (std::set<string>::iterator i = segment_list.begin();
i != segment_list.end(); ++i) {
- string seg_path, seg_csum;
- if (db->GetSegmentChecksum(*i, &seg_path, &seg_csum)) {
- const char *raw_checksum = NULL;
- if (strncmp(seg_csum.c_str(), csum_type,
- strlen(csum_type)) == 0) {
- raw_checksum = seg_csum.c_str() + strlen(csum_type);
- if (*raw_checksum == '=')
- raw_checksum++;
- else
- raw_checksum = NULL;
+ map<string, string> segment_metadata = db->GetSegmentMetadata(*i);
+ if (segment_metadata.size() > 0) {
+ map<string, string>::const_iterator j;
+ for (j = segment_metadata.begin();
+ j != segment_metadata.end(); ++j)
+ {
+ fprintf(dbmeta, "%s: %s\n",
+ j->first.c_str(), j->second.c_str());
}
-
- if (raw_checksum != NULL)
- fprintf(checksums, "%s *%s\n",
- raw_checksum, seg_path.c_str());
+ fprintf(dbmeta, "\n");
}
}
- fclose(checksums);
-
- SHA1Checksum checksum_csum;
- string csum;
- checksum_filename = checksum_file->get_local_path();
- if (checksum_csum.process_file(checksum_filename.c_str())) {
- csum = checksum_csum.checksum_str();
- }
+ fclose(dbmeta);
+ dbmeta_filter->wait();
- checksum_file->send();
+ string dbmeta_csum
+ = Hash::hash_file(dbmeta_file->get_local_path().c_str());
+ dbmeta_file->send();
db->Close();
string desc_filename = "snapshot-";
if (backup_scheme.size() > 0)
desc_filename += backup_scheme + "-";
- desc_filename = desc_filename + desc_buf + ".lbs";
+ desc_filename = desc_filename + timestamp + ".cumulus";
RemoteFile *descriptor_file = remote->alloc_file(desc_filename,
"snapshots");
- int descriptor_fd = descriptor_file->get_fd();
- if (descriptor_fd < 0) {
+ scoped_ptr<FileFilter> descriptor_filter(
+ FileFilter::New(descriptor_file->get_fd(), signature_filter.c_str()));
+ if (descriptor_filter == NULL) {
fprintf(stderr, "Unable to open descriptor output file: %m\n");
return 1;
}
- pid_t signature_pid = 0;
- if (signature_filter.size() > 0) {
- int new_fd = spawn_filter(descriptor_fd, signature_filter.c_str(),
- &signature_pid);
- close(descriptor_fd);
- descriptor_fd = new_fd;
- }
- FILE *descriptor = fdopen(descriptor_fd, "w");
+ FILE *descriptor = fdopen(descriptor_filter->get_wrapped_fd(), "w");
fprintf(descriptor, "Format: Cumulus Snapshot v0.11\n");
fprintf(descriptor, "Producer: Cumulus %s\n", cumulus_version);
- strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z",
- &time_buf_local);
- fprintf(descriptor, "Date: %s\n", desc_buf);
+ string timestamp_local
+ = TimeFormat::format(now, TimeFormat::FORMAT_LOCALTIME, false);
+ fprintf(descriptor, "Date: %s\n", timestamp_local.c_str());
if (backup_scheme.size() > 0)
fprintf(descriptor, "Scheme: %s\n", backup_scheme.c_str());
- fprintf(descriptor, "Backup-Intent: %g\n", snapshot_intent);
fprintf(descriptor, "Root: %s\n", backup_root.c_str());
- if (csum.size() > 0) {
- fprintf(descriptor, "Checksums: %s\n", csum.c_str());
+ if (dbmeta_csum.size() > 0) {
+ fprintf(descriptor, "Segment-metadata: %s\n", dbmeta_csum.c_str());
}
fprintf(descriptor, "Segments:\n");
}
fclose(descriptor);
-
- if (signature_pid) {
- int status;
- waitpid(signature_pid, &status, 0);
-
- if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
- fatal("Signature filter process error");
- }
+ if (descriptor_filter->wait() < 0) {
+ fatal("Signature filter process error");
}
descriptor_file->send();