projects
/
cumulus.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Improve database rebuilding.
[cumulus.git]
/
main.cc
diff --git
a/main.cc
b/main.cc
index
15ddfc6
..
cd31189
100644
(file)
--- a/
main.cc
+++ b/
main.cc
@@
-1,8
+1,6
@@
-/* Cumulus: Smart Filesystem Backup to Dumb Servers
- *
- * Copyright (C) 2006-2009 The Regents of the University of California
- * Copyright (C) 2012 Google Inc.
- * Written by Michael Vrable <mvrable@cs.ucsd.edu>
+/* Cumulus: Efficient Filesystem Backup to the Cloud
+ * Copyright (C) 2006-2009, 2012 The Cumulus Developers
+ * See the AUTHORS file for a list of contributors.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@
-49,13
+47,14
@@
#include <vector>
#include "exclude.h"
#include <vector>
#include "exclude.h"
+#include "hash.h"
#include "localdb.h"
#include "metadata.h"
#include "remote.h"
#include "store.h"
#include "localdb.h"
#include "metadata.h"
#include "remote.h"
#include "store.h"
-#include "sha1.h"
#include "subfile.h"
#include "util.h"
#include "subfile.h"
#include "util.h"
+#include "third_party/sha1.h"
using std::list;
using std::map;
using std::list;
using std::map;
@@
-83,14
+82,6
@@
static char *block_buf;
* invocations to help in creating incremental snapshots. */
LocalDb *db;
* invocations to help in creating incremental snapshots. */
LocalDb *db;
-/* Keep track of all segments which are needed to reconstruct the snapshot. */
-std::set<string> segment_list;
-
-/* Snapshot intent: 1=daily, 7=weekly, etc. This is not used directly, but is
- * stored in the local database and can help guide segment cleaning and
- * snapshot expiration policies. */
-double snapshot_intent = 1.0;
-
/* Selection of files to include/exclude in the snapshot. */
PathFilterList filter_rules;
/* Selection of files to include/exclude in the snapshot. */
PathFilterList filter_rules;
@@
-99,13
+90,6
@@
bool flag_rebuild_statcache = false;
/* Whether verbose output is enabled. */
bool verbose = false;
/* Whether verbose output is enabled. */
bool verbose = false;
-/* Ensure that the given segment is listed as a dependency of the current
- * snapshot. */
-void add_segment(const string& segment)
-{
- segment_list.insert(segment);
-}
-
/* Attempts to open a regular file read-only, but with safety checks for files
* that might not be fully trusted. */
int safe_open(const string& path, struct stat *stat_buf)
/* Attempts to open a regular file read-only, but with safety checks for files
* that might not be fully trusted. */
int safe_open(const string& path, struct stat *stat_buf)
@@
-233,8
+217,6
@@
int64_t dumpfile(int fd, dictionary &file_info, const string &path,
i != old_blocks.end(); ++i) {
const ObjectReference &ref = *i;
object_list.push_back(ref.to_string());
i != old_blocks.end(); ++i) {
const ObjectReference &ref = *i;
object_list.push_back(ref.to_string());
- if (ref.is_normal())
- add_segment(ref.get_segment());
db->UseObject(ref);
}
size = stat_buf.st_size;
db->UseObject(ref);
}
size = stat_buf.st_size;
@@
-244,7
+226,7
@@
int64_t dumpfile(int fd, dictionary &file_info, const string &path,
/* If the file is new or changed, we must read in the contents a block at a
* time. */
if (!cached) {
/* If the file is new or changed, we must read in the contents a block at a
* time. */
if (!cached) {
-
SHA1Checksum hash
;
+
Hash *hash = Hash::New()
;
Subfile subfile(db);
subfile.load_old_blocks(old_blocks);
Subfile subfile(db);
subfile.load_old_blocks(old_blocks);
@@
-258,7
+240,7
@@
int64_t dumpfile(int fd, dictionary &file_info, const string &path,
break;
}
break;
}
- hash
.process
(block_buf, bytes);
+ hash
->update
(block_buf, bytes);
// Sparse file processing: if we read a block of all zeroes, encode
// that explicitly.
// Sparse file processing: if we read a block of all zeroes, encode
// that explicitly.
@@
-275,9
+257,10
@@
int64_t dumpfile(int fd, dictionary &file_info, const string &path,
double block_age = 0.0;
ObjectReference ref;
double block_age = 0.0;
ObjectReference ref;
- SHA1Checksum block_hash;
- block_hash.process(block_buf, bytes);
- string block_csum = block_hash.checksum_str();
+ Hash *hash = Hash::New();
+ hash->update(block_buf, bytes);
+ string block_csum = hash->digest_str();
+ delete hash;
if (all_zero) {
ref = ObjectReference(ObjectReference::REF_ZERO);
if (all_zero) {
ref = ObjectReference(ObjectReference::REF_ZERO);
@@
-333,8
+316,6
@@
int64_t dumpfile(int fd, dictionary &file_info, const string &path,
while (!refs.empty()) {
ref = refs.front(); refs.pop_front();
object_list.push_back(ref.to_string());
while (!refs.empty()) {
ref = refs.front(); refs.pop_front();
object_list.push_back(ref.to_string());
- if (ref.is_normal())
- add_segment(ref.get_segment());
db->UseObject(ref);
}
size += bytes;
db->UseObject(ref);
}
size += bytes;
@@
-343,7
+324,8
@@
int64_t dumpfile(int fd, dictionary &file_info, const string &path,
status = "old";
}
status = "old";
}
- file_info["checksum"] = hash.checksum_str();
+ file_info["checksum"] = hash->digest_str();
+ delete hash;
}
// Sanity check: if we are rebuilding the statcache, but the file looks
}
// Sanity check: if we are rebuilding the statcache, but the file looks
@@
-541,6
+523,7
@@
void try_merge_filter(const string& path, const string& basedir)
* one block (1 MB) worth of data. If the file doesn't seems like it might
* be larger than that, don't parse the rules in it. */
ssize_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE);
* one block (1 MB) worth of data. If the file doesn't seems like it might
* be larger than that, don't parse the rules in it. */
ssize_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE);
+ close(fd);
if (bytes < 0 || bytes >= static_cast<ssize_t>(LBS_BLOCK_SIZE - 1)) {
/* TODO: Add more strict resource limits on merge files? */
fprintf(stderr,
if (bytes < 0 || bytes >= static_cast<ssize_t>(LBS_BLOCK_SIZE - 1)) {
/* TODO: Add more strict resource limits on merge files? */
fprintf(stderr,
@@
-585,7
+568,8
@@
void scanfile(const string& path)
DIR *dir = opendir(path.c_str());
if (dir == NULL) {
DIR *dir = opendir(path.c_str());
if (dir == NULL) {
- fprintf(stderr, "Error: %m\n");
+ fprintf(stderr, "Error reading directory %s: %m\n",
+ path.c_str());
return;
}
return;
}
@@
-653,8
+637,10
@@
void usage(const char *program)
" --dest=PATH path where backup is to be written\n"
" --upload-script=COMMAND\n"
" program to invoke for each backup file generated\n"
" --dest=PATH path where backup is to be written\n"
" --upload-script=COMMAND\n"
" program to invoke for each backup file generated\n"
- " --exclude=PATH exclude files in PATH from snapshot\n"
- " --exclude-name=NAME exclude files called NAME from snapshot\n"
+ " --exclude=PATTERN exclude files matching PATTERN from snapshot\n"
+ " --include=PATTERN include files matching PATTERN in snapshot\n"
+ " --dir-merge=PATTERN parse files matching PATTERN to read additional\n"
+ " subtree-specific include/exclude rules during backup\n"
" --localdb=PATH local backup metadata is stored in PATH\n"
" --tmpdir=PATH path for temporarily storing backup files\n"
" (defaults to TMPDIR environment variable or /tmp)\n"
" --localdb=PATH local backup metadata is stored in PATH\n"
" --tmpdir=PATH path for temporarily storing backup files\n"
" (defaults to TMPDIR environment variable or /tmp)\n"
@@
-666,8
+652,7
@@
void usage(const char *program)
" --signature-filter=COMMAND\n"
" program though which to filter descriptor\n"
" --scheme=NAME optional name for this snapshot\n"
" --signature-filter=COMMAND\n"
" program though which to filter descriptor\n"
" --scheme=NAME optional name for this snapshot\n"
- " --intent=FLOAT intended backup type: 1=daily, 7=weekly, ...\n"
- " (defaults to \"1\")\n"
+ " --intent=FLOAT DEPRECATED: ignored, and will be removed soon\n"
" --full-metadata do not re-use metadata from previous backups\n"
" --rebuild-statcache re-read all file data to verify statcache\n"
" -v --verbose list files as they are backed up\n"
" --full-metadata do not re-use metadata from previous backups\n"
" --rebuild-statcache re-read all file data to verify statcache\n"
" -v --verbose list files as they are backed up\n"
@@
-679,6
+664,8
@@
void usage(const char *program)
int main(int argc, char *argv[])
{
int main(int argc, char *argv[])
{
+ hash_init();
+
string backup_dest = "", backup_script = "";
string localdb_dir = "";
string backup_scheme = "";
string backup_dest = "", backup_script = "";
string localdb_dir = "";
string backup_scheme = "";
@@
-696,7
+683,7
@@
int main(int argc, char *argv[])
{"dest", 1, 0, 0}, // 3
{"scheme", 1, 0, 0}, // 4
{"signature-filter", 1, 0, 0}, // 5
{"dest", 1, 0, 0}, // 3
{"scheme", 1, 0, 0}, // 4
{"signature-filter", 1, 0, 0}, // 5
- {"intent", 1, 0, 0}, // 6
+ {"intent", 1, 0, 0}, // 6
, DEPRECATED
{"full-metadata", 0, 0, 0}, // 7
{"tmpdir", 1, 0, 0}, // 8
{"upload-script", 1, 0, 0}, // 9
{"full-metadata", 0, 0, 0}, // 7
{"tmpdir", 1, 0, 0}, // 8
{"upload-script", 1, 0, 0}, // 9
@@
-736,9
+723,9
@@
int main(int argc, char *argv[])
signature_filter = optarg;
break;
case 6: // --intent
signature_filter = optarg;
break;
case 6: // --intent
- snapshot_intent = atof(optarg);
- if (snapshot_intent <= 0)
-
snapshot_intent = 1
;
+ fprintf(stderr,
+ "Warning: The --intent= option is deprecated and will "
+
"be removed in the future.\n")
;
break;
case 7: // --full-metadata
flag_full_metadata = true;
break;
case 7: // --full-metadata
flag_full_metadata = true;
@@
-813,7
+800,7
@@
int main(int argc, char *argv[])
* a temporary directory for staging files. Otherwise, write backups
* directly to the destination directory. */
if (backup_script != "") {
* a temporary directory for staging files. Otherwise, write backups
* directly to the destination directory. */
if (backup_script != "") {
- tmp_dir = tmp_dir + "/
lb
s." + generate_uuid();
+ tmp_dir = tmp_dir + "/
cumulu
s." + generate_uuid();
if (mkdir(tmp_dir.c_str(), 0700) < 0) {
fprintf(stderr, "Cannot create temporary directory %s: %m\n",
tmp_dir.c_str());
if (mkdir(tmp_dir.c_str(), 0700) < 0) {
fprintf(stderr, "Cannot create temporary directory %s: %m\n",
tmp_dir.c_str());
@@
-839,8
+826,7
@@
int main(int argc, char *argv[])
* snapshot. */
string database_path = localdb_dir + "/localdb.sqlite";
db = new LocalDb;
* snapshot. */
string database_path = localdb_dir + "/localdb.sqlite";
db = new LocalDb;
- db->Open(database_path.c_str(), desc_buf, backup_scheme.c_str(),
- snapshot_intent);
+ db->Open(database_path.c_str(), desc_buf, backup_scheme.c_str());
tss = new TarSegmentStore(remote, db);
tss = new TarSegmentStore(remote, db);
@@
-853,7
+839,6
@@
int main(int argc, char *argv[])
}
ObjectReference root_ref = metawriter->close();
}
ObjectReference root_ref = metawriter->close();
- add_segment(root_ref.get_segment());
string backup_root = root_ref.to_string();
delete metawriter;
string backup_root = root_ref.to_string();
delete metawriter;
@@
-871,13
+856,14
@@
int main(int argc, char *argv[])
checksum_filename += backup_scheme + "-";
checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums";
RemoteFile *checksum_file = remote->alloc_file(checksum_filename,
checksum_filename += backup_scheme + "-";
checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums";
RemoteFile *checksum_file = remote->alloc_file(checksum_filename,
- "
checksums
");
+ "
meta
");
FILE *checksums = fdopen(checksum_file->get_fd(), "w");
FILE *checksums = fdopen(checksum_file->get_fd(), "w");
+ std::set<string> segment_list = db->GetUsedSegments();
for (std::set<string>::iterator i = segment_list.begin();
i != segment_list.end(); ++i) {
string seg_path, seg_csum;
for (std::set<string>::iterator i = segment_list.begin();
i != segment_list.end(); ++i) {
string seg_path, seg_csum;
- if (db->GetSegment
Checksum
(*i, &seg_path, &seg_csum)) {
+ if (db->GetSegment
Metadata
(*i, &seg_path, &seg_csum)) {
const char *raw_checksum = NULL;
if (strncmp(seg_csum.c_str(), csum_type,
strlen(csum_type)) == 0) {
const char *raw_checksum = NULL;
if (strncmp(seg_csum.c_str(), csum_type,
strlen(csum_type)) == 0) {
@@
-919,7
+905,7
@@
int main(int argc, char *argv[])
string desc_filename = "snapshot-";
if (backup_scheme.size() > 0)
desc_filename += backup_scheme + "-";
string desc_filename = "snapshot-";
if (backup_scheme.size() > 0)
desc_filename += backup_scheme + "-";
- desc_filename = desc_filename + desc_buf + ".
lb
s";
+ desc_filename = desc_filename + desc_buf + ".
cumulu
s";
RemoteFile *descriptor_file = remote->alloc_file(desc_filename,
"snapshots");
RemoteFile *descriptor_file = remote->alloc_file(desc_filename,
"snapshots");
@@
-944,7
+930,6
@@
int main(int argc, char *argv[])
fprintf(descriptor, "Date: %s\n", desc_buf);
if (backup_scheme.size() > 0)
fprintf(descriptor, "Scheme: %s\n", backup_scheme.c_str());
fprintf(descriptor, "Date: %s\n", desc_buf);
if (backup_scheme.size() > 0)
fprintf(descriptor, "Scheme: %s\n", backup_scheme.c_str());
- fprintf(descriptor, "Backup-Intent: %g\n", snapshot_intent);
fprintf(descriptor, "Root: %s\n", backup_root.c_str());
if (csum.size() > 0) {
fprintf(descriptor, "Root: %s\n", backup_root.c_str());
if (csum.size() > 0) {