#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <unistd.h>
#include <algorithm>
#include <vector>
#include "localdb.h"
+#include "metadata.h"
#include "store.h"
#include "sha1.h"
#include "statcache.h"
static const char lbs_version[] = LBS_STRINGIFY(LBS_VERSION);
static TarSegmentStore *tss = NULL;
+static MetadataWriter *metawriter = NULL;
/* Buffer for holding a single block of data read from a file. */
static const size_t LBS_BLOCK_SIZE = 1024 * 1024;
static char *block_buf;
-static const size_t LBS_METADATA_BLOCK_SIZE = 65536;
-
/* Local database, which tracks objects written in this and previous
* invocations to help in creating incremental snapshots. */
LocalDb *db;
* skipping files which have not changed. */
StatCache *statcache;
-/* Contents of the root object. This will contain a set of indirect links to
- * the metadata objects. */
-std::ostringstream metadata_root;
-
-/* Buffer for building up metadata. */
-std::ostringstream metadata;
-
/* Keep track of all segments which are needed to reconstruct the snapshot. */
std::set<string> segment_list;
bool relative_paths = true;
-/* Ensure contents of metadata are flushed to an object. */
-void metadata_flush()
+/* Ensure that the given segment is listed as a dependency of the current
+ * snapshot. */
+void add_segment(const string& segment)
{
- string m = metadata.str();
- if (m.size() == 0)
- return;
-
- /* Write current metadata information to a new object. */
- LbsObject *meta = new LbsObject;
- meta->set_group("metadata");
- meta->set_data(m.data(), m.size());
- meta->write(tss);
- meta->checksum();
-
- /* Write a reference to this block in the root. */
- ObjectReference ref = meta->get_ref();
- metadata_root << "@" << ref.to_string() << "\n";
- segment_list.insert(ref.get_segment());
-
- delete meta;
-
- metadata.str("");
+ segment_list.insert(segment);
}
/* Read data from a file descriptor and return the amount of data read. A
// Store a copy of the object if one does not yet exist
if (ref.get_segment().size() == 0) {
LbsObject *o = new LbsObject;
+ int object_group;
/* We might still have seen this checksum before, if the object
* was stored at some time in the past, but we have decided to
* Additionally, keep track of the age of the data by looking
* up the age of the block which was expired and using that
* instead of the current time. */
- if (db->IsOldObject(block_csum, bytes, &block_age)) {
- o->set_group("compacted");
+ if (db->IsOldObject(block_csum, bytes,
+ &block_age, &object_group)) {
+ if (object_group == 0) {
+ o->set_group("data");
+ } else {
+ char group[32];
+ sprintf(group, "compacted-%d", object_group);
+ o->set_group(group);
+ }
if (status == NULL)
status = "partial";
} else {
statcache->Save(path, &stat_buf, file_info["checksum"], object_list);
- /* For files that only need to be broken apart into a few objects, store
- * the list of objects directly. For larger files, store the data
- * out-of-line and provide a pointer to the indrect object. */
- if (object_list.size() < 8) {
- string blocklist = "";
- for (list<string>::iterator i = object_list.begin();
- i != object_list.end(); ++i) {
- if (i != object_list.begin())
- blocklist += " ";
- blocklist += *i;
- }
- file_info["data"] = blocklist;
- } else {
- string blocklist = "";
- for (list<string>::iterator i = object_list.begin();
- i != object_list.end(); ++i) {
- blocklist += *i + "\n";
- }
-
- LbsObject *i = new LbsObject;
- i->set_group("metadata");
- i->set_data(blocklist.data(), blocklist.size());
- i->write(tss);
- file_info["data"] = "@" + i->get_name();
- segment_list.insert(i->get_ref().get_segment());
- delete i;
+ string blocklist = "";
+ for (list<string>::iterator i = object_list.begin();
+ i != object_list.end(); ++i) {
+ if (i != object_list.begin())
+ blocklist += "\n ";
+ blocklist += *i;
}
+ file_info["data"] = blocklist;
return size;
}
printf("%s\n", path.c_str());
file_info["mode"] = encode_int(stat_buf.st_mode & 07777, 8);
+ file_info["ctime"] = encode_int(stat_buf.st_ctime);
file_info["mtime"] = encode_int(stat_buf.st_mtime);
file_info["user"] = encode_int(stat_buf.st_uid);
file_info["group"] = encode_int(stat_buf.st_gid);
if (stat_buf.st_nlink > 1 && (stat_buf.st_mode & S_IFMT) != S_IFDIR) {
file_info["links"] = encode_int(stat_buf.st_nlink);
- file_info["inode"] = encode_int(major(stat_buf.st_dev))
- + "/" + encode_int(minor(stat_buf.st_dev))
- + "/" + encode_int(stat_buf.st_ino);
}
+ file_info["inode"] = encode_int(major(stat_buf.st_dev))
+ + "/" + encode_int(minor(stat_buf.st_dev))
+ + "/" + encode_int(stat_buf.st_ino);
+
char inode_type;
switch (stat_buf.st_mode & S_IFMT) {
fprintf(stderr, "error reading symlink: %m\n");
} else if (len <= stat_buf.st_size) {
buf[len] = '\0';
- file_info["contents"] = uri_encode(buf);
+ file_info["target"] = uri_encode(buf);
} else if (len > stat_buf.st_size) {
fprintf(stderr, "error reading symlink: name truncated\n");
}
delete[] buf;
break;
case S_IFREG:
- inode_type = '-';
+ inode_type = 'f';
file_size = dumpfile(fd, file_info, path, stat_buf);
file_info["size"] = encode_int(file_size);
file_info["type"] = string(1, inode_type);
- metadata << "name: " << uri_encode(path) << "\n";
- dict_output(metadata, file_info);
- metadata << "\n";
-
- // Break apart metadata listing if it becomes too large.
- if (metadata.str().size() > LBS_METADATA_BLOCK_SIZE)
- metadata_flush();
+ metawriter->add(path, file_info);
}
void scanfile(const string& path, bool include)
{
fprintf(
stderr,
+ "LBS %s\n\n"
"Usage: %s [OPTION]... --dest=DEST PATHS...\n"
"Produce backup snapshot of files in SOURCE and store to DEST.\n"
"\n"
" --filter-extension=EXT\n"
" string to append to segment files\n"
" (defaults to \".bz2\")\n"
+ " --signature-filter=COMMAND\n"
+ " program though which to filter descriptor\n"
" --scheme=NAME optional name for this snapshot\n",
- program
+ lbs_version, program
);
}
string backup_dest = "";
string localdb_dir = "";
string backup_scheme = "";
+ string signature_filter = "";
while (1) {
static struct option long_options[] = {
{"filter-extension", 1, 0, 0}, // 3
{"dest", 1, 0, 0}, // 4
{"scheme", 1, 0, 0}, // 5
+ {"signature-filter", 1, 0, 0}, // 6
{NULL, 0, 0, 0},
};
case 5: // --scheme
backup_scheme = optarg;
break;
+ case 6: // --signature-filter
+ signature_filter = optarg;
+ break;
default:
fprintf(stderr, "Unhandled long option!\n");
return 1;
printf(" %s\n", i->c_str());
}
- tss = new TarSegmentStore(backup_dest);
block_buf = new char[LBS_BLOCK_SIZE];
/* Store the time when the backup started, so it can be included in the
db->Open(database_path.c_str(), desc_buf,
backup_scheme.size() ? backup_scheme.c_str() : NULL);
+ tss = new TarSegmentStore(backup_dest, db);
+
/* Initialize the stat cache, for skipping over unchanged files. */
statcache = new StatCache;
statcache->Open(localdb_dir.c_str(), desc_buf,
backup_scheme.size() ? backup_scheme.c_str() : NULL);
- scanfile(".", false);
+ metawriter = new MetadataWriter(tss, localdb_dir.c_str(), desc_buf,
+ backup_scheme.size()
+ ? backup_scheme.c_str()
+ : NULL);
- metadata_flush();
- const string md = metadata_root.str();
-
- LbsObject *root = new LbsObject;
- root->set_group("metadata");
- root->set_data(md.data(), md.size());
- root->write(tss);
- root->checksum();
- segment_list.insert(root->get_ref().get_segment());
+ scanfile(".", false);
- string backup_root = root->get_ref().to_string();
- delete root;
-
- db->Close();
+ ObjectReference root_ref = metawriter->close();
+ add_segment(root_ref.get_segment());
+ string backup_root = root_ref.to_string();
statcache->Close();
delete statcache;
+ delete metawriter;
+
tss->sync();
tss->dump_stats();
delete tss;
+ /* Write out a checksums file which lists the checksums for all the
+ * segments included in this snapshot. The format is designed so that it
+ * may be easily verified using the sha1sums command. */
+ const char csum_type[] = "sha1";
+ string checksum_filename = backup_dest + "/snapshot-";
+ if (backup_scheme.size() > 0)
+ checksum_filename += backup_scheme + "-";
+ checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums";
+ FILE *checksums = fopen(checksum_filename.c_str(), "w");
+ if (checksums != NULL) {
+ for (std::set<string>::iterator i = segment_list.begin();
+ i != segment_list.end(); ++i) {
+ string seg_path, seg_csum;
+ if (db->GetSegmentChecksum(*i, &seg_path, &seg_csum)) {
+ const char *raw_checksum = NULL;
+ if (strncmp(seg_csum.c_str(), csum_type,
+ strlen(csum_type)) == 0) {
+ raw_checksum = seg_csum.c_str() + strlen(csum_type);
+ if (*raw_checksum == '=')
+ raw_checksum++;
+ else
+ raw_checksum = NULL;
+ }
+
+ if (raw_checksum != NULL)
+ fprintf(checksums, "%s *%s\n",
+ raw_checksum, seg_path.c_str());
+ }
+ }
+ fclose(checksums);
+ } else {
+ fprintf(stderr, "ERROR: Unable to write checksums file: %m\n");
+ }
+
+ db->Close();
+
/* Write a backup descriptor file, which says which segments are needed and
* where to start to restore this snapshot. The filename is based on the
- * current time. */
+ * current time. If a signature filter program was specified, filter the
+ * data through that to give a chance to sign the descriptor contents. */
string desc_filename = backup_dest + "/snapshot-";
if (backup_scheme.size() > 0)
desc_filename += backup_scheme + "-";
desc_filename = desc_filename + desc_buf + ".lbs";
- std::ofstream descriptor(desc_filename.c_str());
- descriptor << "Format: LBS Snapshot v0.2\n";
- descriptor << "Producer: LBS " << lbs_version << "\n";
+ int descriptor_fd = open(desc_filename.c_str(), O_WRONLY | O_CREAT, 0666);
+ if (descriptor_fd < 0) {
+ fprintf(stderr, "Unable to open descriptor output file: %m\n");
+ return 1;
+ }
+ pid_t signature_pid = 0;
+ if (signature_filter.size() > 0) {
+ int new_fd = spawn_filter(descriptor_fd, signature_filter.c_str(),
+ &signature_pid);
+ close(descriptor_fd);
+ descriptor_fd = new_fd;
+ }
+ FILE *descriptor = fdopen(descriptor_fd, "w");
+
+ fprintf(descriptor, "Format: LBS Snapshot v0.6\n");
+ fprintf(descriptor, "Producer: LBS %s\n", lbs_version);
strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", &time_buf);
- descriptor << "Date: " << desc_buf << "\n";
+ fprintf(descriptor, "Date: %s\n", desc_buf);
if (backup_scheme.size() > 0)
- descriptor << "Scheme: " << backup_scheme << "\n";
- descriptor << "Root: " << backup_root << "\n";
+ fprintf(descriptor, "Scheme: %s\n", backup_scheme.c_str());
+ fprintf(descriptor, "Root: %s\n", backup_root.c_str());
+
+ SHA1Checksum checksum_csum;
+ if (checksum_csum.process_file(checksum_filename.c_str())) {
+ string csum = checksum_csum.checksum_str();
+ fprintf(descriptor, "Checksums: %s\n", csum.c_str());
+ }
- descriptor << "Segments:\n";
+ fprintf(descriptor, "Segments:\n");
for (std::set<string>::iterator i = segment_list.begin();
i != segment_list.end(); ++i) {
- descriptor << " " << *i << "\n";
+ fprintf(descriptor, " %s\n", i->c_str());
+ }
+
+ fclose(descriptor);
+
+ if (signature_pid) {
+ int status;
+ waitpid(signature_pid, &status, 0);
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ throw IOException("Signature filter process error");
+ }
}
return 0;