#include <sys/stat.h>
#include <sys/sysmacros.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <unistd.h>
#include <algorithm>
/* Keep track of all segments which are needed to reconstruct the snapshot. */
std::set<string> segment_list;
-void scandir(const string& path, bool include);
-
/* Selection of files to include/exclude in the snapshot. */
std::list<string> includes; // Paths in which files should be saved
std::list<string> excludes; // Paths which will not be saved
/* Read data from a file descriptor and return the amount of data read. A
* short read (less than the requested size) will only occur if end-of-file is
* hit. */
-size_t file_read(int fd, char *buf, size_t maxlen)
+ssize_t file_read(int fd, char *buf, size_t maxlen)
{
size_t bytes_read = 0;
if (res < 0) {
if (errno == EINTR)
continue;
- throw IOException("file_read: error reading");
+ fprintf(stderr, "error reading file: %m\n");
+ return -1;
} else if (res == 0) {
break;
} else {
/* Read the contents of a file (specified by an open file descriptor) and copy
* the data to the store. Returns the size of the file (number of bytes
* dumped), or -1 on error. */
-int64_t dumpfile(int fd, dictionary &file_info, const string &path)
+int64_t dumpfile(int fd, dictionary &file_info, const string &path,
+ struct stat& stat_buf)
{
- struct stat stat_buf;
- fstat(fd, &stat_buf);
int64_t size = 0;
list<string> object_list;
-
- if ((stat_buf.st_mode & S_IFMT) != S_IFREG) {
- fprintf(stderr, "file is no longer a regular file!\n");
- return -1;
- }
+ const char *status = NULL; /* Status indicator printed out */
/* Look up this file in the old stat cache, if we can. If the stat
* information indicates that the file has not changed, do not bother
const ObjectReference &ref = *i;
if (!db->IsAvailable(ref)) {
cached = false;
+ status = "repack";
break;
}
}
/* If the file is new or changed, we must read in the contents a block at a
* time. */
if (!cached) {
- printf(" [new]\n");
-
SHA1Checksum hash;
while (true) {
- size_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE);
+ ssize_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE);
if (bytes == 0)
break;
+ if (bytes < 0) {
+ fprintf(stderr, "Backup contents for %s may be incorrect\n",
+ path.c_str());
+ break;
+ }
hash.process(block_buf, bytes);
// Store a copy of the object if one does not yet exist
if (ref.get_segment().size() == 0) {
LbsObject *o = new LbsObject;
+ int object_group;
/* We might still have seen this checksum before, if the object
* was stored at some time in the past, but we have decided to
* Additionally, keep track of the age of the data by looking
* up the age of the block which was expired and using that
* instead of the current time. */
- if (db->IsOldObject(block_csum, bytes, &block_age))
- o->set_group("compacted");
- else
+ if (db->IsOldObject(block_csum, bytes,
+ &block_age, &object_group)) {
+ if (object_group == 0) {
+ o->set_group("data");
+ } else {
+ char group[32];
+ sprintf(group, "compacted-%d", object_group);
+ o->set_group(group);
+ }
+ if (status == NULL)
+ status = "partial";
+ } else {
o->set_group("data");
+ status = "new";
+ }
o->set_data(block_buf, bytes);
o->write(tss);
segment_list.insert(ref.get_segment());
db->UseObject(ref);
size += bytes;
+
+ if (status == NULL)
+ status = "old";
}
file_info["checksum"] = hash.checksum_str();
}
+ if (status != NULL)
+ printf(" [%s]\n", status);
+
statcache->Save(path, &stat_buf, file_info["checksum"], object_list);
/* For files that only need to be broken apart into a few objects, store
return size;
}
-void scanfile(const string& path, bool include)
+/* Dump a specified filesystem object (file, directory, etc.) based on its
+ * inode information. If the object is a regular file, an open filehandle is
+ * provided. */
+void dump_inode(const string& path, // Path within snapshot
+ const string& fullpath, // Path to object in filesystem
+ struct stat& stat_buf, // Results of stat() call
+ int fd) // Open filehandle if regular file
{
- int fd;
- long flags;
- struct stat stat_buf;
char *buf;
- ssize_t len;
- int64_t file_size;
- list<string> refs;
-
- string true_path;
- if (relative_paths)
- true_path = path;
- else
- true_path = "/" + path;
-
- // Set to true if the item is a directory and we should recursively scan
- bool recurse = false;
-
- // Set to true if we should scan through the contents of this directory,
- // but not actually back files up
- bool scan_only = false;
-
- // Check this file against the include/exclude list to see if it should be
- // considered
- for (list<string>::iterator i = includes.begin();
- i != includes.end(); ++i) {
- if (path == *i) {
- printf("Including %s\n", path.c_str());
- include = true;
- }
- }
-
- for (list<string>::iterator i = excludes.begin();
- i != excludes.end(); ++i) {
- if (path == *i) {
- printf("Excluding %s\n", path.c_str());
- include = false;
- }
- }
-
- for (list<string>::iterator i = searches.begin();
- i != searches.end(); ++i) {
- if (path == *i) {
- printf("Scanning %s\n", path.c_str());
- scan_only = true;
- }
- }
-
- if (!include && !scan_only)
- return;
-
dictionary file_info;
-
- lstat(true_path.c_str(), &stat_buf);
+ int64_t file_size;
+ ssize_t len;
printf("%s\n", path.c_str());
- file_info["mode"] = encode_int(stat_buf.st_mode & 07777);
+ file_info["mode"] = encode_int(stat_buf.st_mode & 07777, 8);
file_info["mtime"] = encode_int(stat_buf.st_mtime);
file_info["user"] = encode_int(stat_buf.st_uid);
file_info["group"] = encode_int(stat_buf.st_gid);
file_info["group"] += " (" + uri_encode(grp->gr_name) + ")";
}
+ if (stat_buf.st_nlink > 1 && (stat_buf.st_mode & S_IFMT) != S_IFDIR) {
+ file_info["links"] = encode_int(stat_buf.st_nlink);
+ file_info["inode"] = encode_int(major(stat_buf.st_dev))
+ + "/" + encode_int(minor(stat_buf.st_dev))
+ + "/" + encode_int(stat_buf.st_ino);
+ }
+
char inode_type;
switch (stat_buf.st_mode & S_IFMT) {
* the symlink. Allocate slightly more space, so that we ask for more
* bytes than we expect and so check for truncation. */
buf = new char[stat_buf.st_size + 2];
- len = readlink(true_path.c_str(), buf, stat_buf.st_size + 1);
+ len = readlink(fullpath.c_str(), buf, stat_buf.st_size + 1);
if (len < 0) {
fprintf(stderr, "error reading symlink: %m\n");
} else if (len <= stat_buf.st_size) {
case S_IFREG:
inode_type = '-';
- /* Be paranoid when opening the file. We have no guarantee that the
- * file was not replaced between the stat() call above and the open()
- * call below, so we might not even be opening a regular file. That
- * the file descriptor refers to a regular file is checked in
- * dumpfile(). But we also supply flags to open to to guard against
- * various conditions before we can perform that verification:
- * - O_NOFOLLOW: in the event the file was replaced by a symlink
- * - O_NONBLOCK: prevents open() from blocking if the file was
- * replaced by a fifo
- * We also add in O_NOATIME, since this may reduce disk writes (for
- * inode updates). However, O_NOATIME may result in EPERM, so if the
- * initial open fails, try again without O_NOATIME. */
- fd = open(true_path.c_str(), O_RDONLY|O_NOATIME|O_NOFOLLOW|O_NONBLOCK);
- if (fd < 0) {
- fd = open(true_path.c_str(), O_RDONLY|O_NOFOLLOW|O_NONBLOCK);
- }
- if (fd < 0) {
- fprintf(stderr, "Unable to open file %s: %m\n", path.c_str());
- return;
- }
-
- /* Drop the use of the O_NONBLOCK flag; we only wanted that for file
- * open. */
- flags = fcntl(fd, F_GETFL);
- fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
-
- file_size = dumpfile(fd, file_info, path);
+ file_size = dumpfile(fd, file_info, path, stat_buf);
file_info["size"] = encode_int(file_size);
- close(fd);
if (file_size < 0)
return; // error occurred; do not dump file
break;
case S_IFDIR:
inode_type = 'd';
- recurse = true;
break;
default:
// Break apart metadata listing if it becomes too large.
if (metadata.str().size() > LBS_METADATA_BLOCK_SIZE)
metadata_flush();
-
- // If we hit a directory, now that we've written the directory itself,
- // recursively scan the directory.
- if (recurse)
- scandir(path, include);
}
-void scandir(const string& path, bool include)
+void scanfile(const string& path, bool include)
{
+ int fd = -1;
+ long flags;
+ struct stat stat_buf;
+ list<string> refs;
+
string true_path;
if (relative_paths)
true_path = path;
else
true_path = "/" + path;
- DIR *dir = opendir(true_path.c_str());
+ // Set to true if we should scan through the contents of this directory,
+ // but not actually back files up
+ bool scan_only = false;
- if (dir == NULL) {
- fprintf(stderr, "Error: %m\n");
- return;
+ // Check this file against the include/exclude list to see if it should be
+ // considered
+ for (list<string>::iterator i = includes.begin();
+ i != includes.end(); ++i) {
+ if (path == *i) {
+ printf("Including %s\n", path.c_str());
+ include = true;
+ }
+ }
+
+ for (list<string>::iterator i = excludes.begin();
+ i != excludes.end(); ++i) {
+ if (path == *i) {
+ printf("Excluding %s\n", path.c_str());
+ include = false;
+ }
+ }
+
+ for (list<string>::iterator i = searches.begin();
+ i != searches.end(); ++i) {
+ if (path == *i) {
+ printf("Scanning %s\n", path.c_str());
+ scan_only = true;
+ }
}
- struct dirent *ent;
- vector<string> contents;
- while ((ent = readdir(dir)) != NULL) {
- string filename(ent->d_name);
- if (filename == "." || filename == "..")
- continue;
- contents.push_back(filename);
+ if (!include && !scan_only)
+ return;
+
+ if (lstat(true_path.c_str(), &stat_buf) < 0) {
+ fprintf(stderr, "lstat(%s): %m\n", path.c_str());
+ return;
}
- sort(contents.begin(), contents.end());
+ if ((stat_buf.st_mode & S_IFMT) == S_IFREG) {
+ /* Be paranoid when opening the file. We have no guarantee that the
+ * file was not replaced between the stat() call above and the open()
+ * call below, so we might not even be opening a regular file. We
+ * supply flags to open to to guard against various conditions before
+ * we can perform an lstat to check that the file is still a regular
+ * file:
+ * - O_NOFOLLOW: in the event the file was replaced by a symlink
+ * - O_NONBLOCK: prevents open() from blocking if the file was
+ * replaced by a fifo
+ * We also add in O_NOATIME, since this may reduce disk writes (for
+ * inode updates). However, O_NOATIME may result in EPERM, so if the
+ * initial open fails, try again without O_NOATIME. */
+ fd = open(true_path.c_str(), O_RDONLY|O_NOATIME|O_NOFOLLOW|O_NONBLOCK);
+ if (fd < 0) {
+ fd = open(true_path.c_str(), O_RDONLY|O_NOFOLLOW|O_NONBLOCK);
+ }
+ if (fd < 0) {
+ fprintf(stderr, "Unable to open file %s: %m\n", path.c_str());
+ return;
+ }
+
+ /* Drop the use of the O_NONBLOCK flag; we only wanted that for file
+ * open. */
+ flags = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
+
+ /* Perform the stat call again, and check that we still have a regular
+ * file. */
+ if (fstat(fd, &stat_buf) < 0) {
+ fprintf(stderr, "fstat: %m\n");
+ close(fd);
+ return;
+ }
- for (vector<string>::iterator i = contents.begin();
- i != contents.end(); ++i) {
- const string& filename = *i;
- if (path == ".")
- scanfile(filename, include);
- else
- scanfile(path + "/" + filename, include);
+ if ((stat_buf.st_mode & S_IFMT) != S_IFREG) {
+ fprintf(stderr, "file is no longer a regular file!\n");
+ close(fd);
+ return;
+ }
}
- closedir(dir);
+ dump_inode(path, true_path, stat_buf, fd);
+
+ if (fd >= 0)
+ close(fd);
+
+ // If we hit a directory, now that we've written the directory itself,
+ // recursively scan the directory.
+ if ((stat_buf.st_mode & S_IFMT) == S_IFDIR) {
+ DIR *dir = opendir(true_path.c_str());
+
+ if (dir == NULL) {
+ fprintf(stderr, "Error: %m\n");
+ return;
+ }
+
+ struct dirent *ent;
+ vector<string> contents;
+ while ((ent = readdir(dir)) != NULL) {
+ string filename(ent->d_name);
+ if (filename == "." || filename == "..")
+ continue;
+ contents.push_back(filename);
+ }
+
+ closedir(dir);
+
+ sort(contents.begin(), contents.end());
+
+ for (vector<string>::iterator i = contents.begin();
+ i != contents.end(); ++i) {
+ const string& filename = *i;
+ if (path == ".")
+ scanfile(filename, include);
+ else
+ scanfile(path + "/" + filename, include);
+ }
+ }
}
/* Include the specified file path in the backups. Append the path to the
{
fprintf(
stderr,
+ "LBS %s\n\n"
"Usage: %s [OPTION]... --dest=DEST PATHS...\n"
"Produce backup snapshot of files in SOURCE and store to DEST.\n"
"\n"
" (defaults to \"bzip2 -c\")\n"
" --filter-extension=EXT\n"
" string to append to segment files\n"
- " (defaults to \".bz2\")\n",
- program
+ " (defaults to \".bz2\")\n"
+ " --signature-filter=COMMAND\n"
+ " program though which to filter descriptor\n"
+ " --scheme=NAME optional name for this snapshot\n",
+ lbs_version, program
);
}
int main(int argc, char *argv[])
{
- string backup_source = ".";
string backup_dest = "";
string localdb_dir = "";
+ string backup_scheme = "";
+ string signature_filter = "";
while (1) {
static struct option long_options[] = {
{"filter", 1, 0, 0}, // 2
{"filter-extension", 1, 0, 0}, // 3
{"dest", 1, 0, 0}, // 4
+ {"scheme", 1, 0, 0}, // 5
+ {"signature-filter", 1, 0, 0}, // 6
{NULL, 0, 0, 0},
};
case 4: // --dest
backup_dest = optarg;
break;
+ case 5: // --scheme
+ backup_scheme = optarg;
+ break;
+ case 6: // --signature-filter
+ signature_filter = optarg;
+ break;
default:
fprintf(stderr, "Unhandled long option!\n");
return 1;
}
}
- if (argc < optind + 2) {
+ if (optind == argc) {
usage(argv[0]);
return 1;
}
searches.push_back(".");
- if (optind == argc) {
- add_include(".");
- } else {
- for (int i = optind; i < argc; i++)
- add_include(argv[i]);
- }
-
- backup_source = argv[optind];
+ for (int i = optind; i < argc; i++)
+ add_include(argv[i]);
if (backup_dest == "") {
fprintf(stderr,
printf(" %s\n", i->c_str());
}
- tss = new TarSegmentStore(backup_dest);
block_buf = new char[LBS_BLOCK_SIZE];
/* Store the time when the backup started, so it can be included in the
* snapshot. */
string database_path = localdb_dir + "/localdb.sqlite";
db = new LocalDb;
- db->Open(database_path.c_str(), desc_buf);
+ db->Open(database_path.c_str(), desc_buf,
+ backup_scheme.size() ? backup_scheme.c_str() : NULL);
+
+ tss = new TarSegmentStore(backup_dest, db);
/* Initialize the stat cache, for skipping over unchanged files. */
statcache = new StatCache;
- statcache->Open(localdb_dir.c_str(), desc_buf);
+ statcache->Open(localdb_dir.c_str(), desc_buf,
+ backup_scheme.size() ? backup_scheme.c_str() : NULL);
scanfile(".", false);
string backup_root = root->get_ref().to_string();
delete root;
- db->Close();
-
statcache->Close();
delete statcache;
tss->dump_stats();
delete tss;
+ /* Write out a checksums file which lists the checksums for all the
+ * segments included in this snapshot. The format is designed so that it
+ * may be easily verified using the sha1sums command. */
+ const char csum_type[] = "sha1";
+ string checksum_filename = backup_dest + "/snapshot-";
+ if (backup_scheme.size() > 0)
+ checksum_filename += backup_scheme + "-";
+ checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums";
+ FILE *checksums = fopen(checksum_filename.c_str(), "w");
+ if (checksums != NULL) {
+ for (std::set<string>::iterator i = segment_list.begin();
+ i != segment_list.end(); ++i) {
+ string seg_path, seg_csum;
+ if (db->GetSegmentChecksum(*i, &seg_path, &seg_csum)) {
+ const char *raw_checksum = NULL;
+ if (strncmp(seg_csum.c_str(), csum_type,
+ strlen(csum_type)) == 0) {
+ raw_checksum = seg_csum.c_str() + strlen(csum_type);
+ if (*raw_checksum == '=')
+ raw_checksum++;
+ else
+ raw_checksum = NULL;
+ }
+
+ if (raw_checksum != NULL)
+ fprintf(checksums, "%s *%s\n",
+ raw_checksum, seg_path.c_str());
+ }
+ }
+ fclose(checksums);
+ } else {
+ fprintf(stderr, "ERROR: Unable to write checksums file: %m\n");
+ }
+
+ db->Close();
+
/* Write a backup descriptor file, which says which segments are needed and
* where to start to restore this snapshot. The filename is based on the
- * current time. */
- string desc_filename = backup_dest + "/snapshot-" + desc_buf + ".lbs";
- std::ofstream descriptor(desc_filename.c_str());
+ * current time. If a signature filter program was specified, filter the
+ * data through that to give a chance to sign the descriptor contents. */
+ string desc_filename = backup_dest + "/snapshot-";
+ if (backup_scheme.size() > 0)
+ desc_filename += backup_scheme + "-";
+ desc_filename = desc_filename + desc_buf + ".lbs";
+
+ int descriptor_fd = open(desc_filename.c_str(), O_WRONLY | O_CREAT, 0666);
+ if (descriptor_fd < 0) {
+ fprintf(stderr, "Unable to open descriptor output file: %m\n");
+ return 1;
+ }
+ pid_t signature_pid = 0;
+ if (signature_filter.size() > 0) {
+ int new_fd = spawn_filter(descriptor_fd, signature_filter.c_str(),
+ &signature_pid);
+ close(descriptor_fd);
+ descriptor_fd = new_fd;
+ }
+ FILE *descriptor = fdopen(descriptor_fd, "w");
- descriptor << "Format: LBS Snapshot v0.1\n";
- descriptor << "Producer: " << lbs_version << "\n";
+ fprintf(descriptor, "Format: LBS Snapshot v0.2\n");
+ fprintf(descriptor, "Producer: LBS %s\n", lbs_version);
strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z", &time_buf);
- descriptor << "Date: " << desc_buf << "\n";
- descriptor << "Root: " << backup_root << "\n";
+ fprintf(descriptor, "Date: %s\n", desc_buf);
+ if (backup_scheme.size() > 0)
+ fprintf(descriptor, "Scheme: %s\n", backup_scheme.c_str());
+ fprintf(descriptor, "Root: %s\n", backup_root.c_str());
+
+ SHA1Checksum checksum_csum;
+ if (checksum_csum.process_file(checksum_filename.c_str())) {
+ string csum = checksum_csum.checksum_str();
+ fprintf(descriptor, "Checksums: %s\n", csum.c_str());
+ }
- descriptor << "Segments:\n";
+ fprintf(descriptor, "Segments:\n");
for (std::set<string>::iterator i = segment_list.begin();
i != segment_list.end(); ++i) {
- descriptor << " " << *i << "\n";
+ fprintf(descriptor, " %s\n", i->c_str());
+ }
+
+ fclose(descriptor);
+
+ if (signature_pid) {
+ int status;
+ waitpid(signature_pid, &status, 0);
+
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ throw IOException("Signature filter process error");
+ }
}
return 0;