`pkg-config --cflags $(PACKAGES)`
LDFLAGS=$(DEBUG) -ltar `pkg-config --libs $(PACKAGES)`
-SRCS=format.cc localdb.cc ref.cc scandir.cc sha1.cc store.cc
+SRCS=format.cc localdb.cc ref.cc scandir.cc sha1.cc statcache.cc store.cc
OBJS=$(SRCS:.cc=.o)
scandir : $(OBJS)
#include "localdb.h"
#include "store.h"
#include "sha1.h"
+#include "statcache.h"
using std::list;
using std::string;
* invocations to help in creating incremental snapshots. */
LocalDb *db;
+/* Stat cache, which stored data locally to speed the backup process by quickly
+ * skipping files which have not changed. */
+StatCache *statcache;
+
/* Contents of the root object. This will contain a set of indirect links to
* the metadata objects. */
std::ostringstream metadata_root;
/* Read the contents of a file (specified by an open file descriptor) and copy
* the data to the store. Returns the size of the file (number of bytes
* dumped), or -1 on error. */
-int64_t dumpfile(int fd, dictionary &file_info)
+int64_t dumpfile(int fd, dictionary &file_info, const string &path)
{
struct stat stat_buf;
fstat(fd, &stat_buf);
file_info["checksum"] = hash.checksum_str();
+ statcache->Save(path, &stat_buf, file_info["checksum"], object_list);
+
/* For files that only need to be broken apart into a few objects, store
* the list of objects directly. For larger files, store the data
* out-of-line and provide a pointer to the indrect object. */
flags = fcntl(fd, F_GETFL);
fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
- file_size = dumpfile(fd, file_info);
+ file_size = dumpfile(fd, file_info, path);
file_info["size"] = encode_int(file_size);
close(fd);
char desc_buf[256];
time(&now);
localtime_r(&now, &time_buf);
+ strftime(desc_buf, sizeof(desc_buf), "%Y%m%dT%H%M%S", &time_buf);
/* Open the local database which tracks all objects that are stored
* remotely, for efficient incrementals. Provide it with the name of this
db = new LocalDb;
db->Open(database_path.c_str(), desc_buf);
+ /* Initialize the stat cache, for skipping over unchanged files. */
+ statcache = new StatCache;
+ statcache->Open(localdb_dir.c_str(), desc_buf);
+
try {
scanfile(".");
} catch (IOException e) {
/* Write a backup descriptor file, which says which segments are needed and
* where to start to restore this snapshot. The filename is based on the
* current time. */
- strftime(desc_buf, sizeof(desc_buf), "%Y%m%dT%H%M%S", &time_buf);
string desc_filename = backup_dest + "/snapshot-" + desc_buf + ".lbs";
std::ofstream descriptor(desc_filename.c_str());
db->Close();
+ statcache->Close();
+ delete statcache;
+
tss->sync();
delete tss;
--- /dev/null
+/* LBS: An LFS-inspired filesystem backup system Copyright (C) 2007 Michael
+ * Vrable
+ *
+ * To speed backups, we maintain a "stat cache" containing selected information
+ * about all regular files, including modification times and the list of blocks
+ * that comprised the file in the last backup. If the file has not changed
+ * according to a stat() call, we may re-use the information contained in the
+ * stat cache instead of re-reading the entire file. It is always safe to
+ * discard information from the stat cache; this will only cause a file to be
+ * re-read to determine that it contains the same data as before.
+ *
+ * The stat cache is stored in a file called "statcache" in the local backup
+ * directory. During a backup, a new statcache file is written out with a
+ * suffix based on the current time; at the end of a successful backup this
+ * file is renamed over the original statcache file.
+ *
+ * The information in the statcache file is stored in sorted order as we
+ * traverse the filesystem, so that we can read and write it in a purely
+ * streaming manner. (This is why we don't include the information in the
+ * SQLite local database; doing so is likely less efficient.)
+ */
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <fstream>
+#include <iostream>
+#include <string>
+
+#include "format.h"
+#include "statcache.h"
+
+using std::list;
+using std::string;
+using std::ifstream;
+using std::ofstream;
+
+void StatCache::Open(const char *path, const char *snapshot_name)
+{
+ oldpath = path;
+ oldpath += "/statcache";
+ newpath = oldpath + "." + snapshot_name;
+
+ oldcache = NULL;
+ newcache = new ofstream(newpath.c_str());
+}
+
+void StatCache::Close()
+{
+ if (oldcache != NULL)
+ delete oldcache;
+
+ delete newcache;
+
+ if (rename(newpath.c_str(), oldpath.c_str()) < 0) {
+ fprintf(stderr, "Error renaming statcache from %s to %s: %m\n",
+ newpath.c_str(), oldpath.c_str());
+ }
+}
+
+/* Save stat information about a regular file for future invocations. */
+void StatCache::Save(const string &path, struct stat *stat_buf,
+ const string &checksum, const list<string> &blocks)
+{
+ *newcache << uri_encode(path) << "\n";
+ *newcache << "mtime: " << encode_int(stat_buf->st_mtime) << "\n"
+ << "ctime: " << encode_int(stat_buf->st_ctime) << "\n"
+ << "inode: " << encode_int(stat_buf->st_ino) << "\n"
+ << "checksum: " << checksum << "\n";
+
+ *newcache << "blocks:";
+ for (list<string>::const_iterator i = blocks.begin();
+ i != blocks.end(); ++i) {
+ *newcache << " " << *i << "\n";
+ }
+
+ *newcache << "\n";
+}
--- /dev/null
+/* LBS: An LFS-inspired filesystem backup system Copyright (C) 2007 Michael
+ * Vrable
+ *
+ * To speed backups, we maintain a "stat cache" containing selected information
+ * about all regular files, including modification times and the list of blocks
+ * that comprised the file in the last backup. If the file has not changed
+ * according to a stat() call, we may re-use the information contained in the
+ * stat cache instead of re-reading the entire file. It is always safe to
+ * discard information from the stat cache; this will only cause a file to be
+ * re-read to determine that it contains the same data as before.
+ *
+ * The stat cache is stored in a file called "statcache" in the local backup
+ * directory. During a backup, a new statcache file is written out with a
+ * suffix based on the current time; at the end of a successful backup this
+ * file is renamed over the original statcache file.
+ *
+ * The information in the statcache file is stored in sorted order as we
+ * traverse the filesystem, so that we can read and write it in a purely
+ * streaming manner. (This is why we don't include the information in the
+ * SQLite local database; doing so is likely less efficient.)
+ */
+
+#ifndef _LBS_STATCACHE_H
+#define _LBS_STATCACHE_H
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <list>
+#include <string>
+
+class StatCache {
+public:
+ void Open(const char *path, const char *snapshot_name);
+ void Close();
+ void Save(const std::string &path, struct stat *stat_buf,
+ const std::string &checksum,
+ const std::list<std::string> &blocks);
+
+private:
+ std::string oldpath, newpath;
+ std::ifstream *oldcache;
+ std::ofstream *newcache;
+};
+
+#endif // _LBS_STATCACHE_H