#include "metadata.h"
#include "ref.h"
#include "store.h"
-#include "statcache.h"
#include "util.h"
using std::list;
throw IOException("Error opening statcache");
}
+ found_match = false;
old_metadata_eof = false;
this->store = store;
while (!old_metadata_eof) {
string old_path = uri_decode(old_metadata["path"]);
int cmp = pathcmp(old_path.c_str(), path_str);
- if (cmp == 0)
+ if (cmp == 0) {
+ found_match = true;
return true;
- else if (cmp > 0)
+ } else if (cmp > 0) {
+ found_match = false;
return false;
- else
+ } else {
read_statcache();
+ }
}
+ found_match = false;
return false;
}
+/* Does a file appear to be unchanged from the previous time it was backed up,
+ * based on stat information?
+ *
+ * TODO: Notice files that were modified as they were being backed up the last
+ * time. */
+bool MetadataWriter::is_unchanged(const struct stat *stat_buf)
+{
+ if (old_metadata.find("ctime") == old_metadata.end())
+ return false;
+ if (stat_buf->st_ctime != parse_int(old_metadata["ctime"]))
+ return false;
+
+ if (old_metadata.find("mtime") == old_metadata.end())
+ return false;
+ if (stat_buf->st_mtime != parse_int(old_metadata["mtime"]))
+ return false;
+
+ if (old_metadata.find("size") == old_metadata.end())
+ return false;
+ if (stat_buf->st_size != parse_int(old_metadata["size"]))
+ return false;
+
+ if (old_metadata.find("inode") == old_metadata.end())
+ return false;
+ string inode = encode_int(major(stat_buf->st_dev))
+ + "/" + encode_int(minor(stat_buf->st_dev))
+ + "/" + encode_int(stat_buf->st_ino);
+ if (inode != old_metadata["inode"])
+ return false;
+
+ return true;
+}
+
+list<ObjectReference> MetadataWriter::get_blocks()
+{
+ list<ObjectReference> blocks;
+
+ /* Parse the list of blocks. */
+ const char *s = old_metadata["data"].c_str();
+ while (*s != '\0') {
+ if (isspace(*s)) {
+ s++;
+ continue;
+ }
+
+ string ref = "";
+ while (*s != '\0' && !isspace(*s)) {
+ char buf[2];
+ buf[0] = *s;
+ buf[1] = '\0';
+ ref += buf;
+ s++;
+ }
+
+ ObjectReference *r = ObjectReference::parse(ref);
+ if (r != NULL) {
+ blocks.push_back(*r);
+ delete r;
+ }
+ }
+
+ return blocks;
+}
+
/* Ensure contents of metadata are flushed to an object. */
void MetadataWriter::metadata_flush()
{
#include "metadata.h"
#include "store.h"
#include "sha1.h"
-#include "statcache.h"
#include "util.h"
using std::list;
* invocations to help in creating incremental snapshots. */
LocalDb *db;
-/* Stat cache, which stored data locally to speed the backup process by quickly
- * skipping files which have not changed. */
-StatCache *statcache;
-
/* Keep track of all segments which are needed to reconstruct the snapshot. */
std::set<string> segment_list;
* re-reading the entire contents. */
bool cached = false;
- if (statcache->Find(path, &stat_buf)) {
+ if (metawriter->matched() && metawriter->is_unchanged(&stat_buf)) {
cached = true;
- const list<ObjectReference> &blocks = statcache->get_blocks();
+ list<ObjectReference> blocks = metawriter->get_blocks();
/* If any of the blocks in the object have been expired, then we should
* fall back to fully reading in the file. */
/* If everything looks okay, use the cached information */
if (cached) {
- file_info["checksum"] = statcache->get_checksum();
+ file_info["checksum"] = metawriter->get_checksum();
for (list<ObjectReference>::const_iterator i = blocks.begin();
i != blocks.end(); ++i) {
const ObjectReference &ref = *i;
if (status != NULL)
printf(" [%s]\n", status);
- statcache->Save(path, &stat_buf, file_info["checksum"], object_list);
-
string blocklist = "";
for (list<string>::iterator i = object_list.begin();
i != object_list.end(); ++i) {
printf("%s\n", path.c_str());
- if (metawriter->find(path)) {
- ObjectReference *r = metawriter->old_ref();
- if (r != NULL) {
- string s = r->to_string();
- printf(" cached at %s\n", s.c_str());
- delete r;
- }
- }
+ metawriter->find(path);
file_info["path"] = uri_encode(path);
file_info["mode"] = encode_int(stat_buf.st_mode & 07777, 8);
tss = new TarSegmentStore(backup_dest, db);
/* Initialize the stat cache, for skipping over unchanged files. */
- statcache = new StatCache;
- statcache->Open(localdb_dir.c_str(), desc_buf,
- backup_scheme.size() ? backup_scheme.c_str() : NULL);
-
metawriter = new MetadataWriter(tss, localdb_dir.c_str(), desc_buf,
backup_scheme.size()
? backup_scheme.c_str()
add_segment(root_ref.get_segment());
string backup_root = root_ref.to_string();
- statcache->Close();
- delete statcache;
-
delete metawriter;
tss->sync();
+++ /dev/null
-/* LBS: An LFS-inspired filesystem backup system
- * Copyright (C) 2007 Michael Vrable
- *
- * To speed backups, we maintain a "stat cache" containing selected information
- * about all regular files, including modification times and the list of blocks
- * that comprised the file in the last backup. If the file has not changed
- * according to a stat() call, we may re-use the information contained in the
- * stat cache instead of re-reading the entire file. It is always safe to
- * discard information from the stat cache; this will only cause a file to be
- * re-read to determine that it contains the same data as before.
- *
- * The stat cache is stored in a file called "statcache" in the local backup
- * directory. During a backup, a new statcache file is written out with a
- * suffix based on the current time; at the end of a successful backup this
- * file is renamed over the original statcache file.
- *
- * The information in the statcache file is stored in sorted order as we
- * traverse the filesystem, so that we can read and write it in a purely
- * streaming manner. (This is why we don't include the information in the
- * SQLite local database; doing so is likely less efficient.)
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-#include <ctype.h>
-
-#include <fstream>
-#include <iostream>
-#include <map>
-#include <string>
-
-#include "ref.h"
-#include "statcache.h"
-#include "util.h"
-
-using std::list;
-using std::map;
-using std::string;
-using std::getline;
-using std::ifstream;
-using std::ofstream;
-
-/* Like strcmp, but sorts in the order that files will be visited in the
- * filesystem. That is, we break paths apart at slashes, and compare path
- * components separately. */
-static int pathcmp(const char *path1, const char *path2)
-{
- /* Find the first component in each path. */
- const char *slash1 = strchr(path1, '/');
- const char *slash2 = strchr(path2, '/');
-
- {
- string comp1, comp2;
- if (slash1 == NULL)
- comp1 = path1;
- else
- comp1 = string(path1, slash1 - path1);
-
- if (slash2 == NULL)
- comp2 = path2;
- else
- comp2 = string(path2, slash2 - path2);
-
- /* Directly compare the two components first. */
- if (comp1 < comp2)
- return -1;
- if (comp1 > comp2)
- return 1;
- }
-
- if (slash1 == NULL && slash2 == NULL)
- return 0;
- if (slash1 == NULL)
- return -1;
- if (slash2 == NULL)
- return 1;
-
- return pathcmp(slash1 + 1, slash2 + 1);
-}
-
-void StatCache::Open(const char *path, const char *snapshot_name,
- const char *snapshot_scheme)
-{
- oldpath = path;
- oldpath += "/statcache";
- if (snapshot_scheme != NULL)
- oldpath = oldpath + "-" + snapshot_scheme;
- newpath = oldpath + "." + snapshot_name;
-
- oldcache = new ifstream(oldpath.c_str());
- newcache = new ofstream(newpath.c_str());
-
- /* Read the first entry from the old stat cache into memory before we
- * start. */
- ReadNext();
-}
-
-void StatCache::Close()
-{
- if (oldcache != NULL)
- delete oldcache;
-
- delete newcache;
-
- if (rename(newpath.c_str(), oldpath.c_str()) < 0) {
- fprintf(stderr, "Error renaming statcache from %s to %s: %m\n",
- newpath.c_str(), oldpath.c_str());
- }
-}
-
-/* Read the next entry from the old statcache file and cache it in memory. */
-void StatCache::ReadNext()
-{
- if (oldcache == NULL) {
- end_of_cache = true;
- return;
- }
-
- std::istream &cache = *oldcache;
- map<string, string> fields;
-
- old_is_validated = false;
- old_mtime = -1;
- old_ctime = -1;
- old_inode = -1;
- old_size = -1;
- old_checksum = "";
- old_contents.clear();
-
- /* First, read in the filename. */
- getline(cache, old_name);
- if (!cache) {
- end_of_cache = true;
- return;
- }
- old_name = uri_decode(old_name);
-
- /* Start reading in the fields which follow the filename. */
- string field = "";
- while (!cache.eof()) {
- string line;
- getline(cache, line);
- const char *s = line.c_str();
-
- /* Is the line blank? If so, we have reached the end of this entry. */
- if (s[0] == '\0' || s[0] == '\n')
- break;
-
- /* Is this a continuation line? (Does it start with whitespace?) */
- if (isspace(s[0]) && field != "") {
- fields[field] += line;
- continue;
- }
-
- /* For lines of the form "Key: Value" look for ':' and split the line
- * apart. */
- const char *value = strchr(s, ':');
- if (value == NULL)
- continue;
- field = string(s, value - s);
-
- value++;
- while (isspace(*value))
- value++;
-
- fields[field] = value;
- }
-
- /* Parse the easy fields: mtime, ctime, inode, checksum, ... */
- if (fields.count("validated"))
- old_is_validated = true;
- if (fields.count("mtime"))
- old_mtime = parse_int(fields["mtime"]);
- if (fields.count("ctime"))
- old_ctime = parse_int(fields["ctime"]);
- if (fields.count("inode"))
- old_inode = parse_int(fields["inode"]);
- if (fields.count("size"))
- old_size = parse_int(fields["size"]);
-
- old_checksum = fields["checksum"];
-
- /* Parse the list of blocks. */
- const char *s = fields["blocks"].c_str();
- while (*s != '\0') {
- if (isspace(*s)) {
- s++;
- continue;
- }
-
- string ref = "";
- while (*s != '\0' && !isspace(*s)) {
- char buf[2];
- buf[0] = *s;
- buf[1] = '\0';
- ref += buf;
- s++;
- }
-
- ObjectReference *r = ObjectReference::parse(ref);
- if (r != NULL) {
- old_contents.push_back(*r);
- delete r;
- }
- }
-
- end_of_cache = false;
-}
-
-/* Find information about the given filename in the old stat cache, if it
- * exists. */
-bool StatCache::Find(const string &path, const struct stat *stat_buf)
-{
- while (!end_of_cache && pathcmp(old_name.c_str(), path.c_str()) < 0)
- ReadNext();
-
- /* Could the file be found at all? */
- if (end_of_cache)
- return false;
- if (old_name != path)
- return false;
-
- /* Do we trust cached stat information? */
- if (!old_is_validated)
- return false;
-
- /* Check to see if the file is unchanged. */
- if (stat_buf->st_mtime != old_mtime)
- return false;
- if (stat_buf->st_ctime != old_ctime)
- return false;
- if ((long long)stat_buf->st_ino != old_inode)
- return false;
- if (stat_buf->st_size != old_size)
- return false;
-
- /* File looks to be unchanged. */
- return true;
-}
-
-/* Save stat information about a regular file for future invocations. */
-void StatCache::Save(const string &path, struct stat *stat_buf,
- const string &checksum, const list<string> &blocks)
-{
- /* Was this file in the old stat cache, and is the information unchanged?
- * If so, mark the information "validated", which means we are confident
- * that we can use it to accurately detect changes. (Stat information may
- * not be updated if, for example, there are two writes within a single
- * second and we happen to make the first stat call between them. However,
- * if two stat calls separated in time agree, then we will trust the
- * values.) */
- bool validated = false;
- if (!end_of_cache && path == old_name) {
- if (stat_buf->st_mtime == old_mtime
- && stat_buf->st_ctime == old_ctime
- && (long long)stat_buf->st_ino == old_inode
- && old_checksum == checksum)
- validated = true;
- }
-
- *newcache << uri_encode(path) << "\n";
- *newcache << "mtime: " << encode_int(stat_buf->st_mtime) << "\n"
- << "ctime: " << encode_int(stat_buf->st_ctime) << "\n"
- << "inode: " << encode_int(stat_buf->st_ino) << "\n"
- << "size: " << encode_int(stat_buf->st_size) << "\n"
- << "checksum: " << checksum << "\n";
-
- *newcache << "blocks:";
- if (blocks.size() == 0)
- *newcache << "\n";
- for (list<string>::const_iterator i = blocks.begin();
- i != blocks.end(); ++i) {
- *newcache << " " << *i << "\n";
- }
-
- if (validated)
- *newcache << "validated: true\n";
-
- *newcache << "\n";
-}
+++ /dev/null
-/* LBS: An LFS-inspired filesystem backup system Copyright (C) 2007 Michael
- * Vrable
- *
- * To speed backups, we maintain a "stat cache" containing selected information
- * about all regular files, including modification times and the list of blocks
- * that comprised the file in the last backup. If the file has not changed
- * according to a stat() call, we may re-use the information contained in the
- * stat cache instead of re-reading the entire file. It is always safe to
- * discard information from the stat cache; this will only cause a file to be
- * re-read to determine that it contains the same data as before.
- *
- * The stat cache is stored in a file called "statcache" in the local backup
- * directory. During a backup, a new statcache file is written out with a
- * suffix based on the current time; at the end of a successful backup this
- * file is renamed over the original statcache file.
- *
- * The information in the statcache file is stored in sorted order as we
- * traverse the filesystem, so that we can read and write it in a purely
- * streaming manner. (This is why we don't include the information in the
- * SQLite local database; doing so is likely less efficient.)
- */
-
-#ifndef _LBS_STATCACHE_H
-#define _LBS_STATCACHE_H
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <iostream>
-#include <list>
-#include <string>
-
-#include "ref.h"
-
-class StatCache {
-public:
- void Open(const char *path, const char *snapshot_name,
- const char *snapshot_scheme);
- void Close();
- bool Find(const std::string &path, const struct stat *stat_buf);
- void Save(const std::string &path, struct stat *stat_buf,
- const std::string &checksum,
- const std::list<std::string> &blocks);
-
- std::string get_checksum() const { return old_checksum; }
- const std::list<ObjectReference> &get_blocks() const
- { return old_contents; }
-
-private:
- void ReadNext();
-
- std::string oldpath, newpath;
- std::ifstream *oldcache;
- std::ofstream *newcache;
-
- /* Information about one file read from the old cache. */
- bool end_of_cache;
- bool old_is_validated;
- int64_t old_mtime, old_ctime, old_inode, old_size;
- std::string old_name, old_checksum;
- std::list<ObjectReference> old_contents;
-};
-
-#endif // _LBS_STATCACHE_H