#include <assert.h>
#include <stdio.h>
#include <string.h>
+#include <ctype.h>
#include <fstream>
#include <iostream>
+#include <map>
#include <string>
-#include "format.h"
+#include "ref.h"
#include "statcache.h"
+#include "util.h"
using std::list;
+using std::map;
using std::string;
+using std::getline;
using std::ifstream;
using std::ofstream;
-void StatCache::Open(const char *path, const char *snapshot_name)
+/* Like strcmp, but sorts in the order that files will be visited in the
+ * filesystem. That is, we break paths apart at slashes, and compare path
+ * components separately. */
+static int pathcmp(const char *path1, const char *path2)
+{
+ /* Find the first component in each path. */
+ const char *slash1 = strchr(path1, '/');
+ const char *slash2 = strchr(path2, '/');
+
+ {
+ string comp1, comp2;
+ if (slash1 == NULL)
+ comp1 = path1;
+ else
+ comp1 = string(path1, slash1 - path1);
+
+ if (slash2 == NULL)
+ comp2 = path2;
+ else
+ comp2 = string(path2, slash2 - path2);
+
+ /* Directly compare the two components first. */
+ if (comp1 < comp2)
+ return -1;
+ if (comp1 > comp2)
+ return 1;
+ }
+
+ if (slash1 == NULL && slash2 == NULL)
+ return 0;
+ if (slash1 == NULL)
+ return -1;
+ if (slash2 == NULL)
+ return 1;
+
+ return pathcmp(slash1 + 1, slash2 + 1);
+}
+
+void StatCache::Open(const char *path, const char *snapshot_name,
+ const char *snapshot_scheme)
{
oldpath = path;
oldpath += "/statcache";
+ if (snapshot_scheme != NULL)
+ oldpath = oldpath + "-" + snapshot_scheme;
newpath = oldpath + "." + snapshot_name;
- oldcache = NULL;
+ oldcache = new ifstream(oldpath.c_str());
newcache = new ofstream(newpath.c_str());
+
+ /* Read the first entry from the old stat cache into memory before we
+ * start. */
+ ReadNext();
}
void StatCache::Close()
}
}
+/* Read the next entry from the old statcache file and cache it in memory. */
+void StatCache::ReadNext()
+{
+ if (oldcache == NULL) {
+ end_of_cache = true;
+ return;
+ }
+
+ std::istream &cache = *oldcache;
+ map<string, string> fields;
+
+ old_is_validated = false;
+ old_mtime = -1;
+ old_ctime = -1;
+ old_inode = -1;
+ old_size = -1;
+ old_checksum = "";
+ old_contents.clear();
+
+ /* First, read in the filename. */
+ getline(cache, old_name);
+ if (!cache) {
+ end_of_cache = true;
+ return;
+ }
+ old_name = uri_decode(old_name);
+
+ /* Start reading in the fields which follow the filename. */
+ string field = "";
+ while (!cache.eof()) {
+ string line;
+ getline(cache, line);
+ const char *s = line.c_str();
+
+ /* Is the line blank? If so, we have reached the end of this entry. */
+ if (s[0] == '\0' || s[0] == '\n')
+ break;
+
+ /* Is this a continuation line? (Does it start with whitespace?) */
+ if (isspace(s[0]) && field != "") {
+ fields[field] += line;
+ continue;
+ }
+
+ /* For lines of the form "Key: Value" look for ':' and split the line
+ * apart. */
+ const char *value = strchr(s, ':');
+ if (value == NULL)
+ continue;
+ field = string(s, value - s);
+
+ value++;
+ while (isspace(*value))
+ value++;
+
+ fields[field] = value;
+ }
+
+ /* Parse the easy fields: mtime, ctime, inode, checksum, ... */
+ if (fields.count("validated"))
+ old_is_validated = true;
+ if (fields.count("mtime"))
+ old_mtime = parse_int(fields["mtime"]);
+ if (fields.count("ctime"))
+ old_ctime = parse_int(fields["ctime"]);
+ if (fields.count("inode"))
+ old_inode = parse_int(fields["inode"]);
+ if (fields.count("size"))
+ old_size = parse_int(fields["size"]);
+
+ old_checksum = fields["checksum"];
+
+ /* Parse the list of blocks. */
+ const char *s = fields["blocks"].c_str();
+ while (*s != '\0') {
+ if (isspace(*s)) {
+ s++;
+ continue;
+ }
+
+ string ref = "";
+ while (*s != '\0' && !isspace(*s)) {
+ char buf[2];
+ buf[0] = *s;
+ buf[1] = '\0';
+ ref += buf;
+ s++;
+ }
+
+ ObjectReference *r = ObjectReference::parse(ref);
+ if (r != NULL) {
+ old_contents.push_back(*r);
+ delete r;
+ }
+ }
+
+ end_of_cache = false;
+}
+
+/* Find information about the given filename in the old stat cache, if it
+ * exists. */
+bool StatCache::Find(const string &path, const struct stat *stat_buf)
+{
+ while (!end_of_cache && pathcmp(old_name.c_str(), path.c_str()) < 0)
+ ReadNext();
+
+ /* Could the file be found at all? */
+ if (end_of_cache)
+ return false;
+ if (old_name != path)
+ return false;
+
+ /* Do we trust cached stat information? */
+ if (!old_is_validated)
+ return false;
+
+ /* Check to see if the file is unchanged. */
+ if (stat_buf->st_mtime != old_mtime)
+ return false;
+ if (stat_buf->st_ctime != old_ctime)
+ return false;
+ if ((long long)stat_buf->st_ino != old_inode)
+ return false;
+ if (stat_buf->st_size != old_size)
+ return false;
+
+ /* File looks to be unchanged. */
+ return true;
+}
+
/* Save stat information about a regular file for future invocations. */
void StatCache::Save(const string &path, struct stat *stat_buf,
const string &checksum, const list<string> &blocks)
{
+ /* Was this file in the old stat cache, and is the information unchanged?
+ * If so, mark the information "validated", which means we are confident
+ * that we can use it to accurately detect changes. (Stat information may
+ * not be updated if, for example, there are two writes within a single
+ * second and we happen to make the first stat call between them. However,
+ * if two stat calls separated in time agree, then we will trust the
+ * values.) */
+ bool validated = false;
+ if (!end_of_cache && path == old_name) {
+ if (stat_buf->st_mtime == old_mtime
+ && stat_buf->st_ctime == old_ctime
+ && (long long)stat_buf->st_ino == old_inode
+ && old_checksum == checksum)
+ validated = true;
+ }
+
*newcache << uri_encode(path) << "\n";
*newcache << "mtime: " << encode_int(stat_buf->st_mtime) << "\n"
<< "ctime: " << encode_int(stat_buf->st_ctime) << "\n"
<< "inode: " << encode_int(stat_buf->st_ino) << "\n"
+ << "size: " << encode_int(stat_buf->st_size) << "\n"
<< "checksum: " << checksum << "\n";
*newcache << "blocks:";
*newcache << " " << *i << "\n";
}
+ if (validated)
+ *newcache << "validated: true\n";
+
*newcache << "\n";
}