1 /* LBS: An LFS-inspired filesystem backup system Copyright (C) 2007 Michael
4 * To speed backups, we maintain a "stat cache" containing selected information
5 * about all regular files, including modification times and the list of blocks
6 * that comprised the file in the last backup. If the file has not changed
7 * according to a stat() call, we may re-use the information contained in the
8 * stat cache instead of re-reading the entire file. It is always safe to
9 * discard information from the stat cache; this will only cause a file to be
10 * re-read to determine that it contains the same data as before.
12 * The stat cache is stored in a file called "statcache" in the local backup
13 * directory. During a backup, a new statcache file is written out with a
14 * suffix based on the current time; at the end of a successful backup this
15 * file is renamed over the original statcache file.
17 * The information in the statcache file is stored in sorted order as we
18 * traverse the filesystem, so that we can read and write it in a purely
19 * streaming manner. (This is why we don't include the information in the
20 * SQLite local database; doing so is likely less efficient.)
34 #include "statcache.h"
44 /* Like strcmp, but sorts in the order that files will be visited in the
45 * filesystem. That is, we break paths apart at slashes, and compare path
46 * components separately. */
47 static int pathcmp(const char *path1, const char *path2)
49 /* Find the first component in each path. */
50 const char *slash1 = strchr(path1, '/');
51 const char *slash2 = strchr(path2, '/');
58 comp1 = string(path1, slash1 - path1);
63 comp2 = string(path2, slash2 - path2);
65 /* Directly compare the two components first. */
72 if (slash1 == NULL && slash2 == NULL)
79 return pathcmp(slash1 + 1, slash2 + 1);
82 void StatCache::Open(const char *path, const char *snapshot_name,
83 const char *snapshot_scheme)
86 oldpath += "/statcache";
87 if (snapshot_scheme != NULL)
88 oldpath = oldpath + "-" + snapshot_scheme;
89 newpath = oldpath + "." + snapshot_name;
91 oldcache = new ifstream(oldpath.c_str());
92 newcache = new ofstream(newpath.c_str());
94 /* Read the first entry from the old stat cache into memory before we
99 void StatCache::Close()
101 if (oldcache != NULL)
106 if (rename(newpath.c_str(), oldpath.c_str()) < 0) {
107 fprintf(stderr, "Error renaming statcache from %s to %s: %m\n",
108 newpath.c_str(), oldpath.c_str());
112 /* Read the next entry from the old statcache file and cache it in memory. */
113 void StatCache::ReadNext()
115 if (oldcache == NULL) {
120 std::istream &cache = *oldcache;
121 map<string, string> fields;
123 old_is_validated = false;
129 old_contents.clear();
131 /* First, read in the filename. */
132 getline(cache, old_name);
137 old_name = uri_decode(old_name);
139 /* Start reading in the fields which follow the filename. */
141 while (!cache.eof()) {
143 getline(cache, line);
144 const char *s = line.c_str();
146 /* Is the line blank? If so, we have reached the end of this entry. */
147 if (s[0] == '\0' || s[0] == '\n')
150 /* Is this a continuation line? (Does it start with whitespace?) */
151 if (isspace(s[0]) && field != "") {
152 fields[field] += line;
156 /* For lines of the form "Key: Value" look for ':' and split the line
158 const char *value = strchr(s, ':');
161 field = string(s, value - s);
164 while (isspace(*value))
167 fields[field] = value;
170 /* Parse the easy fields: mtime, ctime, inode, checksum, ... */
171 if (fields.count("validated"))
172 old_is_validated = true;
173 if (fields.count("mtime"))
174 old_mtime = parse_int(fields["mtime"]);
175 if (fields.count("ctime"))
176 old_ctime = parse_int(fields["ctime"]);
177 if (fields.count("inode"))
178 old_inode = parse_int(fields["inode"]);
179 if (fields.count("size"))
180 old_size = parse_int(fields["size"]);
182 old_checksum = fields["checksum"];
184 /* Parse the list of blocks. */
185 const char *s = fields["blocks"].c_str();
193 while (*s != '\0' && !isspace(*s)) {
201 ObjectReference *r = ObjectReference::parse(ref);
203 old_contents.push_back(*r);
208 end_of_cache = false;
211 /* Find information about the given filename in the old stat cache, if it
213 bool StatCache::Find(const string &path, const struct stat *stat_buf)
215 while (!end_of_cache && pathcmp(old_name.c_str(), path.c_str()) < 0)
218 /* Could the file be found at all? */
221 if (old_name != path)
224 /* Do we trust cached stat information? */
225 if (!old_is_validated)
228 /* Check to see if the file is unchanged. */
229 if (stat_buf->st_mtime != old_mtime)
231 if (stat_buf->st_ctime != old_ctime)
233 if ((long long)stat_buf->st_ino != old_inode)
235 if (stat_buf->st_size != old_size)
238 /* File looks to be unchanged. */
242 /* Save stat information about a regular file for future invocations. */
243 void StatCache::Save(const string &path, struct stat *stat_buf,
244 const string &checksum, const list<string> &blocks)
246 /* Was this file in the old stat cache, and is the information unchanged?
247 * If so, mark the information "validated", which means we are confident
248 * that we can use it to accurately detect changes. (Stat information may
249 * not be updated if, for example, there are two writes within a single
250 * second and we happen to make the first stat call between them. However,
251 * if two stat calls separated in time agree, then we will trust the
253 bool validated = false;
254 if (!end_of_cache && path == old_name) {
255 if (stat_buf->st_mtime == old_mtime
256 && stat_buf->st_ctime == old_ctime
257 && (long long)stat_buf->st_ino == old_inode
258 && old_checksum == checksum)
262 *newcache << uri_encode(path) << "\n";
263 *newcache << "mtime: " << encode_int(stat_buf->st_mtime) << "\n"
264 << "ctime: " << encode_int(stat_buf->st_ctime) << "\n"
265 << "inode: " << encode_int(stat_buf->st_ino) << "\n"
266 << "size: " << encode_int(stat_buf->st_size) << "\n"
267 << "checksum: " << checksum << "\n";
269 *newcache << "blocks:";
270 if (blocks.size() == 0)
272 for (list<string>::const_iterator i = blocks.begin();
273 i != blocks.end(); ++i) {
274 *newcache << " " << *i << "\n";
278 *newcache << "validated: true\n";