1 /* Cumulus: Efficient Filesystem Backup to the Cloud
2 * Copyright (C) 2007-2008 The Cumulus Developers
3 * See the AUTHORS file for a list of contributors.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20 /* Handling of metadata written to backup snapshots. This manages the writing
21 * of file metadata into new backup snapshots, including breaking the metadata
22 * log apart across separate objects. Eventually this should include unified
23 * handling of the statcache, and re-use of metadata between snapshots.
42 using std::ostringstream;
44 static const size_t LBS_METADATA_BLOCK_SIZE = 65536;
46 // If true, forces a full write of metadata: will not include pointers to
47 // metadata in old snapshots.
48 bool flag_full_metadata = false;
50 /* TODO: Move to header file */
53 /* Like strcmp, but sorts in the order that files will be visited in the
54 * filesystem. That is, we break paths apart at slashes, and compare path
55 * components separately. */
56 static int pathcmp(const char *path1, const char *path2)
58 /* Find the first component in each path. */
59 const char *slash1 = strchr(path1, '/');
60 const char *slash2 = strchr(path2, '/');
67 comp1 = string(path1, slash1 - path1);
72 comp2 = string(path2, slash2 - path2);
74 /* Directly compare the two components first. */
81 if (slash1 == NULL && slash2 == NULL)
88 return pathcmp(slash1 + 1, slash2 + 1);
91 /* Encode a dictionary of string key/value pairs into a sequence of lines of
92 * the form "key: value". If it exists, the key "name" is treated specially
93 * and will be listed first. */
94 static string encode_dict(const map<string, string>& dict)
98 if (dict.find("name") != dict.end()) {
99 result += "name: " + dict.find("name")->second + "\n";
102 for (map<string, string>::const_iterator i = dict.begin();
103 i != dict.end(); ++i) {
104 if (i->first == "name")
106 result += i->first + ": " + i->second + "\n";
112 MetadataWriter::MetadataWriter(TarSegmentStore *store,
114 const char *snapshot_name,
115 const char *snapshot_scheme)
117 statcache_path = path;
118 statcache_path += "/statcache2";
119 if (snapshot_scheme != NULL && strlen(snapshot_scheme) > 0)
120 statcache_path = statcache_path + "-" + snapshot_scheme;
121 statcache_tmp_path = statcache_path + "." + snapshot_name;
123 statcache_in = fopen(statcache_path.c_str(), "r");
125 statcache_out = fopen(statcache_tmp_path.c_str(), "w");
126 if (statcache_out == NULL) {
127 fprintf(stderr, "Error opening statcache %s: %m\n",
128 statcache_tmp_path.c_str());
129 fatal("Error opening statcache");
132 old_metadata_eof = false;
138 /* Read the next entry from the old statcache file, loading it into
140 void MetadataWriter::read_statcache()
142 if (statcache_in == NULL) {
143 old_metadata_eof = true;
147 old_metadata.clear();
151 string field = ""; // Last field to be read in
153 /* Look for a first line starting with "@@", which tells where the metadata
154 * can be found in the metadata log of an old snapshot. */
155 if (getline(&buf, &n, statcache_in) < 0
156 || buf == NULL || buf[0] != '@' || buf[1] != '@') {
157 old_metadata_eof = true;
161 if (strchr(buf, '\n') != NULL)
162 *strchr(buf, '\n') = '\0';
163 old_metadata_loc = buf + 2;
165 /* After the initial line follows the metadata, as key-value pairs. */
166 while (!feof(statcache_in)) {
167 if (getline(&buf, &n, statcache_in) < 0)
170 char *eol = strchr(buf, '\n');
174 /* Is the line blank? If so, we have reached the end of this entry. */
178 /* Is this a continuation line? (Does it start with whitespace?) */
179 if (isspace(buf[0]) && field != "") {
180 old_metadata[field] += string("\n") + buf;
184 /* For lines of the form "Key: Value" look for ':' and split the line
186 char *value = strchr(buf, ':');
193 while (isspace(*value))
196 old_metadata[field] = value;
199 if (feof(statcache_in) && old_metadata.size() == 0) {
200 old_metadata_eof = true;
206 bool MetadataWriter::find(const string& path)
208 const char *path_str = path.c_str();
209 while (!old_metadata_eof) {
210 string old_path = uri_decode(old_metadata["name"]);
211 int cmp = pathcmp(old_path.c_str(), path_str);
223 /* Does a file appear to be unchanged from the previous time it was backed up,
224 * based on stat information? */
225 bool MetadataWriter::is_unchanged(const struct stat *stat_buf)
227 if (old_metadata.find("volatile") != old_metadata.end()
228 && parse_int(old_metadata["volatile"]) != 0)
231 if (old_metadata.find("ctime") == old_metadata.end())
233 if (stat_buf->st_ctime != parse_int(old_metadata["ctime"]))
236 if (old_metadata.find("mtime") == old_metadata.end())
238 if (stat_buf->st_mtime != parse_int(old_metadata["mtime"]))
241 if (old_metadata.find("size") == old_metadata.end())
243 if (stat_buf->st_size != parse_int(old_metadata["size"]))
246 if (old_metadata.find("inode") == old_metadata.end())
248 string inode = encode_int(major(stat_buf->st_dev))
249 + "/" + encode_int(minor(stat_buf->st_dev))
250 + "/" + encode_int(stat_buf->st_ino);
251 if (inode != old_metadata["inode"])
257 list<ObjectReference> MetadataWriter::get_blocks()
259 list<ObjectReference> blocks;
261 /* Parse the list of blocks. */
262 const char *s = old_metadata["data"].c_str();
270 while (*s != '\0' && !isspace(*s)) {
278 ObjectReference r = ObjectReference::parse(ref);
286 /* Ensure contents of metadata are flushed to an object. */
287 void MetadataWriter::metadata_flush()
291 ostringstream metadata;
292 ObjectReference indirect;
293 for (list<MetadataItem>::iterator i = items.begin();
294 i != items.end(); ++i) {
295 // If indirectly referencing any other metadata logs, be sure those
296 // segments are properly referenced.
298 db->UseObject(i->ref);
301 // Write out an indirect reference to any previous objects which could
303 if (!i->reused || !indirect.merge(i->ref)) {
304 if (!indirect.is_null()) {
305 string refstr = indirect.to_string();
306 metadata << "@" << refstr << "\n";
307 offset += refstr.size() + 2;
316 indirect = ObjectReference();
322 offset += i->text.size();
325 if (!indirect.is_null()) {
326 string refstr = indirect.to_string();
327 metadata << "@" << refstr << "\n";
328 offset += refstr.size() + 2;
329 indirect = ObjectReference();
332 string m = metadata.str();
336 /* Write current metadata information to a new object. */
337 LbsObject *meta = new LbsObject;
338 meta->set_group("metadata");
339 meta->set_data(m.data(), m.size(), NULL);
342 /* Write a reference to this block in the root. */
343 ObjectReference ref = meta->get_ref();
344 metadata_root << "@" << ref.to_string() << "\n";
349 /* Write these files out to the statcache, and include a reference to where
350 * the metadata lives (so we can re-use it if it has not changed). */
351 for (list<MetadataItem>::const_iterator i = items.begin();
352 i != items.end(); ++i) {
353 ObjectReference r = ref;
354 r.set_range(i->offset, i->text.size());
359 string refstr = r.to_string();
360 fprintf(statcache_out, "@@%s\n%s", refstr.c_str(), i->text.c_str());
367 void MetadataWriter::add(dictionary info)
372 item.text += encode_dict(info) + "\n";
374 if (info == old_metadata && !flag_full_metadata) {
375 ObjectReference ref = ObjectReference::parse(old_metadata_loc);
376 if (!ref.is_null()) {
382 items.push_back(item);
383 chunk_size += item.text.size();
385 if (chunk_size > LBS_METADATA_BLOCK_SIZE)
389 ObjectReference MetadataWriter::close()
392 const string root_data = metadata_root.str();
394 LbsObject *root = new LbsObject;
395 root->set_group("metadata");
396 root->set_data(root_data.data(), root_data.size(), NULL);
398 db->UseObject(root->get_ref());
400 ObjectReference ref = root->get_ref();
403 fclose(statcache_out);
404 if (rename(statcache_tmp_path.c_str(), statcache_path.c_str()) < 0) {
405 fprintf(stderr, "Error renaming statcache from %s to %s: %m\n",
406 statcache_tmp_path.c_str(), statcache_path.c_str());