Replace boost::scoped_ptr with std::unique_ptr.
[cumulus.git] / main.cc
diff --git a/main.cc b/main.cc
index e10a04a..1335bea 100644 (file)
--- a/main.cc
+++ b/main.cc
@@ -41,6 +41,7 @@
 #include <iostream>
 #include <list>
 #include <map>
+#include <memory>
 #include <set>
 #include <sstream>
 #include <string>
@@ -61,6 +62,7 @@ using std::map;
 using std::string;
 using std::vector;
 using std::ostream;
+using std::unique_ptr;
 
 /* Version information.  This will be filled in by the Makefile. */
 #ifndef CUMULUS_VERSION
@@ -82,11 +84,6 @@ static char *block_buf;
  * invocations to help in creating incremental snapshots. */
 LocalDb *db;
 
-/* Snapshot intent: 1=daily, 7=weekly, etc.  This is not used directly, but is
- * stored in the local database and can help guide segment cleaning and
- * snapshot expiration policies. */
-double snapshot_intent = 1.0;
-
 /* Selection of files to include/exclude in the snapshot. */
 PathFilterList filter_rules;
 
@@ -231,7 +228,7 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path,
     /* If the file is new or changed, we must read in the contents a block at a
      * time. */
     if (!cached) {
-        Hash *hash = Hash::New();
+        unique_ptr<Hash> file_hash(Hash::New());
         Subfile subfile(db);
         subfile.load_old_blocks(old_blocks);
 
@@ -245,7 +242,7 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path,
                 break;
             }
 
-            hash->update(block_buf, bytes);
+            file_hash->update(block_buf, bytes);
 
             // Sparse file processing: if we read a block of all zeroes, encode
             // that explicitly.
@@ -262,10 +259,9 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path,
             double block_age = 0.0;
             ObjectReference ref;
 
-            Hash *hash = Hash::New();
-            hash->update(block_buf, bytes);
-            string block_csum = hash->digest_str();
-            delete hash;
+            unique_ptr<Hash> block_hash(Hash::New());
+            block_hash->update(block_buf, bytes);
+            string block_csum = block_hash->digest_str();
 
             if (all_zero) {
                 ref = ObjectReference(ObjectReference::REF_ZERO);
@@ -297,9 +293,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path,
                     if (object_group == 0) {
                         o->set_group("data");
                     } else {
-                        char group[32];
-                        sprintf(group, "compacted-%d", object_group);
-                        o->set_group(group);
+                        o->set_group(string_printf("compacted-%d",
+                                                   object_group));
                     }
                     if (status == NULL)
                         status = "partial";
@@ -320,6 +315,12 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path,
 
             while (!refs.empty()) {
                 ref = refs.front(); refs.pop_front();
+
+                // The file-level checksum guarantees integrity of the data.
+                // To reduce the metadata log size, do not include checksums on
+                // individual objects.
+                ref.clear_checksum();
+
                 object_list.push_back(ref.to_string());
                 db->UseObject(ref);
             }
@@ -329,26 +330,22 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path,
                 status = "old";
         }
 
-        file_info["checksum"] = hash->digest_str();
-        delete hash;
+        file_info["checksum"] = file_hash->digest_str();
     }
 
-    // Sanity check: if we are rebuilding the statcache, but the file looks
-    // like it hasn't changed, then the newly-computed checksum should match
-    // the checksum in the statcache.  If not, we have possible disk corruption
-    // and report a warning.
-    if (flag_rebuild_statcache) {
-        if (found
-            && metawriter->is_unchanged(&stat_buf)
-            && file_info["checksum"] != metawriter->get_checksum()) {
-            fprintf(stderr,
-                    "Warning: Checksum for %s does not match expected value\n"
-                    "    expected: %s\n"
-                    "    actual:   %s\n",
-                    path.c_str(),
-                    metawriter->get_checksum().c_str(),
-                    file_info["checksum"].c_str());
-        }
+    // Sanity check: if the file looks like it hasn't changed, then the
+    // newly-computed checksum should match the checksum in the statcache.  If
+    // not, we have possible disk corruption and report a warning.
+    if (found
+        && metawriter->is_unchanged(&stat_buf)
+        && file_info["checksum"] != metawriter->get_checksum()) {
+        fprintf(stderr,
+                "Warning: Checksum for %s does not match expected value\n"
+                "    expected: %s\n"
+                "    actual:   %s\n",
+                path.c_str(),
+                metawriter->get_checksum().c_str(),
+                file_info["checksum"].c_str());
     }
 
     if (verbose && status != NULL)
@@ -657,8 +654,7 @@ void usage(const char *program)
         "  --signature-filter=COMMAND\n"
         "                       program though which to filter descriptor\n"
         "  --scheme=NAME        optional name for this snapshot\n"
-        "  --intent=FLOAT       intended backup type: 1=daily, 7=weekly, ...\n"
-        "                           (defaults to \"1\")\n"
+        "  --intent=FLOAT       DEPRECATED: ignored, and will be removed soon\n"
         "  --full-metadata      do not re-use metadata from previous backups\n"
         "  --rebuild-statcache  re-read all file data to verify statcache\n"
         "  -v --verbose         list files as they are backed up\n"
@@ -689,7 +685,7 @@ int main(int argc, char *argv[])
             {"dest", 1, 0, 0},              // 3
             {"scheme", 1, 0, 0},            // 4
             {"signature-filter", 1, 0, 0},  // 5
-            {"intent", 1, 0, 0},            // 6
+            {"intent", 1, 0, 0},            // 6, DEPRECATED
             {"full-metadata", 0, 0, 0},     // 7
             {"tmpdir", 1, 0, 0},            // 8
             {"upload-script", 1, 0, 0},     // 9
@@ -729,9 +725,9 @@ int main(int argc, char *argv[])
                 signature_filter = optarg;
                 break;
             case 6:     // --intent
-                snapshot_intent = atof(optarg);
-                if (snapshot_intent <= 0)
-                    snapshot_intent = 1;
+                fprintf(stderr,
+                        "Warning: The --intent= option is deprecated and will "
+                        "be removed in the future.\n");
                 break;
             case 7:     // --full-metadata
                 flag_full_metadata = true;
@@ -806,7 +802,7 @@ int main(int argc, char *argv[])
      * a temporary directory for staging files.  Otherwise, write backups
      * directly to the destination directory. */
     if (backup_script != "") {
-        tmp_dir = tmp_dir + "/lbs." + generate_uuid();
+        tmp_dir = tmp_dir + "/cumulus." + generate_uuid();
         if (mkdir(tmp_dir.c_str(), 0700) < 0) {
             fprintf(stderr, "Cannot create temporary directory %s: %m\n",
                     tmp_dir.c_str());
@@ -820,25 +816,21 @@ int main(int argc, char *argv[])
     /* Store the time when the backup started, so it can be included in the
      * snapshot name. */
     time_t now;
-    struct tm time_buf_local, time_buf_utc;
-    char desc_buf[256];
     time(&now);
-    localtime_r(&now, &time_buf_local);
-    gmtime_r(&now, &time_buf_utc);
-    strftime(desc_buf, sizeof(desc_buf), "%Y%m%dT%H%M%S", &time_buf_utc);
+    string timestamp
+        = TimeFormat::format(now, TimeFormat::FORMAT_FILENAME, true);
 
     /* Open the local database which tracks all objects that are stored
      * remotely, for efficient incrementals.  Provide it with the name of this
      * snapshot. */
     string database_path = localdb_dir + "/localdb.sqlite";
     db = new LocalDb;
-    db->Open(database_path.c_str(), desc_buf, backup_scheme.c_str(),
-             snapshot_intent);
+    db->Open(database_path.c_str(), timestamp.c_str(), backup_scheme.c_str());
 
     tss = new TarSegmentStore(remote, db);
 
     /* Initialize the stat cache, for skipping over unchanged files. */
-    metawriter = new MetadataWriter(tss, localdb_dir.c_str(), desc_buf,
+    metawriter = new MetadataWriter(tss, localdb_dir.c_str(), timestamp.c_str(),
                                     backup_scheme.c_str());
 
     for (int i = optind; i < argc; i++) {
@@ -854,48 +846,43 @@ int main(int argc, char *argv[])
     tss->dump_stats();
     delete tss;
 
-    /* Write out a checksums file which lists the checksums for all the
-     * segments included in this snapshot.  The format is designed so that it
-     * may be easily verified using the sha1sums command. */
-    const char csum_type[] = "sha1";
-    string checksum_filename = "snapshot-";
+    /* Write out a summary file with metadata for all the segments in this
+     * snapshot (can be used to reconstruct database contents if needed), and
+     * contains hash values for the segments for quick integrity checks. */
+    string dbmeta_filename = "snapshot-";
     if (backup_scheme.size() > 0)
-        checksum_filename += backup_scheme + "-";
-    checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums";
-    RemoteFile *checksum_file = remote->alloc_file(checksum_filename,
-                                                   "checksums");
-    FILE *checksums = fdopen(checksum_file->get_fd(), "w");
+        dbmeta_filename += backup_scheme + "-";
+    dbmeta_filename += timestamp + ".meta" + filter_extension;
+    RemoteFile *dbmeta_file = remote->alloc_file(dbmeta_filename, "meta");
+    unique_ptr<FileFilter> dbmeta_filter(FileFilter::New(dbmeta_file->get_fd(),
+                                                         filter_program));
+    if (dbmeta_filter == NULL) {
+        fprintf(stderr, "Unable to open descriptor output file: %m\n");
+        return 1;
+    }
+    FILE *dbmeta = fdopen(dbmeta_filter->get_wrapped_fd(), "w");
 
     std::set<string> segment_list = db->GetUsedSegments();
     for (std::set<string>::iterator i = segment_list.begin();
          i != segment_list.end(); ++i) {
-        string seg_path, seg_csum;
-        if (db->GetSegmentChecksum(*i, &seg_path, &seg_csum)) {
-            const char *raw_checksum = NULL;
-            if (strncmp(seg_csum.c_str(), csum_type,
-                        strlen(csum_type)) == 0) {
-                raw_checksum = seg_csum.c_str() + strlen(csum_type);
-                if (*raw_checksum == '=')
-                    raw_checksum++;
-                else
-                    raw_checksum = NULL;
+        map<string, string> segment_metadata = db->GetSegmentMetadata(*i);
+        if (segment_metadata.size() > 0) {
+            map<string, string>::const_iterator j;
+            for (j = segment_metadata.begin();
+                 j != segment_metadata.end(); ++j)
+            {
+                fprintf(dbmeta, "%s: %s\n",
+                        j->first.c_str(), j->second.c_str());
             }
-
-            if (raw_checksum != NULL)
-                fprintf(checksums, "%s *%s\n",
-                        raw_checksum, seg_path.c_str());
+            fprintf(dbmeta, "\n");
         }
     }
-    fclose(checksums);
-
-    SHA1Checksum checksum_csum;
-    string csum;
-    checksum_filename = checksum_file->get_local_path();
-    if (checksum_csum.process_file(checksum_filename.c_str())) {
-        csum = checksum_csum.checksum_str();
-    }
+    fclose(dbmeta);
+    dbmeta_filter->wait();
 
-    checksum_file->send();
+    string dbmeta_csum
+        = Hash::hash_file(dbmeta_file->get_local_path().c_str());
+    dbmeta_file->send();
 
     db->Close();
 
@@ -912,36 +899,29 @@ int main(int argc, char *argv[])
     string desc_filename = "snapshot-";
     if (backup_scheme.size() > 0)
         desc_filename += backup_scheme + "-";
-    desc_filename = desc_filename + desc_buf + ".lbs";
+    desc_filename = desc_filename + timestamp + ".cumulus";
 
     RemoteFile *descriptor_file = remote->alloc_file(desc_filename,
                                                      "snapshots");
-    int descriptor_fd = descriptor_file->get_fd();
-    if (descriptor_fd < 0) {
+    unique_ptr<FileFilter> descriptor_filter(
+        FileFilter::New(descriptor_file->get_fd(), signature_filter.c_str()));
+    if (descriptor_filter == NULL) {
         fprintf(stderr, "Unable to open descriptor output file: %m\n");
         return 1;
     }
-    pid_t signature_pid = 0;
-    if (signature_filter.size() > 0) {
-        int new_fd = spawn_filter(descriptor_fd, signature_filter.c_str(),
-                                  &signature_pid);
-        close(descriptor_fd);
-        descriptor_fd = new_fd;
-    }
-    FILE *descriptor = fdopen(descriptor_fd, "w");
+    FILE *descriptor = fdopen(descriptor_filter->get_wrapped_fd(), "w");
 
     fprintf(descriptor, "Format: Cumulus Snapshot v0.11\n");
     fprintf(descriptor, "Producer: Cumulus %s\n", cumulus_version);
-    strftime(desc_buf, sizeof(desc_buf), "%Y-%m-%d %H:%M:%S %z",
-             &time_buf_local);
-    fprintf(descriptor, "Date: %s\n", desc_buf);
+    string timestamp_local
+        = TimeFormat::format(now, TimeFormat::FORMAT_LOCALTIME, false);
+    fprintf(descriptor, "Date: %s\n", timestamp_local.c_str());
     if (backup_scheme.size() > 0)
         fprintf(descriptor, "Scheme: %s\n", backup_scheme.c_str());
-    fprintf(descriptor, "Backup-Intent: %g\n", snapshot_intent);
     fprintf(descriptor, "Root: %s\n", backup_root.c_str());
 
-    if (csum.size() > 0) {
-        fprintf(descriptor, "Checksums: %s\n", csum.c_str());
+    if (dbmeta_csum.size() > 0) {
+        fprintf(descriptor, "Segment-metadata: %s\n", dbmeta_csum.c_str());
     }
 
     fprintf(descriptor, "Segments:\n");
@@ -951,14 +931,8 @@ int main(int argc, char *argv[])
     }
 
     fclose(descriptor);
-
-    if (signature_pid) {
-        int status;
-        waitpid(signature_pid, &status, 0);
-
-        if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
-            fatal("Signature filter process error");
-        }
+    if (descriptor_filter->wait() < 0) {
+        fatal("Signature filter process error");
     }
 
     descriptor_file->send();