From: Michael Vrable <mvrable@cs.ucsd.edu>
Date: Fri, 10 Aug 2007 02:23:20 +0000 (-0700)
Subject: Compute checksums of segments and store them in the local database.
X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=a4cf5f4d8df46fa00992a210d587cd824cedcb08;p=cumulus.git

Compute checksums of segments and store them in the local database.

When a segment is fully written out, compute a checksum of the file
actually written (post-filtering).  Store this in the local database so
that it will be possible to write out, at the end of a backup, a file
containing the checksums of all segments used for the snapshot (including
old ones not written out in this execution).
---

diff --git a/localdb.cc b/localdb.cc
index 9ec0c96..5c1f7e5 100644
--- a/localdb.cc
+++ b/localdb.cc
@@ -285,3 +285,26 @@ void LocalDb::UseObject(const ObjectReference& ref)
 
     sqlite3_finalize(stmt);
 }
+
+void LocalDb::SetSegmentChecksum(const std::string &segment,
+                                 const std::string &path,
+                                 const std::string &checksum)
+{
+    int rc;
+    sqlite3_stmt *stmt;
+
+    stmt = Prepare("update segments set path = ?, checksum = ? "
+                   "where segmentid = ?");
+    sqlite3_bind_text(stmt, 1, path.c_str(), path.size(),
+                      SQLITE_TRANSIENT);
+    sqlite3_bind_text(stmt, 2, checksum.c_str(), checksum.size(),
+                      SQLITE_TRANSIENT);
+    sqlite3_bind_int64(stmt, 3, SegmentToId(segment));
+
+    rc = sqlite3_step(stmt);
+    if (rc != SQLITE_DONE) {
+        fprintf(stderr, "Could not update segment checksum in database!\n");
+    }
+
+    sqlite3_finalize(stmt);
+}
diff --git a/localdb.h b/localdb.h
index 0fce30d..45bb132 100644
--- a/localdb.h
+++ b/localdb.h
@@ -30,6 +30,9 @@ public:
     bool IsOldObject(const std::string &checksum, int64_t size, double *age);
     bool IsAvailable(const ObjectReference &ref);
     void UseObject(const ObjectReference& ref);
+
+    void SetSegmentChecksum(const std::string &segment, const std::string &path,
+                            const std::string &checksum);
 private:
     sqlite3 *db;
     int64_t snapshotid;
diff --git a/scandir.cc b/scandir.cc
index 1db5ec3..82160a1 100644
--- a/scandir.cc
+++ b/scandir.cc
@@ -675,7 +675,6 @@ int main(int argc, char *argv[])
             printf("    %s\n", i->c_str());
     }
 
-    tss = new TarSegmentStore(backup_dest);
     block_buf = new char[LBS_BLOCK_SIZE];
 
     /* Store the time when the backup started, so it can be included in the
@@ -695,6 +694,8 @@ int main(int argc, char *argv[])
     db->Open(database_path.c_str(), desc_buf,
              backup_scheme.size() ? backup_scheme.c_str() : NULL);
 
+    tss = new TarSegmentStore(backup_dest, db);
+
     /* Initialize the stat cache, for skipping over unchanged files. */
     statcache = new StatCache;
     statcache->Open(localdb_dir.c_str(), desc_buf,
@@ -715,8 +716,6 @@ int main(int argc, char *argv[])
     string backup_root = root->get_ref().to_string();
     delete root;
 
-    db->Close();
-
     statcache->Close();
     delete statcache;
 
@@ -724,6 +723,8 @@ int main(int argc, char *argv[])
     tss->dump_stats();
     delete tss;
 
+    db->Close();
+
     /* Write a backup descriptor file, which says which segments are needed and
      * where to start to restore this snapshot.  The filename is based on the
      * current time. */
diff --git a/schema.sql b/schema.sql
index 6d89b4b..a37b501 100644
--- a/schema.sql
+++ b/schema.sql
@@ -14,7 +14,9 @@ create table snapshots (
 -- List of segments which have been created.
 create table segments (
     segmentid integer primary key,
-    segment text unique not null
+    segment text unique not null,
+    path text,
+    checksum text
 );
 
 -- Index of all blocks which have been stored in a snapshot, by checksum.
diff --git a/sha1.cc b/sha1.cc
index c7b7112..3218075 100644
--- a/sha1.cc
+++ b/sha1.cc
@@ -30,6 +30,7 @@
 #include "sha1.h"
 
 #include <stddef.h>
+#include <stdio.h>
 #include <string.h>
 #include <arpa/inet.h>
 
@@ -348,6 +349,28 @@ void SHA1Checksum::process(const void *data, size_t len)
     sha1_process_bytes(data, len, &ctx);
 }
 
+bool SHA1Checksum::process_file(const char *filename)
+{
+    FILE *f = fopen(filename, "rb");
+    if (f == NULL)
+        return false;
+
+    while (!feof(f)) {
+        char buf[4096];
+        size_t bytes = fread(buf, 1, sizeof(buf), f);
+
+        if (ferror(f)) {
+            fclose(f);
+            return false;
+        }
+
+        process(buf, bytes);
+    }
+
+    fclose(f);
+    return true;
+}
+
 const uint8_t *SHA1Checksum::checksum()
 {
     sha1_finish_ctx(&ctx, resbuf);
diff --git a/sha1.h b/sha1.h
index 1ede8b3..2d7e161 100644
--- a/sha1.h
+++ b/sha1.h
@@ -88,6 +88,7 @@ public:
     ~SHA1Checksum();
 
     void process(const void *data, size_t len);
+    bool process_file(const char *filename);
     const uint8_t *checksum();
     size_t checksum_size() const { return 20; }
     std::string checksum_str();
diff --git a/store.cc b/store.cc
index 9d4d1bd..930794d 100644
--- a/store.cc
+++ b/store.cc
@@ -230,11 +230,10 @@ ObjectReference TarSegmentStore::write_object(const char *data, size_t len,
         segment = new segment_info;
 
         segment->name = generate_uuid();
-
-        string filename = path + "/" + segment->name + ".tar";
-        filename += filter_extension;
-        segment->file = new Tarfile(filename, segment->name);
-
+        segment->basename = segment->name + ".tar";
+        segment->basename += filter_extension;
+        segment->fullname = path + "/" + segment->basename;
+        segment->file = new Tarfile(segment->fullname, segment->name);
         segment->count = 0;
 
         segments[group] = segment;
@@ -281,6 +280,15 @@ void TarSegmentStore::close_segment(const string &group)
     struct segment_info *segment = segments[group];
 
     delete segment->file;
+
+    if (db != NULL) {
+        SHA1Checksum segment_checksum;
+        if (segment_checksum.process_file(segment->fullname.c_str())) {
+            string checksum = segment_checksum.checksum_str();
+            db->SetSegmentChecksum(segment->name, segment->basename, checksum);
+        }
+    }
+
     segments.erase(segments.find(group));
     delete segment;
 }
diff --git a/store.h b/store.h
index 75c2008..a959cce 100644
--- a/store.h
+++ b/store.h
@@ -17,6 +17,7 @@
 #include <iostream>
 #include <sstream>
 
+#include "localdb.h"
 #include "sha1.h"
 #include "ref.h"
 
@@ -95,7 +96,9 @@ private:
 class TarSegmentStore {
 public:
     // New segments will be stored in the given directory.
-    TarSegmentStore(const std::string &path) { this->path = path; }
+    TarSegmentStore(const std::string &path,
+                    LocalDb *db = NULL)
+        { this->path = path; this->db = db; }
     ~TarSegmentStore() { sync(); }
 
     // Writes an object to segment in the store, and returns the name
@@ -116,10 +119,13 @@ private:
         Tarfile *file;
         std::string name;           // UUID
         int count;                  // Objects written to this segment
+        std::string basename;       // Name of segment without directory
+        std::string fullname;       // Full path to stored segment
     };
 
     std::string path;
     std::map<std::string, struct segment_info *> segments;
+    LocalDb *db;
 
     // Ensure that all segments in the given group have been fully written.
     void close_segment(const std::string &group);