Initial cut at re-using objects from old segments when contents match.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Tue, 15 May 2007 05:30:05 +0000 (22:30 -0700)
committerMichael Vrable <mvrable@turin.ucsd.edu>
Tue, 15 May 2007 05:30:05 +0000 (22:30 -0700)
localdb.cc
localdb.h
scandir.cc

index 63a9f83..4ab8283 100644 (file)
@@ -81,3 +81,39 @@ void LocalDb::StoreObject(const ObjectReference& ref,
 
     sqlite3_finalize(stmt);
 }
+
+ObjectReference LocalDb::FindObject(const string &checksum, int64_t size)
+{
+    int rc;
+    sqlite3_stmt *stmt;
+    static const char s[] =
+        "select segment, object from block_index "
+        "where checksum = ? and size = ?";
+    const char *tail;
+
+    ObjectReference ref;
+
+    rc = sqlite3_prepare_v2(db, s, strlen(s), &stmt, &tail);
+    if (rc != SQLITE_OK) {
+        return ref;
+    }
+
+    sqlite3_bind_text(stmt, 1, checksum.c_str(), checksum.size(),
+                      SQLITE_TRANSIENT);
+    sqlite3_bind_int64(stmt, 2, size);
+
+    rc = sqlite3_step(stmt);
+    if (rc == SQLITE_DONE) {
+    } else if (rc == SQLITE_ROW) {
+        printf("Can re-use block: %s/%s\n",
+               sqlite3_column_text(stmt, 0), sqlite3_column_text(stmt, 1));
+        ref = ObjectReference((const char *)sqlite3_column_text(stmt, 0),
+                              (const char *)sqlite3_column_text(stmt, 1));
+    } else {
+        fprintf(stderr, "Could not execute SELECT statement!\n");
+    }
+
+    sqlite3_finalize(stmt);
+
+    return ref;
+}
index 8a45e0e..6035e5d 100644 (file)
--- a/localdb.h
+++ b/localdb.h
@@ -25,6 +25,7 @@ public:
     void Close();
     void StoreObject(const ObjectReference& ref,
                      const std::string &checksum, int64_t size);
+    ObjectReference FindObject(const std::string &checksum, int64_t size);
 private:
     sqlite3 *db;
 };
index cfa29ca..0dfdbdb 100644 (file)
@@ -131,22 +131,26 @@ int64_t dumpfile(int fd, dictionary &file_info)
 
         hash.process(block_buf, bytes);
 
-        // tarstore processing
-        LbsObject *o = new LbsObject;
-        o->set_group("data");
-        o->set_data(block_buf, bytes);
-        o->write(tss);
-        object_list.push_back(o->get_name());
-        segment_list.insert(o->get_ref().get_segment());
-
-        // Index this block so it can be used by future snapshots
+        // Either find a copy of this block in an already-existing segment, or
+        // index it so it can be re-used in the future
         SHA1Checksum block_hash;
         block_hash.process(block_buf, bytes);
-        db->StoreObject(o->get_ref(), block_hash.checksum_str(), bytes);
-
+        string block_csum = block_hash.checksum_str();
+        ObjectReference ref = db->FindObject(block_csum, bytes);
+
+        // Store a copy of the object if one does not yet exist
+        if (ref.get_segment().size() == 0) {
+            LbsObject *o = new LbsObject;
+            o->set_group("data");
+            o->set_data(block_buf, bytes);
+            o->write(tss);
+            ref = o->get_ref();
+            db->StoreObject(ref, block_csum, bytes);
+            delete o;
+        }
+        object_list.push_back(ref.to_string());
+        segment_list.insert(ref.get_segment());
         size += bytes;
-
-        delete o;
     }
 
     file_info["checksum"] = hash.checksum_str();