From: Michael Vrable Date: Tue, 15 May 2007 05:30:05 +0000 (-0700) Subject: Initial cut at re-using objects from old segments when contents match. X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=commitdiff_plain;h=0a76f30b578a2ace26090a4b9bbd74b04124a20e Initial cut at re-using objects from old segments when contents match. --- diff --git a/localdb.cc b/localdb.cc index 63a9f83..4ab8283 100644 --- a/localdb.cc +++ b/localdb.cc @@ -81,3 +81,39 @@ void LocalDb::StoreObject(const ObjectReference& ref, sqlite3_finalize(stmt); } + +ObjectReference LocalDb::FindObject(const string &checksum, int64_t size) +{ + int rc; + sqlite3_stmt *stmt; + static const char s[] = + "select segment, object from block_index " + "where checksum = ? and size = ?"; + const char *tail; + + ObjectReference ref; + + rc = sqlite3_prepare_v2(db, s, strlen(s), &stmt, &tail); + if (rc != SQLITE_OK) { + return ref; + } + + sqlite3_bind_text(stmt, 1, checksum.c_str(), checksum.size(), + SQLITE_TRANSIENT); + sqlite3_bind_int64(stmt, 2, size); + + rc = sqlite3_step(stmt); + if (rc == SQLITE_DONE) { + } else if (rc == SQLITE_ROW) { + printf("Can re-use block: %s/%s\n", + sqlite3_column_text(stmt, 0), sqlite3_column_text(stmt, 1)); + ref = ObjectReference((const char *)sqlite3_column_text(stmt, 0), + (const char *)sqlite3_column_text(stmt, 1)); + } else { + fprintf(stderr, "Could not execute SELECT statement!\n"); + } + + sqlite3_finalize(stmt); + + return ref; +} diff --git a/localdb.h b/localdb.h index 8a45e0e..6035e5d 100644 --- a/localdb.h +++ b/localdb.h @@ -25,6 +25,7 @@ public: void Close(); void StoreObject(const ObjectReference& ref, const std::string &checksum, int64_t size); + ObjectReference FindObject(const std::string &checksum, int64_t size); private: sqlite3 *db; }; diff --git a/scandir.cc b/scandir.cc index cfa29ca..0dfdbdb 100644 --- a/scandir.cc +++ b/scandir.cc @@ -131,22 +131,26 @@ int64_t dumpfile(int fd, dictionary &file_info) hash.process(block_buf, bytes); - // tarstore processing - LbsObject *o = new LbsObject; - o->set_group("data"); - o->set_data(block_buf, bytes); - o->write(tss); - object_list.push_back(o->get_name()); - segment_list.insert(o->get_ref().get_segment()); - - // Index this block so it can be used by future snapshots + // Either find a copy of this block in an already-existing segment, or + // index it so it can be re-used in the future SHA1Checksum block_hash; block_hash.process(block_buf, bytes); - db->StoreObject(o->get_ref(), block_hash.checksum_str(), bytes); - + string block_csum = block_hash.checksum_str(); + ObjectReference ref = db->FindObject(block_csum, bytes); + + // Store a copy of the object if one does not yet exist + if (ref.get_segment().size() == 0) { + LbsObject *o = new LbsObject; + o->set_group("data"); + o->set_data(block_buf, bytes); + o->write(tss); + ref = o->get_ref(); + db->StoreObject(ref, block_csum, bytes); + delete o; + } + object_list.push_back(ref.to_string()); + segment_list.insert(ref.get_segment()); size += bytes; - - delete o; } file_info["checksum"] = hash.checksum_str();