From: Michael Vrable Date: Thu, 31 May 2007 05:44:11 +0000 (-0700) Subject: Differentiate between never-before-seen objects and seen-but-expired. X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=commitdiff_plain;h=cac644d993d130efd8d29539de6557b18c9b737e Differentiate between never-before-seen objects and seen-but-expired. We group seen-but-expired objects into different segments, since the fact that the content has been seen before is an indicator that the data is long-lived, and grouping by (expected future) age should help increase segment utilization. --- diff --git a/localdb.cc b/localdb.cc index c58549b..4683f4d 100644 --- a/localdb.cc +++ b/localdb.cc @@ -221,6 +221,40 @@ ObjectReference LocalDb::FindObject(const string &checksum, int64_t size) return ref; } +bool LocalDb::IsOldObject(const string &checksum, int64_t size) +{ + int rc; + sqlite3_stmt *stmt; + static const char s[] = + "select segmentid, object from block_index " + "where checksum = ? and size = ?"; + const char *tail; + + bool found = false; + + rc = sqlite3_prepare_v2(db, s, strlen(s), &stmt, &tail); + if (rc != SQLITE_OK) { + return false; + } + + sqlite3_bind_text(stmt, 1, checksum.c_str(), checksum.size(), + SQLITE_TRANSIENT); + sqlite3_bind_int64(stmt, 2, size); + + rc = sqlite3_step(stmt); + if (rc == SQLITE_DONE) { + found = false; + } else if (rc == SQLITE_ROW) { + found = true; + } else { + fprintf(stderr, "Could not execute SELECT statement!\n"); + } + + sqlite3_finalize(stmt); + + return found; +} + void LocalDb::UseObject(const ObjectReference& ref) { int rc; diff --git a/localdb.h b/localdb.h index eb0955d..cebf019 100644 --- a/localdb.h +++ b/localdb.h @@ -26,6 +26,7 @@ public: void StoreObject(const ObjectReference& ref, const std::string &checksum, int64_t size); ObjectReference FindObject(const std::string &checksum, int64_t size); + bool IsOldObject(const std::string &checksum, int64_t size); void UseObject(const ObjectReference& ref); private: sqlite3 *db; diff --git a/scandir.cc b/scandir.cc index 26b83ed..aa7ce53 100644 --- a/scandir.cc +++ b/scandir.cc @@ -145,7 +145,20 @@ int64_t dumpfile(int fd, dictionary &file_info) // Store a copy of the object if one does not yet exist if (ref.get_segment().size() == 0) { LbsObject *o = new LbsObject; - o->set_group("data"); + + /* We might still have seen this checksum before, if the object was + * stored at some time in the past, but we have decided to clean + * the segment the object was originally stored in (FindObject will + * not return such objects). When rewriting the object contents, + * put it in a separate group, so that old objects get grouped + * together. The hope is that these old objects will continue to + * be used in the future, and we obtain segments which will + * continue to be well-utilized. */ + if (db->IsOldObject(block_csum, bytes)) + o->set_group("compacted"); + else + o->set_group("data"); + o->set_data(block_buf, bytes); o->write(tss); ref = o->get_ref();