From 0347920b621a8feaf16d5f48f33bbe1b238fc896 Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Wed, 5 Dec 2007 20:14:08 -0800 Subject: [PATCH] Modifications to the local database: create a summary segments_used table. Make the local database more compact by only storing, for each snapshot, a listing of the segments it uses and the fraction of each which is used, instead of listing all objects referenced individually. This commit only adds the new table; it doesn't yet delete the old table (snapshot_contents). --- localdb.cc | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++- schema.sql | 30 +++++++++----------------- 2 files changed, 72 insertions(+), 21 deletions(-) diff --git a/localdb.cc b/localdb.cc index 251148b..98b2b09 100644 --- a/localdb.cc +++ b/localdb.cc @@ -32,6 +32,7 @@ sqlite3_stmt *LocalDb::Prepare(const char *sql) rc = sqlite3_prepare_v2(db, sql, strlen(sql), &stmt, &tail); if (rc != SQLITE_OK) { + ReportError(rc); throw IOException(string("Error preparing statement: ") + sql); } @@ -92,11 +93,54 @@ void LocalDb::Open(const char *path, const char *snapshot_name, sqlite3_close(db); throw IOException("Find snapshot id"); } + + /* Create a temporary table which will be used to keep track of the objects + * used by this snapshot. When the database is closed, we will summarize + * the results of this table into segments_used. */ + rc = sqlite3_exec(db, + "create temporary table snapshot_refs (" + " segmentid integer not null," + " object text not null," + " size integer not null" + ")", NULL, NULL, NULL); + if (rc != SQLITE_OK) { + ReportError(rc); + sqlite3_close(db); + throw IOException("Database initialization"); + } + rc = sqlite3_exec(db, + "create unique index snapshot_refs_index " + "on snapshot_refs(segmentid, object)", + NULL, NULL, NULL); + if (rc != SQLITE_OK) { + ReportError(rc); + sqlite3_close(db); + throw IOException("Database initialization"); + } } void LocalDb::Close() { int rc; + + /* Summarize the snapshot_refs table into segments_used. */ + sqlite3_stmt *stmt = Prepare("insert into segments_used " + "select ? as snapshotid, segmentid, " + "cast(used as real) / size as utilization " + "from " + "(select segmentid, sum(size) as used " + "from snapshot_refs group by segmentid) " + "join segments using (segmentid)"); + sqlite3_bind_int64(stmt, 1, snapshotid); + rc = sqlite3_step(stmt); + if (rc != SQLITE_OK && rc != SQLITE_DONE) { + ReportError(rc); + sqlite3_close(db); + fprintf(stderr, "DATABASE ERROR: Unable to create segment summary!\n"); + } + sqlite3_finalize(stmt); + + /* Commit changes to the database and close. */ rc = sqlite3_exec(db, "commit", NULL, NULL, NULL); if (rc != SQLITE_OK) { fprintf(stderr, "DATABASE ERROR: Can't commit database!\n"); @@ -302,6 +346,20 @@ void LocalDb::UseObject(const ObjectReference& ref) } sqlite3_finalize(stmt); + + stmt = Prepare("insert or ignore into snapshot_refs " + "select segmentid, object, size from block_index " + "where segmentid = ? and object = ?"); + sqlite3_bind_int64(stmt, 1, SegmentToId(ref.get_segment())); + sqlite3_bind_text(stmt, 2, obj.c_str(), obj.size(), SQLITE_TRANSIENT); + + rc = sqlite3_step(stmt); + if (rc != SQLITE_DONE) { + fprintf(stderr, "Could not execute INSERT statement!\n"); + ReportError(rc); + } + + sqlite3_finalize(stmt); } void LocalDb::SetSegmentChecksum(const std::string &segment, @@ -311,13 +369,16 @@ void LocalDb::SetSegmentChecksum(const std::string &segment, int rc; sqlite3_stmt *stmt; - stmt = Prepare("update segments set path = ?, checksum = ? " + stmt = Prepare("update segments set path = ?, checksum = ?, " + "size = (select sum(size) from block_index " + " where segmentid = ?) " "where segmentid = ?"); sqlite3_bind_text(stmt, 1, path.c_str(), path.size(), SQLITE_TRANSIENT); sqlite3_bind_text(stmt, 2, checksum.c_str(), checksum.size(), SQLITE_TRANSIENT); sqlite3_bind_int64(stmt, 3, SegmentToId(segment)); + sqlite3_bind_int64(stmt, 4, SegmentToId(segment)); rc = sqlite3_step(stmt); if (rc != SQLITE_DONE) { diff --git a/schema.sql b/schema.sql index 2cca99a..1c6e84e 100644 --- a/schema.sql +++ b/schema.sql @@ -16,10 +16,11 @@ create table segments ( segmentid integer primary key, segment text unique not null, path text, - checksum text + checksum text, + size integer ); --- Index of all blocks which have been stored in a snapshot, by checksum. +-- Index of all blocks which have been stored, by checksum. create table block_index ( blockid integer primary key, segmentid integer not null, @@ -32,6 +33,13 @@ create table block_index ( create index block_content_index on block_index(checksum); create unique index block_name_index on block_index(segmentid, object); +-- Summary of segment utilization for each snapshots. +create table segments_used ( + snapshotid integer not null, + segmentid integer not null, + utilization real +); + -- Index tracking which blocks are used by which snapshots. create table snapshot_contents ( blockid integer, @@ -39,21 +47,3 @@ create table snapshot_contents ( ); create unique index snapshot_contents_unique on snapshot_contents(blockid, snapshotid); - --- Summary statistics for each segment. -create view segment_info as select * from - (select segmentid, max(timestamp) as mtime, - sum(size) as size, count(*) as objects - from block_index join segments using (segmentid) group by segmentid) -natural join - (select segmentid, sum(size) as used, count(*) as objects_used - from block_index where blockid in - (select blockid from snapshot_contents) group by segmentid); - --- Ranking of segments to be cleaned, using a benefit function of --- (fraction free space)*(age of youngest object). -create view cleaning_order as select *, (1-u)*age/(u+0.1) as benefit from - (select segmentid, - cast(used as real) / size as u, julianday('now') - mtime as age - from segment_info) -where benefit > 0; -- 2.20.1