From 197d9dca9416c09c7e79e56b88b9eb53c26d30d7 Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Mon, 29 Apr 2013 16:45:43 -0700 Subject: [PATCH] Clean up database, and timestamp handling in particular. --- localdb.cc | 5 +++-- schema.sql | 31 ++++++++++++++++++++++--------- 2 files changed, 25 insertions(+), 11 deletions(-) diff --git a/localdb.cc b/localdb.cc index 0c9dac4..15b58e6 100644 --- a/localdb.cc +++ b/localdb.cc @@ -319,7 +319,7 @@ bool LocalDb::IsOldObject(const string &checksum, int64_t size, double *age, sqlite3_stmt *stmt; bool found = false; - stmt = Prepare("select segmentid, object, timestamp, expired " + stmt = Prepare("select segmentid, object, julianday(timestamp), expired " "from block_index where checksum = ? and size = ?"); sqlite3_bind_text(stmt, 1, checksum.c_str(), checksum.size(), SQLITE_TRANSIENT); @@ -494,7 +494,8 @@ void LocalDb::SetSegmentMetadata(const std::string &segment, stmt = Prepare("update segments set path = ?, checksum = ?, " "type = ?, data_size = ?, disk_size = ?, " - "mtime = coalesce(mtime, julianday('now')) " + "timestamp = coalesce(julianday(timestamp), " + " julianday('now')) " "where segmentid = ?"); sqlite3_bind_text(stmt, 1, path.c_str(), path.size(), SQLITE_TRANSIENT); diff --git a/schema.sql b/schema.sql index 465dde2..fdd8884 100644 --- a/schema.sql +++ b/schema.sql @@ -3,6 +3,19 @@ -- -- The index is stored in an SQLite3 database. This is its schema. +-- A note on date formats: values which represent timestamps are generally +-- stored in SQLite's julianday format (Julian day stored as a 64-bit float). +-- However, application code should allow any value accepted by SQLite's +-- date/time functions: Julian day or ISO8601 text string. Code reading from +-- the database should select julianday(timestamp) or datetime(timestamp), +-- depending on the desired format for parsing. +-- +-- Because the julianday format uses floating point, code should not rely on +-- exact equality tests between timestamps, or precise round-tripping of +-- timestamp values. +-- +-- Timestamps are always in UTC. + -- Versioning information, describing the revision for which the table schema -- was set up. create table schema_version( @@ -20,21 +33,21 @@ create table snapshots ( snapshotid integer primary key, name text not null, scheme text not null, - timestamp real, - intent real -- TODO: deprecated, should be removed + timestamp datetime -- should match the timestamp of the snapshot ); -- List of segments which have been created. create table segments ( segmentid integer primary key, segment text unique not null, - mtime real, -- timestamp when segment was created + timestamp datetime, -- when was the segment written? path text, checksum text, data_size integer, -- sum of bytes in all objects in the segment disk_size integer, -- size of segment on disk, after compression type text ); +create unique index segment_name_index on segments(segment); -- Index of all data blocks in stored segments. This is indexed by content -- hash to allow for coarse block-level data deduplication. @@ -44,7 +57,7 @@ create table block_index ( object text not null, checksum text, size integer, - timestamp real, + timestamp datetime, -- when a block with this data was first stored expired integer ); create index block_content_index on block_index(checksum); @@ -58,13 +71,13 @@ create unique index block_name_index on block_index(segmentid, object); -- Each block stored in block_index may have an entry in the -- subblock_signatures table. The signatures field is a binary blob consisting -- of a packed sequence of (chunk length [16-bit unsigned, big-endian], --- checksum [20 bytes if SHA-1]) tuples that should cover the entire block. +-- checksum [28 bytes if SHA-224]) tuples that should cover the entire block. -- -- algorithm specifies the method used for computing break points as well as -- the hash function used, so that signatures can be discarded if the algorithm --- changes. The current algorithm used is 'lbfs-4096/sha1', which specifies a --- target 4 kB block size with parameters set to match LBFS, and SHA-1 as the --- hash algorithm. +-- changes. The current algorithm used is 'lbfs-4096/sha224', which specifies +-- a target 4 kB block size with parameters set to match LBFS, and SHA-224 as +-- the hash algorithm. create table subblock_signatures ( blockid integer primary key, algorithm text not null, @@ -78,7 +91,7 @@ create table segment_utilization ( -- Estimate for the number of live bytes in data objects: this is capped at -- segments.data_size if all data in the segment is referenced. - bytes_referenced integer + bytes_referenced integer not null ); create unique index segment_utilization_index on segment_utilization(snapshotid, segmentid); -- 2.20.1