Clean up database, and timestamp handling in particular.

author Michael Vrable <vrable@cs.hmc.edu>

Mon, 29 Apr 2013 23:45:43 +0000 (16:45 -0700)

committer Michael Vrable <vrable@cs.hmc.edu>

Sun, 26 Jan 2014 20:28:09 +0000 (12:28 -0800)
author Michael Vrable <vrable@cs.hmc.edu>
Mon, 29 Apr 2013 23:45:43 +0000 (16:45 -0700)
committer Michael Vrable <vrable@cs.hmc.edu>
Sun, 26 Jan 2014 20:28:09 +0000 (12:28 -0800)
diff --git a/localdb.cc b/localdb.cc

index 0c9dac4..15b58e6 100644 (file)
--- a/localdb.cc
+++ b/localdb.cc
@@ -319,7 +319,7 @@ bool LocalDb::IsOldObject(const string &checksum, int64_t size, double *age,
      sqlite3_stmt *stmt;
      bool found = false;
  
-    stmt = Prepare("select segmentid, object, timestamp, expired "
+    stmt = Prepare("select segmentid, object, julianday(timestamp), expired "
                     "from block_index where checksum = ? and size = ?");
      sqlite3_bind_text(stmt, 1, checksum.c_str(), checksum.size(),
                        SQLITE_TRANSIENT);
@@ -494,7 +494,8 @@ void LocalDb::SetSegmentMetadata(const std::string &segment,
  
      stmt = Prepare("update segments set path = ?, checksum = ?, "
                     "type = ?, data_size = ?, disk_size = ?, "
-                   "mtime = coalesce(mtime, julianday('now')) "
+                   "timestamp = coalesce(julianday(timestamp), "
+                   "                     julianday('now')) "
                     "where segmentid = ?");
      sqlite3_bind_text(stmt, 1, path.c_str(), path.size(),
                        SQLITE_TRANSIENT);
diff --git a/schema.sql b/schema.sql

index 465dde2..fdd8884 100644 (file)
--- a/schema.sql
+++ b/schema.sql
@@ -3,6 +3,19 @@
  --
  -- The index is stored in an SQLite3 database.  This is its schema.
  
+-- A note on date formats: values which represent timestamps are generally
+-- stored in SQLite's julianday format (Julian day stored as a 64-bit float).
+-- However, application code should allow any value accepted by SQLite's
+-- date/time functions: Julian day or ISO8601 text string.  Code reading from
+-- the database should select julianday(timestamp) or datetime(timestamp),
+-- depending on the desired format for parsing.
+--
+-- Because the julianday format uses floating point, code should not rely on
+-- exact equality tests between timestamps, or precise round-tripping of
+-- timestamp values.
+--
+-- Timestamps are always in UTC.
+
  -- Versioning information, describing the revision for which the table schema
  -- was set up.
  create table schema_version(
@@ -20,21 +33,21 @@ create table snapshots (
      snapshotid integer primary key,
      name text not null,
      scheme text not null,
-    timestamp real,
-    intent real                 -- TODO: deprecated, should be removed
+    timestamp datetime          -- should match the timestamp of the snapshot
  );
  
  -- List of segments which have been created.
  create table segments (
      segmentid integer primary key,
      segment text unique not null,
-    mtime real,                 -- timestamp when segment was created
+    timestamp datetime,         -- when was the segment written?
      path text,
      checksum text,
      data_size integer,          -- sum of bytes in all objects in the segment
      disk_size integer,          -- size of segment on disk, after compression
      type text
  );
+create unique index segment_name_index on segments(segment);
  
  -- Index of all data blocks in stored segments.  This is indexed by content
  -- hash to allow for coarse block-level data deduplication.
@@ -44,7 +57,7 @@ create table block_index (
      object text not null,
      checksum text,
      size integer,
-    timestamp real,
+    timestamp datetime,         -- when a block with this data was first stored
      expired integer
  );
  create index block_content_index on block_index(checksum);
@@ -58,13 +71,13 @@ create unique index block_name_index on block_index(segmentid, object);
  -- Each block stored in block_index may have an entry in the
  -- subblock_signatures table.  The signatures field is a binary blob consisting
  -- of a packed sequence of (chunk length [16-bit unsigned, big-endian],
--- checksum [20 bytes if SHA-1]) tuples that should cover the entire block.
+-- checksum [28 bytes if SHA-224]) tuples that should cover the entire block.
  --
  -- algorithm specifies the method used for computing break points as well as
  -- the hash function used, so that signatures can be discarded if the algorithm
--- changes.  The current algorithm used is 'lbfs-4096/sha1', which specifies a
--- target 4 kB block size with parameters set to match LBFS, and SHA-1 as the
--- hash algorithm.
+-- changes.  The current algorithm used is 'lbfs-4096/sha224', which specifies
+-- a target 4 kB block size with parameters set to match LBFS, and SHA-224 as
+-- the hash algorithm.
  create table subblock_signatures (
      blockid integer primary key,
      algorithm text not null,
@@ -78,7 +91,7 @@ create table segment_utilization (
  
      -- Estimate for the number of live bytes in data objects: this is capped at
      -- segments.data_size if all data in the segment is referenced.
-    bytes_referenced integer
+    bytes_referenced integer not null
  );
  create unique index segment_utilization_index
      on segment_utilization(snapshotid, segmentid);
author	Michael Vrable <vrable@cs.hmc.edu>
	Mon, 29 Apr 2013 23:45:43 +0000 (16:45 -0700)
committer	Michael Vrable <vrable@cs.hmc.edu>
	Sun, 26 Jan 2014 20:28:09 +0000 (12:28 -0800)
localdb.cc		patch \| blob \| history
schema.sql		patch \| blob \| history