X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=blobdiff_plain;f=schema.sql;h=35b2c9d0c2cf16b96579e78d2720eb300ecb4d8c;hp=8529e32725977f50c1ab0722574cc97d3d6144db;hb=f38dd9bcb0caffd3fc9126b05788c936690e8288;hpb=0546d239071daa82a8b85348a7f66fba10d85aff

diff --git a/schema.sql b/schema.sql
index 8529e32..35b2c9d 100644
--- a/schema.sql
+++ b/schema.sql
@@ -7,7 +7,7 @@
 create table snapshots (
     snapshotid integer primary key,
     name text not null,
-    scheme text,
+    scheme text not null,
     timestamp real,
     intent real
 );
@@ -19,7 +19,8 @@ create table segments (
     path text,
     checksum text,
     mtime real,
-    size integer
+    size integer,
+    expire_time integer         -- snapshotid of latest snapshot when expired
 );
 
 -- Index of all blocks which have been stored, by checksum.
@@ -35,6 +36,27 @@ create table block_index (
 create index block_content_index on block_index(checksum);
 create unique index block_name_index on block_index(segmentid, object);
 
+-- Checksums for the decomposition of blocks into even smaller chunks
+-- (variable-sized, but generally ~4 kB, and maximum 64 kB).  Chunk boundaries
+-- are determined based on the contents using Rabin fingerprints.  These
+-- checksums can be used for computing sub-file incrementals.
+--
+-- Each block stored in block_index may have an entry in the
+-- subblock_signatures table.  The signatures field is a binary blob consisting
+-- of a packed sequence of (chunk length [16-bit unsigned, big-endian],
+-- checksum [20 bytes if SHA-1]) tuples that should cover the entire block.
+--
+-- algorithm specifies the method used for computing break points as well as
+-- the hash function used, so that signatures can be discarded if the algorithm
+-- changes.  The current algorithm used is 'lbfs-4096/sha1', which specifies a
+-- target 4 kB block size with parameters set to match LBFS, and SHA-1 as the
+-- hash algorithm.
+create table subblock_signatures (
+    blockid integer primary key,
+    algorithm text not null,
+    signatures blob not null
+);
+
 -- Summary of segment utilization for each snapshots.
 create table segments_used (
     snapshotid integer not null,
@@ -46,8 +68,8 @@ create unique index segments_used_index
 
 -- Overall estimate of segment utilization, for all snapshots combined.
 create view segment_info as
-select segmentid, mtime, size, cast(size * utilization as integer) as used,
-       utilization
+select segmentid, mtime, size, expire_time,
+       cast(size * utilization as integer) as used, utilization
 from segments join
      (select segmentid, max(utilization) as utilization
       from segments_used group by segmentid)