Fix a bug in signature loading for sub-file incrementals.

[cumulus.git] / schema.sql
diff --git a/schema.sql b/schema.sql

index e0f16a6..2851f0f 100644 (file)
--- a/schema.sql
+++ b/schema.sql
@@ -8,7 +8,8 @@ create table snapshots (
      snapshotid integer primary key,
      name text not null,
      scheme text,
-    timestamp real
+    timestamp real,
+    intent real
  );
  
  -- List of segments which have been created.
@@ -18,7 +19,8 @@ create table segments (
      path text,
      checksum text,
      mtime real,
-    size integer
+    size integer,
+    expire_time integer         -- snapshotid of latest snapshot when expired
  );
  
  -- Index of all blocks which have been stored, by checksum.
@@ -34,17 +36,34 @@ create table block_index (
  create index block_content_index on block_index(checksum);
  create unique index block_name_index on block_index(segmentid, object);
  
+-- Checksums for the decomposition of blocks into even smaller chunks
+-- (variable-sized, but generally ~8 kB, and maximum 64 kB).  Chunk boundaries
+-- are determined based on the contents using Rabin fingerprints.  These
+-- checksums can be used for computing sub-file incrementals.
+--
+-- Each block stored in block_index may have an entry in the
+-- subblock_signatures table.  The hash_data field is a binary blob consisting
+-- of a packed sequence of (chunk length [16-bit unsigned, big-endian],
+-- checksum [20 bytes for SHA-1]) tuples that should cover the entire block.
+create table subblock_signatures (
+    blockid integer primary key,
+    algorithm text not null,
+    signatures blob not null
+);
+
  -- Summary of segment utilization for each snapshots.
  create table segments_used (
      snapshotid integer not null,
      segmentid integer not null,
      utilization real
  );
+create unique index segments_used_index
+    on segments_used(snapshotid, segmentid);
  
  -- Overall estimate of segment utilization, for all snapshots combined.
  create view segment_info as
-select segmentid, mtime, size, cast(size * utilization as integer) as used,
-       utilization
+select segmentid, mtime, size, expire_time,
+       cast(size * utilization as integer) as used, utilization
  from segments join
       (select segmentid, max(utilization) as utilization
        from segments_used group by segmentid)