schema.sql

   1 -- We maintain a local index of data blocks that have been previously stored
   2 -- for constructing incremental snapshots.
   3 --
   4 -- The index is stored in an SQLite3 database.  This is its schema.
   5
   6 -- List of snapshots which have been created.
   7 create table snapshots (
   8     snapshotid integer primary key,
   9     name text not null,
  10     scheme text not null,
  11     timestamp real,
  12     intent real
  13 );
  14
  15 -- List of segments which have been created.
  16 create table segments (
  17     segmentid integer primary key,
  18     segment text unique not null,
  19     path text,
  20     checksum text,
  21     mtime real,
  22     size integer,
  23     expire_time integer         -- snapshotid of latest snapshot when expired
  24 );
  25
  26 -- Index of all blocks which have been stored, by checksum.
  27 create table block_index (
  28     blockid integer primary key,
  29     segmentid integer not null,
  30     object text not null,
  31     checksum text,
  32     size integer,
  33     timestamp real,
  34     expired integer
  35 );
  36 create index block_content_index on block_index(checksum);
  37 create unique index block_name_index on block_index(segmentid, object);
  38
  39 -- Checksums for the decomposition of blocks into even smaller chunks
  40 -- (variable-sized, but generally ~8 kB, and maximum 64 kB).  Chunk boundaries
  41 -- are determined based on the contents using Rabin fingerprints.  These
  42 -- checksums can be used for computing sub-file incrementals.
  43 --
  44 -- Each block stored in block_index may have an entry in the
  45 -- subblock_signatures table.  The hash_data field is a binary blob consisting
  46 -- of a packed sequence of (chunk length [16-bit unsigned, big-endian],
  47 -- checksum [20 bytes for SHA-1]) tuples that should cover the entire block.
  48 create table subblock_signatures (
  49     blockid integer primary key,
  50     algorithm text not null,
  51     signatures blob not null
  52 );
  53
  54 -- Summary of segment utilization for each snapshots.
  55 create table segments_used (
  56     snapshotid integer not null,
  57     segmentid integer not null,
  58     utilization real
  59 );
  60 create unique index segments_used_index
  61     on segments_used(snapshotid, segmentid);
  62
  63 -- Overall estimate of segment utilization, for all snapshots combined.
  64 create view segment_info as
  65 select segmentid, mtime, size, expire_time,
  66        cast(size * utilization as integer) as used, utilization
  67 from segments join
  68      (select segmentid, max(utilization) as utilization
  69       from segments_used group by segmentid)
  70 using (segmentid);