schema.sql

   1 -- We maintain a local index of data blocks that have been previously stored
   2 -- for constructing incremental snapshots.
   3 --
   4 -- The index is stored in an SQLite3 database.  This is its schema.
   5
   6 -- Versioning information, describing the revision for which the table schema
   7 -- was set up.
   8 create table schema_version(
   9     version text,               -- Program version, dotted decimal string
  10     major integer,              -- Major version number
  11     minor integer               -- Minor version number
  12 );
  13 insert into schema_version values ('0.11', 0, 11);
  14
  15 -- List of snapshots which have been created and which we are still tracking.
  16 -- There may be more snapshots than this actually stored at the remote server,
  17 -- but the reverse should not ever be true: Cumulus may depend on data stored
  18 -- in these snapshots when writing a new snapshot.
  19 create table snapshots (
  20     snapshotid integer primary key,
  21     name text not null,
  22     scheme text not null,
  23     timestamp real,
  24     intent real                 -- TODO: deprecated, should be removed
  25 );
  26
  27 -- List of segments which have been created.
  28 create table segments (
  29     segmentid integer primary key,
  30     segment text unique not null,
  31     path text,
  32     checksum text,
  33     mtime real,
  34     data_size integer,          -- sum of bytes in all objects in the segment
  35     disk_size integer           -- size of segment on disk, after compression
  36     -- TODO: group? metadata vs. non-metadata?
  37 );
  38
  39 -- Index of all data blocks in stored segments.  This is indexed by content
  40 -- hash to allow for coarse block-level data deduplication.
  41 create table block_index (
  42     blockid integer primary key,
  43     segmentid integer not null,
  44     object text not null,
  45     checksum text,
  46     size integer,
  47     timestamp real,
  48     expired integer
  49 );
  50 create index block_content_index on block_index(checksum);
  51 create unique index block_name_index on block_index(segmentid, object);
  52
  53 -- Checksums for the decomposition of blocks into even smaller chunks
  54 -- (variable-sized, but generally ~4 kB, and maximum 64 kB).  Chunk boundaries
  55 -- are determined based on the contents using Rabin fingerprints.  These
  56 -- checksums can be used for computing sub-file incrementals.
  57 --
  58 -- Each block stored in block_index may have an entry in the
  59 -- subblock_signatures table.  The signatures field is a binary blob consisting
  60 -- of a packed sequence of (chunk length [16-bit unsigned, big-endian],
  61 -- checksum [20 bytes if SHA-1]) tuples that should cover the entire block.
  62 --
  63 -- algorithm specifies the method used for computing break points as well as
  64 -- the hash function used, so that signatures can be discarded if the algorithm
  65 -- changes.  The current algorithm used is 'lbfs-4096/sha1', which specifies a
  66 -- target 4 kB block size with parameters set to match LBFS, and SHA-1 as the
  67 -- hash algorithm.
  68 create table subblock_signatures (
  69     blockid integer primary key,
  70     algorithm text not null,
  71     signatures blob not null
  72 );
  73
  74 -- Summary of segment utilization for each snapshot.
  75 create table segment_utilization (
  76     snapshotid integer not null,
  77     segmentid integer not null,
  78
  79     -- Estimate for the number of live bytes in data objects: this is capped at
  80     -- segments.data_size if all data in the segment is referenced.
  81     bytes_referenced integer
  82 );
  83 create unique index segment_utilization_index
  84     on segment_utilization(snapshotid, segmentid);