1 -- We maintain a local index of data blocks that have been previously stored
2 -- for constructing incremental snapshots.
4 -- The index is stored in an SQLite3 database. This is its schema.
6 -- Versioning information, describing the revision for which the table schema
8 create table schema_version(
9 version text, -- Program version, dotted decimal string
10 major integer, -- Major version number
11 minor integer -- Minor version number
13 insert into schema_version values ('0.11', 0, 11);
15 -- List of snapshots which have been created and which we are still tracking.
16 -- There may be more snapshots than this actually stored at the remote server,
17 -- but the reverse should not ever be true: Cumulus may depend on data stored
18 -- in these snapshots when writing a new snapshot.
19 create table snapshots (
20 snapshotid integer primary key,
24 intent real -- TODO: deprecated, should be removed
27 -- List of segments which have been created.
28 create table segments (
29 segmentid integer primary key,
30 segment text unique not null,
34 data_size integer, -- sum of bytes in all objects in the segment
35 disk_size integer -- size of segment on disk, after compression
36 -- TODO: group? metadata vs. non-metadata?
39 -- Index of all data blocks in stored segments. This is indexed by content
40 -- hash to allow for coarse block-level data deduplication.
41 create table block_index (
42 blockid integer primary key,
43 segmentid integer not null,
50 create index block_content_index on block_index(checksum);
51 create unique index block_name_index on block_index(segmentid, object);
53 -- Checksums for the decomposition of blocks into even smaller chunks
54 -- (variable-sized, but generally ~4 kB, and maximum 64 kB). Chunk boundaries
55 -- are determined based on the contents using Rabin fingerprints. These
56 -- checksums can be used for computing sub-file incrementals.
58 -- Each block stored in block_index may have an entry in the
59 -- subblock_signatures table. The signatures field is a binary blob consisting
60 -- of a packed sequence of (chunk length [16-bit unsigned, big-endian],
61 -- checksum [20 bytes if SHA-1]) tuples that should cover the entire block.
63 -- algorithm specifies the method used for computing break points as well as
64 -- the hash function used, so that signatures can be discarded if the algorithm
65 -- changes. The current algorithm used is 'lbfs-4096/sha1', which specifies a
66 -- target 4 kB block size with parameters set to match LBFS, and SHA-1 as the
68 create table subblock_signatures (
69 blockid integer primary key,
70 algorithm text not null,
71 signatures blob not null
74 -- Summary of segment utilization for each snapshot.
75 create table segment_utilization (
76 snapshotid integer not null,
77 segmentid integer not null,
79 -- Estimate for the number of live bytes in data objects: this is capped at
80 -- segments.data_size if all data in the segment is referenced.
81 bytes_referenced integer
83 create unique index segment_utilization_index
84 on segment_utilization(snapshotid, segmentid);