create table snapshots (
snapshotid integer primary key,
name text not null,
- timestamp real
+ scheme text not null,
+ timestamp real,
+ intent real
);
-- List of segments which have been created.
create table segments (
segmentid integer primary key,
- segment text unique not null
+ segment text unique not null,
+ path text,
+ checksum text,
+ mtime real,
+ size integer,
+ expire_time integer -- snapshotid of latest snapshot when expired
);
--- Index of all blocks which have been stored in a snapshot, by checksum.
+-- Index of all blocks which have been stored, by checksum.
create table block_index (
blockid integer primary key,
segmentid integer not null,
create index block_content_index on block_index(checksum);
create unique index block_name_index on block_index(segmentid, object);
--- Index tracking which blocks are used by which snapshots.
-create table snapshot_contents (
- blockid integer,
- snapshotid integer
+-- Checksums for the decomposition of blocks into even smaller chunks
+-- (variable-sized, but generally ~8 kB, and maximum 64 kB). Chunk boundaries
+-- are determined based on the contents using Rabin fingerprints. These
+-- checksums can be used for computing sub-file incrementals.
+--
+-- Each block stored in block_index may have an entry in the
+-- subblock_signatures table. The hash_data field is a binary blob consisting
+-- of a packed sequence of (chunk length [16-bit unsigned, big-endian],
+-- checksum [20 bytes for SHA-1]) tuples that should cover the entire block.
+create table subblock_signatures (
+ blockid integer primary key,
+ algorithm text not null,
+ signatures blob not null
);
-create unique index snapshot_contents_unique
- on snapshot_contents(blockid, snapshotid);
--- Summary statistics for each segment.
-create view segment_info as select * from
- (select segmentid, max(timestamp) as mtime,
- sum(size) as size, count(*) as objects
- from block_index natural join segments group by segmentid)
-natural join
- (select segmentid, sum(size) as used, count(*) as objects_used
- from block_index where blockid in
- (select blockid from snapshot_contents) group by segmentid);
+-- Summary of segment utilization for each snapshots.
+create table segments_used (
+ snapshotid integer not null,
+ segmentid integer not null,
+ utilization real
+);
+create unique index segments_used_index
+ on segments_used(snapshotid, segmentid);
--- Ranking of segments to be cleaned, using a benefit function of
--- (fraction free space)*(age of youngest object).
-create view cleaning_order as select *, (1-u)*age as benefit from
- (select segmentid,
- cast(used as real) / size as u, julianday('now') - mtime as age
- from segment_info)
-where benefit > 0;
+-- Overall estimate of segment utilization, for all snapshots combined.
+create view segment_info as
+select segmentid, mtime, size, expire_time,
+ cast(size * utilization as integer) as used, utilization
+from segments join
+ (select segmentid, max(utilization) as utilization
+ from segments_used group by segmentid)
+using (segmentid);