Update the sample restore script and the Python code to support the new
snapshot format and the new local database schema. While updating segment
cleaning, also slightly rearrange the database schema to better support it.
# Restore the specified file. How to do so depends upon the file type, so
# dispatch based on that.
my $dest = "$DEST_DIR/$filename";
# Restore the specified file. How to do so depends upon the file type, so
# dispatch based on that.
my $dest = "$DEST_DIR/$filename";
+ if ($type eq '-' || $type eq 'f') {
# Regular file
unpack_file($filename, %info);
} elsif ($type eq 'd') {
# Regular file
unpack_file($filename, %info);
} elsif ($type eq 'd') {
}
} elsif ($type eq 'l') {
# Symlink
}
} elsif ($type eq 'l') {
# Symlink
- if (!defined($info{contents})) {
+ my $target = $info{target} || $info{contents};
+ if (!defined($target)) {
die "Symlink $filename has no value specified";
}
die "Symlink $filename has no value specified";
}
- my $contents = uri_decode($info{contents});
- symlink $contents, $dest
+ $target = uri_decode($target);
+ symlink $target, $dest
or die "Cannot create symlink $filename: $!";
# TODO: We can't properly restore all metadata for symbolic links
or die "Cannot create symlink $filename: $!";
# TODO: We can't properly restore all metadata for symbolic links
-- This script should be loaded after connecting to the database to be
-- upgraded.
-- This script should be loaded after connecting to the database to be
-- upgraded.
--- Database schema changes: the size column was added to the segments table,
--- and the segments_used table was added. Rather than upgrade the segments
--- table in-place, we create a new table and then rename it over the old
--- segments table.
+-- Database schema changes: the size and mtime columns were added to the
+-- segments table, and the segments_used table was added. Rather than upgrade
+-- the segments table in-place, we create a new table and then rename it over
+-- the old segments table.
create table segments_new (
segmentid integer primary key,
segment text unique not null,
path text,
checksum text,
create table segments_new (
segmentid integer primary key,
segment text unique not null,
path text,
checksum text,
-- Compute the size of each of the segments, if possible, based on our
-- knowledge of the objects stored in them.
insert into segments_new
-- Compute the size of each of the segments, if possible, based on our
-- knowledge of the objects stored in them.
insert into segments_new
-select segmentid, segment, path, checksum, size
+select segmentid, segment, path, checksum, mtime, size
from
(select segmentid, segment, path, checksum from segments)
left join
from
(select segmentid, segment, path, checksum from segments)
left join
- (select segmentid, sum(size) as size from block_index group by segmentid)
+ (select segmentid, sum(size) as size, max(timestamp) as mtime
+ from block_index group by segmentid)
using (segmentid);
drop table segments;
using (segmentid);
drop table segments;
-- The snapshot_contents table is obsolete.
drop table snapshot_contents;
-- The snapshot_contents table is obsolete.
drop table snapshot_contents;
+
+-- Upgrade database views.
+drop view cleaning_order;
+drop view segment_info;
+
+create view segment_info as
+select segmentid, mtime, size, cast(size * utilization as integer) as used,
+ utilization
+from segments join
+ (select segmentid, max(utilization) as utilization
+ from segments_used group by segmentid)
+using (segmentid);
from optparse import OptionParser
import lbs
from optparse import OptionParser
import lbs
-# We support up to "LBS Snapshot v0.2" formats, but are also limited by the lbs
+# We support up to "LBS Snapshot v0.6" formats, but are also limited by the lbs
-FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 2))
+FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 6))
def check_version(format):
ver = lbs.parse_metadata_version(format)
def check_version(format):
ver = lbs.parse_metadata_version(format)
print "## Root:", d['Root']
metadata = lbs.iterate_metadata(store, d['Root'])
for m in metadata:
print "## Root:", d['Root']
metadata = lbs.iterate_metadata(store, d['Root'])
for m in metadata:
- if m.fields['type'] != '-': continue
+ if m.fields['type'] not in ('-', 'f'): continue
print "%s [%d bytes]" % (m.fields['name'], int(m.fields['size']))
verifier = lbs.ChecksumVerifier(m.fields['checksum'])
size = 0
print "%s [%d bytes]" % (m.fields['name'], int(m.fields['size']))
verifier = lbs.ChecksumVerifier(m.fields['checksum'])
size = 0
if not os.path.isdir(path):
os.makedirs(path)
if not os.path.isdir(path):
os.makedirs(path)
- if m.items.type == '-':
+ if m.items.type in ('-', 'f'):
file = open(destpath, 'wb')
verifier = lbs.ChecksumVerifier(m.items.checksum)
size = 0
file = open(destpath, 'wb')
verifier = lbs.ChecksumVerifier(m.items.checksum)
size = 0
if filename != '.':
os.mkdir(destpath)
elif m.items.type == 'l':
if filename != '.':
os.mkdir(destpath)
elif m.items.type == 'l':
- os.symlink(m.items.contents, destpath)
+ try:
+ target = m.items.target
+ except:
+ # Old (v0.2 format) name for 'target'
+ target = m.items.contents
+ os.symlink(target, destpath)
elif m.items.type == 'p':
os.mkfifo(destpath)
elif m.items.type in ('c', 'b'):
elif m.items.type == 'p':
os.mkfifo(destpath)
elif m.items.type in ('c', 'b'):
from pysqlite2 import dbapi2 as sqlite3
# The largest supported snapshot format that can be understood.
from pysqlite2 import dbapi2 as sqlite3
# The largest supported snapshot format that can be understood.
-FORMAT_VERSION = (0, 2) # LBS Snapshot v0.2
+FORMAT_VERSION = (0, 6) # LBS Snapshot v0.6
# Maximum number of nested indirect references allowed in a snapshot.
MAX_RECURSION_DEPTH = 3
# Maximum number of nested indirect references allowed in a snapshot.
MAX_RECURSION_DEPTH = 3
'device': MetadataItem.decode_device,
'user': MetadataItem.decode_user,
'group': MetadataItem.decode_user,
'device': MetadataItem.decode_device,
'user': MetadataItem.decode_user,
'group': MetadataItem.decode_user,
+ 'ctime': MetadataItem.decode_int,
'mtime': MetadataItem.decode_int,
'links': MetadataItem.decode_int,
'inode': MetadataItem.raw_str,
'checksum': MetadataItem.decode_str,
'size': MetadataItem.decode_int,
'contents': MetadataItem.decode_str,
'mtime': MetadataItem.decode_int,
'links': MetadataItem.decode_int,
'inode': MetadataItem.raw_str,
'checksum': MetadataItem.decode_str,
'size': MetadataItem.decode_int,
'contents': MetadataItem.decode_str,
+ 'target': MetadataItem.decode_str,
}
def iterate_metadata(object_store, root):
}
def iterate_metadata(object_store, root):
where snapshotid < (select max(snapshotid)
from snapshots)""")
where snapshotid < (select max(snapshotid)
from snapshots)""")
- # Delete entries in the snapshot_contents table which are for
- # non-existent snapshots.
- cur.execute("""delete from snapshot_contents
+ # Delete entries in the segments_used table which are for non-existent
+ # snapshots.
+ cur.execute("""delete from segments_used
where snapshotid not in
(select snapshotid from snapshots)""")
# Find segments which contain no objects used by any current snapshots,
# and delete them from the segment table.
cur.execute("""delete from segments where segmentid not in
where snapshotid not in
(select snapshotid from snapshots)""")
# Find segments which contain no objects used by any current snapshots,
# and delete them from the segment table.
cur.execute("""delete from segments where segmentid not in
- (select distinct segmentid from snapshot_contents
- natural join block_index)""")
+ (select segmentid from segments_used)""")
# Finally, delete objects contained in non-existent segments. We can't
# simply delete unused objects, since we use the set of unused objects
# Finally, delete objects contained in non-existent segments. We can't
# simply delete unused objects, since we use the set of unused objects
- # First step: Mark all unused-and-expired objects with expired = -1,
- # which will cause us to mostly ignore these objects when rebalancing.
- # At the end, we will set these objects to be in group expired = 0.
- # Mark expired objects which still seem to be in use with expired = 0;
- # these objects will later have values set to indicate groupings of
- # objects when repacking.
- cur.execute("""update block_index set expired = -1
- where expired is not null""")
-
+ # Mark all expired objects with expired = 0; these objects will later
+ # have values set to indicate groupings of objects when repacking.
cur.execute("""update block_index set expired = 0
cur.execute("""update block_index set expired = 0
- where expired is not null and blockid in
- (select blockid from snapshot_contents)""")
+ where expired is not null""")
# We will want to aim for at least one full segment for each bucket
# that we eventually create, but don't know how many bytes that should
# We will want to aim for at least one full segment for each bucket
# that we eventually create, but don't know how many bytes that should
# segments, but for now don't worry too much about that.) If we can't
# compute an average, it's probably because there are no expired
# segments, so we have no more work to do.
# segments, but for now don't worry too much about that.) If we can't
# compute an average, it's probably because there are no expired
# segments, so we have no more work to do.
- cur.execute("""select avg(size) from segment_info
+ cur.execute("""select avg(size) from segments
where segmentid in
(select distinct segmentid from block_index
where expired is not null)""")
where segmentid in
(select distinct segmentid from block_index
where expired is not null)""")
cutoffs.reverse()
for i in range(len(cutoffs)):
cur.execute("""update block_index set expired = ?
cutoffs.reverse()
for i in range(len(cutoffs)):
cur.execute("""update block_index set expired = ?
- where round(? - timestamp) > ? and expired >= 0""",
+ where round(? - timestamp) > ?""",
- cur.execute("update block_index set expired = 0 where expired = -1")
segment text unique not null,
path text,
checksum text,
segment text unique not null,
path text,
checksum text,
segmentid integer not null,
utilization real
);
segmentid integer not null,
utilization real
);
+
+-- Overall estimate of segment utilization, for all snapshots combined.
+create view segment_info as
+select segmentid, mtime, size, cast(size * utilization as integer) as used,
+ utilization
+from segments join
+ (select segmentid, max(utilization) as utilization
+ from segments_used group by segmentid)
+using (segmentid);