# Restore the specified file. How to do so depends upon the file type, so
# dispatch based on that.
my $dest = "$DEST_DIR/$filename";
- if ($type eq '-') {
+ if ($type eq '-' || $type eq 'f') {
# Regular file
unpack_file($filename, %info);
} elsif ($type eq 'd') {
}
} elsif ($type eq 'l') {
# Symlink
- if (!defined($info{contents})) {
+ my $target = $info{target} || $info{contents};
+ if (!defined($target)) {
die "Symlink $filename has no value specified";
}
- my $contents = uri_decode($info{contents});
- symlink $contents, $dest
+ $target = uri_decode($target);
+ symlink $target, $dest
or die "Cannot create symlink $filename: $!";
# TODO: We can't properly restore all metadata for symbolic links
-- This script should be loaded after connecting to the database to be
-- upgraded.
--- Database schema changes: the size column was added to the segments table,
--- and the segments_used table was added. Rather than upgrade the segments
--- table in-place, we create a new table and then rename it over the old
--- segments table.
+-- Database schema changes: the size and mtime columns were added to the
+-- segments table, and the segments_used table was added. Rather than upgrade
+-- the segments table in-place, we create a new table and then rename it over
+-- the old segments table.
create table segments_new (
segmentid integer primary key,
segment text unique not null,
path text,
checksum text,
+ mtime real,
size integer
);
-- Compute the size of each of the segments, if possible, based on our
-- knowledge of the objects stored in them.
insert into segments_new
-select segmentid, segment, path, checksum, size
+select segmentid, segment, path, checksum, mtime, size
from
(select segmentid, segment, path, checksum from segments)
left join
- (select segmentid, sum(size) as size from block_index group by segmentid)
+ (select segmentid, sum(size) as size, max(timestamp) as mtime
+ from block_index group by segmentid)
using (segmentid);
drop table segments;
-- The snapshot_contents table is obsolete.
drop table snapshot_contents;
+
+-- Upgrade database views.
+drop view cleaning_order;
+drop view segment_info;
+
+create view segment_info as
+select segmentid, mtime, size, cast(size * utilization as integer) as used,
+ utilization
+from segments join
+ (select segmentid, max(utilization) as utilization
+ from segments_used group by segmentid)
+using (segmentid);
from optparse import OptionParser
import lbs
-# We support up to "LBS Snapshot v0.2" formats, but are also limited by the lbs
+# We support up to "LBS Snapshot v0.6" formats, but are also limited by the lbs
# module.
-FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 2))
+FORMAT_VERSION = min(lbs.FORMAT_VERSION, (0, 6))
def check_version(format):
ver = lbs.parse_metadata_version(format)
print "## Root:", d['Root']
metadata = lbs.iterate_metadata(store, d['Root'])
for m in metadata:
- if m.fields['type'] != '-': continue
+ if m.fields['type'] not in ('-', 'f'): continue
print "%s [%d bytes]" % (m.fields['name'], int(m.fields['size']))
verifier = lbs.ChecksumVerifier(m.fields['checksum'])
size = 0
if not os.path.isdir(path):
os.makedirs(path)
- if m.items.type == '-':
+ if m.items.type in ('-', 'f'):
file = open(destpath, 'wb')
verifier = lbs.ChecksumVerifier(m.items.checksum)
size = 0
if filename != '.':
os.mkdir(destpath)
elif m.items.type == 'l':
- os.symlink(m.items.contents, destpath)
+ try:
+ target = m.items.target
+ except:
+ # Old (v0.2 format) name for 'target'
+ target = m.items.contents
+ os.symlink(target, destpath)
elif m.items.type == 'p':
os.mkfifo(destpath)
elif m.items.type in ('c', 'b'):
from pysqlite2 import dbapi2 as sqlite3
# The largest supported snapshot format that can be understood.
-FORMAT_VERSION = (0, 2) # LBS Snapshot v0.2
+FORMAT_VERSION = (0, 6) # LBS Snapshot v0.6
# Maximum number of nested indirect references allowed in a snapshot.
MAX_RECURSION_DEPTH = 3
'device': MetadataItem.decode_device,
'user': MetadataItem.decode_user,
'group': MetadataItem.decode_user,
+ 'ctime': MetadataItem.decode_int,
'mtime': MetadataItem.decode_int,
'links': MetadataItem.decode_int,
'inode': MetadataItem.raw_str,
'checksum': MetadataItem.decode_str,
'size': MetadataItem.decode_int,
'contents': MetadataItem.decode_str,
+ 'target': MetadataItem.decode_str,
}
def iterate_metadata(object_store, root):
where snapshotid < (select max(snapshotid)
from snapshots)""")
- # Delete entries in the snapshot_contents table which are for
- # non-existent snapshots.
- cur.execute("""delete from snapshot_contents
+ # Delete entries in the segments_used table which are for non-existent
+ # snapshots.
+ cur.execute("""delete from segments_used
where snapshotid not in
(select snapshotid from snapshots)""")
# Find segments which contain no objects used by any current snapshots,
# and delete them from the segment table.
cur.execute("""delete from segments where segmentid not in
- (select distinct segmentid from snapshot_contents
- natural join block_index)""")
+ (select segmentid from segments_used)""")
# Finally, delete objects contained in non-existent segments. We can't
# simply delete unused objects, since we use the set of unused objects
cur = self.cursor()
- # First step: Mark all unused-and-expired objects with expired = -1,
- # which will cause us to mostly ignore these objects when rebalancing.
- # At the end, we will set these objects to be in group expired = 0.
- # Mark expired objects which still seem to be in use with expired = 0;
- # these objects will later have values set to indicate groupings of
- # objects when repacking.
- cur.execute("""update block_index set expired = -1
- where expired is not null""")
-
+ # Mark all expired objects with expired = 0; these objects will later
+ # have values set to indicate groupings of objects when repacking.
cur.execute("""update block_index set expired = 0
- where expired is not null and blockid in
- (select blockid from snapshot_contents)""")
+ where expired is not null""")
# We will want to aim for at least one full segment for each bucket
# that we eventually create, but don't know how many bytes that should
# segments, but for now don't worry too much about that.) If we can't
# compute an average, it's probably because there are no expired
# segments, so we have no more work to do.
- cur.execute("""select avg(size) from segment_info
+ cur.execute("""select avg(size) from segments
where segmentid in
(select distinct segmentid from block_index
where expired is not null)""")
cutoffs.reverse()
for i in range(len(cutoffs)):
cur.execute("""update block_index set expired = ?
- where round(? - timestamp) > ? and expired >= 0""",
+ where round(? - timestamp) > ?""",
(i, now, cutoffs[i]))
- cur.execute("update block_index set expired = 0 where expired = -1")
segment text unique not null,
path text,
checksum text,
+ mtime real,
size integer
);
segmentid integer not null,
utilization real
);
+
+-- Overall estimate of segment utilization, for all snapshots combined.
+create view segment_info as
+select segmentid, mtime, size, cast(size * utilization as integer) as used,
+ utilization
+from segments join
+ (select segmentid, max(utilization) as utilization
+ from segments_used group by segmentid)
+using (segmentid);