from pysqlite2 import dbapi2 as sqlite3
# The largest supported snapshot format that can be understood.
-FORMAT_VERSION = (0, 6) # LBS Snapshot v0.6
+FORMAT_VERSION = (0, 8) # LBS Snapshot v0.8
# Maximum number of nested indirect references allowed in a snapshot.
MAX_RECURSION_DEPTH = 3
if m:
return ("zero", None, None, (0, int(m.group(1))))
- m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[((\d+)\+)?(\d+)\])?$", refstr)
+ m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(((\d+)\+)?(\d+)|=(\d+))\])?$", refstr)
if not m: return
segment = m.group(1)
checksum = checksum.lstrip("(").rstrip(")")
if slice is not None:
- if m.group(5) is None:
+ if m.group(9) is not None:
+ # Size-assertion slice
+ slice = (0, int(m.group(9)), True)
+ elif m.group(6) is None:
# Abbreviated slice
- slice = (0, int(m.group(7)))
+ slice = (0, int(m.group(8)), False)
else:
- slice = (int(m.group(6)), int(m.group(7)))
+ slice = (int(m.group(7)), int(m.group(8)), False)
return (segment, object, checksum, slice)
raise ValueError
if slice is not None:
- (start, length) = slice
+ (start, length, exact) = slice
+ if exact and len(data) != length: raise ValueError
data = data[start:start+length]
if len(data) != length: raise IndexError
last_key = None
continue
- m = re.match(r"^(\w+):\s*(.*)$", l)
+ m = re.match(r"^([-\w]+):\s*(.*)$", l)
if m:
dict[m.group(1)] = m.group(2)
last_key = m.group(1)
cur = self.cursor()
+ # Find the id of the last snapshot to be created. This is used for
+ # measuring time in a way: we record this value in each segment we
+ # expire on this run, and then on a future run can tell if there have
+ # been intervening backups made.
+ cur.execute("select max(snapshotid) from snapshots")
+ last_snapshotid = cur.fetchone()[0]
+
# Get the list of old snapshots for this scheme. Delete all the old
# ones. Rules for what to keep:
# - Always keep the most recent snapshot.
cur.execute("""delete from segments where segmentid not in
(select segmentid from segments_used)""")
- # Finally, delete objects contained in non-existent segments. We can't
- # simply delete unused objects, since we use the set of unused objects
- # to determine the used/free ratio of segments.
+ # Delete unused objects in the block_index table. By "unused", we mean
+ # any object which was stored in a segment which has been deleted, and
+ # any object in a segment which was marked for cleaning and has had
+ # cleaning performed already (the expired time is less than the current
+ # largest snapshot id).
cur.execute("""delete from block_index
- where segmentid not in
- (select segmentid from segments)""")
+ where segmentid not in (select segmentid from segments)
+ or segmentid in (select segmentid from segments
+ where expire_time < ?)""",
+ (last_snapshotid,))
+
+ # Remove sub-block signatures for deleted objects.
+ cur.execute("""delete from subblock_signatures
+ where blockid not in
+ (select blockid from block_index)""")
# Segment cleaning.
class SegmentInfo(Struct): pass
cur = self.cursor()
segments = []
cur.execute("""select segmentid, used, size, mtime,
- julianday('now') - mtime as age from segment_info""")
+ julianday('now') - mtime as age from segment_info
+ where expire_time is null""")
for row in cur:
info = self.SegmentInfo()
info.id = row[0]
info.mtime = row[3]
info.age_days = row[4]
- # If age is not available for whatever reason, treat it as 0.0.
+ # If data is not available for whatever reason, treat it as 0.0.
if info.age_days is None:
info.age_days = 0.0
+ if info.used_bytes is None:
+ info.used_bytes = 0.0
# Benefit calculation: u is the estimated fraction of each segment
# which is utilized (bytes belonging to objects still in use
raise TypeError("Invalid segment: %s, must be of type int or SegmentInfo, not %s" % (segment, type(segment)))
cur = self.cursor()
- cur.execute("update block_index set expired = 1 where segmentid = ?",
+ cur.execute("select max(snapshotid) from snapshots")
+ last_snapshotid = cur.fetchone()[0]
+ cur.execute("update segments set expire_time = ? where segmentid = ?",
+ (last_snapshotid, id))
+ cur.execute("update block_index set expired = 0 where segmentid = ?",
(id,))
def balance_expired_objects(self):
cutoffs.reverse()
for i in range(len(cutoffs)):
cur.execute("""update block_index set expired = ?
- where round(? - timestamp) > ?""",
+ where round(? - timestamp) > ?
+ and expired is not null""",
(i, now, cutoffs[i]))