X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=lbs.py;h=b8de982e8652d45c0e5e52de987ee59def5fd456;hb=297553b6abd759306875c4c6f47fa1835e32fa05;hp=4d2641f22e5cdb072b2d5135a3de4a430d9c0fd2;hpb=4749293dc3e518f5de8a50c9da7384b1c7794ce2;p=cumulus.git diff --git a/lbs.py b/lbs.py index 4d2641f..b8de982 100644 --- a/lbs.py +++ b/lbs.py @@ -13,7 +13,7 @@ import os, re, sha, tarfile, tempfile, thread from pysqlite2 import dbapi2 as sqlite3 # The largest supported snapshot format that can be understood. -FORMAT_VERSION = (0, 6) # LBS Snapshot v0.6 +FORMAT_VERSION = (0, 8) # LBS Snapshot v0.8 # Maximum number of nested indirect references allowed in a snapshot. MAX_RECURSION_DEPTH = 3 @@ -143,7 +143,7 @@ class ObjectStore: if m: return ("zero", None, None, (0, int(m.group(1)))) - m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[((\d+)\+)?(\d+)\])?$", refstr) + m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(((\d+)\+)?(\d+)|=(\d+))\])?$", refstr) if not m: return segment = m.group(1) @@ -155,11 +155,14 @@ class ObjectStore: checksum = checksum.lstrip("(").rstrip(")") if slice is not None: - if m.group(5) is None: + if m.group(9) is not None: + # Size-assertion slice + slice = (0, int(m.group(9)), True) + elif m.group(6) is None: # Abbreviated slice - slice = (0, int(m.group(7))) + slice = (0, int(m.group(8)), False) else: - slice = (int(m.group(6)), int(m.group(7))) + slice = (int(m.group(7)), int(m.group(8)), False) return (segment, object, checksum, slice) @@ -231,7 +234,8 @@ class ObjectStore: raise ValueError if slice is not None: - (start, length) = slice + (start, length, exact) = slice + if exact and len(data) != length: raise ValueError data = data[start:start+length] if len(data) != length: raise IndexError @@ -263,7 +267,7 @@ def parse(lines, terminate=None): last_key = None continue - m = re.match(r"^(\w+):\s*(.*)$", l) + m = re.match(r"^([-\w]+):\s*(.*)$", l) if m: dict[m.group(1)] = m.group(2) last_key = m.group(1) @@ -483,6 +487,13 @@ class LocalDatabase: cur = self.cursor() + # Find the id of the last snapshot to be created. This is used for + # measuring time in a way: we record this value in each segment we + # expire on this run, and then on a future run can tell if there have + # been intervening backups made. + cur.execute("select max(snapshotid) from snapshots") + last_snapshotid = cur.fetchone()[0] + # Get the list of old snapshots for this scheme. Delete all the old # ones. Rules for what to keep: # - Always keep the most recent snapshot. @@ -523,12 +534,21 @@ class LocalDatabase: cur.execute("""delete from segments where segmentid not in (select segmentid from segments_used)""") - # Finally, delete objects contained in non-existent segments. We can't - # simply delete unused objects, since we use the set of unused objects - # to determine the used/free ratio of segments. + # Delete unused objects in the block_index table. By "unused", we mean + # any object which was stored in a segment which has been deleted, and + # any object in a segment which was marked for cleaning and has had + # cleaning performed already (the expired time is less than the current + # largest snapshot id). cur.execute("""delete from block_index - where segmentid not in - (select segmentid from segments)""") + where segmentid not in (select segmentid from segments) + or segmentid in (select segmentid from segments + where expire_time < ?)""", + (last_snapshotid,)) + + # Remove sub-block signatures for deleted objects. + cur.execute("""delete from subblock_signatures + where blockid not in + (select blockid from block_index)""") # Segment cleaning. class SegmentInfo(Struct): pass @@ -553,7 +573,8 @@ class LocalDatabase: cur = self.cursor() segments = [] cur.execute("""select segmentid, used, size, mtime, - julianday('now') - mtime as age from segment_info""") + julianday('now') - mtime as age from segment_info + where expire_time is null""") for row in cur: info = self.SegmentInfo() info.id = row[0] @@ -562,9 +583,11 @@ class LocalDatabase: info.mtime = row[3] info.age_days = row[4] - # If age is not available for whatever reason, treat it as 0.0. + # If data is not available for whatever reason, treat it as 0.0. if info.age_days is None: info.age_days = 0.0 + if info.used_bytes is None: + info.used_bytes = 0.0 # Benefit calculation: u is the estimated fraction of each segment # which is utilized (bytes belonging to objects still in use @@ -604,7 +627,11 @@ class LocalDatabase: raise TypeError("Invalid segment: %s, must be of type int or SegmentInfo, not %s" % (segment, type(segment))) cur = self.cursor() - cur.execute("update block_index set expired = 1 where segmentid = ?", + cur.execute("select max(snapshotid) from snapshots") + last_snapshotid = cur.fetchone()[0] + cur.execute("update segments set expire_time = ? where segmentid = ?", + (last_snapshotid, id)) + cur.execute("update block_index set expired = 0 where segmentid = ?", (id,)) def balance_expired_objects(self): @@ -733,5 +760,6 @@ class LocalDatabase: cutoffs.reverse() for i in range(len(cutoffs)): cur.execute("""update block_index set expired = ? - where round(? - timestamp) > ?""", + where round(? - timestamp) > ? + and expired is not null""", (i, now, cutoffs[i]))