From: Michael Vrable Date: Wed, 13 Feb 2008 01:05:33 +0000 (-0800) Subject: Slight tweaks to the local database to improve cleaning procedures. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=030ddd80e04aa65738bf21557dd541bec757de5b;p=cumulus.git Slight tweaks to the local database to improve cleaning procedures. In addition to marking objects in cleaned segments, mark the segment itself as cleaned. --- diff --git a/contrib/upgrade0.6-localdb.sql b/contrib/upgrade0.6-localdb.sql index 7256f9f..ce8797c 100644 --- a/contrib/upgrade0.6-localdb.sql +++ b/contrib/upgrade0.6-localdb.sql @@ -15,7 +15,8 @@ create table segments_new ( path text, checksum text, mtime real, - size integer + size integer, + expire_time integer ); create table segments_used ( @@ -35,7 +36,7 @@ update snapshots set intent = 1; -- Compute the size of each of the segments, if possible, based on our -- knowledge of the objects stored in them. insert into segments_new -select segmentid, segment, path, checksum, mtime, size +select segmentid, segment, path, checksum, mtime, size, null as expire_time from (select segmentid, segment, path, checksum from segments) left join diff --git a/lbs.py b/lbs.py index 5213b89..4224cf9 100644 --- a/lbs.py +++ b/lbs.py @@ -483,6 +483,13 @@ class LocalDatabase: cur = self.cursor() + # Find the id of the last snapshot to be created. This is used for + # measuring time in a way: we record this value in each segment we + # expire on this run, and then on a future run can tell if there have + # been intervening backups made. + cur.execute("select max(snapshotid) from snapshots") + last_snapshotid = cur.fetchone()[0] + # Get the list of old snapshots for this scheme. Delete all the old # ones. Rules for what to keep: # - Always keep the most recent snapshot. @@ -523,12 +530,16 @@ class LocalDatabase: cur.execute("""delete from segments where segmentid not in (select segmentid from segments_used)""") - # Finally, delete objects contained in non-existent segments. We can't - # simply delete unused objects, since we use the set of unused objects - # to determine the used/free ratio of segments. + # Delete unused objects in the block_index table. By "unused", we mean + # any object which was stored in a segment which has been deleted, and + # any object in a segment which was marked for cleaning and has had + # cleaning performed already (the expired time is less than the current + # largest snapshot id). cur.execute("""delete from block_index - where segmentid not in - (select segmentid from segments)""") + where segmentid not in (select segmentid from segments) + or segmentid in (select segmentid from segments + where expire_time < ?)""", + (last_snapshotid,)) # Segment cleaning. class SegmentInfo(Struct): pass @@ -604,7 +615,11 @@ class LocalDatabase: raise TypeError("Invalid segment: %s, must be of type int or SegmentInfo, not %s" % (segment, type(segment))) cur = self.cursor() - cur.execute("update block_index set expired = 1 where segmentid = ?", + cur.execute("select max(snapshotid) from snapshots") + last_snapshotid = cur.fetchone()[0] + cur.execute("update segments set expire_time = ? where segmentid = ?", + (last_snapshotid, id)) + cur.execute("update block_index set expired = 0 where segmentid = ?", (id,)) def balance_expired_objects(self): diff --git a/schema.sql b/schema.sql index 8529e32..f406d91 100644 --- a/schema.sql +++ b/schema.sql @@ -19,7 +19,8 @@ create table segments ( path text, checksum text, mtime real, - size integer + size integer, + expire_time integer -- snapshotid of latest snapshot when expired ); -- Index of all blocks which have been stored, by checksum.