From: Michael Vrable Date: Tue, 25 Dec 2007 04:00:42 +0000 (-0800) Subject: Add intent-based cleaning to lbs-util. X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=commitdiff_plain;h=4749293dc3e518f5de8a50c9da7384b1c7794ce2 Add intent-based cleaning to lbs-util. Allow the level of segment cleaning performed to be adjusted by specifying the next type of backup to be performed. If the next backup is to be longer-lived, then clean more aggressively. --- diff --git a/lbs-util b/lbs-util index 6f2dd07..da89060 100755 --- a/lbs-util +++ b/lbs-util @@ -22,6 +22,8 @@ parser.add_option("--store", dest="store", help="specify path to backup data store") parser.add_option("--localdb", dest="localdb", help="specify path to local database") +parser.add_option("--intent", dest="intent", default=1.0, + help="give expected next snapshot type when cleaning") (options, args) = parser.parse_args(sys.argv[1:]) # Read a passphrase from the user and store it in the LBS_GPG_PASSPHRASE @@ -38,8 +40,8 @@ def cmd_prune_db(): db = lbs.LocalDatabase(options.localdb) # Delete old snapshots from the local database. - db.garbage_collect() - db.commit() + #db.garbage_collect() + #db.commit() # Run the segment cleaner. # Syntax: $0 --localdb=LOCALDB clean @@ -47,7 +49,9 @@ def cmd_clean(clean_threshold=7.0): db = lbs.LocalDatabase(options.localdb) # Delete old snapshots from the local database. - db.garbage_collect() + intent = float(options.intent) + for s in db.list_schemes(): + db.garbage_collect(s, intent) # Expire segments which are poorly-utilized. for s in db.get_segment_cleaning_list(): diff --git a/lbs.py b/lbs.py index 80240e7..4d2641f 100644 --- a/lbs.py +++ b/lbs.py @@ -460,15 +460,57 @@ class LocalDatabase: "Return a DB-API cursor for directly accessing the local database." return self.db_connection.cursor() - def garbage_collect(self): - """Delete entries from old snapshots from the database.""" + def list_schemes(self): + """Return the list of snapshots found in the local database. + + The returned value is a list of tuples (id, scheme, name, time, intent). + """ + + cur = self.cursor() + cur.execute("select distinct scheme from snapshots") + schemes = [row[0] for row in cur.fetchall()] + schemes.sort() + return schemes + + def garbage_collect(self, scheme, intent=1.0): + """Delete entries from old snapshots from the database. + + Only snapshots with the specified scheme name will be deleted. If + intent is given, it gives the intended next snapshot type, to determine + how aggressively to clean (for example, intent=7 could be used if the + next snapshot will be a weekly snapshot). + """ cur = self.cursor() - # Delete old snapshots. - cur.execute("""delete from snapshots - where snapshotid < (select max(snapshotid) - from snapshots)""") + # Get the list of old snapshots for this scheme. Delete all the old + # ones. Rules for what to keep: + # - Always keep the most recent snapshot. + # - If snapshot X is younger than Y, and X has higher intent, then Y + # can be deleted. + cur.execute("""select snapshotid, name, intent, + julianday('now') - timestamp as age + from snapshots where scheme = ? + order by age""", (scheme,)) + + first = True + max_intent = intent + for (id, name, snap_intent, snap_age) in cur.fetchall(): + can_delete = False + if snap_intent < max_intent: + # Delete small-intent snapshots if there is a more recent + # large-intent snapshot. + can_delete = True + elif snap_intent == intent: + # Delete previous snapshots with the specified intent level. + can_delete = True + + if can_delete and not first: + print "Delete snapshot %d (%s)" % (id, name) + cur.execute("delete from snapshots where snapshotid = ?", + (id,)) + first = False + max_intent = max(max_intent, snap_intent) # Delete entries in the segments_used table which are for non-existent # snapshots. @@ -520,6 +562,10 @@ class LocalDatabase: info.mtime = row[3] info.age_days = row[4] + # If age is not available for whatever reason, treat it as 0.0. + if info.age_days is None: + info.age_days = 0.0 + # Benefit calculation: u is the estimated fraction of each segment # which is utilized (bytes belonging to objects still in use # divided by total size; this doesn't take compression or storage