Extend cleaner with a simple policy for choosing segments to clean.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Thu, 9 Sep 2010 05:07:17 +0000 (22:07 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Thu, 9 Sep 2010 05:07:17 +0000 (22:07 -0700)
cleaner/cleaner

index e249ed2..42aef01 100755 (executable)
@@ -234,6 +234,7 @@ class InodeMap:
         util = UtilizationTracker(backend)
         util.add_item(checkpoint_record)
         inodes = {}
+        self.obsolete_segments = set()
 
         print "Inode map:"
         for i in range(len(checkpoint_record.data) // 16):
@@ -282,11 +283,11 @@ class InodeMap:
 
             # Case 1: No inodes in this range of the old inode map have
             # changed.  Simply emit a new pointer to the same inode map block.
-            # TODO: Add the ability to rewrite the inode map block if we choose
-            # to do so for cleaning, even if no inodes have changed.
             if len(updated_inodes) == 0 or updated_inodes[-1] > end:
-                new_checkpoint.links.append(self.checkpoint_record.links[i])
-                continue
+                old_location = self.checkpoint_record.links[i][1][0:2]
+                if old_location not in self.obsolete_segments:
+                    new_checkpoint.links.append(self.checkpoint_record.links[i])
+                    continue
 
             # Case 2: Some inodes have been updated.  Create a new inode map
             # block, write it out, and point the new checkpoint at it.
@@ -323,6 +324,31 @@ def rewrite_inode(backend, inode_map, inum, log):
     log.write(inode, 1)
     inode_map.mark_updated(inum)
 
+def run_cleaner(backend, inode_map, log):
+    # Determine which segments are poorly utilized and should be cleaned.  We
+    # need better heuristics here.
+    for (s, u) in sorted(inode_map.util.segments.items()):
+        if float(u[1]) / u[0] < 0.99 and u[1] > 0:
+            print "Should clean segment", s
+            m = re.match(r"^log-(\d+)-(\d+)$", s)
+            if m: inode_map.obsolete_segments.add((int(m.group(1)), int(m.group(2))))
+
+    # Given that list of segments to clean, scan through those segments to find
+    # data which is still live and mark relevant inodes as needing to be
+    # rewritten.
+    dirty_inodes = set()
+    for s in inode_map.obsolete_segments:
+        filename = "log-%08d-%08d" % s
+        print "Scanning", filename, "for live data"
+        for item in parse_log(backend.read(filename), filename):
+            if item.type in (ITEM_TYPE.DATA, ITEM_TYPE.INODE):
+                if item.inum != 0:
+                    dirty_inodes.add(item.inum)
+
+    print "Inodes to rewrite:", dirty_inodes
+    for i in sorted(dirty_inodes):
+        rewrite_inode(backend, inode_map, i, log)
+
 if __name__ == '__main__':
     backend = FileBackend(".")
     chkpt = load_checkpoint_record(backend)
@@ -330,9 +356,7 @@ if __name__ == '__main__':
     imap.build(backend, chkpt)
     print chkpt
 
-    print repr(chkpt.serialize())
-
     log_dir = LogDirectory(backend, 1, 0)
-    rewrite_inode(backend, imap, 147, log_dir)
+    run_cleaner(backend, imap, log_dir)
     imap.write(backend, log_dir)
     log_dir.close_all()