Dump some statistics when the cleaner runs
authorMichael Vrable <mvrable@cs.ucsd.edu>
Fri, 18 Mar 2011 04:18:12 +0000 (21:18 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Fri, 18 Mar 2011 04:18:12 +0000 (21:18 -0700)
cleaner/cleaner

index 4cda7c7..93feef4 100755 (executable)
@@ -37,6 +37,8 @@ class Backend:
         m = re.match(r"^log-(\d+)-(\d+)$", name)
         if m: return (int(m.group(1)), int(m.group(2)))
 
+    def dump_stats(self):
+        pass
 
 class FileBackend(Backend):
     """An interface to BlueSky where the log segments are on local disk.
@@ -95,6 +97,8 @@ class S3Backend(Backend):
         self.cachedir = cachedir
         self.cache = {}
         self.connect()
+        self.stats_get = [0, 0]
+        self.stats_put = [0, 0]
 
     def connect(self):
         self.conn = boto.connect_s3(is_secure=False)
@@ -125,6 +129,8 @@ class S3Backend(Backend):
             fp.write(data)
             fp.close()
             self.cache[filename] = True
+            self.stats_get[0] += 1
+            self.stats_get[1] += len(data)
             if offset > 0:
                 data = data[offset:]
             if length is not None:
@@ -136,6 +142,8 @@ class S3Backend(Backend):
         k = Key(self.bucket)
         k.key = self.path + filename
         k.set_contents_from_string(data)
+        self.stats_put[0] += 1
+        self.stats_put[1] += len(data)
         if filename in self.cache:
             del self.cache[filename]
 
@@ -147,6 +155,11 @@ class S3Backend(Backend):
         if filename in self.cache:
             del self.cache[filename]
 
+    def dump_stats(self):
+        print "S3 statistics:"
+        print "GET: %d ops / %d bytes" % tuple(self.stats_get)
+        print "PUT: %d ops / %d bytes" % tuple(self.stats_put)
+
 class SimpleBackend(Backend):
     """An interface to the simple BlueSky test network server."""
 
@@ -446,17 +459,24 @@ class InodeMap:
 
         print
         print "Segment utilizations:"
+        total_data = [0, 0]
+        deletions = [0, 0]
         for (s, u) in sorted(util.segments.items()):
+            for i in range(2): total_data[i] += u[i]
             print "%s: %s %s" % (s, u, float(u[1]) / u[0])
             if u[1] == 0:
-                # print "Deleting..."
-                # backend.delete(s)
-                pass
+                print "Would delete..."
+                #backend.delete(s)
+                deletions[0] += 1
+                deletions[1] += u[0]
 
         self.inodes = inodes
         self.util = util
         self.updated_inodes = set()
 
+        print "%d bytes total / %d bytes used" % tuple(total_data)
+        print "would delete %d segments (%d bytes)" % tuple(deletions)
+
     def mark_updated(self, inum):
         self.updated_inodes.add(inum)
 
@@ -565,7 +585,8 @@ def run_cleaner(backend, inode_map, log, repack_inodes=False):
         rewrite_inode(backend, inode_map, i, log, i in dirty_inode_data)
 
 if __name__ == '__main__':
-    backend = S3Backend("mvrable-bluesky", cachedir=".")
+    start_time = time.time()
+    backend = S3Backend("mvrable-bluesky-west", cachedir="/export/cache")
     #backend = FileBackend(".")
     chkpt = load_checkpoint_record(backend)
     print backend.list()
@@ -578,3 +599,6 @@ if __name__ == '__main__':
     print "Version vector:", imap.version_vector
     imap.write(backend, log_dir)
     log_dir.close_all()
+    end_time = time.time()
+    print "Cleaner running time:", end_time - start_time
+    backend.dump_stats()