Merge ssh://vrable1.sysnet.ucsd.edu/scratch/bluesky
authorMichael Vrable <vrable@cs.hmc.edu>
Mon, 26 Sep 2011 08:23:16 +0000 (01:23 -0700)
committerMichael Vrable <vrable@cs.hmc.edu>
Mon, 26 Sep 2011 08:23:16 +0000 (01:23 -0700)
cleaner/cleaner
results/20110815/synwrite-2048M-summary.data [new file with mode: 0644]
results/20110815/synwrite-256M-summary.data [new file with mode: 0644]
results/20110925/cleaner.data [new file with mode: 0644]
results/20110925/cleaner.log [new file with mode: 0644]
results/20110925/cleaner.txt [new file with mode: 0644]
results/20110925/cleaner2.log [new file with mode: 0644]
results/20110925/extract.sh [new file with mode: 0755]
results/figures/cleaner.gnuplot [new file with mode: 0644]
results/figures/syn-write.gnuplot
results/kcompile.txt

index 93feef4..f2dcfcb 100755 (executable)
@@ -21,6 +21,15 @@ HEADER_SIZE = struct.calcsize(HEADER_FORMAT)
 
 CHECKPOINT_MAGIC = struct.pack('<Q', 0x7ad7dafb42a498b4)
 
+# Log file to write benchmark data to
+benchlog = None
+def benchlog_write(msg, *args):
+    m = msg % args
+    print "LOG:", m
+    if benchlog is not None:
+        benchlog.write(msg % args)
+        benchlog.write("\n")
+
 class ITEM_TYPE:
     DATA = '1'
     INODE = '2'
@@ -83,6 +92,7 @@ def retry_wrap(method):
                 return method(self, *args, **kwargs)
             except:
                 print >>sys.stderr, "S3 operation failed, retrying..."
+                print >>sys.stderr, "  %s %s %s" % (method, args, kwargs)
                 self.connect()
                 time.sleep(1.0)
         return method(self, *args, **kwargs)
@@ -96,6 +106,9 @@ class S3Backend(Backend):
         self.path = path
         self.cachedir = cachedir
         self.cache = {}
+        for f in os.listdir(cachedir):
+            self.cache[f] = True
+        #print "Initial cache contents:", list(self.cache.keys())
         self.connect()
         self.stats_get = [0, 0]
         self.stats_put = [0, 0]
@@ -159,6 +172,8 @@ class S3Backend(Backend):
         print "S3 statistics:"
         print "GET: %d ops / %d bytes" % tuple(self.stats_get)
         print "PUT: %d ops / %d bytes" % tuple(self.stats_put)
+        benchlog_write("s3_get: %d", self.stats_get[1])
+        benchlog_write("s3_put: %d", self.stats_put[1])
 
 class SimpleBackend(Backend):
     """An interface to the simple BlueSky test network server."""
@@ -281,7 +296,7 @@ class LogDirectory:
         self.dir_num = dir
         self.seq_num = 0
         for logname in backend.list(dir):
-            print "Old log file:", logname
+            #print "Old log file:", logname
             loc = backend.name_to_loc(logname[0])
             if loc is not None and loc[0] == dir:
                 self.seq_num = max(self.seq_num, loc[1] + 1)
@@ -440,12 +455,12 @@ class InodeMap:
 
         data = data[self.vvsize:]
 
-        print "Inode map:"
+        #print "Inode map:"
         for i in range(len(data) // 16):
             (start, end) = struct.unpack_from("<QQ", data, 16*i)
             imap = load_item(backend, checkpoint_record.links[i][1])
             util.add_item(imap)
-            print "[%d, %d]: %s" % (start, end, imap)
+            #print "[%d, %d]: %s" % (start, end, imap)
             for j in range(len(imap.data) // 8):
                 (inum,) = struct.unpack_from("<Q", imap.data, 8*j)
                 inode = load_item(backend, imap.links[j][1])
@@ -454,10 +469,11 @@ class InodeMap:
                 util.add_item(inode)
                 for i in inode.links:
                     util.add_item(i[1])
-                    data_segments.add(i[1][0:2])
-                print "  %d: %s (%d data segments)" % (inum, inode, len(data_segments))
+                    if i[1] is not None:
+                        data_segments.add(i[1][0:2])
+                #print "  %d: %s (%d data segments)" % (inum, inode, len(data_segments))
 
-        print
+        #print
         print "Segment utilizations:"
         total_data = [0, 0]
         deletions = [0, 0]
@@ -466,9 +482,16 @@ class InodeMap:
             print "%s: %s %s" % (s, u, float(u[1]) / u[0])
             if u[1] == 0:
                 print "Would delete..."
-                #backend.delete(s)
-                deletions[0] += 1
-                deletions[1] += u[0]
+                (d, n) = backend.name_to_loc(s)
+                try:
+                    if n < self.version_vector[d]:
+                        backend.delete(s)
+                        deletions[0] += 1
+                        deletions[1] += u[0]
+                    else:
+                        print "Not deleting log file newer than checkpoint!"
+                except:
+                    print "Error determining age of log segment, keeping"
 
         self.inodes = inodes
         self.util = util
@@ -476,6 +499,9 @@ class InodeMap:
 
         print "%d bytes total / %d bytes used" % tuple(total_data)
         print "would delete %d segments (%d bytes)" % tuple(deletions)
+        benchlog_write("bytes_used: %d", total_data[1])
+        benchlog_write("bytes_wasted: %d", total_data[0] - total_data[1])
+        benchlog_write("bytes_freed: %d", deletions[1])
 
     def mark_updated(self, inum):
         self.updated_inodes.add(inum)
@@ -535,12 +561,15 @@ class InodeMap:
 def rewrite_inode(backend, inode_map, inum, log, copy_data=True):
     inode = inode_map.inodes[inum]
     if copy_data:
-        blocks = []
+        newlinks = []
         for l in inode.links:
-            data = load_item(backend, l[1])
-            blocks.append(data)
-            log.write(data, 0)
-        inode.links = [(b.id, b.location) for b in blocks]
+            if l[1] is not None:
+                data = load_item(backend, l[1])
+                log.write(data, 0)
+                newlinks.append((data.id, data.location))
+            else:
+                newlinks.append(l)
+        inode.links = newlinks
     log.write(inode, 1)
     inode_map.mark_updated(inum)
 
@@ -566,7 +595,7 @@ def run_cleaner(backend, inode_map, log, repack_inodes=False):
     dirty_inode_data = set()
     for s in inode_map.obsolete_segments:
         filename = backend.loc_to_name(s)
-        print "Scanning", filename, "for live data"
+        #print "Scanning", filename, "for live data"
         for item in parse_log(backend.read(filename), filename):
             if item.type in (ITEM_TYPE.DATA, ITEM_TYPE.INODE):
                 if item.inum != 0:
@@ -575,30 +604,40 @@ def run_cleaner(backend, inode_map, log, repack_inodes=False):
                         dirty_inodes.add(item.inum)
                     if item.inum not in dirty_inode_data:
                         for b in inode.links:
-                            if s == b[1][0:2]:
+                            if b[1] is not None and s == b[1][0:2]:
                                 dirty_inode_data.add(item.inum)
                                 break
 
-    print "Inodes to rewrite:", dirty_inodes
-    print "Inodes with data to rewrite:", dirty_inode_data
+    #print "Inodes to rewrite:", dirty_inodes
+    #print "Inodes with data to rewrite:", dirty_inode_data
     for i in sorted(dirty_inodes.union(dirty_inode_data)):
         rewrite_inode(backend, inode_map, i, log, i in dirty_inode_data)
 
 if __name__ == '__main__':
+    benchlog = open('cleaner.log', 'a')
+    benchlog_write("*** START CLEANER RUN ***")
     start_time = time.time()
-    backend = S3Backend("mvrable-bluesky-west", cachedir="/export/cache")
+    backend = S3Backend("mvrable-bluesky-west", cachedir="/tmp/bluesky-cache")
     #backend = FileBackend(".")
     chkpt = load_checkpoint_record(backend)
-    print backend.list()
+    #print backend.list()
+    log_dir = LogDirectory(backend, 1)
     imap = InodeMap()
     imap.build(backend, chkpt)
     print chkpt
 
-    log_dir = LogDirectory(backend, 1)
+    print "Version vector:", imap.version_vector
+    print "Last cleaner log file:", log_dir.seq_num - 1
+    if imap.version_vector.get(1, -1) != log_dir.seq_num - 1:
+        print "Proxy hasn't updated to latest cleaner segment yet!"
+        benchlog_write("waiting for proxy...")
+        sys.exit(0)
+
     run_cleaner(backend, imap, log_dir)
     print "Version vector:", imap.version_vector
     imap.write(backend, log_dir)
     log_dir.close_all()
     end_time = time.time()
-    print "Cleaner running time:", end_time - start_time
     backend.dump_stats()
+    benchlog_write("running_time: %s", end_time - start_time)
+    benchlog_write("")
diff --git a/results/20110815/synwrite-2048M-summary.data b/results/20110815/synwrite-2048M-summary.data
new file mode 100644 (file)
index 0000000..144499f
--- /dev/null
@@ -0,0 +1,13 @@
+2.0     0.0329503261202
+4.0     0.0324957906085
+6.0     0.0322965410105
+8.0     
+10.0    0.0320952405399
+12.0    
+14.0    2.03195491483
+16.0    0.0327706130346
+18.0    
+20.0    0.997681190573
+24.0    0.033307889687
+28.0    
+32.0    0.528391515259
diff --git a/results/20110815/synwrite-256M-summary.data b/results/20110815/synwrite-256M-summary.data
new file mode 100644 (file)
index 0000000..4df40dc
--- /dev/null
@@ -0,0 +1,13 @@
+2.0     0.211334479288
+4.0     0.649392315485
+6.0     0.499975254865
+8.0     0.639862457444
+10.0    0.596910132444
+12.0    0.575741621164
+14.0    0.668904821078
+16.0    0.673272160011
+18.0    0.676902224378
+20.0    0.679619228937
+24.0    0.683999513223
+28.0    0.686800989678
+32.0    0.666428181214
diff --git a/results/20110925/cleaner.data b/results/20110925/cleaner.data
new file mode 100644 (file)
index 0000000..12fae1d
--- /dev/null
@@ -0,0 +1,15 @@
+# bytes_used bytes_wasted bytes_freed s3_get s3_put
+67358975       1069199 9410    26353439        157
+67358983       12789943        0       12143384        16840128
+67358983       36592832        18993913        30128745        25259885
+67358983       51925625        31680172        24777202        33688007
+67358983       63050471        42848645        20596601        16840449
+67358983       45057387        26381295        38455006        33679642
+67358983       57715658        41202181        30594100        25260206
+67358983       50348573        34878155        18985720        16840128
+67358983       37645375        22200052        21088853        16848814
+67358983       32320159        21131934        34740350        33679642
+67358983       44901933        39023442        8445458 8428736
+67358983       14324563        12711535        8428728 8420371
+67358983       10042370        10023208        8971    8979
+67358983       37112   26315   8971    165
diff --git a/results/20110925/cleaner.log b/results/20110925/cleaner.log
new file mode 100644 (file)
index 0000000..8ec95f0
--- /dev/null
@@ -0,0 +1,172 @@
+*** START CLEANER RUN ***
+bytes_used: 67358975
+bytes_wasted: 1069199
+bytes_freed: 9410
+s3_get: 26353439
+s3_put: 157
+running_time: 4.74999809265
+
+*** START CLEANER RUN ***
+bytes_used: 67358975
+bytes_wasted: 9057959
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 12789943
+bytes_freed: 0
+s3_get: 12143384
+s3_put: 16840128
+running_time: 6.54327201843
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 36592832
+bytes_freed: 18993913
+s3_get: 30128745
+s3_put: 25259885
+running_time: 11.4323270321
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 48176903
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 51925625
+bytes_freed: 31680172
+s3_get: 24777202
+s3_put: 33688007
+running_time: 11.3818879128
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 59276654
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 63050471
+bytes_freed: 42848645
+s3_get: 20596601
+s3_put: 16840449
+running_time: 8.70604705811
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 41316709
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 45057387
+bytes_freed: 26381295
+s3_get: 38455006
+s3_put: 33679642
+running_time: 11.7080230713
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 57715658
+bytes_freed: 41202181
+s3_get: 30594100
+s3_put: 25260206
+running_time: 11.7434489727
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 47116877
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 50348573
+bytes_freed: 34878155
+s3_get: 18985720
+s3_put: 16840128
+running_time: 9.43346095085
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 37637010
+bytes_freed: 1602837
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 37645375
+bytes_freed: 22200052
+s3_get: 21088853
+s3_put: 16848814
+running_time: 7.93763995171
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 32294137
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 32320159
+bytes_freed: 21131934
+s3_get: 34740350
+s3_put: 33679642
+running_time: 12.6115119457
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 44867867
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 44901933
+bytes_freed: 39023442
+s3_get: 8445458
+s3_put: 8428736
+running_time: 5.66940903664
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 14307227
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 14324563
+bytes_freed: 12711535
+s3_get: 8428728
+s3_put: 8420371
+running_time: 5.03071594238
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 10033399
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 10042370
+bytes_freed: 10023208
+s3_get: 8971
+s3_put: 8979
+running_time: 0.643241882324
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 28141
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 28141
+bytes_freed: 0
+waiting for proxy...
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 37112
+bytes_freed: 26315
+s3_get: 8971
+s3_put: 165
+running_time: 0.594120025635
+
diff --git a/results/20110925/cleaner.txt b/results/20110925/cleaner.txt
new file mode 100644 (file)
index 0000000..2fa42c9
--- /dev/null
@@ -0,0 +1,22 @@
+Simple cleanerbench run (no cleaner during run):
+
+    Store[s3:mvrable-bluesky-west]: GETS: count=0 sum=0
+    Store[s3:mvrable-bluesky-west]: PUTS: count=69 sum=133523453
+    NFS RPC Messages In: count=524 sum=134267716
+    NFS RPC Messages Out: count=523 sum=62124
+
+    bytes_used: 67358975
+    bytes_wasted: 66164478
+    bytes_freed: 8559426
+    s3_get: 124964027
+    s3_put: 67358983
+    running_time: 34.587624073
+
+
+Repated, with cleaner running periodically:
+
+    Store[s3:mvrable-bluesky-west]: GETS: count=26 sum=276333
+    Store[s3:mvrable-bluesky-west]: PUTS: count=74 sum=134287730
+    NFS RPC Messages In: count=521 sum=134267448
+    NFS RPC Messages Out: count=520 sum=61828
+
diff --git a/results/20110925/cleaner2.log b/results/20110925/cleaner2.log
new file mode 100644 (file)
index 0000000..98ffb99
--- /dev/null
@@ -0,0 +1,112 @@
+*** START CLEANER RUN ***
+bytes_used: 67358975
+bytes_wasted: 1069199
+bytes_freed: 9410
+s3_get: 26353439
+s3_put: 157
+running_time: 4.74999809265
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 12789943
+bytes_freed: 0
+s3_get: 12143384
+s3_put: 16840128
+running_time: 6.54327201843
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 36592832
+bytes_freed: 18993913
+s3_get: 30128745
+s3_put: 25259885
+running_time: 11.4323270321
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 51925625
+bytes_freed: 31680172
+s3_get: 24777202
+s3_put: 33688007
+running_time: 11.3818879128
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 63050471
+bytes_freed: 42848645
+s3_get: 20596601
+s3_put: 16840449
+running_time: 8.70604705811
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 45057387
+bytes_freed: 26381295
+s3_get: 38455006
+s3_put: 33679642
+running_time: 11.7080230713
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 57715658
+bytes_freed: 41202181
+s3_get: 30594100
+s3_put: 25260206
+running_time: 11.7434489727
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 50348573
+bytes_freed: 34878155
+s3_get: 18985720
+s3_put: 16840128
+running_time: 9.43346095085
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 37645375
+bytes_freed: 22200052
+s3_get: 21088853
+s3_put: 16848814
+running_time: 7.93763995171
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 32320159
+bytes_freed: 21131934
+s3_get: 34740350
+s3_put: 33679642
+running_time: 12.6115119457
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 44901933
+bytes_freed: 39023442
+s3_get: 8445458
+s3_put: 8428736
+running_time: 5.66940903664
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 14324563
+bytes_freed: 12711535
+s3_get: 8428728
+s3_put: 8420371
+running_time: 5.03071594238
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 10042370
+bytes_freed: 10023208
+s3_get: 8971
+s3_put: 8979
+running_time: 0.643241882324
+
+*** START CLEANER RUN ***
+bytes_used: 67358983
+bytes_wasted: 37112
+bytes_freed: 26315
+s3_get: 8971
+s3_put: 165
+running_time: 0.594120025635
+
diff --git a/results/20110925/extract.sh b/results/20110925/extract.sh
new file mode 100755 (executable)
index 0000000..ea704d4
--- /dev/null
@@ -0,0 +1,13 @@
+#!/bin/bash
+#
+# Gather statistics out of a cleaner.log log file and group into plottable
+# form.
+
+DATAFILE="$1"
+
+extract() {
+    grep "$1" "$DATAFILE" | cut -f2 -d' '
+}
+
+echo "#extract bytes_used extract bytes_wasted extract bytes_freed extract s3_get extract s3_put"
+paste <(extract bytes_used) <(extract bytes_wasted) <(extract bytes_freed) <(extract s3_get) <(extract s3_put)
diff --git a/results/figures/cleaner.gnuplot b/results/figures/cleaner.gnuplot
new file mode 100644 (file)
index 0000000..1fd5b3d
--- /dev/null
@@ -0,0 +1,16 @@
+load "common.gnuplot"
+
+set grid
+set title "Storage Used: Writes Running Concurrently with Cleaner"
+set ylabel "Cloud Storage Consumed (MB)"
+set key top right
+set noxtics
+set yrange [0:*]
+
+set output "cleaner.eps"
+plot "../20110925/cleaner.data" using 0:(($1+$2)/1024**2) with boxes fill solid 0.0 lt 1 title "Reclaimed", \
+     "../20110925/cleaner.data" using 0:(($1+$2-$3)/1024**2) with boxes fill solid 0.25 lt 1 title "Still Wasted", \
+     "../20110925/cleaner.data" using 0:($1/1024**2) with boxes fill solid 0.5 lt 1 title "Used"
+
+set output "cleaner-rw.eps"
+plot "../20110925/cleaner.data" using 0:($5/1024**2) with boxes fill solid 0.0 lt 1 title "Bytes Written"
index 995dbb3..8820d23 100644 (file)
@@ -12,3 +12,9 @@ plot "../20110405/synwrite-256M-summary.data" using 1:($2*1000):($3*1000) with y
      "../20110405/synwrite-2048M-summary.data" using 1:($2*1000):($3*1000) with yerrorbars title "1 GB Write Buffer", \
      "../20110405/synwrite-256M-summary.data" using 1:($2*1000) with lines lt 1 notitle, \
      "../20110405/synwrite-2048M-summary.data" using 1:($2*1000) with lines lt 2 notitle
+
+set yrange [0:*]
+set output "syn-write10.eps"
+set title "Latency vs. Write Rate with Constrained Upload"
+plot "../20110815/synwrite-256M-summary.data" using 1:($2*1000) with linespoints title "128 MB Write Buffer"
+     #"../20110815/synwrite-2048M-summary.data" using 1:($2*1000) with linespoints title "1 GB Write Buffer"
index 7d141a6..16f3b81 100644 (file)
@@ -135,3 +135,34 @@ S3-East, full segments:
     real    8m7.465s
     user    11m3.393s
     sys     1m30.578s
+
+==============================================================================
+NetApp
+2011-09-17
+
+ [1] real    2m19.113s
+     user    0m21.421s
+     sys     0m5.004s
+
+ [2] real    3m19.273s
+     user    0m3.288s
+     sys     0m3.488s
+
+ [3] real    4m31.247s
+     user    10m55.269s
+     sys     1m30.170s
+
+
+2011-09-18
+ [1] real    2m15.887s
+     user    0m21.949s
+     sys     0m4.864s
+
+ [2] real    3m13.297s
+     user    0m3.352s
+     sys     0m3.496s
+
+ [3] real    4m33.410s
+     user    11m3.057s
+     sys     1m30.874s
+