Add a script to parse the S3 and NFS traffic statistics from a run
authorMichael Vrable <mvrable@cs.ucsd.edu>
Fri, 11 Mar 2011 00:40:58 +0000 (16:40 -0800)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Fri, 11 Mar 2011 00:40:58 +0000 (16:40 -0800)
microbench/parse-stats.py [new file with mode: 0755]

diff --git a/microbench/parse-stats.py b/microbench/parse-stats.py
new file mode 100755 (executable)
index 0000000..c690e9f
--- /dev/null
@@ -0,0 +1,57 @@
+#!/usr/bin/python2
+#
+# Parse the periodically-dumped statistics counters from the BlueSky proxy.
+# Can be used to reconstruct the costs for interactions with S3 over time.
+#
+# To plot S3 (standard region) costs:
+#   plot "stats.data" using 1:($2 * 0.01e-4 + $4 * 0.01e-3 + $3 * 0.15/2**30 + $5 * 0.10/2**30) with lines
+#
+# NFS operation counts:
+#   plot "stats.data" using 1:6 with linespoints title "NFS In", "stats.data" using 1:6 with linespoints title "NFS Out"
+
+import os, re, sys
+
+counter_map = [
+    (re.compile(r"Store.*GETS"), 'store-get'),
+    (re.compile(r"Store.*PUTS"), 'store-put'),
+    (re.compile(r"NFS RPC.*In"), 'nfs-in'),
+    (re.compile(r"NFS RPC.*Out"), 'nfs-out'),
+]
+
+def process_file(fp, outfile):
+    timestamp = 0
+    stats = {}
+
+    outfile.write("# Timestamp\t")
+    for (k, v) in counter_map:
+        outfile.write("%s\t" % (v,))
+    outfile.write("\n")
+
+    def dump():
+        if len(stats) > 0:
+            outfile.write("%f\t" % (timestamp,))
+            for (k, v) in counter_map:
+                outfile.write("%d\t%d\t" % (stats.get(v, (0, 0))))
+            outfile.write("\n")
+            stats.clear()
+
+    for line in fp:
+        if line.startswith("****"):
+            dump()
+
+        m = re.match(r"^time=([\d.]+)", line)
+        if m:
+            timestamp = float(m.group(1))
+
+        m = re.match(r"^(.*): count=(\d+) sum=(\d+)", line)
+        if m:
+            vals = (int(m.group(2)), int(m.group(3)))
+            for (k, v) in counter_map:
+                if k.match(m.group(1)):
+                    if v not in stats: stats[v] = (0, 0)
+                    stats[v] = (stats[v][0] + vals[0], stats[v][1] + vals[1])
+
+    dump()
+
+if __name__ == '__main__':
+    process_file(sys.stdin, sys.stdout)