3 # Parse the periodically-dumped statistics counters from the BlueSky proxy.
4 # Can be used to reconstruct the costs for interactions with S3 over time.
6 # To plot S3 (standard region) costs:
7 # plot "stats.data" using 1:($2 * 0.01e-4 + $4 * 0.01e-3 + $3 * 0.15/2**30 + $5 * 0.10/2**30) with lines
9 # NFS operation counts:
10 # plot "stats.data" using 1:6 with linespoints title "NFS In", "stats.data" using 1:6 with linespoints title "NFS Out"
15 (re.compile(r"Store.*GETS"), 'store-get'),
16 (re.compile(r"Store.*PUTS"), 'store-put'),
17 (re.compile(r"NFS RPC.*In"), 'nfs-in'),
18 (re.compile(r"NFS RPC.*Out"), 'nfs-out'),
21 def process_file(fp, outfile):
25 outfile.write("# Timestamp\t")
26 for (k, v) in counter_map:
27 outfile.write("%s\t" % (v,))
32 outfile.write("%f\t" % (timestamp,))
33 for (k, v) in counter_map:
34 outfile.write("%d\t%d\t" % (stats.get(v, (0, 0))))
39 if line.startswith("****"):
42 m = re.match(r"^time=([\d.]+)", line)
44 timestamp = float(m.group(1))
46 m = re.match(r"^(.*): count=(\d+) sum=(\d+)", line)
48 vals = (int(m.group(2)), int(m.group(3)))
49 for (k, v) in counter_map:
50 if k.match(m.group(1)):
51 if v not in stats: stats[v] = (0, 0)
52 stats[v] = (stats[v][0] + vals[0], stats[v][1] + vals[1])
56 if __name__ == '__main__':
57 process_file(sys.stdin, sys.stdout)