# files with a specified working set size, and measure the response time to do
# so.
-import json, os, random, sys, threading, time
+import json, math, os, random, sys, threading, time
THREADS = 1
+def percentile(rank, elements):
+ """Return the value at a fraction rank from the beginning of the list."""
+
+ # Convert a fraction [0.0, 1.0] to a fractional index in elements.
+ rank = rank * (len(elements) - 1)
+
+ # Round the index value to the nearest integer values, then interpolate.
+ prev = int(math.floor(rank))
+ next = int(math.ceil(rank))
+ frac = rank - prev
+
+ return (1.0 - frac) * elements[prev] + frac * elements[next]
+
class WorkerThread:
"""Performs a mix of file system operations and records the performance."""
+ PARAMS = ['duration', 'write_fraction', 'wss_count', 'tot_count',
+ 'filesize', 'target_ops']
+
def __init__(self):
self.stats = []
- self.duration = 10.0 # Seconds for which to run
- self.write_fraction = 0.3 # Fraction of operations which are writes
- self.wss_count = 16 # Files in the working set
- self.tot_count = 32 # Total number of files created
- self.filesize = 32 * 1024 # Size of files to work with
- self.target_ops = 2 # Target operations/second/thread
+ self.duration = 7200.0 # Seconds for which to run
+ self.write_fraction = 0.5 # Fraction of operations which are writes
+ self.wss_count = 2048 # Files in the working set
+ self.tot_count = 2048 # Total number of files created
+ self.filesize = 256 * 1024 # Size of files to work with
+ self.target_ops = 10 # Target operations/second/thread
+
+ def get_params(self):
+ params = {}
+ for p in self.PARAMS:
+ params[p] = getattr(self, p)
+ return params
def setup(self):
for i in range(self.tot_count):
fp.close()
return ('read', filename)
+def print_distribution_stats(stats):
+ stats = sorted(stats)
+ print " Count:", len(stats)
+ if len(stats) == 0: return
+ print " Average:", sum(stats) / len(stats)
+ for (s, p) in [("Min", 0.0), ("Med", 0.5), ("90%", 0.9),
+ ("95%", 0.95), ("Max", 1.0)]:
+ print " %s: %s" % (s, percentile(p, stats))
+
def run_stats(stats):
duration = max(x[0] for x in stats) - min(x[0] for x in stats)
latencies = [x[1] for x in stats]
latencies.sort()
print "Experiment duration:", duration
- print "Operation count:", len(stats)
- print "Latencies:", latencies
- print "Average latency:", sum(latencies) / len(latencies)
+ print "READS:"
+ print_distribution_stats([x[1] for x in stats if x[2][0] == 'read'])
+ print "WRITES:"
+ print_distribution_stats([x[1] for x in stats if x[2][0] == 'write'])
if __name__ == '__main__':
workers = []
threads = []
for i in range(THREADS):
w = WorkerThread()
- if i == 0: w.setup()
+ #if i == 0: w.setup()
t = threading.Thread(target=w.run)
threads.append(t)
workers.append(w)
t.start()
+
+ print json.dumps(workers[0].get_params(), indent=2)
+
for t in threads:
t.join()
results += w.stats
results.sort()
- print json.dumps(results, indent=2)
+ fp = open('/tmp/results.json', 'w')
+ fp.write(json.dumps(workers[0].get_params(), indent=2) + "\n\n")
+ fp.write(json.dumps(results, indent=2))
+ fp.close()
run_stats(results)