Updates to the workload generator and some results from runs.
[bluesky.git] / microbench / workload.py
index d459b9d..3ec8942 100755 (executable)
@@ -4,21 +4,43 @@
 # files with a specified working set size, and measure the response time to do
 # so.
 
-import json, os, random, sys, threading, time
+import json, math, os, random, sys, threading, time
 
 THREADS = 1
 
+def percentile(rank, elements):
+    """Return the value at a fraction rank from the beginning of the list."""
+
+    # Convert a fraction [0.0, 1.0] to a fractional index in elements.
+    rank = rank * (len(elements) - 1)
+
+    # Round the index value to the nearest integer values, then interpolate.
+    prev = int(math.floor(rank))
+    next = int(math.ceil(rank))
+    frac = rank - prev
+
+    return (1.0 - frac) * elements[prev] + frac * elements[next]
+
 class WorkerThread:
     """Performs a mix of file system operations and records the performance."""
 
+    PARAMS = ['duration', 'write_fraction', 'wss_count', 'tot_count',
+              'filesize', 'target_ops']
+
     def __init__(self):
         self.stats = []
-        self.duration = 10.0        # Seconds for which to run
-        self.write_fraction = 0.3   # Fraction of operations which are writes
-        self.wss_count = 16         # Files in the working set
-        self.tot_count = 32         # Total number of files created
-        self.filesize = 32 * 1024   # Size of files to work with
-        self.target_ops = 2         # Target operations/second/thread
+        self.duration = 7200.0      # Seconds for which to run
+        self.write_fraction = 0.5   # Fraction of operations which are writes
+        self.wss_count = 2048       # Files in the working set
+        self.tot_count = 2048       # Total number of files created
+        self.filesize = 256 * 1024  # Size of files to work with
+        self.target_ops = 10        # Target operations/second/thread
+
+    def get_params(self):
+        params = {}
+        for p in self.PARAMS:
+            params[p] = getattr(self, p)
+        return params
 
     def setup(self):
         for i in range(self.tot_count):
@@ -56,25 +78,38 @@ class WorkerThread:
             fp.close()
             return ('read', filename)
 
+def print_distribution_stats(stats):
+    stats = sorted(stats)
+    print "  Count:", len(stats)
+    if len(stats) == 0: return
+    print "  Average:", sum(stats) / len(stats)
+    for (s, p) in [("Min", 0.0), ("Med", 0.5), ("90%", 0.9),
+                   ("95%", 0.95), ("Max", 1.0)]:
+        print "  %s: %s" % (s, percentile(p, stats))
+
 def run_stats(stats):
     duration = max(x[0] for x in stats) - min(x[0] for x in stats)
     latencies = [x[1] for x in stats]
     latencies.sort()
     print "Experiment duration:", duration
-    print "Operation count:", len(stats)
-    print "Latencies:", latencies
-    print "Average latency:", sum(latencies) / len(latencies)
+    print "READS:"
+    print_distribution_stats([x[1] for x in stats if x[2][0] == 'read'])
+    print "WRITES:"
+    print_distribution_stats([x[1] for x in stats if x[2][0] == 'write'])
 
 if __name__ == '__main__':
     workers = []
     threads = []
     for i in range(THREADS):
         w = WorkerThread()
-        if i == 0: w.setup()
+        #if i == 0: w.setup()
         t = threading.Thread(target=w.run)
         threads.append(t)
         workers.append(w)
         t.start()
+
+    print json.dumps(workers[0].get_params(), indent=2)
+
     for t in threads:
         t.join()
 
@@ -83,5 +118,8 @@ if __name__ == '__main__':
         results += w.stats
     results.sort()
 
-    print json.dumps(results, indent=2)
+    fp = open('/tmp/results.json', 'w')
+    fp.write(json.dumps(workers[0].get_params(), indent=2) + "\n\n")
+    fp.write(json.dumps(results, indent=2))
+    fp.close()
     run_stats(results)