Add proper per-file copyright notices/licenses and top-level license.

[bluesky.git] / microbench / workload.py
diff --git a/microbench/workload.py b/microbench/workload.py

index d459b9d..0ce8ea0 100755 (executable)
--- a/microbench/workload.py
+++ b/microbench/workload.py
@@ -4,21 +4,43 @@
  # files with a specified working set size, and measure the response time to do
  # so.
  
-import json, os, random, sys, threading, time
+import json, math, os, random, sys, threading, time
  
  THREADS = 1
  
+def percentile(rank, elements):
+    """Return the value at a fraction rank from the beginning of the list."""
+
+    # Convert a fraction [0.0, 1.0] to a fractional index in elements.
+    rank = rank * (len(elements) - 1)
+
+    # Round the index value to the nearest integer values, then interpolate.
+    prev = int(math.floor(rank))
+    next = int(math.ceil(rank))
+    frac = rank - prev
+
+    return (1.0 - frac) * elements[prev] + frac * elements[next]
+
  class WorkerThread:
      """Performs a mix of file system operations and records the performance."""
  
+    PARAMS = ['duration', 'write_fraction', 'wss_count', 'tot_count',
+              'filesize', 'target_ops']
+
      def __init__(self):
          self.stats = []
-        self.duration = 10.0        # Seconds for which to run
-        self.write_fraction = 0.3   # Fraction of operations which are writes
-        self.wss_count = 16         # Files in the working set
-        self.tot_count = 32         # Total number of files created
+        self.duration = 1800.0      # Seconds for which to run
+        self.write_fraction = 0.5   # Fraction of operations which are writes
+        self.wss_count = 2048       # Files in the working set
+        self.tot_count = 2048       # Total number of files created
          self.filesize = 32 * 1024   # Size of files to work with
-        self.target_ops = 2         # Target operations/second/thread
+        self.target_ops = 40        # Target operations/second/thread
+
+    def get_params(self):
+        params = {}
+        for p in self.PARAMS:
+            params[p] = getattr(self, p)
+        return params
  
      def setup(self):
          for i in range(self.tot_count):
@@ -36,7 +58,7 @@ class WorkerThread:
              info = self._operation()
              time2 = time.time()
              self.stats.append((time1, time2 - time1, info))
-            print self.stats[-1]
+            #print self.stats[-1]
              delay = time1 + (1.0 / self.target_ops) - time2
              if delay > 0: time.sleep(delay)
  
@@ -56,25 +78,43 @@ class WorkerThread:
              fp.close()
              return ('read', filename)
  
+def print_distribution_stats(stats):
+    stats = sorted(stats)
+    print "  Count:", len(stats)
+    if len(stats) == 0: return
+    print "  Average:", sum(stats) / len(stats)
+    for (s, p) in [("Min", 0.0), ("Med", 0.5), ("90%", 0.9),
+                   ("95%", 0.95), ("Max", 1.0)]:
+        print "  %s: %s" % (s, percentile(p, stats))
+
  def run_stats(stats):
      duration = max(x[0] for x in stats) - min(x[0] for x in stats)
      latencies = [x[1] for x in stats]
      latencies.sort()
      print "Experiment duration:", duration
-    print "Operation count:", len(stats)
-    print "Latencies:", latencies
-    print "Average latency:", sum(latencies) / len(latencies)
+    print "READS:"
+    print_distribution_stats([x[1] for x in stats if x[2][0] == 'read'])
+    print "WRITES:"
+    print_distribution_stats([x[1] for x in stats if x[2][0] == 'write'])
  
-if __name__ == '__main__':
+fp = open('/tmp/results.json', 'a')
+
+def run(filecount, writefrac, filesize):
      workers = []
      threads = []
      for i in range(THREADS):
          w = WorkerThread()
+        w.write_fraction = writefrac
+        w.wss_count = w.tot_count = filecount
+        w.filesize = filesize
          if i == 0: w.setup()
          t = threading.Thread(target=w.run)
          threads.append(t)
          workers.append(w)
          t.start()
+
+    print json.dumps(workers[0].get_params(), indent=2)
+
      for t in threads:
          t.join()
  
@@ -83,5 +123,15 @@ if __name__ == '__main__':
          results += w.stats
      results.sort()
  
-    print json.dumps(results, indent=2)
+    fp.write(json.dumps(workers[0].get_params(), indent=2) + "\n\n")
+    fp.write(json.dumps(results, indent=2))
+    fp.write("\n\n")
      run_stats(results)
+
+if __name__ == '__main__':
+    for filesize in [32, 256, 2048]:            # KiB
+        for totsize in [256, 512, 1024]:        # MiB
+            filecount = totsize * 1024 / filesize
+            for writefrac in [0.0, 0.5]:
+                run(filecount, writefrac, filesize * 1024)
+    fp.close()