Add a few more test scripts.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Wed, 26 May 2010 21:17:41 +0000 (14:17 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Wed, 26 May 2010 21:17:41 +0000 (14:17 -0700)
cloudbench/paralleltest.py [new file with mode: 0755]
parsetrace/trace-summary.py [new file with mode: 0755]

diff --git a/cloudbench/paralleltest.py b/cloudbench/paralleltest.py
new file mode 100755 (executable)
index 0000000..f1b0be2
--- /dev/null
@@ -0,0 +1,77 @@
+#!/usr/bin/python
+#
+# Run a series of simple test requests against S3 for gathering some basic
+# performance numbers.
+
+import boto, time
+from boto.s3.connection import SubdomainCallingFormat
+from boto.s3.key import Key
+import sys, threading, time
+import azure
+
+BUCKET_NAME = 'mvrable-benchmark-west'
+SIZES = [64, 4096, 32 << 10, 256 << 10, 1 << 20, 4 << 20, 32 << 20]
+
+class S3TestConnection:
+    def __init__(self):
+        self.conn = boto.connect_s3(is_secure=False,
+                                    calling_format=SubdomainCallingFormat())
+        self.bucket = self.conn.get_bucket(BUCKET_NAME)
+
+    def put_object(self, name, size):
+        buf = 'A' * size
+        k = Key(self.bucket, name)
+        start_time = time.time()
+        k.set_contents_from_string(buf)
+        print "%s: %f" % (name, time.time() - start_time)
+
+    def get_object(self, name):
+        k = Key(self.bucket, name)
+        start_time = time.time()
+        buf = k.get_contents_as_string()
+        print "%s: %f" % (name, time.time() - start_time)
+
+def parallel_get(name, connections, delay1=0.0):
+    print "Get: %s x %d" % (name, len(connections))
+    threads = []
+    for i in range(len(connections)):
+        c = connections[i]
+        threads.append(threading.Thread(target=c.get_object, args=(name,)))
+    for i in range(len(threads)):
+        threads[i].start()
+        if i == 0: time.sleep(delay1)
+    for t in threads: t.join()
+    time.sleep(1.0)
+
+def run_test():
+    print "==== S3 ===="
+    c = S3TestConnection()
+    for repeat in range(4):
+        for size in SIZES:
+            #c.put_object('file-%d-%d' % (size, repeat), size)
+            pass
+
+    c = S3TestConnection()
+    for repeat in range(4):
+        for size in SIZES:
+            c.get_object('file-%d-%d' % (size, repeat))
+
+if __name__ == '__main__':
+    # Pass 1: Identical downloads in parallel
+    connections = [S3TestConnection() for _ in range(8)]
+    SIZES = [4096, 32 << 10, 256 << 10, 1 << 20, 4 << 20]
+    PRIME = (1 << 20) + (1 << 10)
+    c = S3TestConnection()
+    for size in SIZES:
+        for i in range(32):
+            parallel_get('file-%d-%d' % (size, i), connections)
+
+    # Pass 1: Downloads in parallel, but downloads staggered so one request
+    # arrives earlier
+    connections = [S3TestConnection() for _ in range(8)]
+    SIZES = [4096, 32 << 10, 256 << 10, 1 << 20, 4 << 20]
+    PRIME = (1 << 20) + (1 << 10)
+    c = S3TestConnection()
+    for size in SIZES:
+        for i in range(32):
+            parallel_get('file-%d-%d' % (size, i), connections, delay1=1.0)
diff --git a/parsetrace/trace-summary.py b/parsetrace/trace-summary.py
new file mode 100755 (executable)
index 0000000..d162cbc
--- /dev/null
@@ -0,0 +1,29 @@
+#!/usr/bin/python
+
+import json, sys
+
+times = []
+
+for f in sys.argv[1:]:
+    stats = json.load(open(f))
+    for n in range(len(stats)):
+        s = stats[n]
+        name = "%s-%d" % (f, n)
+        event_count = 0
+        event_locs = []
+        delay_total = 0.0
+        if 'interpacket_times' in s:
+            for x in s['interpacket_times']:
+                flags = x[2:]
+                if 'LAST_PACKET_SHORT' in flags and '9000' in flags:
+                    event_count += 1
+                    delay_total += x[0]
+                    event_locs.append(x[1])
+            total_time = s['finish_latency'] * 1e-6
+            if event_count > 0 or True:
+                #print "%s: %d %s" % (name, event_count, event_locs)
+                print "%d\t%s\t%s\t%f" % (event_count, delay_total, event_count and delay_total / event_count or 0.0, total_time)
+
+for t in times:
+    print "%f\t%s\t%s" % (t[0], t[1], ','.join(t[2]))
+