More cloud storage performance-measurement scripts.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Tue, 4 May 2010 23:53:14 +0000 (16:53 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Tue, 4 May 2010 23:53:14 +0000 (16:53 -0700)
cloudbench/cloudtest.py
parsetrace/analyze-tcp.py
parsetrace/delay-graph.py [new file with mode: 0755]
parsetrace/latency-graph.py [new file with mode: 0755]
parsetrace/split-trace.py

index 0755688..3743282 100755 (executable)
@@ -58,16 +58,16 @@ def run_test():
         for size in SIZES:
             c.get_object('file-%d-%d' % (size, repeat), size)
 
-    print "==== AZURE ===="
-    c = AzureTestConnection()
-    for repeat in range(4):
-        for size in SIZES:
-            c.put_object('file-%d-%d' % (size, repeat), size)
-
-    c = AzureTestConnection()
-    for repeat in range(4):
-        for size in SIZES:
-            c.get_object('file-%d-%d' % (size, repeat), size)
+#    print "==== AZURE ===="
+#    c = AzureTestConnection()
+#    for repeat in range(4):
+#        for size in SIZES:
+#            c.put_object('file-%d-%d' % (size, repeat), size)
+#
+#    c = AzureTestConnection()
+#    for repeat in range(4):
+#        for size in SIZES:
+#            c.get_object('file-%d-%d' % (size, repeat), size)
 
 if __name__ == '__main__':
     run_test()
index 47a3d7b..83125e7 100755 (executable)
@@ -4,12 +4,15 @@
 # determine as much as possible about the performance of that connection.
 # (Specifically designed for measuring performance of fetches to Amazon S3.)
 
-import impacket, pcapy, re, sys
+import impacket, json, pcapy, re, sys
 import impacket.ImpactDecoder, impacket.ImpactPacket
 
 # Estimate of the network RTT
 RTT_EST = 0.03
 
+def dump_data(obj):
+    return json.dumps(result_list, sort_keys=True, indent=2)
+
 class Packet:
     def __init__(self, connection, ts, pkt):
         self.connection = connection
@@ -90,16 +93,23 @@ def split_trace(packets, predicate, before=True):
 
 def analyze_get(packets):
     packets = iter(packets)
-
-    # First packet is the GET request itself
     p = packets.next()
-    if not(p.direction > 0 and p.data.startswith('GET')):
-        print "Doesn't seem to be a GET request..."
-        return
 
     start_ts = p.ts
     id_out = p.id
 
+    # Check for connection establishment (SYN/SYN-ACK) and use that to estimate
+    # th network RTT.
+    if p.tcp.get_SYN():
+        p = packets.next()
+        #print "Connection establishment: RTT is", p.ts - start_ts
+        return {'syn_rtt': p.ts - start_ts}
+
+    # Otherwise, we expect the first packet to be the GET request itself
+    if not(p.direction > 0 and p.data.startswith('GET')):
+        #print "Doesn't seem to be a GET request..."
+        return
+
     # Find the first response packet containing data
     while not(p.direction < 0 and p.datalen > 0):
         p = packets.next()
@@ -107,30 +117,48 @@ def analyze_get(packets):
     resp_ts = p.ts
     id_in = p.id
     start_seq = p.seq[0]
+    tot_bytes = (p.seq[1] - start_seq) & 0xffffffff
+    spacings = []
 
-    print "Response time:", resp_ts - start_ts
+    #print "Response time:", resp_ts - start_ts
 
     # Scan through the incoming packets, looking for gaps in either the IP ID
     # field or in the timing
     last_ts = resp_ts
     for p in packets:
         gap = False
+        bytenr = (p.seq[1] - start_seq) & 0xffffffff
         if not p.direction < 0: continue
+        if p.tcp.get_FIN(): continue
+        spacings.append((p.ts - last_ts, bytenr))
         if p.id != (id_in + 1) & 0xffff:
             gap = True
-            print "Sequence number gap at", id_in
+            #print "Sequence number gap at", id_in
         if p.ts - last_ts > 2 * RTT_EST:
             gap = True
-            print "Long gap of", p.ts - last_ts
+            #print "Long gap of", p.ts - last_ts
         elif p.ts - last_ts > RTT_EST / 2:
             gap = True
-            print "Short gap of", p.ts - last_ts
+            #print "Short gap of", p.ts - last_ts
         if gap:
-            print "    [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]"
+            #print "    [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]"
+            pass
         if p.datalen not in (1448, 1460):
-            print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq
+            #print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq
+            pass
+        if (p.seq[0] - start_seq) & 0xffffffff != tot_bytes:
+            #print "Packet out of order; got sequence number %d, expected %d" \
+            #        % ((p.seq[0] - start_seq) & 0xffffffff, tot_bytes)
+            pass
         last_ts = p.ts
         id_in = p.id
+        tot_bytes = max(tot_bytes, bytenr)
+
+    #print "Transferred %d bytes in %s seconds, initial response after %s" % (tot_bytes, last_ts - start_ts, resp_ts - start_ts)
+    return {'bytes': tot_bytes,
+            'start_latency': resp_ts - start_ts,
+            'finish_latency': last_ts - start_ts,
+            'interpacket_times': spacings}
 
 if __name__ == '__main__':
     for f in sys.argv[1:]:
@@ -139,17 +167,22 @@ if __name__ == '__main__':
         ts = 0.0
         def request_start(p):
             return p.direction > 0 and p.datalen > 0
+        result_list = []
         for s in split_trace(conn.packets, request_start):
             s = list(s)
             if False:
                 for p in s:
-                    if p.ts - ts > 0.01:
-                        print "----"
-                    if p.ts - ts > 2 * RTT_EST:
-                        print "LONG DELAY\n----"
+                    #if p.ts - ts > 0.01:
+                        #print "----"
+                    #if p.ts - ts > 2 * RTT_EST:
+                        #print "LONG DELAY\n----"
                     ts = p.ts
-                    print p
-                    if p.direction > 0 and p.datalen > 0:
-                        print "Request:", repr(p.data)
-            analyze_get(s)
-            print "===="
+                    #print p
+                    #if p.direction > 0 and p.datalen > 0:
+                        #print "Request:", repr(p.data)
+            results = analyze_get(s)
+            if results is not None:
+                result_list.append(results)
+            #print "===="
+
+        print dump_data(result_list)
diff --git a/parsetrace/delay-graph.py b/parsetrace/delay-graph.py
new file mode 100755 (executable)
index 0000000..8030397
--- /dev/null
@@ -0,0 +1,14 @@
+#!/usr/bin/python
+
+import json, sys
+
+times = []
+
+for f in sys.argv[1:]:
+    stats = json.load(open(f))
+    for s in stats:
+        if 'interpacket_times' in s:
+            times += [x[0] for x in s['interpacket_times']]
+
+for t in times:
+    print "%f" % (t,)
diff --git a/parsetrace/latency-graph.py b/parsetrace/latency-graph.py
new file mode 100755 (executable)
index 0000000..dcff23c
--- /dev/null
@@ -0,0 +1,14 @@
+#!/usr/bin/python
+
+import json, sys
+
+for f in sys.argv[1:]:
+    stats = json.load(open(f))
+    for s in stats:
+        try:
+            size = s['bytes']
+            lat1 = s['start_latency']
+            lat2 = s['finish_latency']
+            print "%d\t%f\t%f" % (size, lat1, lat2)
+        except:
+            pass
index e1ff68a..caf77ee 100755 (executable)
@@ -7,7 +7,8 @@ import impacket, itertools, pcapy, re, socket, subprocess, sys
 import impacket.ImpactDecoder, impacket.ImpactPacket
 
 # Domain names for cloud service providers, whose traces we want to pull out.
-DOMAINS = ['.amazon.com', '.core.windows.net']
+DOMAINS = ['.amazon.com', '.amazonaws.com', '.core.windows.net',
+           '204.246.162.', '87.238.86.']
 
 # The collection of flows we've seen.  The value associated with each flow is a
 # sequence number indicating in what order we saw the flows in the trace.
@@ -53,7 +54,10 @@ for file in sys.argv[1:]:
         matches = False
         for d in DOMAINS:
             if name.endswith(d): matches = True
-        if not matches: continue
+            if name.startswith(d): matches = True
+        if not matches:
+            print "Host", name, "not recognized, skipping"
+            continue
 
         filter = "tcp and (host %s and host %s) and (port %d and port %d)" \
             % (src, dst, sport, dport)