More cloud storage performance-measurement scripts.

author Michael Vrable <mvrable@cs.ucsd.edu>

Tue, 4 May 2010 23:53:14 +0000 (16:53 -0700)

committer Michael Vrable <mvrable@cs.ucsd.edu>

Tue, 4 May 2010 23:53:14 +0000 (16:53 -0700)
author Michael Vrable <mvrable@cs.ucsd.edu>
Tue, 4 May 2010 23:53:14 +0000 (16:53 -0700)
committer Michael Vrable <mvrable@cs.ucsd.edu>
Tue, 4 May 2010 23:53:14 +0000 (16:53 -0700)
diff --git a/cloudbench/cloudtest.py b/cloudbench/cloudtest.py

index 0755688..3743282 100755 (executable)
--- a/cloudbench/cloudtest.py
+++ b/cloudbench/cloudtest.py
@@ -58,16 +58,16 @@ def run_test():
          for size in SIZES:
              c.get_object('file-%d-%d' % (size, repeat), size)
  
-    print "==== AZURE ===="
-    c = AzureTestConnection()
-    for repeat in range(4):
-        for size in SIZES:
-            c.put_object('file-%d-%d' % (size, repeat), size)
-
-    c = AzureTestConnection()
-    for repeat in range(4):
-        for size in SIZES:
-            c.get_object('file-%d-%d' % (size, repeat), size)
+#    print "==== AZURE ===="
+#    c = AzureTestConnection()
+#    for repeat in range(4):
+#        for size in SIZES:
+#            c.put_object('file-%d-%d' % (size, repeat), size)
+#
+#    c = AzureTestConnection()
+#    for repeat in range(4):
+#        for size in SIZES:
+#            c.get_object('file-%d-%d' % (size, repeat), size)
  
  if __name__ == '__main__':
      run_test()
diff --git a/parsetrace/analyze-tcp.py b/parsetrace/analyze-tcp.py

index 47a3d7b..83125e7 100755 (executable)
--- a/parsetrace/analyze-tcp.py
+++ b/parsetrace/analyze-tcp.py
@@ -4,12 +4,15 @@
  # determine as much as possible about the performance of that connection.
  # (Specifically designed for measuring performance of fetches to Amazon S3.)
  
-import impacket, pcapy, re, sys
+import impacket, json, pcapy, re, sys
  import impacket.ImpactDecoder, impacket.ImpactPacket
  
  # Estimate of the network RTT
  RTT_EST = 0.03
  
+def dump_data(obj):
+    return json.dumps(result_list, sort_keys=True, indent=2)
+
  class Packet:
      def __init__(self, connection, ts, pkt):
          self.connection = connection
@@ -90,16 +93,23 @@ def split_trace(packets, predicate, before=True):
  
  def analyze_get(packets):
      packets = iter(packets)
-
-    # First packet is the GET request itself
      p = packets.next()
-    if not(p.direction > 0 and p.data.startswith('GET')):
-        print "Doesn't seem to be a GET request..."
-        return
  
      start_ts = p.ts
      id_out = p.id
  
+    # Check for connection establishment (SYN/SYN-ACK) and use that to estimate
+    # th network RTT.
+    if p.tcp.get_SYN():
+        p = packets.next()
+        #print "Connection establishment: RTT is", p.ts - start_ts
+        return {'syn_rtt': p.ts - start_ts}
+
+    # Otherwise, we expect the first packet to be the GET request itself
+    if not(p.direction > 0 and p.data.startswith('GET')):
+        #print "Doesn't seem to be a GET request..."
+        return
+
      # Find the first response packet containing data
      while not(p.direction < 0 and p.datalen > 0):
          p = packets.next()
@@ -107,30 +117,48 @@ def analyze_get(packets):
      resp_ts = p.ts
      id_in = p.id
      start_seq = p.seq[0]
+    tot_bytes = (p.seq[1] - start_seq) & 0xffffffff
+    spacings = []
  
-    print "Response time:", resp_ts - start_ts
+    #print "Response time:", resp_ts - start_ts
  
      # Scan through the incoming packets, looking for gaps in either the IP ID
      # field or in the timing
      last_ts = resp_ts
      for p in packets:
          gap = False
+        bytenr = (p.seq[1] - start_seq) & 0xffffffff
          if not p.direction < 0: continue
+        if p.tcp.get_FIN(): continue
+        spacings.append((p.ts - last_ts, bytenr))
          if p.id != (id_in + 1) & 0xffff:
              gap = True
-            print "Sequence number gap at", id_in
+            #print "Sequence number gap at", id_in
          if p.ts - last_ts > 2 * RTT_EST:
              gap = True
-            print "Long gap of", p.ts - last_ts
+            #print "Long gap of", p.ts - last_ts
          elif p.ts - last_ts > RTT_EST / 2:
              gap = True
-            print "Short gap of", p.ts - last_ts
+            #print "Short gap of", p.ts - last_ts
          if gap:
-            print "    [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]"
+            #print "    [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]"
+            pass
          if p.datalen not in (1448, 1460):
-            print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq
+            #print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq
+            pass
+        if (p.seq[0] - start_seq) & 0xffffffff != tot_bytes:
+            #print "Packet out of order; got sequence number %d, expected %d" \
+            #        % ((p.seq[0] - start_seq) & 0xffffffff, tot_bytes)
+            pass
          last_ts = p.ts
          id_in = p.id
+        tot_bytes = max(tot_bytes, bytenr)
+
+    #print "Transferred %d bytes in %s seconds, initial response after %s" % (tot_bytes, last_ts - start_ts, resp_ts - start_ts)
+    return {'bytes': tot_bytes,
+            'start_latency': resp_ts - start_ts,
+            'finish_latency': last_ts - start_ts,
+            'interpacket_times': spacings}
  
  if __name__ == '__main__':
      for f in sys.argv[1:]:
@@ -139,17 +167,22 @@ if __name__ == '__main__':
          ts = 0.0
          def request_start(p):
              return p.direction > 0 and p.datalen > 0
+        result_list = []
          for s in split_trace(conn.packets, request_start):
              s = list(s)
              if False:
                  for p in s:
-                    if p.ts - ts > 0.01:
-                        print "----"
-                    if p.ts - ts > 2 * RTT_EST:
-                        print "LONG DELAY\n----"
+                    #if p.ts - ts > 0.01:
+                        #print "----"
+                    #if p.ts - ts > 2 * RTT_EST:
+                        #print "LONG DELAY\n----"
                      ts = p.ts
-                    print p
-                    if p.direction > 0 and p.datalen > 0:
-                        print "Request:", repr(p.data)
-            analyze_get(s)
-            print "===="
+                    #print p
+                    #if p.direction > 0 and p.datalen > 0:
+                        #print "Request:", repr(p.data)
+            results = analyze_get(s)
+            if results is not None:
+                result_list.append(results)
+            #print "===="
+
+        print dump_data(result_list)
diff --git a/parsetrace/delay-graph.py b/parsetrace/delay-graph.py

new file mode 100755 (executable)

index 0000000..8030397
--- /dev/null
+++ b/parsetrace/delay-graph.py
@@ -0,0 +1,14 @@
+#!/usr/bin/python
+
+import json, sys
+
+times = []
+
+for f in sys.argv[1:]:
+    stats = json.load(open(f))
+    for s in stats:
+        if 'interpacket_times' in s:
+            times += [x[0] for x in s['interpacket_times']]
+
+for t in times:
+    print "%f" % (t,)
diff --git a/parsetrace/latency-graph.py b/parsetrace/latency-graph.py

new file mode 100755 (executable)

index 0000000..dcff23c
--- /dev/null
+++ b/parsetrace/latency-graph.py
@@ -0,0 +1,14 @@
+#!/usr/bin/python
+
+import json, sys
+
+for f in sys.argv[1:]:
+    stats = json.load(open(f))
+    for s in stats:
+        try:
+            size = s['bytes']
+            lat1 = s['start_latency']
+            lat2 = s['finish_latency']
+            print "%d\t%f\t%f" % (size, lat1, lat2)
+        except:
+            pass
diff --git a/parsetrace/split-trace.py b/parsetrace/split-trace.py

index e1ff68a..caf77ee 100755 (executable)
--- a/parsetrace/split-trace.py
+++ b/parsetrace/split-trace.py
@@ -7,7 +7,8 @@ import impacket, itertools, pcapy, re, socket, subprocess, sys
  import impacket.ImpactDecoder, impacket.ImpactPacket
  
  # Domain names for cloud service providers, whose traces we want to pull out.
-DOMAINS = ['.amazon.com', '.core.windows.net']
+DOMAINS = ['.amazon.com', '.amazonaws.com', '.core.windows.net',
+           '204.246.162.', '87.238.86.']
  
  # The collection of flows we've seen.  The value associated with each flow is a
  # sequence number indicating in what order we saw the flows in the trace.
@@ -53,7 +54,10 @@ for file in sys.argv[1:]:
          matches = False
          for d in DOMAINS:
              if name.endswith(d): matches = True
-        if not matches: continue
+            if name.startswith(d): matches = True
+        if not matches:
+            print "Host", name, "not recognized, skipping"
+            continue
  
          filter = "tcp and (host %s and host %s) and (port %d and port %d)" \
              % (src, dst, sport, dport)
author	Michael Vrable <mvrable@cs.ucsd.edu>
	Tue, 4 May 2010 23:53:14 +0000 (16:53 -0700)
committer	Michael Vrable <mvrable@cs.ucsd.edu>
	Tue, 4 May 2010 23:53:14 +0000 (16:53 -0700)
cloudbench/cloudtest.py		patch \| blob \| history
parsetrace/analyze-tcp.py		patch \| blob \| history
parsetrace/delay-graph.py	[new file with mode: 0755]	patch \| blob
parsetrace/latency-graph.py	[new file with mode: 0755]	patch \| blob
parsetrace/split-trace.py		patch \| blob \| history