Add proper per-file copyright notices/licenses and top-level license.
[bluesky.git] / parsetrace / analyze-tcp.py
index 54b32dc..fb58b6a 100755 (executable)
@@ -4,11 +4,14 @@
 # determine as much as possible about the performance of that connection.
 # (Specifically designed for measuring performance of fetches to Amazon S3.)
 
-import impacket, pcapy, re, sys
+import impacket, json, pcapy, re, sys
 import impacket.ImpactDecoder, impacket.ImpactPacket
 
 # Estimate of the network RTT
-RTT_EST = 0.03
+RTT_EST = 0.03 * 1e6
+
+def dump_data(obj):
+    return json.dumps(result_list, sort_keys=True, indent=2)
 
 class Packet:
     def __init__(self, connection, ts, pkt):
@@ -62,7 +65,7 @@ class TcpAnalysis:
         if self.start_time is None:
             self.start_time = ts
         ts -= self.start_time
-        pkt = Packet(self, ts * 1e-6, self.decoder.decode(data))
+        pkt = Packet(self, ts, self.decoder.decode(data))
         self.packets.append(pkt)
 
 def split_trace(packets, predicate, before=True):
@@ -88,18 +91,26 @@ def split_trace(packets, predicate, before=True):
     if len(segment) > 0:
         yield segment
 
-def analyze_get(packets):
+def analyze_get(packets, prev_time = None):
     packets = iter(packets)
-
-    # First packet is the GET request itself
     p = packets.next()
-    if not(p.direction > 0 and p.data.startswith('GET')):
-        print "Doesn't seem to be a GET request..."
-        return
 
     start_ts = p.ts
     id_out = p.id
 
+    # Check for connection establishment (SYN/SYN-ACK) and use that to estimate
+    # th network RTT.
+    if p.tcp.get_SYN():
+        addr = p.ip.get_ip_dst()
+        p = packets.next()
+        #print "Connection establishment: RTT is", p.ts - start_ts
+        return {'syn_rtt': (p.ts - start_ts) / 1e6, 'addr': addr}
+
+    # Otherwise, we expect the first packet to be the GET request itself
+    if not(p.direction > 0 and p.data.startswith('GET')):
+        #print "Doesn't seem to be a GET request..."
+        return
+
     # Find the first response packet containing data
     while not(p.direction < 0 and p.datalen > 0):
         p = packets.next()
@@ -107,49 +118,75 @@ def analyze_get(packets):
     resp_ts = p.ts
     id_in = p.id
     start_seq = p.seq[0]
+    tot_bytes = (p.seq[1] - start_seq) & 0xffffffff
+    spacings = []
 
-    print "Response time:", resp_ts - start_ts
+    #print "Response time:", resp_ts - start_ts
 
     # Scan through the incoming packets, looking for gaps in either the IP ID
     # field or in the timing
     last_ts = resp_ts
+    last_was_short = False
     for p in packets:
         gap = False
+        flags = []
+        bytenr = (p.seq[1] - start_seq) & 0xffffffff
         if not p.direction < 0: continue
+        if p.tcp.get_FIN(): continue
+
+        if last_was_short:
+            flags.append('LAST_PACKET_SHORT')
+        last_was_short = False
         if p.id != (id_in + 1) & 0xffff:
             gap = True
-            print "Sequence number gap at", id_in
-        if p.ts - last_ts > 2 * RTT_EST:
-            gap = True
-            print "Long gap of", p.ts - last_ts
-        elif p.ts - last_ts > RTT_EST / 2:
-            gap = True
-            print "Short gap of", p.ts - last_ts
-        if gap:
-            print "    [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]"
-        if p.datalen < 1460:
-            print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq
+            flags.append('IPID_GAP')
+        if p.datalen not in (1448, 1460):
+            last_was_short = True
+        if (p.seq[0] - start_seq) & 0xffffffff != tot_bytes:
+            flags.append('OUT_OF_ORDER')
+        if ((p.seq[0] - start_seq) & 0xffffffff) % 9000 == 0:
+            flags.append('9000')
+        spacings.append(((p.ts - last_ts) / 1e6, bytenr) + tuple(flags))
         last_ts = p.ts
         id_in = p.id
+        tot_bytes = max(tot_bytes, bytenr)
+
+    #print "Transferred %d bytes in %s seconds, initial response after %s" % (tot_bytes, last_ts - start_ts, resp_ts - start_ts)
+    if prev_time is not None:
+        prev_delay = start_ts - prev_time
+    else:
+        prev_delay = 0
+    return {'bytes': tot_bytes,
+            'start_latency': resp_ts - start_ts,
+            'finish_latency': last_ts - start_ts,
+            'interpacket_times': spacings,
+            'delay_from_previous': prev_delay}
 
 if __name__ == '__main__':
     for f in sys.argv[1:]:
         conn = TcpAnalysis()
         conn.process_file(f)
-        ts = 0.0
+        ts = 0
         def request_start(p):
             return p.direction > 0 and p.datalen > 0
+        result_list = []
+        prev_time = None
         for s in split_trace(conn.packets, request_start):
             s = list(s)
             if False:
                 for p in s:
-                    if p.ts - ts > 0.01:
-                        print "----"
-                    if p.ts - ts > 2 * RTT_EST:
-                        print "LONG DELAY\n----"
+                    #if p.ts - ts > 0.01:
+                        #print "----"
+                    #if p.ts - ts > 2 * RTT_EST:
+                        #print "LONG DELAY\n----"
                     ts = p.ts
-                    print p
-                    if p.direction > 0 and p.datalen > 0:
-                        print "Request:", repr(p.data)
-            analyze_get(s)
-            print "===="
+                    #print p
+                    #if p.direction > 0 and p.datalen > 0:
+                        #print "Request:", repr(p.data)
+            results = analyze_get(s, prev_time)
+            if results is not None:
+                result_list.append(results)
+            prev_time = s[-1].ts
+            #print "===="
+
+        print dump_data(result_list)