Simple tool for analyzing a dump of a single S3 connection.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Thu, 29 Apr 2010 23:07:21 +0000 (16:07 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Thu, 29 Apr 2010 23:07:21 +0000 (16:07 -0700)
Will try to extract information about any delays in the transfers.

parsetrace/analyze-tcp.py [new file with mode: 0755]

diff --git a/parsetrace/analyze-tcp.py b/parsetrace/analyze-tcp.py
new file mode 100755 (executable)
index 0000000..54b32dc
--- /dev/null
@@ -0,0 +1,155 @@
+#!/usr/bin/python
+#
+# Read a pcap dump containing a single TCP connection and analyze it to
+# determine as much as possible about the performance of that connection.
+# (Specifically designed for measuring performance of fetches to Amazon S3.)
+
+import impacket, pcapy, re, sys
+import impacket.ImpactDecoder, impacket.ImpactPacket
+
+# Estimate of the network RTT
+RTT_EST = 0.03
+
+class Packet:
+    def __init__(self, connection, ts, pkt):
+        self.connection = connection
+        self.ts = ts
+        self.pkt = pkt
+        self.ip = self.pkt.child()
+        self.tcp = self.ip.child()
+
+        self.datalen = self.ip.get_ip_len() - self.ip.get_header_size() \
+                        - self.tcp.get_header_size()
+        self.data = self.tcp.get_data_as_string()[0:self.datalen]
+
+        self.seq = (self.tcp.get_th_seq(), self.tcp.get_th_seq() + self.datalen)
+        self.ack = self.tcp.get_th_ack()
+        self.id = self.ip.get_ip_id()
+
+        if self.tcp.get_th_sport() == 80:
+            # Incoming packet
+            self.direction = -1
+        elif self.tcp.get_th_dport() == 80:
+            # Outgoing packet
+            self.direction = 1
+        else:
+            self.direction = 0
+
+    def __repr__(self):
+        return "<Packet[%s]: id=%d seq=%d..%d ack=%d %s>" % \
+            ({-1: '<', 1: '>', 0: '?'}[self.direction], self.id,
+             self.seq[0], self.seq[1], self.ack, self.ts)
+
+class TcpAnalysis:
+    def __init__(self):
+        self.start_time = None
+        self.decoder = impacket.ImpactDecoder.EthDecoder()
+        self.packets = []
+
+    def process_file(self, filename):
+        """Load a pcap file and process the packets contained in it."""
+
+        p = pcapy.open_offline(filename)
+        p.setfilter(r"ip proto \tcp")
+        assert p.datalink() == pcapy.DLT_EN10MB
+        p.loop(0, self.packet_handler)
+
+    def packet_handler(self, header, data):
+        """Callback function run by the pcap parser for each packet."""
+
+        (sec, us) = header.getts()
+        ts = sec * 1000000 + us
+        if self.start_time is None:
+            self.start_time = ts
+        ts -= self.start_time
+        pkt = Packet(self, ts * 1e-6, self.decoder.decode(data))
+        self.packets.append(pkt)
+
+def split_trace(packets, predicate, before=True):
+    """Split a sequence of packets apart where packets satisfy the predicate.
+
+    If before is True (default), the split happens just before the matching
+    packet; otherwise it happens just after.
+    """
+
+    segment = []
+    for p in packets:
+        if predicate(p):
+            if before:
+                if len(segment) > 0:
+                    yield segment
+                segment = [p]
+            else:
+                segment.append(p)
+                yield segment
+                segment = []
+        else:
+            segment.append(p)
+    if len(segment) > 0:
+        yield segment
+
+def analyze_get(packets):
+    packets = iter(packets)
+
+    # First packet is the GET request itself
+    p = packets.next()
+    if not(p.direction > 0 and p.data.startswith('GET')):
+        print "Doesn't seem to be a GET request..."
+        return
+
+    start_ts = p.ts
+    id_out = p.id
+
+    # Find the first response packet containing data
+    while not(p.direction < 0 and p.datalen > 0):
+        p = packets.next()
+
+    resp_ts = p.ts
+    id_in = p.id
+    start_seq = p.seq[0]
+
+    print "Response time:", resp_ts - start_ts
+
+    # Scan through the incoming packets, looking for gaps in either the IP ID
+    # field or in the timing
+    last_ts = resp_ts
+    for p in packets:
+        gap = False
+        if not p.direction < 0: continue
+        if p.id != (id_in + 1) & 0xffff:
+            gap = True
+            print "Sequence number gap at", id_in
+        if p.ts - last_ts > 2 * RTT_EST:
+            gap = True
+            print "Long gap of", p.ts - last_ts
+        elif p.ts - last_ts > RTT_EST / 2:
+            gap = True
+            print "Short gap of", p.ts - last_ts
+        if gap:
+            print "    [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]"
+        if p.datalen < 1460:
+            print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq
+        last_ts = p.ts
+        id_in = p.id
+
+if __name__ == '__main__':
+    for f in sys.argv[1:]:
+        conn = TcpAnalysis()
+        conn.process_file(f)
+        ts = 0.0
+        def request_start(p):
+            return p.direction > 0 and p.datalen > 0
+        for s in split_trace(conn.packets, request_start):
+            s = list(s)
+            if False:
+                for p in s:
+                    if p.ts - ts > 0.01:
+                        print "----"
+                    if p.ts - ts > 2 * RTT_EST:
+                        print "LONG DELAY\n----"
+                    ts = p.ts
+                    print p
+                    if p.direction > 0 and p.datalen > 0:
+                        print "Request:", repr(p.data)
+            analyze_get(s)
+            print "===="