--- /dev/null
+#!/usr/bin/python
+#
+# Read a pcap dump containing a single TCP connection and analyze it to
+# determine as much as possible about the performance of that connection.
+# (Specifically designed for measuring performance of fetches to Amazon S3.)
+
+import impacket, pcapy, re, sys
+import impacket.ImpactDecoder, impacket.ImpactPacket
+
+# Estimate of the network RTT
+RTT_EST = 0.03
+
+class Packet:
+ def __init__(self, connection, ts, pkt):
+ self.connection = connection
+ self.ts = ts
+ self.pkt = pkt
+ self.ip = self.pkt.child()
+ self.tcp = self.ip.child()
+
+ self.datalen = self.ip.get_ip_len() - self.ip.get_header_size() \
+ - self.tcp.get_header_size()
+ self.data = self.tcp.get_data_as_string()[0:self.datalen]
+
+ self.seq = (self.tcp.get_th_seq(), self.tcp.get_th_seq() + self.datalen)
+ self.ack = self.tcp.get_th_ack()
+ self.id = self.ip.get_ip_id()
+
+ if self.tcp.get_th_sport() == 80:
+ # Incoming packet
+ self.direction = -1
+ elif self.tcp.get_th_dport() == 80:
+ # Outgoing packet
+ self.direction = 1
+ else:
+ self.direction = 0
+
+ def __repr__(self):
+ return "<Packet[%s]: id=%d seq=%d..%d ack=%d %s>" % \
+ ({-1: '<', 1: '>', 0: '?'}[self.direction], self.id,
+ self.seq[0], self.seq[1], self.ack, self.ts)
+
+class TcpAnalysis:
+ def __init__(self):
+ self.start_time = None
+ self.decoder = impacket.ImpactDecoder.EthDecoder()
+ self.packets = []
+
+ def process_file(self, filename):
+ """Load a pcap file and process the packets contained in it."""
+
+ p = pcapy.open_offline(filename)
+ p.setfilter(r"ip proto \tcp")
+ assert p.datalink() == pcapy.DLT_EN10MB
+ p.loop(0, self.packet_handler)
+
+ def packet_handler(self, header, data):
+ """Callback function run by the pcap parser for each packet."""
+
+ (sec, us) = header.getts()
+ ts = sec * 1000000 + us
+ if self.start_time is None:
+ self.start_time = ts
+ ts -= self.start_time
+ pkt = Packet(self, ts * 1e-6, self.decoder.decode(data))
+ self.packets.append(pkt)
+
+def split_trace(packets, predicate, before=True):
+ """Split a sequence of packets apart where packets satisfy the predicate.
+
+ If before is True (default), the split happens just before the matching
+ packet; otherwise it happens just after.
+ """
+
+ segment = []
+ for p in packets:
+ if predicate(p):
+ if before:
+ if len(segment) > 0:
+ yield segment
+ segment = [p]
+ else:
+ segment.append(p)
+ yield segment
+ segment = []
+ else:
+ segment.append(p)
+ if len(segment) > 0:
+ yield segment
+
+def analyze_get(packets):
+ packets = iter(packets)
+
+ # First packet is the GET request itself
+ p = packets.next()
+ if not(p.direction > 0 and p.data.startswith('GET')):
+ print "Doesn't seem to be a GET request..."
+ return
+
+ start_ts = p.ts
+ id_out = p.id
+
+ # Find the first response packet containing data
+ while not(p.direction < 0 and p.datalen > 0):
+ p = packets.next()
+
+ resp_ts = p.ts
+ id_in = p.id
+ start_seq = p.seq[0]
+
+ print "Response time:", resp_ts - start_ts
+
+ # Scan through the incoming packets, looking for gaps in either the IP ID
+ # field or in the timing
+ last_ts = resp_ts
+ for p in packets:
+ gap = False
+ if not p.direction < 0: continue
+ if p.id != (id_in + 1) & 0xffff:
+ gap = True
+ print "Sequence number gap at", id_in
+ if p.ts - last_ts > 2 * RTT_EST:
+ gap = True
+ print "Long gap of", p.ts - last_ts
+ elif p.ts - last_ts > RTT_EST / 2:
+ gap = True
+ print "Short gap of", p.ts - last_ts
+ if gap:
+ print " [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]"
+ if p.datalen < 1460:
+ print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq
+ last_ts = p.ts
+ id_in = p.id
+
+if __name__ == '__main__':
+ for f in sys.argv[1:]:
+ conn = TcpAnalysis()
+ conn.process_file(f)
+ ts = 0.0
+ def request_start(p):
+ return p.direction > 0 and p.datalen > 0
+ for s in split_trace(conn.packets, request_start):
+ s = list(s)
+ if False:
+ for p in s:
+ if p.ts - ts > 0.01:
+ print "----"
+ if p.ts - ts > 2 * RTT_EST:
+ print "LONG DELAY\n----"
+ ts = p.ts
+ print p
+ if p.direction > 0 and p.datalen > 0:
+ print "Request:", repr(p.data)
+ analyze_get(s)
+ print "===="