3 # Read a pcap dump containing a single TCP connection and analyze it to
4 # determine as much as possible about the performance of that connection.
5 # (Specifically designed for measuring performance of fetches to Amazon S3.)
7 import impacket, json, pcapy, re, sys
8 import impacket.ImpactDecoder, impacket.ImpactPacket
10 # Estimate of the network RTT
14 return json.dumps(result_list, sort_keys=True, indent=2)
17 def __init__(self, connection, ts, pkt):
18 self.connection = connection
21 self.ip = self.pkt.child()
22 self.tcp = self.ip.child()
24 self.datalen = self.ip.get_ip_len() - self.ip.get_header_size() \
25 - self.tcp.get_header_size()
26 self.data = self.tcp.get_data_as_string()[0:self.datalen]
28 self.seq = (self.tcp.get_th_seq(), self.tcp.get_th_seq() + self.datalen)
29 self.ack = self.tcp.get_th_ack()
30 self.id = self.ip.get_ip_id()
32 if self.tcp.get_th_sport() == 80:
35 elif self.tcp.get_th_dport() == 80:
42 return "<Packet[%s]: id=%d seq=%d..%d ack=%d %s>" % \
43 ({-1: '<', 1: '>', 0: '?'}[self.direction], self.id,
44 self.seq[0], self.seq[1], self.ack, self.ts)
48 self.start_time = None
49 self.decoder = impacket.ImpactDecoder.EthDecoder()
52 def process_file(self, filename):
53 """Load a pcap file and process the packets contained in it."""
55 p = pcapy.open_offline(filename)
56 p.setfilter(r"ip proto \tcp")
57 assert p.datalink() == pcapy.DLT_EN10MB
58 p.loop(0, self.packet_handler)
60 def packet_handler(self, header, data):
61 """Callback function run by the pcap parser for each packet."""
63 (sec, us) = header.getts()
64 ts = sec * 1000000 + us
65 if self.start_time is None:
68 pkt = Packet(self, ts, self.decoder.decode(data))
69 self.packets.append(pkt)
71 def split_trace(packets, predicate, before=True):
72 """Split a sequence of packets apart where packets satisfy the predicate.
74 If before is True (default), the split happens just before the matching
75 packet; otherwise it happens just after.
94 def analyze_get(packets):
95 packets = iter(packets)
101 # Check for connection establishment (SYN/SYN-ACK) and use that to estimate
104 addr = p.ip.get_ip_dst()
106 #print "Connection establishment: RTT is", p.ts - start_ts
107 return {'syn_rtt': (p.ts - start_ts) / 1e6, 'addr': addr}
109 # Otherwise, we expect the first packet to be the GET request itself
110 if not(p.direction > 0 and p.data.startswith('GET')):
111 #print "Doesn't seem to be a GET request..."
114 # Find the first response packet containing data
115 while not(p.direction < 0 and p.datalen > 0):
121 tot_bytes = (p.seq[1] - start_seq) & 0xffffffff
124 #print "Response time:", resp_ts - start_ts
126 # Scan through the incoming packets, looking for gaps in either the IP ID
127 # field or in the timing
129 last_was_short = False
133 bytenr = (p.seq[1] - start_seq) & 0xffffffff
134 if not p.direction < 0: continue
135 if p.tcp.get_FIN(): continue
138 flags.append('LAST_PACKET_SHORT')
139 last_was_short = False
140 if p.id != (id_in + 1) & 0xffff:
142 flags.append('IPID_GAP')
143 if p.datalen not in (1448, 1460):
144 last_was_short = True
145 if (p.seq[0] - start_seq) & 0xffffffff != tot_bytes:
146 flags.append('OUT_OF_ORDER')
147 if ((p.seq[0] - start_seq) & 0xffffffff) % 9000 == 0:
149 spacings.append(((p.ts - last_ts) / 1e6, bytenr) + tuple(flags))
152 tot_bytes = max(tot_bytes, bytenr)
154 #print "Transferred %d bytes in %s seconds, initial response after %s" % (tot_bytes, last_ts - start_ts, resp_ts - start_ts)
155 return {'bytes': tot_bytes,
156 'start_latency': resp_ts - start_ts,
157 'finish_latency': last_ts - start_ts,
158 'interpacket_times': spacings}
160 if __name__ == '__main__':
161 for f in sys.argv[1:]:
165 def request_start(p):
166 return p.direction > 0 and p.datalen > 0
168 for s in split_trace(conn.packets, request_start):
172 #if p.ts - ts > 0.01:
174 #if p.ts - ts > 2 * RTT_EST:
175 #print "LONG DELAY\n----"
178 #if p.direction > 0 and p.datalen > 0:
179 #print "Request:", repr(p.data)
180 results = analyze_get(s)
181 if results is not None:
182 result_list.append(results)
185 print dump_data(result_list)