Print flow identification in TCP parsing output.
[bluesky.git] / parsetrace / parse.py
1 #!/usr/bin/python
2
3 import impacket, pcapy, re, sys
4 import impacket.ImpactDecoder
5
6 start_time = None
7
8 flows = {}
9
10 STATE_START = 0
11 STATE_REQ_SENT = 1
12 STATE_REQ_ACKED = 2
13 STATE_RESP_START = 3
14
15 logfile = open('times.data', 'w')
16
17 def seq_after(x, y):
18     """Compares whether x >= y in sequence number space."""
19
20     delta = (x - y) % (1 << 32)
21     return delta < (1 << 31)
22
23 class Connection:
24     counter = 0
25
26     def __init__(self, endpoints):
27         self.endpoints = endpoints
28         self.packets = []
29         self.state = STATE_START
30         self.id = Connection.counter
31         self.times = []
32         self.transfer_count = 0
33         Connection.counter += 1
34
35     def finish_transfer(self):
36         if len(self.times) > 0:
37             rtt = self.times[0][0]
38             try:
39                 start = iter(t[0] for t in self.times if t[1] > 0).next()
40             except:
41                 start = 0.0
42             end = self.times[-1][0]
43             data = self.times[-1][1]
44             print "Connection %d Transfer #%d" % (self.id, self.transfer_count)
45             print "Network RTT:", rtt
46             print "Additional response delay:", start - rtt
47             print "Transfer time:", end - start
48             if end - start > 0:
49                 print "Bandwidth:", data / (end - start)
50             print
51             logfile.write("%d\t%d\t%d\t%f\t%f\t%f\t# %s\n"
52                           % (self.id, self.transfer_count, data,
53                               rtt, start - rtt, end - start, self.endpoints))
54             self.transfer_count += 1
55         self.times = []
56         self.state = STATE_START
57
58     def process(self, timestamp, packet):
59         ip = pkt.child()
60         tcp = ip.child()
61         self.packets.append(packet)
62
63         datalen = ip.get_ip_len() - ip.get_header_size() - tcp.get_header_size()
64         data = tcp.get_data_as_string()[0:datalen]
65
66         if tcp.get_th_sport() == 80:
67             # Incoming packet
68             direction = -1
69         elif tcp.get_th_dport() == 80:
70             # Outgoing packet
71             direction = 1
72         else:
73             direction = 0
74
75         seq = (tcp.get_th_seq(), tcp.get_th_seq() + datalen)
76         ack = tcp.get_th_ack()
77
78         # Previous request finished
79         if self.state == STATE_RESP_START and direction > 0 \
80                 and data.startswith('GET /'):
81             self.finish_transfer()
82
83         # New request seen on an idle connection...
84         if self.state == STATE_START and direction > 0 \
85                 and data.startswith('GET /'):
86             self.startseq = seq[1]
87             self.starttime = timestamp
88             self.state = STATE_REQ_SENT
89
90         # Request is acknowledged, but response not yet seen
91         if self.state == STATE_REQ_SENT and direction < 0 \
92                 and seq_after(ack, self.startseq):
93             self.state = STATE_REQ_ACKED
94             self.respseq = seq[0]
95             self.times.append(((timestamp - self.starttime) / 1e6, 0))
96
97         # Response header to request has been seen
98         if self.state == STATE_REQ_ACKED and direction < 0 \
99                 and data.startswith("HTTP/1."):
100             self.state = STATE_RESP_START
101
102         # Data packet in response
103         if self.state == STATE_RESP_START and direction < 0 and datalen > 0:
104             self.times.append(((timestamp - self.starttime) / 1e6,
105                                seq[1] - self.respseq))
106
107 def handler(header, data):
108     global start_time
109     global pkt
110     (sec, us) = header.getts()
111     ts = sec * 1000000 + us
112     if start_time is None:
113         start_time = ts
114     ts -= start_time
115     pkt = decoder.decode(data)
116
117     ip = pkt.child()
118     tcp = ip.child()
119     src = (ip.get_ip_src(), tcp.get_th_sport())
120     dst = (ip.get_ip_dst(), tcp.get_th_dport())
121     flow = tuple(sorted([src, dst]))
122     if flow not in flows:
123         #print "New flow", flow
124         flows[flow] = Connection(flow)
125
126     flows[flow].process(ts, pkt)
127
128 def process(filename):
129     global decoder
130     p = pcapy.open_offline(filename)
131     p.setfilter(r"ip proto \tcp")
132     assert p.datalink() == pcapy.DLT_EN10MB
133     decoder = impacket.ImpactDecoder.EthDecoder()
134     p.loop(0, handler)
135
136     for c in flows.values():
137         c.finish_transfer()
138
139 if __name__ == '__main__':
140     for f in sys.argv[1:]:
141         process(f)