3 # Split a tcpdump trace apart into multiple files, each containing a single TCP
6 import impacket, itertools, pcapy, re, socket, subprocess, sys
7 import impacket.ImpactDecoder, impacket.ImpactPacket
9 # Domain names for cloud service providers, whose traces we want to pull out.
10 DOMAINS = ['.amazon.com', '.amazonaws.com', '.core.windows.net',
11 '204.246.162.', '87.238.86.']
13 # The collection of flows we've seen. The value associated with each flow is a
14 # sequence number indicating in what order we saw the flows in the trace.
17 # Step 1: Parse the input file and extract a listing of all the flows that we
19 def handler(header, data):
20 pkt = decoder.decode(data)
23 src = (ip.get_ip_src(), tcp.get_th_sport())
24 dst = (ip.get_ip_dst(), tcp.get_th_dport())
25 flow = tuple(sorted([src, dst],
26 cmp=lambda x, y: cmp(x[1], y[1]) or cmp(x[0], y[0])))
28 flows[flow] = max(itertools.chain(flows.values(), [0])) + 1
32 p = pcapy.open_offline(filename)
33 p.setfilter(r"ip proto \tcp")
34 assert p.datalink() == pcapy.DLT_EN10MB
35 decoder = impacket.ImpactDecoder.EthDecoder()
38 for file in sys.argv[1:]:
39 print "Scanning %s..." % (file,)
43 for (((dst, dport), (src, sport)), seq) in flows.items():
44 # Filter out to find just the relevant flows. Right now we want only
45 # flows to port 80 (since both S3/Azure use that as the service port
46 # when unencrypted which is what we use). We probably ought to apply
47 # another filter on IP address in case there happened to be any other
48 # HTTP flows during the trace capture.
49 if dport != 80: continue
51 name = socket.gethostbyaddr(dst)[0]
56 if name.endswith(d): matches = True
57 if name.startswith(d): matches = True
59 print "Host", name, "not recognized, skipping"
62 filter = "tcp and (host %s and host %s) and (port %d and port %d)" \
63 % (src, dst, sport, dport)
64 filters[seq] = (filter, name)
67 for (_, (filter, name)) in sorted(filters.items()):
68 print "%d: %s" % (n, filter)
69 subprocess.check_call(['tcpdump', '-s0', '-r', file, '-w',
70 'trace-%03d-%s' % (n, name),