X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=parsetrace%2Fsplit-trace.py;h=6af8c724ff40b2a3af4287e25338fa441d6059ac;hb=388030970805a70cb4fad34ade5e3de7a3607a57;hp=e1ff68a7cad806cd84831043057c4aa3350ec665;hpb=9d32f47c84c4b8aae2ec6fd63d1a4f008228e82f;p=bluesky.git diff --git a/parsetrace/split-trace.py b/parsetrace/split-trace.py index e1ff68a..6af8c72 100755 --- a/parsetrace/split-trace.py +++ b/parsetrace/split-trace.py @@ -7,12 +7,21 @@ import impacket, itertools, pcapy, re, socket, subprocess, sys import impacket.ImpactDecoder, impacket.ImpactPacket # Domain names for cloud service providers, whose traces we want to pull out. -DOMAINS = ['.amazon.com', '.core.windows.net'] +DOMAINS = ['.amazon.com', '.amazonaws.com', '.core.windows.net', + '204.246.', '87.238.'] # The collection of flows we've seen. The value associated with each flow is a # sequence number indicating in what order we saw the flows in the trace. flows = {} +def ip_lookup(host, cache={}): + if host not in cache: + try: + cache[host] = socket.gethostbyaddr(dst)[0] + except: + cache[host] = host + return cache[host] + # Step 1: Parse the input file and extract a listing of all the flows that we # care about. def handler(header, data): @@ -46,14 +55,14 @@ for file in sys.argv[1:]: # another filter on IP address in case there happened to be any other # HTTP flows during the trace capture. if dport != 80: continue - try: - name = socket.gethostbyaddr(dst)[0] - except: - name = dst + name = ip_lookup(dst) matches = False for d in DOMAINS: if name.endswith(d): matches = True - if not matches: continue + if name.startswith(d): matches = True + if not matches: + print "Host", name, "not recognized, skipping" + continue filter = "tcp and (host %s and host %s) and (port %d and port %d)" \ % (src, dst, sport, dport)