X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=parsetrace%2Fsplit-trace.py;h=6af8c724ff40b2a3af4287e25338fa441d6059ac;hb=7f01069131555a0e9ab332e578a9eb2815e1e12d;hp=caf77eecedc66c35b85b7b4e73d44babad16ba28;hpb=9af190d3571bd1dbc6f58a20639863abb5b50fed;p=bluesky.git diff --git a/parsetrace/split-trace.py b/parsetrace/split-trace.py index caf77ee..6af8c72 100755 --- a/parsetrace/split-trace.py +++ b/parsetrace/split-trace.py @@ -8,12 +8,20 @@ import impacket.ImpactDecoder, impacket.ImpactPacket # Domain names for cloud service providers, whose traces we want to pull out. DOMAINS = ['.amazon.com', '.amazonaws.com', '.core.windows.net', - '204.246.162.', '87.238.86.'] + '204.246.', '87.238.'] # The collection of flows we've seen. The value associated with each flow is a # sequence number indicating in what order we saw the flows in the trace. flows = {} +def ip_lookup(host, cache={}): + if host not in cache: + try: + cache[host] = socket.gethostbyaddr(dst)[0] + except: + cache[host] = host + return cache[host] + # Step 1: Parse the input file and extract a listing of all the flows that we # care about. def handler(header, data): @@ -47,10 +55,7 @@ for file in sys.argv[1:]: # another filter on IP address in case there happened to be any other # HTTP flows during the trace capture. if dport != 80: continue - try: - name = socket.gethostbyaddr(dst)[0] - except: - name = dst + name = ip_lookup(dst) matches = False for d in DOMAINS: if name.endswith(d): matches = True