# Domain names for cloud service providers, whose traces we want to pull out.
DOMAINS = ['.amazon.com', '.amazonaws.com', '.core.windows.net',
- '204.246.162.', '87.238.86.']
+ '204.246.', '87.238.']
# The collection of flows we've seen. The value associated with each flow is a
# sequence number indicating in what order we saw the flows in the trace.
flows = {}
+def ip_lookup(host, cache={}):
+ if host not in cache:
+ try:
+ cache[host] = socket.gethostbyaddr(dst)[0]
+ except:
+ cache[host] = host
+ return cache[host]
+
# Step 1: Parse the input file and extract a listing of all the flows that we
# care about.
def handler(header, data):
# another filter on IP address in case there happened to be any other
# HTTP flows during the trace capture.
if dport != 80: continue
- try:
- name = socket.gethostbyaddr(dst)[0]
- except:
- name = dst
+ name = ip_lookup(dst)
matches = False
for d in DOMAINS:
if name.endswith(d): matches = True