--- /dev/null
+#!/usr/bin/python
+#
+# Split a tcpdump trace apart into multiple files, each containing a single TCP
+# flow.
+
+import impacket, itertools, pcapy, re, socket, subprocess, sys
+import impacket.ImpactDecoder, impacket.ImpactPacket
+
+# Domain names for cloud service providers, whose traces we want to pull out.
+DOMAINS = ['.amazon.com', '.core.windows.net']
+
+# The collection of flows we've seen. The value associated with each flow is a
+# sequence number indicating in what order we saw the flows in the trace.
+flows = {}
+
+# Step 1: Parse the input file and extract a listing of all the flows that we
+# care about.
+def handler(header, data):
+ pkt = decoder.decode(data)
+ ip = pkt.child()
+ tcp = ip.child()
+ src = (ip.get_ip_src(), tcp.get_th_sport())
+ dst = (ip.get_ip_dst(), tcp.get_th_dport())
+ flow = tuple(sorted([src, dst],
+ cmp=lambda x, y: cmp(x[1], y[1]) or cmp(x[0], y[0])))
+ if flow not in flows:
+ flows[flow] = max(itertools.chain(flows.values(), [0])) + 1
+
+def scan(filename):
+ global decoder
+ p = pcapy.open_offline(filename)
+ p.setfilter(r"ip proto \tcp")
+ assert p.datalink() == pcapy.DLT_EN10MB
+ decoder = impacket.ImpactDecoder.EthDecoder()
+ p.loop(0, handler)
+
+for file in sys.argv[1:]:
+ print "Scanning %s..." % (file,)
+ scan(file)
+
+ filters = {}
+ for (((dst, dport), (src, sport)), seq) in flows.items():
+ # Filter out to find just the relevant flows. Right now we want only
+ # flows to port 80 (since both S3/Azure use that as the service port
+ # when unencrypted which is what we use). We probably ought to apply
+ # another filter on IP address in case there happened to be any other
+ # HTTP flows during the trace capture.
+ if dport != 80: continue
+ try:
+ name = socket.gethostbyaddr(dst)[0]
+ except:
+ name = dst
+ matches = False
+ for d in DOMAINS:
+ if name.endswith(d): matches = True
+ if not matches: continue
+
+ filter = "tcp and (host %s and host %s) and (port %d and port %d)" \
+ % (src, dst, sport, dport)
+ filters[seq] = (filter, name)
+
+ n = 0
+ for (_, (filter, name)) in sorted(filters.items()):
+ print "%d: %s" % (n, filter)
+ subprocess.check_call(['tcpdump', '-s0', '-r', file, '-w',
+ 'trace-%03d-%s' % (n, name),
+ filter])
+ n += 1
--- /dev/null
+#!/usr/bin/python
+#
+# Run a series of simple test requests against S3 for gathering some basic
+# performance numbers.
+
+import boto, time
+from boto.s3.connection import SubdomainCallingFormat
+from boto.s3.key import Key
+import azure
+
+BUCKET_NAME = 'mvrable-benchmark'
+SIZES = [64, 4096, 32 << 10, 256 << 10, 1 << 20, 4 << 20, 32 << 20]
+
+class S3TestConnection:
+ def __init__(self):
+ self.conn = boto.connect_s3(is_secure=False,
+ calling_format=SubdomainCallingFormat())
+ self.bucket = self.conn.get_bucket(BUCKET_NAME)
+
+ def put_object(self, name, size):
+ buf = 'A' * size
+ k = Key(self.bucket, name)
+ start_time = time.time()
+ k.set_contents_from_string(buf)
+ print "%s: %f" % (name, time.time() - start_time)
+
+ def get_object(self, name, size):
+ k = Key(self.bucket, name)
+ start_time = time.time()
+ buf = k.get_contents_as_string()
+ print "%s: %f" % (name, time.time() - start_time)
+
+class AzureTestConnection:
+ def __init__(self):
+ self.conn = azure.Connection()
+
+ def put_object(self, name, size):
+ buf = 'A' * size
+ start_time = time.time()
+ self.conn.make_request('/benchmark/' + name, 'PUT', buf,
+ {'x-ms-blob-type': 'BlockBlob'})
+ print "%s: %f" % (name, time.time() - start_time)
+
+ def get_object(self, name, size):
+ start_time = time.time()
+ self.conn.make_request('/benchmark/' + name, 'GET')
+ print "%s: %f" % (name, time.time() - start_time)
+
+def run_test():
+ print "==== S3 ===="
+ c = S3TestConnection()
+ for repeat in range(4):
+ for size in SIZES:
+ c.put_object('file-%d-%d' % (size, repeat), size)
+
+ c = S3TestConnection()
+ for repeat in range(4):
+ for size in SIZES:
+ c.get_object('file-%d-%d' % (size, repeat), size)
+
+ print "==== AZURE ===="
+ c = AzureTestConnection()
+ for repeat in range(4):
+ for size in SIZES:
+ c.put_object('file-%d-%d' % (size, repeat), size)
+
+ c = AzureTestConnection()
+ for repeat in range(4):
+ for size in SIZES:
+ c.get_object('file-%d-%d' % (size, repeat), size)
+
+if __name__ == '__main__':
+ run_test()