From 9af190d3571bd1dbc6f58a20639863abb5b50fed Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Tue, 4 May 2010 16:53:14 -0700 Subject: [PATCH] More cloud storage performance-measurement scripts. --- cloudbench/cloudtest.py | 20 +++++----- parsetrace/analyze-tcp.py | 75 ++++++++++++++++++++++++++----------- parsetrace/delay-graph.py | 14 +++++++ parsetrace/latency-graph.py | 14 +++++++ parsetrace/split-trace.py | 8 +++- 5 files changed, 98 insertions(+), 33 deletions(-) create mode 100755 parsetrace/delay-graph.py create mode 100755 parsetrace/latency-graph.py diff --git a/cloudbench/cloudtest.py b/cloudbench/cloudtest.py index 0755688..3743282 100755 --- a/cloudbench/cloudtest.py +++ b/cloudbench/cloudtest.py @@ -58,16 +58,16 @@ def run_test(): for size in SIZES: c.get_object('file-%d-%d' % (size, repeat), size) - print "==== AZURE ====" - c = AzureTestConnection() - for repeat in range(4): - for size in SIZES: - c.put_object('file-%d-%d' % (size, repeat), size) - - c = AzureTestConnection() - for repeat in range(4): - for size in SIZES: - c.get_object('file-%d-%d' % (size, repeat), size) +# print "==== AZURE ====" +# c = AzureTestConnection() +# for repeat in range(4): +# for size in SIZES: +# c.put_object('file-%d-%d' % (size, repeat), size) +# +# c = AzureTestConnection() +# for repeat in range(4): +# for size in SIZES: +# c.get_object('file-%d-%d' % (size, repeat), size) if __name__ == '__main__': run_test() diff --git a/parsetrace/analyze-tcp.py b/parsetrace/analyze-tcp.py index 47a3d7b..83125e7 100755 --- a/parsetrace/analyze-tcp.py +++ b/parsetrace/analyze-tcp.py @@ -4,12 +4,15 @@ # determine as much as possible about the performance of that connection. # (Specifically designed for measuring performance of fetches to Amazon S3.) -import impacket, pcapy, re, sys +import impacket, json, pcapy, re, sys import impacket.ImpactDecoder, impacket.ImpactPacket # Estimate of the network RTT RTT_EST = 0.03 +def dump_data(obj): + return json.dumps(result_list, sort_keys=True, indent=2) + class Packet: def __init__(self, connection, ts, pkt): self.connection = connection @@ -90,16 +93,23 @@ def split_trace(packets, predicate, before=True): def analyze_get(packets): packets = iter(packets) - - # First packet is the GET request itself p = packets.next() - if not(p.direction > 0 and p.data.startswith('GET')): - print "Doesn't seem to be a GET request..." - return start_ts = p.ts id_out = p.id + # Check for connection establishment (SYN/SYN-ACK) and use that to estimate + # th network RTT. + if p.tcp.get_SYN(): + p = packets.next() + #print "Connection establishment: RTT is", p.ts - start_ts + return {'syn_rtt': p.ts - start_ts} + + # Otherwise, we expect the first packet to be the GET request itself + if not(p.direction > 0 and p.data.startswith('GET')): + #print "Doesn't seem to be a GET request..." + return + # Find the first response packet containing data while not(p.direction < 0 and p.datalen > 0): p = packets.next() @@ -107,30 +117,48 @@ def analyze_get(packets): resp_ts = p.ts id_in = p.id start_seq = p.seq[0] + tot_bytes = (p.seq[1] - start_seq) & 0xffffffff + spacings = [] - print "Response time:", resp_ts - start_ts + #print "Response time:", resp_ts - start_ts # Scan through the incoming packets, looking for gaps in either the IP ID # field or in the timing last_ts = resp_ts for p in packets: gap = False + bytenr = (p.seq[1] - start_seq) & 0xffffffff if not p.direction < 0: continue + if p.tcp.get_FIN(): continue + spacings.append((p.ts - last_ts, bytenr)) if p.id != (id_in + 1) & 0xffff: gap = True - print "Sequence number gap at", id_in + #print "Sequence number gap at", id_in if p.ts - last_ts > 2 * RTT_EST: gap = True - print "Long gap of", p.ts - last_ts + #print "Long gap of", p.ts - last_ts elif p.ts - last_ts > RTT_EST / 2: gap = True - print "Short gap of", p.ts - last_ts + #print "Short gap of", p.ts - last_ts if gap: - print " [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]" + #print " [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]" + pass if p.datalen not in (1448, 1460): - print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq + #print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq + pass + if (p.seq[0] - start_seq) & 0xffffffff != tot_bytes: + #print "Packet out of order; got sequence number %d, expected %d" \ + # % ((p.seq[0] - start_seq) & 0xffffffff, tot_bytes) + pass last_ts = p.ts id_in = p.id + tot_bytes = max(tot_bytes, bytenr) + + #print "Transferred %d bytes in %s seconds, initial response after %s" % (tot_bytes, last_ts - start_ts, resp_ts - start_ts) + return {'bytes': tot_bytes, + 'start_latency': resp_ts - start_ts, + 'finish_latency': last_ts - start_ts, + 'interpacket_times': spacings} if __name__ == '__main__': for f in sys.argv[1:]: @@ -139,17 +167,22 @@ if __name__ == '__main__': ts = 0.0 def request_start(p): return p.direction > 0 and p.datalen > 0 + result_list = [] for s in split_trace(conn.packets, request_start): s = list(s) if False: for p in s: - if p.ts - ts > 0.01: - print "----" - if p.ts - ts > 2 * RTT_EST: - print "LONG DELAY\n----" + #if p.ts - ts > 0.01: + #print "----" + #if p.ts - ts > 2 * RTT_EST: + #print "LONG DELAY\n----" ts = p.ts - print p - if p.direction > 0 and p.datalen > 0: - print "Request:", repr(p.data) - analyze_get(s) - print "====" + #print p + #if p.direction > 0 and p.datalen > 0: + #print "Request:", repr(p.data) + results = analyze_get(s) + if results is not None: + result_list.append(results) + #print "====" + + print dump_data(result_list) diff --git a/parsetrace/delay-graph.py b/parsetrace/delay-graph.py new file mode 100755 index 0000000..8030397 --- /dev/null +++ b/parsetrace/delay-graph.py @@ -0,0 +1,14 @@ +#!/usr/bin/python + +import json, sys + +times = [] + +for f in sys.argv[1:]: + stats = json.load(open(f)) + for s in stats: + if 'interpacket_times' in s: + times += [x[0] for x in s['interpacket_times']] + +for t in times: + print "%f" % (t,) diff --git a/parsetrace/latency-graph.py b/parsetrace/latency-graph.py new file mode 100755 index 0000000..dcff23c --- /dev/null +++ b/parsetrace/latency-graph.py @@ -0,0 +1,14 @@ +#!/usr/bin/python + +import json, sys + +for f in sys.argv[1:]: + stats = json.load(open(f)) + for s in stats: + try: + size = s['bytes'] + lat1 = s['start_latency'] + lat2 = s['finish_latency'] + print "%d\t%f\t%f" % (size, lat1, lat2) + except: + pass diff --git a/parsetrace/split-trace.py b/parsetrace/split-trace.py index e1ff68a..caf77ee 100755 --- a/parsetrace/split-trace.py +++ b/parsetrace/split-trace.py @@ -7,7 +7,8 @@ import impacket, itertools, pcapy, re, socket, subprocess, sys import impacket.ImpactDecoder, impacket.ImpactPacket # Domain names for cloud service providers, whose traces we want to pull out. -DOMAINS = ['.amazon.com', '.core.windows.net'] +DOMAINS = ['.amazon.com', '.amazonaws.com', '.core.windows.net', + '204.246.162.', '87.238.86.'] # The collection of flows we've seen. The value associated with each flow is a # sequence number indicating in what order we saw the flows in the trace. @@ -53,7 +54,10 @@ for file in sys.argv[1:]: matches = False for d in DOMAINS: if name.endswith(d): matches = True - if not matches: continue + if name.startswith(d): matches = True + if not matches: + print "Host", name, "not recognized, skipping" + continue filter = "tcp and (host %s and host %s) and (port %d and port %d)" \ % (src, dst, sport, dport) -- 2.20.1