From: Michael Vrable Date: Sat, 8 May 2010 02:12:51 +0000 (-0700) Subject: Improve analysis of S3 packet traces. X-Git-Url: http://git.vrable.net/?p=bluesky.git;a=commitdiff_plain;h=04778bf2a2063b7be540b5be45c7451d00af1b81 Improve analysis of S3 packet traces. --- diff --git a/.gitignore b/.gitignore index c44556a..1394e44 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.pyc CMakeFiles CMakeCache.txt Makefile diff --git a/cloudbench/cloudtest.py b/cloudbench/cloudtest.py index 0755688..31c188a 100755 --- a/cloudbench/cloudtest.py +++ b/cloudbench/cloudtest.py @@ -24,7 +24,7 @@ class S3TestConnection: k.set_contents_from_string(buf) print "%s: %f" % (name, time.time() - start_time) - def get_object(self, name, size): + def get_object(self, name): k = Key(self.bucket, name) start_time = time.time() buf = k.get_contents_as_string() @@ -41,7 +41,7 @@ class AzureTestConnection: {'x-ms-blob-type': 'BlockBlob'}) print "%s: %f" % (name, time.time() - start_time) - def get_object(self, name, size): + def get_object(self, name): start_time = time.time() self.conn.make_request('/benchmark/' + name, 'GET') print "%s: %f" % (name, time.time() - start_time) @@ -56,7 +56,7 @@ def run_test(): c = S3TestConnection() for repeat in range(4): for size in SIZES: - c.get_object('file-%d-%d' % (size, repeat), size) + c.get_object('file-%d-%d' % (size, repeat)) print "==== AZURE ====" c = AzureTestConnection() @@ -67,7 +67,20 @@ def run_test(): c = AzureTestConnection() for repeat in range(4): for size in SIZES: - c.get_object('file-%d-%d' % (size, repeat), size) + c.get_object('file-%d-%d' % (size, repeat)) if __name__ == '__main__': - run_test() + #run_test() + SIZES = [4096, 32 << 10, 256 << 10, 1 << 20, 4 << 20] + PRIME = (1 << 20) + (1 << 10) + c = AzureTestConnection() + for size in SIZES: + c.put_object('file-%d-%d' % (size, 0), size) + c.put_object('file-%d-%d' % (PRIME, 0), PRIME) + + for size in SIZES: + for n in range(50): + c = AzureTestConnection() + c.get_object('file-%d-%d' % (PRIME, 0)) + time.sleep(2.0) + c.get_object('file-%d-%d' % (size, 0)) diff --git a/parsetrace/analyze-tcp.py b/parsetrace/analyze-tcp.py index b83fb82..d050d08 100755 --- a/parsetrace/analyze-tcp.py +++ b/parsetrace/analyze-tcp.py @@ -8,7 +8,7 @@ import impacket, json, pcapy, re, sys import impacket.ImpactDecoder, impacket.ImpactPacket # Estimate of the network RTT -RTT_EST = 0.03 +RTT_EST = 0.03 * 1e6 def dump_data(obj): return json.dumps(result_list, sort_keys=True, indent=2) @@ -65,7 +65,7 @@ class TcpAnalysis: if self.start_time is None: self.start_time = ts ts -= self.start_time - pkt = Packet(self, ts * 1e-6, self.decoder.decode(data)) + pkt = Packet(self, ts, self.decoder.decode(data)) self.packets.append(pkt) def split_trace(packets, predicate, before=True): @@ -104,7 +104,7 @@ def analyze_get(packets): addr = p.ip.get_ip_dst() p = packets.next() #print "Connection establishment: RTT is", p.ts - start_ts - return {'syn_rtt': p.ts - start_ts, 'addr': addr} + return {'syn_rtt': (p.ts - start_ts) / 1e6, 'addr': addr} # Otherwise, we expect the first packet to be the GET request itself if not(p.direction > 0 and p.data.startswith('GET')): @@ -126,31 +126,27 @@ def analyze_get(packets): # Scan through the incoming packets, looking for gaps in either the IP ID # field or in the timing last_ts = resp_ts + last_was_short = False for p in packets: gap = False + flags = [] bytenr = (p.seq[1] - start_seq) & 0xffffffff if not p.direction < 0: continue if p.tcp.get_FIN(): continue - spacings.append((p.ts - last_ts, bytenr)) + + if last_was_short: + flags.append('LAST_PACKET_SHORT') + last_was_short = False if p.id != (id_in + 1) & 0xffff: gap = True - #print "Sequence number gap at", id_in - if p.ts - last_ts > 2 * RTT_EST: - gap = True - #print "Long gap of", p.ts - last_ts - elif p.ts - last_ts > RTT_EST / 2: - gap = True - #print "Short gap of", p.ts - last_ts - if gap: - #print " [occurred after", p.seq[0] - start_seq, "bytes, time", p.ts, "sec]" - pass + flags.append('IPID_GAP') if p.datalen not in (1448, 1460): - #print "Short packet of", p.datalen, "bytes, brings total to", p.seq[1] - start_seq - pass + last_was_short = True if (p.seq[0] - start_seq) & 0xffffffff != tot_bytes: - #print "Packet out of order; got sequence number %d, expected %d" \ - # % ((p.seq[0] - start_seq) & 0xffffffff, tot_bytes) - pass + flags.append('OUT_OF_ORDER') + if ((p.seq[0] - start_seq) & 0xffffffff) % 9000 == 0: + flags.append('9000') + spacings.append(((p.ts - last_ts) / 1e6, bytenr) + tuple(flags)) last_ts = p.ts id_in = p.id tot_bytes = max(tot_bytes, bytenr) @@ -165,7 +161,7 @@ if __name__ == '__main__': for f in sys.argv[1:]: conn = TcpAnalysis() conn.process_file(f) - ts = 0.0 + ts = 0 def request_start(p): return p.direction > 0 and p.datalen > 0 result_list = [] diff --git a/parsetrace/delay-graph.py b/parsetrace/delay-graph.py index 8030397..3c10247 100755 --- a/parsetrace/delay-graph.py +++ b/parsetrace/delay-graph.py @@ -8,7 +8,8 @@ for f in sys.argv[1:]: stats = json.load(open(f)) for s in stats: if 'interpacket_times' in s: - times += [x[0] for x in s['interpacket_times']] + times += [(x[0], f, x[2:]) for x in s['interpacket_times'] + if x[0] > 0.005] for t in times: - print "%f" % (t,) + print "%f\t%s\t%s" % (t[0], t[1], ','.join(t[2])) diff --git a/parsetrace/gen-cdf.py b/parsetrace/gen-cdf.py index e5bd828..f5abe5a 100755 --- a/parsetrace/gen-cdf.py +++ b/parsetrace/gen-cdf.py @@ -2,10 +2,14 @@ # # Convert a file with a sequence of data values to a CDF ready for gnuplot. -import sys +import re, sys -data = [float(s.strip()) for s in sys.stdin] +def split_line(l): + m = re.match(r"^([-+\d.e]+)(.*)$", l) + return (float(m.group(1)), m.group(2)) + +data = [split_line(s.strip()) for s in sys.stdin] data.sort() for i in range(len(data)): - sys.stdout.write("%s\t%s\n" % ((i + 1.0) / len(data), data[i])) + sys.stdout.write("%s\t%s\n" % ((i + 1.0) / len(data), ''.join(map(str, data[i]))))