From c6542451a09acfc326f1578635f86291652b4f81 Mon Sep 17 00:00:00 2001
From: Michael Vrable <mvrable@cs.ucsd.edu>
Date: Wed, 5 May 2010 13:46:44 -0700
Subject: [PATCH] More S3 performance evaluation scripts.

---
 cloudbench/cloudtest.py   | 20 ++++++++++----------
 parsetrace/gen-cdf.py     | 11 +++++++++++
 parsetrace/split-trace.py | 15 ++++++++++-----
 3 files changed, 31 insertions(+), 15 deletions(-)
 create mode 100755 parsetrace/gen-cdf.py

diff --git a/cloudbench/cloudtest.py b/cloudbench/cloudtest.py
index 3743282..0755688 100755
--- a/cloudbench/cloudtest.py
+++ b/cloudbench/cloudtest.py
@@ -58,16 +58,16 @@ def run_test():
         for size in SIZES:
             c.get_object('file-%d-%d' % (size, repeat), size)
 
-#    print "==== AZURE ===="
-#    c = AzureTestConnection()
-#    for repeat in range(4):
-#        for size in SIZES:
-#            c.put_object('file-%d-%d' % (size, repeat), size)
-#
-#    c = AzureTestConnection()
-#    for repeat in range(4):
-#        for size in SIZES:
-#            c.get_object('file-%d-%d' % (size, repeat), size)
+    print "==== AZURE ===="
+    c = AzureTestConnection()
+    for repeat in range(4):
+        for size in SIZES:
+            c.put_object('file-%d-%d' % (size, repeat), size)
+
+    c = AzureTestConnection()
+    for repeat in range(4):
+        for size in SIZES:
+            c.get_object('file-%d-%d' % (size, repeat), size)
 
 if __name__ == '__main__':
     run_test()
diff --git a/parsetrace/gen-cdf.py b/parsetrace/gen-cdf.py
new file mode 100755
index 0000000..e5bd828
--- /dev/null
+++ b/parsetrace/gen-cdf.py
@@ -0,0 +1,11 @@
+#!/usr/bin/python
+#
+# Convert a file with a sequence of data values to a CDF ready for gnuplot.
+
+import sys
+
+data = [float(s.strip()) for s in sys.stdin]
+data.sort()
+
+for i in range(len(data)):
+    sys.stdout.write("%s\t%s\n" % ((i + 1.0) / len(data), data[i]))
diff --git a/parsetrace/split-trace.py b/parsetrace/split-trace.py
index caf77ee..6af8c72 100755
--- a/parsetrace/split-trace.py
+++ b/parsetrace/split-trace.py
@@ -8,12 +8,20 @@ import impacket.ImpactDecoder, impacket.ImpactPacket
 
 # Domain names for cloud service providers, whose traces we want to pull out.
 DOMAINS = ['.amazon.com', '.amazonaws.com', '.core.windows.net',
-           '204.246.162.', '87.238.86.']
+           '204.246.', '87.238.']
 
 # The collection of flows we've seen.  The value associated with each flow is a
 # sequence number indicating in what order we saw the flows in the trace.
 flows = {}
 
+def ip_lookup(host, cache={}):
+    if host not in cache:
+        try:
+            cache[host] = socket.gethostbyaddr(dst)[0]
+        except:
+            cache[host] = host
+    return cache[host]
+
 # Step 1: Parse the input file and extract a listing of all the flows that we
 # care about.
 def handler(header, data):
@@ -47,10 +55,7 @@ for file in sys.argv[1:]:
         # another filter on IP address in case there happened to be any other
         # HTTP flows during the trace capture.
         if dport != 80: continue
-        try:
-            name = socket.gethostbyaddr(dst)[0]
-        except:
-            name = dst
+        name = ip_lookup(dst)
         matches = False
         for d in DOMAINS:
             if name.endswith(d): matches = True
-- 
2.20.1