X-Git-Url: http://git.vrable.net/?p=bluesky.git;a=blobdiff_plain;f=cloudbench%2Fparalleltest.py;h=19ee695b6f7bd4ae2adae53cc2e8eba9fc80b455;hp=5084b62229e734d127a14f07b9c035e166ce8223;hb=HEAD;hpb=b49973e78300a264aa8e3b985aa125e0e0e89d8c diff --git a/cloudbench/paralleltest.py b/cloudbench/paralleltest.py index 5084b62..19ee695 100755 --- a/cloudbench/paralleltest.py +++ b/cloudbench/paralleltest.py @@ -1,5 +1,32 @@ #!/usr/bin/python # +# Copyright (C) 2010 The Regents of the University of California +# Written by Michael Vrable +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + # Run a series of simple test requests against S3 for gathering some basic # performance numbers. @@ -10,7 +37,7 @@ import sys, threading, time, Queue import azure BUCKET_NAME = 'mvrable-benchmark' -SIZES = [64, 4096, 32 << 10, 256 << 10, 1 << 20, 4 << 20, 32 << 20] +SIZES = [(1 << s) for s in range(12, 23)] class S3TestConnection: def __init__(self): @@ -48,16 +75,86 @@ def parallel_get(name, connections, delay1=0.0): res = [] while not q.empty(): res.append(q.get()) - return res -def run_test(size, threads, num): + if len(res) == len(connections): + return res + +def parallel_multiget(names, connections, repeat=1): + requests = Queue.Queue() + results = [[threading.Lock(), None] for n in names] + for i in range(len(names)): + for _ in range(repeat): + requests.put((names[i], results[i])) + + threads = [] + def launcher(c, requests): + while True: + try: + (n, r) = requests.get(block=False) + # Possible data race here but it should be harmless + if r[1] is None: + res = c.get_object(n) + r[0].acquire() + if r[1] is None: r[1] = time.time() + r[0].release() + requests.task_done() + except Queue.Empty: + return + for i in range(len(connections)): + c = connections[i] + threads.append(threading.Thread(target=launcher, args=(c, requests))) + start_time = time.time() + for i in range(len(threads)): + threads[i].start() + requests.join() + + return max(x[1] for x in results) - start_time + +def run_test(size, threads, num, logfile=sys.stdout, delay=1.0): connections = [S3TestConnection() for _ in range(threads)] for i in range(num): + print " ...test", i res = parallel_get('file-%d-%d' % (size, i), connections) - print res - time.sleep(1.0) + if res is not None: + logfile.write(str(min(res)) + "\n") + if delay > 0: + time.sleep(delay) + +connections = [S3TestConnection() for _ in range(128)] +logfile = open('multifetch-simulation.data', 'a') +for s in [(1 << s) for s in range(16, 27)]: + print "Priming objects: %d-byte objects" % (s,) + run_test(s, 1, 100, open('/dev/null', 'w'), 0.0) + + for blocksize in [x << 20 for x in (4, 8, 16, 32, 64, 128)]: + if s > blocksize: continue + for t in [4, 2, 1]: + for rep in range(10): + count = blocksize // s + print "Running tests: %d-byte blocks, %d-byte objects, %d parallel fetches" % (blocksize, s, t) + print "Object count:", count + if count * t > len(connections): + conns = connections + else: + conns = connections[0 : count * t] + + objects = ['file-%d-%d' % (s, i % 100) for i in range(count)] + r = parallel_multiget(objects, conns, t) + print r + logfile.write('%s\t%s\t%s\t%s\t%s\n' % (s, blocksize >> 20, t, len(conns), r)) + logfile.flush() + time.sleep(2.0) +sys.exit(0) + +for s in SIZES: + print "Priming objects: %d-byte objects" % (s,) + logfile = open('/dev/null', 'w') + run_test(s, 1, 100, logfile, 0.0) -run_test(32768, 4, 500) + for t in [4, 2, 1]: + print "Running tests: %d-byte objects, %d parallel fetches" % (s, t) + logfile = open('parallel-%d-%d.data' % (s, t), 'w') + run_test(s, t, 100, logfile) sys.exit(0) if __name__ == '__main__':