X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=results%2Fparse-sfsres.py;h=34c9a09f216bc4cad2e3169e25e095cfd4c39179;hb=e6c4e819a6b5c459ec3eefb69a2a1f1830454695;hp=96de28a1937e6991b74f66dd1f48f59c54c7f0ff;hpb=47affe16abdb32408d6cb6a0b83f4e540f1f7707;p=bluesky.git diff --git a/results/parse-sfsres.py b/results/parse-sfsres.py index 96de28a..34c9a09 100755 --- a/results/parse-sfsres.py +++ b/results/parse-sfsres.py @@ -4,7 +4,7 @@ # Parse the sfsres log file generated by SPECsfs to generate more detailed # latency statistics than in the sfssum summary file. -import re, sys +import re, subprocess, sys def extract_re(lines, regexp): if isinstance(regexp, str): @@ -13,11 +13,38 @@ def extract_re(lines, regexp): m = regexp.match(l) if m: return m -def parse_run(lines, timestamp, outfp=sys.stdout): +OPERATIONS = ('read', 'write', 'create', 'setattr', 'lookup', 'getattr') +STATSDATA = ('getop', 'getbyte', 'putop', 'putbyte', 'nfsincount', 'nfsinbyte', 'nfsoutcount', 'nfsoutbyte') +COSTS = (0.01e-4, 0.15/1024**3, 0.01e-3, 0.10/1024**3, 0, 0, 0, 0) + +op_sum = 0 +stat_sum = [0 for _ in STATSDATA] + +def parse_date(datestr): + p = subprocess.Popen(['/bin/date', '-d', datestr, '+%s'], + stdout=subprocess.PIPE) + d = p.stdout.read() + p.wait() + return int(d.strip()) + +def find_stats(statsdata, timestamp): + for s in statsdata: + if s[0] > timestamp: return (s[0], s[1:]) + return (statsdata[-1][0], statsdata[-1][1:]) + +def parse_run(lines, timestamp, outfp=sys.stdout, statsdata=[]): + global stat_sum, op_sum + #print timestamp requested_load = extract_re(lines, r"\s*Requested Load.*= (\d+)") load = int(requested_load.group(1)) results = extract_re(lines, r"SFS NFS THROUGHPUT:\s*([\d.]+).*RESPONSE TIME:\s*([\d.]+) Msec/Op") + timestamp = extract_re(lines, r"SFS Aggregate Results.*, (.*)") + if timestamp is not None: + try: + timestamp = parse_date(timestamp.group(1)) + except: + timestamp = None # Extract the stable of per-operation counts, response times, etc. regexp = re.compile(r"^(\w+)" + r"\s*([\d.]+)%?" * 9) @@ -25,27 +52,75 @@ def parse_run(lines, timestamp, outfp=sys.stdout): for l in lines: m = regexp.match(l) if m: - table[m.group(1)] = [float(m.group(i)) for i in range(2, 11)] + try: + table[m.group(1)] = [float(m.group(i)) for i in range(2, 11)] + except: + #sys.stderr.write("Error parsing line: " + l.strip() + "\n") + pass + # Search for statistics on uploads/downloads in the time interval when the + # benchmark is running. We have the ending time. SPECsfs runs for the 10 + # minutes prior, and uses the last 5 minuts of data. Let's use the time + # from 6 minutes prior to 1 minute prior, to give another 5-minute period + # with a bit of a buffer after in case timing is slightly off. + (t1, s1) = find_stats(statsdata, timestamp - 6*60) + (t2, s2) = find_stats(statsdata, timestamp - 1*60) + stat_delta = map(lambda x, y: y - x, s1, s2) + + outfp.write("# finish_timestamp: " + str(timestamp) + "\n") + outfp.write("# in %s seconds: stats are %s\n" % (t2 - t1, stat_delta)) outfp.write("%d\t%s\t%s" % (load, results.group(1), results.group(2))) - for o in ('read', 'write', 'getattr'): - outfp.write("\t%s\t%s" % (table[o][5], table[o][6])) + for o in OPERATIONS: + val = '-' + try: val = table[o][5] + except: pass + outfp.write("\t%s" % (val,)) outfp.write("\n") -def parse_sfsres(fp): + op_sum += int(results.group(1)) + stat_sum = map(lambda x, y: x + y, stat_sum, stat_delta) + +def parse_sfsres(fp, statsdata): + sys.stdout.write("# target_ops actual_ops latency_avg") + for o in OPERATIONS: + sys.stdout.write(" " + o) + sys.stdout.write("\n") timestamp = None run_data = [] for line in fp: m = re.match(r"^([^*]+) \*{32,}$", line) if m: if len(run_data) > 0: - parse_run(run_data, timestamp) + parse_run(run_data, timestamp, statsdata=statsdata) run_data = [] timestamp = m.group(1) else: run_data.append(line) if len(run_data) > 0: - parse_run(run_data, timestamp) + parse_run(run_data, timestamp, statsdata=statsdata) + + print + print "Total SFS operations:", op_sum * 300 + print "Statistics:" + cost = 0.0 + for i in range(len(STATSDATA)): + cost += stat_sum[i] * COSTS[i] + print "%s: %s (%s)" % (STATSDATA[i], stat_sum[i], stat_sum[i] / (op_sum * 300.0)) + print "Total cost: %s (%s per op)" % (cost, cost / (op_sum * 300.0)) + +def parse_stats(statsfile): + datapoints = [] + for line in statsfile: + if re.match(r"^#", line): continue + datapoints.append([float(x) for x in line.split()]) + return datapoints if __name__ == '__main__': - parse_sfsres(sys.stdin) + input_sfsres = open(sys.argv[1]) + try: + input_stats = open(sys.argv[2]) + statsdata = parse_stats(input_stats) + except: + statsdata = [] + + parse_sfsres(input_sfsres, statsdata)