From: Michael Vrable Date: Tue, 5 Apr 2011 20:50:08 +0000 (-0700) Subject: Merge branch 'master' of git+ssh://niniel.sysnet.ucsd.edu/home/mvrable/local/bluesky X-Git-Url: http://git.vrable.net/?p=bluesky.git;a=commitdiff_plain;h=5ab537ebebcb0abef91bd032c822d466a2e898e1;hp=d775a59d76e74416f162933dbe39e81ad266da60 Merge branch 'master' of git+ssh://niniel.sysnet.ucsd.edu/home/mvrable/local/bluesky --- diff --git a/bluesky/cache.c b/bluesky/cache.c index b20960c..8cd2fb3 100644 --- a/bluesky/cache.c +++ b/bluesky/cache.c @@ -433,27 +433,32 @@ static int compute_pressure(BlueSkyFS *fs) if (g_atomic_int_get(&fs->cache_dirty) + g_atomic_int_get(&fs->cache_log_dirty) > bluesky_watermark_high_dirty) + { + g_print("pressure: too much dirty data (2)\n"); return 2; + } /* Too much uncommitted data in the journal on disk, not yet flushed to the * cloud? */ - printf("Dirty journals: %d to %d\n", - fs->log->journal_watermark, fs->log->seq_num); + /*printf("Dirty journals: %d to %d\n", + fs->log->journal_watermark, fs->log->seq_num);*/ int dirty_limit; dirty_limit = bluesky_options.cache_size / (LOG_SEGMENT_SIZE / 1024) / 2; int dirty_journals = fs->log->seq_num - fs->log->journal_watermark + 1; if (dirty_journals > 1 && dirty_journals >= dirty_limit) { - printf("Too many dirty journals (%d >= %d)\n", + printf("pressure: too many dirty journals (%d >= %d) (2)\n", dirty_journals, dirty_limit); return 2; } /* LEVEL 1 */ - if (g_atomic_int_get(&fs->cache_dirty) < bluesky_watermark_medium_dirty) + if (g_atomic_int_get(&fs->cache_dirty) > bluesky_watermark_medium_dirty) { + g_print("pressure: too much dirty data (1)\n"); return 1; + } if (dirty_journals > 1 && dirty_journals > dirty_limit / 2) { - printf("Many dirty journals (%d), should start writeback\n", + printf("pressure: many dirty journals (%d), should start writeback (1)\n", dirty_journals); return 1; } diff --git a/bluesky/cleaner.c b/bluesky/cleaner.c index 5f241d4..a892e72 100644 --- a/bluesky/cleaner.c +++ b/bluesky/cleaner.c @@ -115,7 +115,8 @@ static BlueSkyCleanerItem *bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) if (last_segment == NULL) return NULL; - g_print("Last cloud log segment: %s\n", last_segment); + g_print("Last cloud log segment: %s (processed up to %d)\n", + last_segment, fs->log_state->latest_cleaner_seq_seen); int seq = atoi(last_segment + 13); g_free(last_segment); @@ -156,7 +157,7 @@ static BlueSkyCleanerItem *bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) BlueSkyCleanerItem *checkpoint = bluesky_cleaner_deserialize(data); checkpoint->location.directory = BLUESKY_CLOUD_DIR_CLEANER; - checkpoint->location.directory = seq; + checkpoint->location.sequence = seq; bluesky_string_unref(data); return checkpoint; @@ -302,6 +303,12 @@ void bluesky_cleaner_merge(BlueSkyFS *fs) return; } + if (checkpoint->type != LOGTYPE_CHECKPOINT) { + g_warning("Last cleaner object not a checkpoint; cleaning probably in progress."); + bluesky_cleaner_item_free(checkpoint); + return; + } + /* Iterate over each of the inode map sections in the checkpoint */ for (int i = 0; i < checkpoint->links->len; i++) { BlueSkyCleanerLink *link = &g_array_index(checkpoint->links, @@ -331,7 +338,7 @@ void bluesky_cleaner_merge(BlueSkyFS *fs) bluesky_cleaner_item_free(imap); } - fs->log_state->latest_cleaner_seq_seen = checkpoint->location.directory; + fs->log_state->latest_cleaner_seq_seen = checkpoint->location.sequence; bluesky_cleaner_item_free(checkpoint); } diff --git a/microbench/32kreadbench.py b/microbench/32kreadbench.py new file mode 100755 index 0000000..1c07cd6 --- /dev/null +++ b/microbench/32kreadbench.py @@ -0,0 +1,24 @@ +#!/usr/bin/python + +import os, random, sys, time + +def read_files(files, rate=1.0): + while True: + f = random.sample(files, 1)[0] + + start = time.time() + fp = open(f, 'r') + blocks = (16 << 20) / 32768 + offset = random.randrange(blocks) * 32768 + fp.seek(offset) + fp.read(32768) + print time.time() - start + fp.close() + +if __name__ == '__main__': + all_files = [] + for (path, dirs, files) in iter(os.walk(".")): + for f in files: + all_files.append(os.path.join(path, f)) + print len(all_files), "files total" + read_files(all_files) diff --git a/microbench/run-synread.sh b/microbench/run-synread.sh index d3d8cc9..b247b5b 100755 --- a/microbench/run-synread.sh +++ b/microbench/run-synread.sh @@ -27,10 +27,10 @@ run_synbench() { BLUESKY_RUN_NAME=$basename-$(($s / 1024))-c1 run_cmd $BENCHER run-synread - SYNREAD_OUTSTANDING=8 - SYNREAD_PROCS=2 - BLUESKY_RUN_NAME=$basename-$(($s / 1024))-c16 - run_cmd $BENCHER run-synread + #SYNREAD_OUTSTANDING=8 + #SYNREAD_PROCS=2 + #BLUESKY_RUN_NAME=$basename-$(($s / 1024))-c16 + #run_cmd $BENCHER run-synread done run_cmd $PROXY stop-proxy @@ -39,10 +39,10 @@ run_synbench() { } SYNREAD_DURATION=120 -for cache in 0 4 12 20 28; do +for cache in 0 4 8 12 16 20 24 28 32; do BLUESKY_CACHE_SIZE=$(($cache * 1024 * 1024)) if [ $cache -eq 0 ]; then BLUESKY_CACHE_SIZE=$((64 * 1024)) fi - run_synbench "syntest2-${cache}G" + run_synbench "syntest-${cache}G" done diff --git a/microbench/run-synwrite.sh b/microbench/run-synwrite.sh index ce66b3e..bc219a8 100755 --- a/microbench/run-synwrite.sh +++ b/microbench/run-synwrite.sh @@ -6,7 +6,7 @@ BASEDIR=$(dirname $(which $0)) BLUESKY_TARGET=s3:mvrable-bluesky-west BLUESKY_EXTRA_OPTS="BLUESKY_OPT_NO_CRYPTO=1" -rates="1 2 4 8 10 12 14 16 32" +rates="2 4 6 8 10 12 14 16 18 20 24 28 32" run_synbench() { basename=$(date +%Y%m%d)-$1 @@ -20,7 +20,7 @@ run_synbench() { for SYNWRITE_RATE in $rates; do run_cmd $BENCHER run-synwrite - sleep 30 + sleep 120 done run_cmd $PROXY stop-proxy @@ -28,7 +28,6 @@ run_synbench() { wait $proxy_pid } -#for BLUESKY_CACHE_SIZE in $((2048 * 1024)); do -for BLUESKY_CACHE_SIZE in $((256 * 1024)); do +for BLUESKY_CACHE_SIZE in $((256 * 1024)) $((2048 * 1024)); do run_synbench "write100c-$(($BLUESKY_CACHE_SIZE / 1024))M" done diff --git a/results/20110317/CLEANER b/results/20110317/CLEANER new file mode 100644 index 0000000..6044f90 --- /dev/null +++ b/results/20110317/CLEANER @@ -0,0 +1,83 @@ +Proxy run as: + +BLUESKY_CACHE_SIZE=$((8 * 1024 * 1024)) BLUESKY_TARGET=s3:mvrable-bluesky-west BLUESKY_STATS_OUT=/export2/20110317-cleaner.stats /scratch/bluesky.git/nfs3/nfsproxy + +Create 1 GB of files: +for i in {0..1023}; do dd if=/dev/zero of=file-$i bs=1M count=1; done + +Thu Mar 17 21:26:36 PDT 2011 +Log files written: up to log-00000000-00000261 + +Thu Mar 17 21:29:00 PDT 2011 +Dirty 25% of files with cleanbench.py +Log files written up to log-00000000-00000329 + +Thu Mar 17 21:29:45 PDT 2011 +Run cleaner, completes around Thu Mar 17 21:32:35 PDT 2011 +max log segments: log-00000000-00000329, log-00000001-00000030 + +============================================================================== + +Proxy run as: + +BLUESKY_CACHE_SIZE=$((4 * 1024 * 1024)) BLUESKY_TARGET=s3:mvrable-bluesky-west BLUESKY_STATS_OUT=/export2/20110317-cleaner.stats /scratch/bluesky.git/nfs3/nfsproxy + +Create 1 GB of files: +for i in {0..1023}; do dd if=/dev/zero of=file-$i bs=1M count=1; done +Fri Mar 18 00:44:58 PDT 2011 + +Log files written: up to log-00000000-00002066 + +Fri Mar 18 00:49:14 PDT 2011 +Dirty 25% of files with cleanbench.py +Log files written up to log-00000000-00002585 + +Run cleaner +max log segments: log-00000000-00002585, log-00000001-00000235 +Fri Mar 18 01:15:04 PDT 2011 + +============================================================================== + +Proxy run as: + +BLUESKY_CACHE_SIZE=$((4 * 1024 * 1024)) BLUESKY_TARGET=s3:mvrable-bluesky-west BLUESKY_STATS_OUT=/export2/20110318-cleaner.stats /scratch/bluesky.git/nfs3/nfsproxy + +Create 1 GB of files: +for i in {0..8191}; do dd if=/dev/zero of=file-$i bs=1M count=1; done +Fri Mar 18 01:47:03 PDT 2011 + +Log files written: up to log-00000000-00002067 + +Dirty 25% of files with cleanbench.py +Log files written up to log-00000000-00002586 +Fri Mar 18 01:50:25 PDT 2011 + +Run cleaner +Fri Mar 18 01:51:20 PDT 2011 +writeout shortly before Fri Mar 18 02:16:55 PDT 2011 +max log segments: + +============================================================================== + +Proxy run as: + +BLUESKY_CACHE_SIZE=$((1024 * 1024)) BLUESKY_TARGET=s3:mvrable-bluesky-west BLUESKY_STATS_OUT=/export2/20110318-cleaner.stats /scratch/bluesky.git/nfs3/nfsproxy + +Create 1 GB of files: +for i in {0..1023}; do dd if=/dev/zero of=file-$i bs=1M count=1; done +Fri Mar 18 01:47:03 PDT 2011 + +Log files up to: log-00000000-00000265 log-00000001-00000002 + +Fri Mar 18 09:18:02 PDT 2011 +Dirty 25% of files with cleanbench.py +Fri Mar 18 09:19:30 PDT 2011 +Log files up to: log-00000000-00000336 log-00000001-00000002 + +Run cleaner +Fri Mar 18 09:20:22 PDT 2011 +Fri Mar 18 09:24:23 PDT 2011 +max log segments: log-00000000-00000337 log-00000001-00000036 + +Re-run cleaner (to check for deletable segments): +Fri Mar 18 09:25:48 PDT 2011 diff --git a/results/20110317/read-latencies.data b/results/20110317/read-latencies.data new file mode 100644 index 0000000..57fde23 --- /dev/null +++ b/results/20110317/read-latencies.data @@ -0,0 +1,33 @@ +# Four-way comparison of read latencies +# +# First column is attempted ops per second, then: +# Native NFS +# Bluesky NFS +# Native Samba +# Bluesky CIFS +50 0.6 0.6 1.12 0.46 +100 2.41 7.59 2.01 7.3 +150 2.91 6.26 2.53 7.33 +200 3.23 8.41 2.96 10.09 +250 4.4 14.58 3.54 13.12 +300 4.28 13.14 3.55 11.38 +350 4.89 12.05 5.39 13.22 +400 6.27 13.81 4.7 14.88 +450 8.11 15.96 4.55 14.47 +500 10.22 21.02 5.32 16.12 +550 12.07 22.01 6.73 18.46 +600 13.39 25.23 7.89 17.12 +650 18.83 27.13 9.22 20.57 +700 18.22 33.35 11.29 24.76 +750 18.31 34.67 10.85 25.48 +800 19.03 37.45 11.14 +850 20.26 37.22 13.21 +900 24.35 41.63 10.58 +950 24.05 45.03 11.74 +1000 22.29 49.88 16.19 +1050 27.05 51.77 +1100 24.07 59.15 +1150 24.67 57.78 +1200 26.07 60.72 +1250 28.53 67.99 + diff --git a/results/20110318-synread/1024k-c1.data b/results/20110318-synread/1024k-c1.data new file mode 100644 index 0000000..7b16114 --- /dev/null +++ b/results/20110318-synread/1024k-c1.data @@ -0,0 +1,9 @@ +0 4.18348623853 4.18348623853 239 +4 5.08181818182 5.08181818182 197 +8 4.03603603604 4.03603603604 247 +12 4.9009009009 4.9009009009 204 +16 5.52252252252 5.52252252252 180 +20 5.59459459459 5.59459459459 178 +24 8.31818181818 8.31818181818 119 +28 11.6272727273 11.6272727273 86 +32 21.0990990991 21.0990990991 47 diff --git a/results/20110318-synread/128k-c1.data b/results/20110318-synread/128k-c1.data new file mode 100644 index 0000000..f812fdc --- /dev/null +++ b/results/20110318-synread/128k-c1.data @@ -0,0 +1,9 @@ +0 7.6036036036 0.95045045045 131 +4 13.7207207207 1.71509009009 72 +8 8.97297297297 1.12162162162 111 +12 9.28828828829 1.16103603604 107 +16 12.1981981982 1.52477477477 81 +20 13.1891891892 1.64864864865 75 +24 16.6396396396 2.07995495495 60 +28 26.8378378378 3.35472972973 37 +32 47.036036036 5.8795045045 21 diff --git a/results/20110318-synread/32k-c1.data b/results/20110318-synread/32k-c1.data new file mode 100644 index 0000000..30e2d07 --- /dev/null +++ b/results/20110318-synread/32k-c1.data @@ -0,0 +1,9 @@ +0 13.4324324324 0.419763513514 74 +4 15.027027027 0.469594594595 66 +8 16.2432432432 0.507601351351 61 +12 16.3153153153 0.509853603604 61 +16 21.2522522523 0.664132882883 47 +20 20.954954955 0.654842342342 47 +24 29.7477477477 0.929617117117 33 +28 42.9099099099 1.34093468468 23 +32 73.9459459459 2.31081081081 13 diff --git a/results/figures/specsfs.gnuplot b/results/figures/specsfs.gnuplot index debc5d3..a085ade 100644 --- a/results/figures/specsfs.gnuplot +++ b/results/figures/specsfs.gnuplot @@ -13,29 +13,23 @@ set xtics nomirror set x2tics auto set grid -set key top left +set key top right set output "spec1.eps" set ylabel "Achieved Operations per Second" -plot "../20110311/sfssum.20110312-native" with linespoints title "Local NFS Server", \ - "../20110307/sfssum.20110307-ec2-west2" with linespoints title "EC2 NFS Server", \ +plot "../20110311/sfssum.20110312-native" with linespoints title "Local NFS", \ "../20110311/sfssum.20110311-s3-west" with linespoints title "BlueSky", \ "../20110311/sfssum.20110311-s3-west-4kfs" with linespoints title "BlueSky (4K blocks)", \ "../20110311/sfssum.20110312-s3-west-fullseg" with linespoints title "BlueSky (full fetches)", \ - "../20110311/sfssum.20110312-s3-west-noseg" with linespoints title "BlueSky (no segments)", \ - "../20110311/sfssum.20110315-s3-west-noagg" with linespoints title "BlueSky (no read aggregation)", \ "../20110311/sfssum.20110313-azure" with linespoints title "Azure" set output "spec2.eps" set ylabel "Operation Latency (ms)" set yrange [0:50] -plot "../20110311/sfssum.20110312-native" using 1:3 with linespoints title "Local NFS Server", \ - "../20110307/sfssum.20110307-ec2-west2" using 1:3 with linespoints title "EC2 NFS Server", \ +plot "../20110311/sfssum.20110312-native" using 1:3 with linespoints title "Local NFS", \ "../20110311/sfssum.20110311-s3-west" using 1:3 with linespoints title "BlueSky", \ "../20110311/sfssum.20110311-s3-west-4kfs" using 1:3 with linespoints title "BlueSky (4K blocks)", \ "../20110311/sfssum.20110312-s3-west-fullseg" using 1:3 with linespoints title "BlueSky (full fetches)", \ - "../20110311/sfssum.20110312-s3-west-noseg" using 1:3 with linespoints title "BlueSky (no segments)", \ - "../20110311/sfssum.20110315-s3-west-noagg" using 1:3 with linespoints title "BlueSky (no read aggregation)", \ "../20110311/sfssum.20110313-azure" using 1:3 with linespoints title "Azure" XMAX=1600 @@ -76,3 +70,18 @@ set output "spec-cifs2.eps" set ylabel "Operation Latency (ms)" plot "../20110227a/sfssum.20110227-samba" using 1:3 with linespoints title "Samba", \ "../20110317/sfssum.20110317-cifs" using 1:3 with linespoints title "BlueSky" + +# Four-way comparison of read latencies among native/BlueSky with NFS/CIFS +XMAX=750 +set xrange [0:XMAX] +set x2range [0:XMAX*OPS_WSS_SCALE] +set xtics nomirror +set x2tics auto +set yrange [*:*] + +set output "spec-read-latencies.eps" +set ylabel "Operation Latency (ms)" +plot "../20110317/read-latencies.data" using 1:2 with linespoints title "Native NFS", \ + "../20110317/read-latencies.data" using 1:3 with linespoints title "BlueSky NFS", \ + "../20110317/read-latencies.data" using 1:4 with linespoints title "Samba (CIFS)", \ + "../20110317/read-latencies.data" using 1:5 with linespoints title "BlueSky CIFS" diff --git a/results/figures/syn-read.gnuplot b/results/figures/syn-read.gnuplot index af974bf..26c2795 100644 --- a/results/figures/syn-read.gnuplot +++ b/results/figures/syn-read.gnuplot @@ -8,28 +8,28 @@ set xrange [0:100] set output "syn-read-1.eps" set title "Single-Client Request Stream" set key top right -plot "../20110316-synread/32k-c1.data" using (100*$1/32):4 with linespoints title "32 KB", \ - "../20110316-synread/128k-c1.data" using (100*$1/32):4 with linespoints title "128 KB", \ - "../20110316-synread/1024k-c1.data" using (100*$1/32):4 with linespoints title "1024 KB" +plot "../20110318-synread/32k-c1.data" using (100*$1/32):4 with linespoints title "32 KB", \ + "../20110318-synread/128k-c1.data" using (100*$1/32):4 with linespoints title "128 KB", \ + "../20110318-synread/1024k-c1.data" using (100*$1/32):4 with linespoints title "1024 KB" -set output "syn-read-16.eps" -set title "16 Concurrent Request Streams" -set key top left -plot "../20110316-synread/32k-c16.data" using (100*$1/32):4 with linespoints title "32 KB", \ - "../20110316-synread/128k-c16.data" using (100*$1/32):4 with linespoints title "128 KB", \ - "../20110316-synread/1024k-c16.data" using (100*$1/32):4 with linespoints title "1024 KB" +# set output "syn-read-16.eps" +# set title "16 Concurrent Request Streams" +# set key top left +# plot "../20110316-synread/32k-c16.data" using (100*$1/32):4 with linespoints title "32 KB", \ +# "../20110316-synread/128k-c16.data" using (100*$1/32):4 with linespoints title "128 KB", \ +# "../20110316-synread/1024k-c16.data" using (100*$1/32):4 with linespoints title "1024 KB" set ylabel "Read Bandwidth (MB/s)" set output "syn-read-1b.eps" set title "Single-Client Request Stream" set key top left -plot "../20110316-synread/32k-c1.data" using (100*$1/32):3 with linespoints title "32 KB", \ - "../20110316-synread/128k-c1.data" using (100*$1/32):3 with linespoints title "128 KB", \ - "../20110316-synread/1024k-c1.data" using (100*$1/32):3 with linespoints title "1024 KB" +plot "../20110318-synread/32k-c1.data" using (100*$1/32):3 with linespoints title "32 KB", \ + "../20110318-synread/128k-c1.data" using (100*$1/32):3 with linespoints title "128 KB", \ + "../20110318-synread/1024k-c1.data" using (100*$1/32):3 with linespoints title "1024 KB" -set output "syn-read-16b.eps" -set title "16 Concurrent Request Streams" -set key top right -plot "../20110316-synread/32k-c16.data" using (100*$1/32):3 with linespoints title "32 KB", \ - "../20110316-synread/128k-c16.data" using (100*$1/32):3 with linespoints title "128 KB", \ - "../20110316-synread/1024k-c16.data" using (100*$1/32):3 with linespoints title "1024 KB" +# set output "syn-read-16b.eps" +# set title "16 Concurrent Request Streams" +# set key top right +# plot "../20110316-synread/32k-c16.data" using (100*$1/32):3 with linespoints title "32 KB", \ +# "../20110316-synread/128k-c16.data" using (100*$1/32):3 with linespoints title "128 KB", \ +# "../20110316-synread/1024k-c16.data" using (100*$1/32):3 with linespoints title "1024 KB" diff --git a/results/parse-sfsres.py b/results/parse-sfsres.py index 34c9a09..8d84ac1 100755 --- a/results/parse-sfsres.py +++ b/results/parse-sfsres.py @@ -28,6 +28,7 @@ def parse_date(datestr): return int(d.strip()) def find_stats(statsdata, timestamp): + if statsdata is None: return (0, [0] * len(STATSDATA)) for s in statsdata: if s[0] > timestamp: return (s[0], s[1:]) return (statsdata[-1][0], statsdata[-1][1:]) @@ -121,6 +122,6 @@ if __name__ == '__main__': input_stats = open(sys.argv[2]) statsdata = parse_stats(input_stats) except: - statsdata = [] + statsdata = None parse_sfsres(input_sfsres, statsdata)