From 3361e337c993f58adefddbf52621e5197229bb84 Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Fri, 10 Dec 2010 15:02:15 -0800 Subject: [PATCH] Commit a basic but functional online cleaner implementation. The cleaner needn't be aware of parallelism; the proxy will handle merging of data. --- bluesky/bluesky-private.h | 1 + bluesky/cleaner.c | 46 +++++++++++++++++++++++++++++++++++++++ bluesky/inode.c | 2 +- cleaner/cleaner | 2 +- 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/bluesky/bluesky-private.h b/bluesky/bluesky-private.h index b188ac2..be7a2d3 100644 --- a/bluesky/bluesky-private.h +++ b/bluesky/bluesky-private.h @@ -477,6 +477,7 @@ gboolean bluesky_checkpoint_load(BlueSkyFS *fs); /* Merging of log state with the work of the cleaner. */ void bluesky_cleaner_merge(BlueSkyFS *fs); +void bluesky_cleaner_thread_launch(BlueSkyFS *fs); #ifdef __cplusplus } diff --git a/bluesky/cleaner.c b/bluesky/cleaner.c index d7e29eb..0fc39df 100644 --- a/bluesky/cleaner.c +++ b/bluesky/cleaner.c @@ -155,6 +155,8 @@ static BlueSkyCleanerItem *bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) g_mutex_unlock(cachefile->lock); BlueSkyCleanerItem *checkpoint = bluesky_cleaner_deserialize(data); + checkpoint->location.directory = BLUESKY_CLOUD_DIR_CLEANER; + checkpoint->location.directory = seq; bluesky_string_unref(data); return checkpoint; @@ -200,6 +202,29 @@ gboolean needs_merging(BlueSkyFS *fs, BlueSkyCloudPointer location) return TRUE; } +/* For an inode which has been modified by the cleaner and must be flushed out + * to cloud storage, mark it as appropriately dirty. We will bypass writingt + * the inode to the journal if possible--but if there have ben other + * uncommitted changes besides what the cleaner did then we will force a + * journal write as well since the cloud shouldn't contain newer data than the + * journal. Inode must be locked. */ +static void cleaner_flush_inode(BlueSkyInode *inode) +{ + // if (inode->change_commit != inode->change_count) { + if (TRUE) { + /* bluesky_inode_start_sync schedules a flush to the cloud so we're all + * done. */ + bluesky_inode_start_sync(inode); + return; + } + + g_assert(inode->unlogged_list == NULL); + + bluesky_list_unlink(&inode->fs->dirty_list, inode->dirty_list); + inode->dirty_list = bluesky_list_prepend(&inode->fs->dirty_list, inode); + inode->change_cloud = inode->change_count; +} + static void merge_inode(BlueSkyFS *fs, BlueSkyCleanerItem *cleaner_inode) { /* There are two versions we are concerned with: cleaner_ is for the data @@ -254,6 +279,7 @@ static void merge_inode(BlueSkyFS *fs, BlueSkyCleanerItem *cleaner_inode) b->ref->location = cb->location; } } + cleaner_flush_inode(proxy_inode); g_mutex_unlock(proxy_inode->lock); /* Mark the inode as modified so it will get written back to the cloud. We @@ -305,5 +331,25 @@ void bluesky_cleaner_merge(BlueSkyFS *fs) bluesky_cleaner_item_free(imap); } + fs->log_state->latest_cleaner_seq_seen = checkpoint->location.directory; bluesky_cleaner_item_free(checkpoint); } + +/* Run the cleaner as a background task. */ +static gpointer cleaner_thread(BlueSkyFS *fs) +{ + while (TRUE) { + struct timespec delay; + delay.tv_sec = 30; + delay.tv_nsec = 0; + nanosleep(&delay, NULL); + bluesky_cleaner_merge(fs); + } + + return NULL; +} + +void bluesky_cleaner_thread_launch(BlueSkyFS *fs) +{ + g_thread_create((GThreadFunc)cleaner_thread, fs, FALSE, NULL); +} diff --git a/bluesky/inode.c b/bluesky/inode.c index dd20cc9..cabaa34 100644 --- a/bluesky/inode.c +++ b/bluesky/inode.c @@ -132,7 +132,7 @@ BlueSkyFS *bluesky_init_fs(gchar *name, BlueSkyStore *store, bluesky_inode_do_sync(root); } - bluesky_cleaner_merge(fs); + bluesky_cleaner_thread_launch(fs); return fs; } diff --git a/cleaner/cleaner b/cleaner/cleaner index 675819b..c7b3d84 100755 --- a/cleaner/cleaner +++ b/cleaner/cleaner @@ -473,7 +473,7 @@ def run_cleaner(backend, inode_map, log, repack_inodes=False): # Determine which segments are poorly utilized and should be cleaned. We # need better heuristics here. for (s, u) in sorted(inode_map.util.segments.items()): - if (float(u[1]) / u[0] < 0.6 or u[1] < 32768) and u[1] > 0: + if (float(u[1]) / u[0] < 0.6) and u[1] > 0: print "Should clean segment", s loc = backend.name_to_loc(s) if s: inode_map.obsolete_segments.add(loc) -- 2.20.1