From a5b5871865301712bc22ffc3d86a425dc0b45991 Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Mon, 7 Dec 2009 15:24:21 -0800 Subject: [PATCH] More work on writeback caching. --- bluesky/bluesky-private.h | 2 ++ bluesky/bluesky.h | 3 +++ bluesky/cache.c | 49 ++++++++++++++++++++++++++++++++++++--- bluesky/file.c | 2 +- bluesky/inode.c | 12 ++++++++-- 5 files changed, 62 insertions(+), 6 deletions(-) diff --git a/bluesky/bluesky-private.h b/bluesky/bluesky-private.h index d00f7c3..e51c40f 100644 --- a/bluesky/bluesky-private.h +++ b/bluesky/bluesky-private.h @@ -110,6 +110,8 @@ void bluesky_store_sync(BlueSkyStore *store); void bluesky_store_add_barrier(BlueSkyStoreAsync *barrier, BlueSkyStoreAsync *async); +void bluesky_inode_start_sync(BlueSkyInode *inode, BlueSkyStoreAsync *barrier); + #ifdef __cplusplus } #endif diff --git a/bluesky/bluesky.h b/bluesky/bluesky.h index 17a9540..bd598e0 100644 --- a/bluesky/bluesky.h +++ b/bluesky/bluesky.h @@ -146,6 +146,9 @@ typedef struct { * set to the current time. If the inode is clean, it is set to zero. */ int64_t change_time; + /* Additional state for tracking cache writeback status. */ + uint64_t change_pending; /* change_count version currently being committed to storage */ + int64_t atime; /* Microseconds since the Unix epoch */ int64_t ctime; int64_t mtime; diff --git a/bluesky/cache.c b/bluesky/cache.c index f953a64..912c5fd 100644 --- a/bluesky/cache.c +++ b/bluesky/cache.c @@ -13,8 +13,29 @@ #include "bluesky-private.h" +#define WRITEBACK_DELAY (5 * 1000000) + /* Filesystem caching and cache coherency. */ +static void writeback_complete(gpointer a, gpointer i) +{ + BlueSkyInode *inode = (BlueSkyInode *)i; + + g_log("bluesky/flushd", G_LOG_LEVEL_DEBUG, + "Writeback for inode %"PRIu64" complete", inode->inum); + + g_mutex_lock(inode->lock); + + inode->change_commit = inode->change_pending; + if (inode->change_count == inode->change_commit) { + /* If inode is no longer dirty... */ + inode->change_time = 0; + inode->change_pending = 0; + } + + g_mutex_unlock(inode->lock); +} + static void flushd_inode(gpointer key, gpointer value, gpointer user_data) { BlueSkyFS *fs = (BlueSkyFS *)user_data; @@ -24,12 +45,34 @@ static void flushd_inode(gpointer key, gpointer value, gpointer user_data) if (inode->change_count == inode->change_commit) return; + if (inode->change_pending) { + /* Waiting for an earlier writeback to finish, so don't start a new + * writeback yet. */ + return; + } + + uint64_t elapsed = bluesky_get_current_time() - inode->change_time; + if (elapsed < WRITEBACK_DELAY) { + /* Give a bit more time before starting writeback. */ + return; + } + + inode->change_pending = inode->change_count; + g_log("bluesky/flushd", G_LOG_LEVEL_DEBUG, "Starting flush of inode %"PRIu64, inode->inum); - if (inode->type == BLUESKY_REGULAR) - bluesky_file_flush(inode); - bluesky_inode_flush(fs, inode); + /* Create a store barrier. All operations part of the writeback will be + * added to this barrier, so when the barrier completes we know that the + * writeback is finished. */ + BlueSkyStoreAsync *barrier = bluesky_store_async_new(fs->store); + barrier->op = STORE_OP_BARRIER; + + bluesky_inode_start_sync(inode, barrier); + + bluesky_store_async_add_notifier(barrier, writeback_complete, inode); + bluesky_store_async_submit(barrier); + bluesky_store_async_unref(barrier); } /* Scan through the cache for dirty data and start flushing it to stable diff --git a/bluesky/file.c b/bluesky/file.c index f7e8fca..8e16e91 100644 --- a/bluesky/file.c +++ b/bluesky/file.c @@ -127,7 +127,7 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset, } bluesky_inode_update_ctime(inode, 1); - bluesky_inode_flush(inode->fs, inode); + //bluesky_inode_flush(inode->fs, inode); } void bluesky_file_read(BlueSkyInode *inode, uint64_t offset, diff --git a/bluesky/inode.c b/bluesky/inode.c index 438e59f..40f40ba 100644 --- a/bluesky/inode.c +++ b/bluesky/inode.c @@ -29,7 +29,8 @@ int64_t bluesky_get_current_time() /* Update an inode to indicate that a modification was made. This increases * the change counter, updates the ctime to the current time, and optionally - * updates the mtime. inode must already be locked. */ + * updates the mtime. This also makes the inode contents subject to writeback + * to storage in the future. inode must already be locked. */ void bluesky_inode_update_ctime(BlueSkyInode *inode, gboolean update_mtime) { int64_t now = bluesky_get_current_time(); @@ -219,12 +220,17 @@ void bluesky_inode_flush(BlueSkyFS *fs, BlueSkyInode *inode) } /* Start writeback of an inode and all associated data. */ -void bluesky_inode_start_sync(BlueSkyInode *inode) +void bluesky_inode_start_sync(BlueSkyInode *inode, BlueSkyStoreAsync *barrier) { + BlueSkyFS *fs = inode->fs; + GString *buf = g_string_new(""); bluesky_serialize_inode(buf, inode); BlueSkyRCStr *data = bluesky_string_new_from_gstring(buf); + if (inode->type == BLUESKY_REGULAR) + bluesky_file_flush(inode); + char key[64]; sprintf(key, "inode-%016"PRIx64, inode->inum); @@ -233,6 +239,8 @@ void bluesky_inode_start_sync(BlueSkyInode *inode) async->key = g_strdup(key); async->data = data; bluesky_store_async_submit(async); + if (barrier != NULL) + bluesky_store_add_barrier(barrier, async); bluesky_store_async_unref(async); } -- 2.20.1