From: Michael Vrable Date: Fri, 20 Aug 2010 20:51:31 +0000 (-0700) Subject: Back out dirty reference tracking, as the design was flawed. X-Git-Url: http://git.vrable.net/?p=bluesky.git;a=commitdiff_plain;h=9b1a8397b18ba54e6ad6cc306e1baaecda6fe1fe Back out dirty reference tracking, as the design was flawed. Objects can be written to the journal but not to the cloud--for example, if a data block is written to the journal but overwritten before the file is flushed to the cloud. This write-combining is good, but the old code for tracking when a journal segment could be reclaimed couldn't handle this. So, back out that dirty reference tracking code, in preparation for replacing it with another approach. --- diff --git a/bluesky/bluesky-private.h b/bluesky/bluesky-private.h index c527ee6..8601228 100644 --- a/bluesky/bluesky-private.h +++ b/bluesky/bluesky-private.h @@ -192,13 +192,6 @@ struct _BlueSkyCloudLog { int location_flags; int pending_read, pending_write; - // If the object is not yet flushed to cloud storage but is written to a - // journal file locally, a reference to that journal file so that we can - // keep the dirty_refs count updated. When the object is deleted or - // becomes clean, decrement the dirty_refs counter of the journal file and - // set this pointer to NULL. - BlueSkyCacheFile *dirty_journal; - // A stable identifier for the object (only changes when authenticated data // is written out, but stays the same when the in-cloud cleaner relocates // the object). @@ -293,11 +286,6 @@ struct _BlueSkyCacheFile { BlueSkyLog *log; gboolean fetching, ready; // Cloud data: downloading or ready for use int64_t atime; // Access time, for cache management - - /* Journal: Count of objects which are not yet committed to cloud storage - * but need to be; a non-zero value prevents the journal file from being - * deleted. */ - gint dirty_refs; }; BlueSkyLog *bluesky_log_new(const char *log_directory); diff --git a/bluesky/cloudlog.c b/bluesky/cloudlog.c index 8b88f64..e842fbf 100644 --- a/bluesky/cloudlog.c +++ b/bluesky/cloudlog.c @@ -158,8 +158,6 @@ void bluesky_cloudlog_unref(BlueSkyCloudLog *log) } g_array_unref(log->links); bluesky_string_unref(log->data); - if (log->dirty_journal != NULL) - g_atomic_int_add(&log->dirty_journal->dirty_refs, -1); g_free(log); } } @@ -300,10 +298,6 @@ static void cloudlog_flush_complete(BlueSkyStoreAsync *async, item->pending_write &= ~CLOUDLOG_CLOUD; item->location_flags |= CLOUDLOG_CLOUD; bluesky_cloudlog_stats_update(item, 1); - if (item->dirty_journal != NULL) { - g_atomic_int_add(&item->dirty_journal->dirty_refs, -1); - item->dirty_journal = NULL; - } g_mutex_unlock(item->lock); bluesky_cloudlog_unref(item); diff --git a/bluesky/debug.c b/bluesky/debug.c index 0a5f254..207cb7b 100644 --- a/bluesky/debug.c +++ b/bluesky/debug.c @@ -56,9 +56,9 @@ static void cache_dump(gpointer key, gpointer value, gpointer user_data) BlueSkyCacheFile *cache = (BlueSkyCacheFile *)value; int64_t age = bluesky_get_current_time() - cache->atime; - g_print("%s addr=%p mapcount=%d refcount=%d dirty=%d atime_age=%f", + g_print("%s addr=%p mapcount=%d refcount=%d atime_age=%f", cache->filename, cache->addr, cache->mapcount, cache->refcount, - cache->dirty_refs, age / 1e6); + age / 1e6); if (cache->fetching) g_print(" (fetching)"); g_print("\n"); diff --git a/bluesky/log.c b/bluesky/log.c index a904fe1..5fd6e28 100644 --- a/bluesky/log.c +++ b/bluesky/log.c @@ -217,15 +217,6 @@ static gpointer log_thread(gpointer d) offset += sizeof(header) + sizeof(footer) + item->data->len; - /* Since we have just written a new dirty object to the journal, - * increment the count of live dirty objects in that journal file. The - * count will be decremented when objects are deleted or written to the - * cloud. */ - if (!(item->location_flags & CLOUDLOG_CLOUD)) { - g_atomic_int_add(&log->current_log->dirty_refs, 1); - item->dirty_journal = log->current_log; - } - /* Replace the log item's string data with a memory-mapped copy of the * data, now that it has been written to the log file. (Even if it * isn't yet on disk, it should at least be in the page cache and so @@ -522,17 +513,16 @@ void bluesky_cachefile_gc(BlueSkyFS *fs) * then we'll just skip the file on this pass. */ if (g_mutex_trylock(cachefile->lock)) { int64_t age = bluesky_get_current_time() - cachefile->atime; - g_print("%s addr=%p mapcount=%d refcount=%d dirty=%d atime_age=%f", + g_print("%s addr=%p mapcount=%d refcount=%d atime_age=%f", cachefile->filename, cachefile->addr, cachefile->mapcount, - cachefile->refcount, cachefile->dirty_refs, age / 1e6); + cachefile->refcount, age / 1e6); if (cachefile->fetching) g_print(" (fetching)"); g_print("\n"); if (g_atomic_int_get(&fs->log->disk_used) > DISK_CACHE_SIZE_TARGET && g_atomic_int_get(&cachefile->refcount) == 0 - && g_atomic_int_get(&cachefile->mapcount) == 0 - && g_atomic_int_get(&cachefile->dirty_refs) == 0) + && g_atomic_int_get(&cachefile->mapcount) == 0) { g_print(" ...deleting\n"); if (unlinkat(fs->log->dirfd, cachefile->filename, 0) < 0) {