From: Michael Vrable Date: Tue, 19 Oct 2010 20:43:58 +0000 (-0700) Subject: Move disk cache cleanup code to cache.c from log.c X-Git-Url: http://git.vrable.net/?p=bluesky.git;a=commitdiff_plain;h=0ca475013b6d6da3fa520001f6f6a50cab482299 Move disk cache cleanup code to cache.c from log.c --- diff --git a/bluesky/cache.c b/bluesky/cache.c index b05baf8..d8ef5fa 100644 --- a/bluesky/cache.c +++ b/bluesky/cache.c @@ -6,10 +6,20 @@ * TODO: Licensing */ +#define _GNU_SOURCE +#define _ATFILE_SOURCE + +#include #include +#include #include #include +#include #include +#include +#include +#include +#include #include "bluesky-private.h" @@ -272,6 +282,98 @@ static void flushd_clean(BlueSkyFS *fs) g_mutex_unlock(fs->lock); } +/* Scan through all currently-stored files in the journal/cache and garbage + * collect old unused ones, if needed. */ +static void gather_cachefiles(gpointer key, gpointer value, gpointer user_data) +{ + GList **files = (GList **)user_data; + *files = g_list_prepend(*files, value); +} + +static gint compare_cachefiles(gconstpointer a, gconstpointer b) +{ + int64_t ta, tb; + + ta = ((BlueSkyCacheFile *)a)->atime; + tb = ((BlueSkyCacheFile *)b)->atime; + if (ta < tb) + return -1; + else if (ta > tb) + return 1; + else + return 0; +} + +void bluesky_cachefile_gc(BlueSkyFS *fs) +{ + GList *files = NULL; + + g_mutex_lock(fs->log->mmap_lock); + g_hash_table_foreach(fs->log->mmap_cache, gather_cachefiles, &files); + + /* Sort based on atime. The atime should be stable since it shouln't be + * updated except by threads which can grab the mmap_lock, which we already + * hold. */ + files = g_list_sort(files, compare_cachefiles); + + /* Walk the list of files, starting with the oldest, deleting files if + * possible until enough space has been reclaimed. */ + g_print("\nScanning cache: (total size = %d kB)\n", fs->log->disk_used); + while (files != NULL) { + BlueSkyCacheFile *cachefile = (BlueSkyCacheFile *)files->data; + /* Try to lock the structure, but if the lock is held by another thread + * then we'll just skip the file on this pass. */ + if (g_mutex_trylock(cachefile->lock)) { + int64_t age = bluesky_get_current_time() - cachefile->atime; + g_print("%s addr=%p mapcount=%d refcount=%d atime_age=%f", + cachefile->filename, cachefile->addr, cachefile->mapcount, + cachefile->refcount, age / 1e6); + if (cachefile->fetching) + g_print(" (fetching)"); + g_print("\n"); + + gboolean deletion_candidate = FALSE; + if (g_atomic_int_get(&fs->log->disk_used) + > bluesky_options.cache_size + && g_atomic_int_get(&cachefile->refcount) == 0 + && g_atomic_int_get(&cachefile->mapcount) == 0) + { + deletion_candidate = TRUE; + } + + /* Don't allow journal files to be reclaimed until all data is + * known to be durably stored in the cloud. */ + if (cachefile->type == CLOUDLOG_JOURNAL + && cachefile->log_seq >= fs->log->journal_watermark) + { + deletion_candidate = FALSE; + } + + if (deletion_candidate) { + g_print(" ...deleting\n"); + if (unlinkat(fs->log->dirfd, cachefile->filename, 0) < 0) { + fprintf(stderr, "Unable to unlink journal %s: %m\n", + cachefile->filename); + } + + g_atomic_int_add(&fs->log->disk_used, -(cachefile->len / 1024)); + g_hash_table_remove(fs->log->mmap_cache, cachefile->filename); + g_mutex_unlock(cachefile->lock); + g_mutex_free(cachefile->lock); + g_cond_free(cachefile->cond); + g_free(cachefile->filename); + g_free(cachefile); + } else { + g_mutex_unlock(cachefile->lock); + } + } + files = g_list_delete_link(files, files); + } + g_list_free(files); + + g_mutex_unlock(fs->log->mmap_lock); +} + /* Run the flush daemon for a single iteration, though if it is already * executing returns immediately. */ static gpointer flushd_task(BlueSkyFS *fs) diff --git a/bluesky/log.c b/bluesky/log.c index d007eca..76d218a 100644 --- a/bluesky/log.c +++ b/bluesky/log.c @@ -534,98 +534,6 @@ void bluesky_mmap_unref(BlueSkyCacheFile *mmap) } } -/* Scan through all currently-stored files in the journal/cache and garbage - * collect old unused ones, if needed. */ -static void gather_cachefiles(gpointer key, gpointer value, gpointer user_data) -{ - GList **files = (GList **)user_data; - *files = g_list_prepend(*files, value); -} - -static gint compare_cachefiles(gconstpointer a, gconstpointer b) -{ - int64_t ta, tb; - - ta = ((BlueSkyCacheFile *)a)->atime; - tb = ((BlueSkyCacheFile *)b)->atime; - if (ta < tb) - return -1; - else if (ta > tb) - return 1; - else - return 0; -} - -void bluesky_cachefile_gc(BlueSkyFS *fs) -{ - GList *files = NULL; - - g_mutex_lock(fs->log->mmap_lock); - g_hash_table_foreach(fs->log->mmap_cache, gather_cachefiles, &files); - - /* Sort based on atime. The atime should be stable since it shouln't be - * updated except by threads which can grab the mmap_lock, which we already - * hold. */ - files = g_list_sort(files, compare_cachefiles); - - /* Walk the list of files, starting with the oldest, deleting files if - * possible until enough space has been reclaimed. */ - g_print("\nScanning cache: (total size = %d kB)\n", fs->log->disk_used); - while (files != NULL) { - BlueSkyCacheFile *cachefile = (BlueSkyCacheFile *)files->data; - /* Try to lock the structure, but if the lock is held by another thread - * then we'll just skip the file on this pass. */ - if (g_mutex_trylock(cachefile->lock)) { - int64_t age = bluesky_get_current_time() - cachefile->atime; - g_print("%s addr=%p mapcount=%d refcount=%d atime_age=%f", - cachefile->filename, cachefile->addr, cachefile->mapcount, - cachefile->refcount, age / 1e6); - if (cachefile->fetching) - g_print(" (fetching)"); - g_print("\n"); - - gboolean deletion_candidate = FALSE; - if (g_atomic_int_get(&fs->log->disk_used) - > bluesky_options.cache_size - && g_atomic_int_get(&cachefile->refcount) == 0 - && g_atomic_int_get(&cachefile->mapcount) == 0) - { - deletion_candidate = TRUE; - } - - /* Don't allow journal files to be reclaimed until all data is - * known to be durably stored in the cloud. */ - if (cachefile->type == CLOUDLOG_JOURNAL - && cachefile->log_seq >= fs->log->journal_watermark) - { - deletion_candidate = FALSE; - } - - if (deletion_candidate) { - g_print(" ...deleting\n"); - if (unlinkat(fs->log->dirfd, cachefile->filename, 0) < 0) { - fprintf(stderr, "Unable to unlink journal %s: %m\n", - cachefile->filename); - } - - g_atomic_int_add(&fs->log->disk_used, -(cachefile->len / 1024)); - g_hash_table_remove(fs->log->mmap_cache, cachefile->filename); - g_mutex_unlock(cachefile->lock); - g_mutex_free(cachefile->lock); - g_cond_free(cachefile->cond); - g_free(cachefile->filename); - g_free(cachefile); - } else { - g_mutex_unlock(cachefile->lock); - } - } - files = g_list_delete_link(files, files); - } - g_list_free(files); - - g_mutex_unlock(fs->log->mmap_lock); -} - /******************************* JOURNAL REPLAY ******************************* * The journal replay code is used to recover filesystem state after a * filesystem restart. We first look for the most recent commit record in the