From: Michael Vrable Date: Wed, 4 Aug 2010 22:32:00 +0000 (-0700) Subject: Rework caching of data blocks to eliminate double-caching. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=f2586554fd55cd3cc1be65ac33d1122d00372133;p=bluesky.git Rework caching of data blocks to eliminate double-caching. Previously data could be cached both as a cloud log item and as a string at the inode level. Now cache as a string for dirty data and a cloud log item for clean data. --- diff --git a/bluesky/bluesky-private.h b/bluesky/bluesky-private.h index 0f86f45..418c1e5 100644 --- a/bluesky/bluesky-private.h +++ b/bluesky/bluesky-private.h @@ -165,6 +165,7 @@ typedef struct { } BlueSkyCloudPointer; typedef enum { + LOGTYPE_INVALID = -1, LOGTYPE_UNKNOWN = 0, LOGTYPE_DATA = 1, LOGTYPE_INODE = 2, @@ -209,8 +210,11 @@ struct _BlueSkyCloudLog { // Pointers to other objects GArray *pointers; - // Serialized data, if available in memory (otherwise NULL). + // Serialized data, if available in memory (otherwise NULL), and a lock + // count which tracks if there are users that require the data to be kept + // around. BlueSkyRCStr *data; + int data_lock_count; }; /* Serialize objects into a log segment to be written to the cloud. */ diff --git a/bluesky/bluesky.h b/bluesky/bluesky.h index 8846488..e9e40d8 100644 --- a/bluesky/bluesky.h +++ b/bluesky/bluesky.h @@ -272,15 +272,14 @@ typedef struct { #define BLUESKY_MAX_FILE_SIZE (BLUESKY_BLOCK_SIZE << 24) typedef enum { BLUESKY_BLOCK_ZERO = 0, /* Data is all zeroes, not explicitly stored */ - BLUESKY_BLOCK_REF = 1, /* Reference to key/value store, not cached */ - BLUESKY_BLOCK_CACHED = 2, /* Data is cached in memory, clean */ - BLUESKY_BLOCK_DIRTY = 3, /* Data needs to be committed to store */ + BLUESKY_BLOCK_REF = 1, /* Reference to cloud log item, data clean */ + BLUESKY_BLOCK_DIRTY = 2, /* Data needs to be committed to store */ } BlueSkyBlockType; typedef struct { BlueSkyBlockType type; - BlueSkyRCStr *data; /* Pointer to data in memory if cached */ - BlueSkyCloudLog *cloudref; /* Reference to cloud log entry with data */ + BlueSkyCloudLog *ref; /* if REF: cloud log entry with data */ + BlueSkyRCStr *dirty; /* if DIRTY: raw data in memory */ } BlueSkyBlock; BlueSkyFS *bluesky_init_fs(gchar *name, BlueSkyStore *store); diff --git a/bluesky/cloudlog.c b/bluesky/cloudlog.c index 0d91e0d..6b80233 100644 --- a/bluesky/cloudlog.c +++ b/bluesky/cloudlog.c @@ -132,6 +132,7 @@ void bluesky_cloudlog_unref(BlueSkyCloudLog *log) g_hash_table_remove(fs->locations, &log->id); g_mutex_unlock(fs->lock); + log->type = LOGTYPE_INVALID; g_mutex_free(log->lock); g_cond_free(log->cond); g_array_unref(log->pointers); @@ -190,6 +191,7 @@ BlueSkyCloudPointer bluesky_cloudlog_serialize(BlueSkyCloudLog *log, BlueSkyCloudLog *log2 = (BlueSkyCloudLog *)g_hash_table_lookup(log->fs->locations, &id); // TODO: refcount + bluesky_cloudlog_fetch(log2); g_assert(log2 != NULL); bluesky_cloudlog_ref(log2); g_mutex_unlock(log->fs->lock); @@ -247,6 +249,7 @@ void bluesky_cloudlog_write_log(BlueSkyFS *fs) while (state->inode_list != NULL) { BlueSkyCloudLog *log = (BlueSkyCloudLog *)state->inode_list->data; bluesky_cloudlog_serialize(log, state); + bluesky_cloudlog_unref(log); state->inode_list = g_list_delete_link(state->inode_list, state->inode_list); } diff --git a/bluesky/debug.c b/bluesky/debug.c index 1df8382..6242ba6 100644 --- a/bluesky/debug.c +++ b/bluesky/debug.c @@ -45,7 +45,8 @@ static void cloudlog_dump(gpointer key, gpointer value, gpointer user_data) } g_print(": refs=%d ty=%d inode=%"PRIu64" locs=%x log@(%d,%d) cloud@(%d,%d,%d)\n", log->refcount, - log->type, log->inum, log->location_flags, + log->type, log->inum, + log->location_flags | (log->data != NULL ? 0x100 : 0), log->log_seq, log->log_offset, log->location.directory, log->location.sequence, log->location.offset); } diff --git a/bluesky/file.c b/bluesky/file.c index b329531..977618d 100644 --- a/bluesky/file.c +++ b/bluesky/file.c @@ -31,31 +31,33 @@ void bluesky_block_touch(BlueSkyInode *inode, uint64_t i) switch (block->type) { case BLUESKY_BLOCK_ZERO: - block->data = bluesky_string_new(g_malloc0(block_len), block_len); + block->dirty = bluesky_string_new(g_malloc0(block_len), block_len); break; case BLUESKY_BLOCK_REF: + // FIXME: locking on the cloudlog? bluesky_block_fetch(inode, block, NULL); - g_assert(block->type == BLUESKY_BLOCK_CACHED); - /* Fall through */ - case BLUESKY_BLOCK_CACHED: + bluesky_string_ref(block->ref->data); + block->dirty = bluesky_string_dup(block->ref->data); + break; case BLUESKY_BLOCK_DIRTY: - block->data = bluesky_string_dup(block->data); + block->dirty = bluesky_string_dup(block->dirty); break; } - if (block->type != BLUESKY_BLOCK_CACHED + /*if (block->type != BLUESKY_BLOCK_CACHED && block->type != BLUESKY_BLOCK_DIRTY) - g_atomic_int_add(&inode->fs->cache_total, 1); + g_atomic_int_add(&inode->fs->cache_total, 1); //FIXME */ if (block->type != BLUESKY_BLOCK_DIRTY) g_atomic_int_add(&inode->fs->cache_dirty, 1); block->type = BLUESKY_BLOCK_DIRTY; - bluesky_cloudlog_unref(block->cloudref); - block->cloudref = NULL; + bluesky_cloudlog_unref(block->ref); + block->ref = NULL; } /* Set the size of a file. This will truncate or extend the file as needed. * Newly-allocated bytes are zeroed. */ +// FIXME void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) { g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE); @@ -82,11 +84,11 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) if (b->type != BLUESKY_BLOCK_ZERO && (b->type == BLUESKY_BLOCK_REF - || b->data->len < BLUESKY_BLOCK_SIZE)) { + || b->dirty->len < BLUESKY_BLOCK_SIZE)) { bluesky_block_touch(inode, inode->blocks->len - 1); - gsize old_size = b->data->len; - bluesky_string_resize(b->data, BLUESKY_BLOCK_SIZE); - memset(&b->data->data[old_size], 0, + gsize old_size = b->dirty->len; + bluesky_string_resize(b->dirty, BLUESKY_BLOCK_SIZE); + memset(&b->dirty->data[old_size], 0, BLUESKY_BLOCK_SIZE - old_size); } } @@ -96,13 +98,13 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) /* Delete blocks from a file. Must reclaim memory. */ for (guint i = inode->blocks->len; i < blocks; i++) { BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i); - if (b->type == BLUESKY_BLOCK_CACHED + /* if (b->type == BLUESKY_BLOCK_CACHED || b->type == BLUESKY_BLOCK_DIRTY) - g_atomic_int_add(&inode->fs->cache_total, -1); + g_atomic_int_add(&inode->fs->cache_total, -1); FIXME */ if (b->type == BLUESKY_BLOCK_DIRTY) g_atomic_int_add(&inode->fs->cache_dirty, -1); - bluesky_string_unref(b->data); - bluesky_cloudlog_unref(b->cloudref); + bluesky_string_unref(b->dirty); + bluesky_cloudlog_unref(b->ref); } g_array_set_size(inode->blocks, blocks); } @@ -115,13 +117,13 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) if (b->type != BLUESKY_BLOCK_ZERO) { bluesky_block_touch(inode, blocks - 1); - gsize old_size = b->data->len; + gsize old_size = b->dirty->len; gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE; - bluesky_string_resize(b->data, new_size); + bluesky_string_resize(b->dirty, new_size); if (new_size > old_size) { - memset(&b->data->data[old_size], 0, new_size - old_size); + memset(&b->dirty->data[old_size], 0, new_size - old_size); } } } @@ -140,6 +142,8 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset, if (len == 0) return; + // TODO: Optimization: If we are entirely overwriting a block we don't need + // to fetch it frm storage first. while (len > 0) { uint64_t block_num = offset / BLUESKY_BLOCK_SIZE; gint block_offset = offset % BLUESKY_BLOCK_SIZE; @@ -148,7 +152,7 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset, bluesky_block_touch(inode, block_num); BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, block_num); - memcpy(&b->data->data[block_offset], data, bytes); + memcpy(&b->dirty->data[block_offset], data, bytes); offset += bytes; data += bytes; @@ -168,6 +172,7 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset, g_return_if_fail(offset < inode->size); g_return_if_fail(len <= inode->size - offset); +#if 0 /* Start fetches on any data blocks that we will need for this read. */ BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store); barrier->op = STORE_OP_BARRIER; @@ -191,6 +196,7 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset, if (bluesky_verbose) { g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete."); } +#endif while (len > 0) { uint64_t block_num = offset / BLUESKY_BLOCK_SIZE; @@ -199,17 +205,17 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset, BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, block_num); - switch (b->type) { - case BLUESKY_BLOCK_ZERO: + if (b->type == BLUESKY_BLOCK_ZERO) { memset(buf, 0, bytes); - break; - case BLUESKY_BLOCK_REF: - bluesky_block_fetch(inode, b, NULL); - /* Fall through */ - case BLUESKY_BLOCK_CACHED: - case BLUESKY_BLOCK_DIRTY: - memcpy(buf, &b->data->data[block_offset], bytes); - break; + } else { + BlueSkyRCStr *data = NULL; + if (b->type == BLUESKY_BLOCK_REF) { + bluesky_block_fetch(inode, b, NULL); + data = b->ref->data; + } else if (b->type == BLUESKY_BLOCK_DIRTY) { + data = b->dirty; + } + memcpy(buf, &data->data[block_offset], bytes); } offset += bytes; @@ -224,13 +230,11 @@ void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block, if (block->type != BLUESKY_BLOCK_REF) return; - g_mutex_lock(block->cloudref->lock); - bluesky_cloudlog_fetch(block->cloudref); - block->data = block->cloudref->data; - bluesky_string_ref(block->data); - g_mutex_unlock(block->cloudref->lock); - block->type = BLUESKY_BLOCK_CACHED; - g_atomic_int_add(&inode->fs->cache_total, 1); + g_mutex_lock(block->ref->lock); + bluesky_cloudlog_fetch(block->ref); + g_mutex_unlock(block->ref->lock); + block->type = BLUESKY_BLOCK_REF; + g_atomic_int_add(&inode->fs->cache_total, 1); //FIXME } /* Write the given block to cloud-backed storage and mark it clean. */ @@ -242,22 +246,21 @@ void bluesky_block_flush(BlueSkyInode *inode, BlueSkyBlock *block, if (block->type != BLUESKY_BLOCK_DIRTY) return; - bluesky_cloudlog_unref(block->cloudref); - - BlueSkyRCStr *data = block->data; + g_assert(block->ref == NULL); BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs); cloudlog->type = LOGTYPE_DATA; cloudlog->inum = inode->inum; - cloudlog->data = data; - bluesky_string_ref(data); + cloudlog->data = block->dirty; // String ownership is transferred bluesky_cloudlog_sync(cloudlog); + bluesky_cloudlog_ref(cloudlog); // Reference for log_items list *log_items = g_list_prepend(*log_items, cloudlog); bluesky_cloudlog_insert(cloudlog); - block->cloudref = cloudlog; + block->ref = cloudlog; // Uses initial reference from _new() - block->type = BLUESKY_BLOCK_CACHED; + block->type = BLUESKY_BLOCK_REF; + block->dirty = NULL; g_atomic_int_add(&fs->cache_dirty, -1); } @@ -279,25 +282,17 @@ void bluesky_file_drop_cached(BlueSkyInode *inode) for (int i = 0; i < inode->blocks->len; i++) { BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i); - if (b->type == BLUESKY_BLOCK_CACHED) { - if (bluesky_verbose) { - g_log("bluesky/cache", G_LOG_LEVEL_DEBUG, - "Dropping block %d of inode %"PRIu64" from cache", - i, inode->inum); - g_log("bluesky/cache", G_LOG_LEVEL_DEBUG, - " (reference count was %d)", b->data->refcount); + if (b->type == BLUESKY_BLOCK_REF) { + g_mutex_lock(b->ref->lock); + if (b->ref->data != NULL + && g_atomic_int_get(&b->ref->data_lock_count) == 0 + && (b->ref->location_flags != 0)) + { + bluesky_string_unref(b->ref->data); + b->ref->data = NULL; } - - bluesky_string_unref(b->data); - b->data = NULL; - b->type = BLUESKY_BLOCK_REF; + g_mutex_unlock(b->ref->lock); g_atomic_int_add(&inode->fs->cache_total, -1); - g_mutex_lock(b->cloudref->lock); - if (b->cloudref->location_flags & CLOUDLOG_JOURNAL) { - bluesky_string_unref(b->cloudref->data); - b->cloudref->data = NULL; - } - g_mutex_unlock(b->cloudref->lock); } } } diff --git a/bluesky/inode.c b/bluesky/inode.c index 6e9c0c8..a76b4b9 100644 --- a/bluesky/inode.c +++ b/bluesky/inode.c @@ -173,8 +173,8 @@ void bluesky_inode_unref(BlueSkyInode *inode) if (b->type == BLUESKY_BLOCK_DIRTY) { g_error("Deleting an inode with dirty file data!"); } - bluesky_cloudlog_unref(b->cloudref); - bluesky_string_unref(b->data); + bluesky_cloudlog_unref(b->ref); + bluesky_string_unref(b->dirty); } g_array_unref(inode->blocks); break; @@ -299,7 +299,6 @@ void bluesky_inode_start_sync(BlueSkyInode *inode) GString *buf = g_string_new(""); bluesky_serialize_inode(buf, inode); - BlueSkyRCStr *data = bluesky_string_new_from_gstring(buf); char key[64]; sprintf(key, "inode-%016"PRIx64, inode->inum); @@ -307,26 +306,23 @@ void bluesky_inode_start_sync(BlueSkyInode *inode) BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs); cloudlog->type = LOGTYPE_INODE; cloudlog->inum = inode->inum; - cloudlog->data = data; - bluesky_string_ref(data); + cloudlog->data = bluesky_string_new_from_gstring(buf); if (inode->type == BLUESKY_REGULAR) { for (int i = 0; i < inode->blocks->len; i++) { BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i); - if (b->type == BLUESKY_BLOCK_CACHED - || b->type == BLUESKY_BLOCK_REF) - { - BlueSkyCloudID id = b->cloudref->id; + if (b->type == BLUESKY_BLOCK_REF) { + BlueSkyCloudID id = b->ref->id; g_array_append_val(cloudlog->pointers, id); } } } - if (inode->committed_item != NULL) - bluesky_cloudlog_unref(inode->committed_item); + bluesky_cloudlog_unref(inode->committed_item); inode->committed_item = cloudlog; bluesky_cloudlog_sync(cloudlog); + bluesky_cloudlog_ref(cloudlog); log_items = g_list_prepend(log_items, cloudlog); bluesky_cloudlog_insert(cloudlog); diff --git a/bluesky/log.c b/bluesky/log.c index 55daf7a..cec46cb 100644 --- a/bluesky/log.c +++ b/bluesky/log.c @@ -82,6 +82,7 @@ static void log_commit(BlueSkyLog *log) g_cond_signal(item->cond); g_mutex_unlock(item->lock); log->committed = g_slist_delete_link(log->committed, log->committed); + bluesky_cloudlog_unref(item); batchsize++; } @@ -153,6 +154,7 @@ static gpointer log_thread(gpointer d) if ((item->location_flags | item->pending_write) & CLOUDLOG_JOURNAL) { g_mutex_unlock(item->lock); bluesky_cloudlog_unref(item); + g_atomic_int_add(&item->data_lock_count, -1); continue; } @@ -199,6 +201,7 @@ static gpointer log_thread(gpointer d) offset += sizeof(header) + sizeof(footer) + item->data->len; log->committed = g_slist_prepend(log->committed, item); + g_atomic_int_add(&item->data_lock_count, -1); g_mutex_unlock(item->lock); /* Force an if there are no other log items currently waiting to be @@ -235,6 +238,7 @@ BlueSkyLog *bluesky_log_new(const char *log_directory) void bluesky_log_item_submit(BlueSkyCloudLog *item, BlueSkyLog *log) { bluesky_cloudlog_ref(item); + g_atomic_int_add(&item->data_lock_count, 1); g_async_queue_push(log->queue, item); } diff --git a/bluesky/serialize.c b/bluesky/serialize.c index 44233a1..a9e4182 100644 --- a/bluesky/serialize.c +++ b/bluesky/serialize.c @@ -95,8 +95,8 @@ void bluesky_serialize_inode(GString *out, BlueSkyInode *inode) BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i); BlueSkyCloudID id; memset(&id, 0, sizeof(id)); - if (b->cloudref != NULL) - id = b->cloudref->id; + if (b->ref != NULL) + id = b->ref->id; g_string_append_len(out, (const char *)&id, sizeof(id)); } break;