From a82b60b3b683840a7074110831bcbaa16a40f0eb Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Wed, 25 Aug 2010 13:24:34 -0700 Subject: [PATCH] Add an inode map data structure to track the location of inodes in logs. --- bluesky/CMakeLists.txt | 4 +- bluesky/bluesky-private.h | 36 +++++++++++ bluesky/bluesky.h | 4 ++ bluesky/cloudlog.c | 10 +++ bluesky/debug.c | 25 ++++++++ bluesky/imap.c | 125 ++++++++++++++++++++++++++++++++++++++ bluesky/inode.c | 1 + 7 files changed, 203 insertions(+), 2 deletions(-) create mode 100644 bluesky/imap.c diff --git a/bluesky/CMakeLists.txt b/bluesky/CMakeLists.txt index 04be126..a3a58bc 100644 --- a/bluesky/CMakeLists.txt +++ b/bluesky/CMakeLists.txt @@ -3,8 +3,8 @@ include_directories("${LIBS3_BUILD_DIR}/include" ${KVSTORE_DIR}) link_directories("${LIBS3_BUILD_DIR}/lib" ${KVSTORE_DIR}) add_library(bluesky SHARED - cache.c cloudlog.c crc32c.c crypto.c debug.c dir.c file.c init.c - inode.c log.c serialize.c store.c store-bdb.c store-kv.cc + cache.c cloudlog.c crc32c.c crypto.c debug.c dir.c file.c imap.c + init.c inode.c log.c serialize.c store.c store-bdb.c store-kv.cc store-multi.c store-s3.c util.c) add_executable(bluesky-test main.c) diff --git a/bluesky/bluesky-private.h b/bluesky/bluesky-private.h index 7652cff..e5b5b58 100644 --- a/bluesky/bluesky-private.h +++ b/bluesky/bluesky-private.h @@ -314,6 +314,42 @@ typedef struct { gboolean complete; } SerializedRecord; +/***** Inode map management *****/ + +/* Mapping information for a single inode number. These are grouped together + * into InodeMapRange objects. */ +typedef struct { + uint64_t inum; + + /* The ID of the most recent version of the inode. */ + BlueSkyCloudID id; + + /* The location where that version is written in the cloud. */ + BlueSkyCloudPointer location; + + /* If the cloud log entry exists in memory, then a pointer to it, otherwise + * NULL. */ + BlueSkyCloudLog *item; +} InodeMapEntry; + +typedef struct { + /* Starting and ending inode number values that fall in this section. + * Endpoint values are inclusive. */ + uint64_t start, end; + + /* A sorted list (by inode number) of InodeMapEntry objects. */ + GSequence *map_entries; + + /* The location where this inode map section is stored in the cloud. */ + BlueSkyCloudPointer location; + + /* Have there been changes that require writing this section out again? */ + gboolean dirty; +} InodeMapRange; + +InodeMapEntry *bluesky_inode_map_lookup(GSequence *inode_map, uint64_t inum, + int action); + #ifdef __cplusplus } #endif diff --git a/bluesky/bluesky.h b/bluesky/bluesky.h index ca3d130..54e8450 100644 --- a/bluesky/bluesky.h +++ b/bluesky/bluesky.h @@ -178,6 +178,10 @@ typedef struct { /* Mapping of object identifiers (blocks, inodes) to physical location (in * the local cache or in the logs in the cloud). */ GHashTable *locations; + + /* The inode map, which maps inode numbers to the location of the most + * recent version. */ + GSequence *inode_map; } BlueSkyFS; /* Inode number of the root directory. */ diff --git a/bluesky/cloudlog.c b/bluesky/cloudlog.c index 0356c7d..40067fd 100644 --- a/bluesky/cloudlog.c +++ b/bluesky/cloudlog.c @@ -265,6 +265,16 @@ BlueSkyCloudPointer bluesky_cloudlog_serialize(BlueSkyCloudLog *log, g_string_append_len(state->data, (const char *)&header, sizeof(header)); g_string_append_len(state->data, log->data->data, log->data->len); + /* If the object we flushed was an inode, update the inode map. */ + if (log->type == LOGTYPE_INODE) { + g_mutex_lock(fs->lock); + InodeMapEntry *entry = bluesky_inode_map_lookup(fs->inode_map, + log->inum, 1); + entry->id = log->id; + entry->location = log->location; + g_mutex_unlock(fs->lock); + } + /* TODO: We should mark the objects as committed on the cloud until the * data is flushed and acknowledged. */ log->pending_write |= CLOUDLOG_CLOUD; diff --git a/bluesky/debug.c b/bluesky/debug.c index 207cb7b..77df845 100644 --- a/bluesky/debug.c +++ b/bluesky/debug.c @@ -64,6 +64,27 @@ static void cache_dump(gpointer key, gpointer value, gpointer user_data) g_print("\n"); } + +void inode_map_dump(GSequence *inode_map) +{ + GSequenceIter *i, *j; + + g_print("\nInode map dump:\n"); + for (i = g_sequence_get_begin_iter(inode_map); + !g_sequence_iter_is_end(i); i = g_sequence_iter_next(i)) + { + InodeMapRange *range = (InodeMapRange *)g_sequence_get(i); + + g_print(" Range [%"PRIu64", %"PRIu64"]\n", range->start, range->end); + + for (j = g_sequence_get_begin_iter(range->map_entries); + !g_sequence_iter_is_end(j); j = g_sequence_iter_next(j)) + { + InodeMapEntry *entry = (InodeMapEntry *)g_sequence_get(j); + g_print(" Entry %"PRIu64"\n", entry->inum); + } + } +} /* Dump a summary of filesystem state as it is cached in memory. */ void bluesky_debug_dump(BlueSkyFS *fs) { @@ -102,6 +123,10 @@ void bluesky_debug_dump(BlueSkyFS *fs) g_print("\nJournal/Cache Files:\n"); g_hash_table_foreach(fs->log->mmap_cache, cache_dump, fs); g_print("\n"); + + g_mutex_lock(fs->lock); + inode_map_dump(fs->inode_map); + g_mutex_unlock(fs->lock); } /* Statistics counters: for operation counts, bytes transferred, etc. */ diff --git a/bluesky/imap.c b/bluesky/imap.c new file mode 100644 index 0000000..e9dd1d9 --- /dev/null +++ b/bluesky/imap.c @@ -0,0 +1,125 @@ +/* Blue Sky: File Systems in the Cloud + * + * Copyright (C) 2009 The Regents of the University of California + * Written by Michael Vrable + * + * TODO: Licensing + */ + +#include +#include +#include +#include +#include + +#include "bluesky-private.h" + +/* Inode maps. There is both an in-memory representation as well as the + * serialized form in the cloud. + * + * The inode map is broken up into pieces by partitioning in the inode number + * space. The checkpoint region will contain pointers to each of these map + * ranges. */ + +/* Roughly how many inodes to try to put in each inode map range. */ +static int target_imap_size = 4096; + +/* Comparison function for lookuping up inode map entries. Reads a 64-bit + * value from the pointed-to address to do the comparison, so we require that + * the InodeMapEntry and InodeMapRange structures start with the 64-bit value + * they are sorted by. */ +static gint compare(gconstpointer a, gconstpointer b, gpointer user_data) +{ + uint64_t x = *(uint64_t *)a; + uint64_t y = *(uint64_t *)b; + + if (x < y) + return -1; + else if (x > y) + return 1; + else + return 0; +} + +/* Look up the inode map entry for the given inode number. If action is +1, + * create a new entry if it does not already exist; if it is -1 then delete the + * entry instead. If 0, return the entry if it is already present. A non-zero + * action value will mark the inode map range as dirty. */ +InodeMapEntry *bluesky_inode_map_lookup(GSequence *inode_map, uint64_t inum, + int action) +{ + GSequenceIter *i; + + /* First, scan through to find the inode map section that covers the + * appropriate range. Create one if it does not exist but we were asked to + * create an entry. */ + InodeMapRange *range = NULL; + + i = g_sequence_search(inode_map, &inum, compare, NULL); + i = g_sequence_iter_prev(i); + if (!g_sequence_iter_is_end(i)) + range = (InodeMapRange *)g_sequence_get(i); + if (range == NULL || inum < range->start || inum > range->end) { + if (action <= 0) + return NULL; + + /* Create a new range. First, determine bounds on the range enpoints + * based on neighboring ranges. Then, shrink the size of the range to + * a reasonable size that covers the needed inode number. */ + range = g_new0(InodeMapRange, 1); + range->start = 0; + range->end = G_MAXUINT64; + range->map_entries = g_sequence_new(NULL); + range->dirty = TRUE; + + g_print("Creating inode map range, 1: start=%"PRIu64" end=%"PRIu64"\n", + range->start, range->end); + + if (!g_sequence_iter_is_begin(i) && !g_sequence_iter_is_end(i)) + range->start = ((InodeMapRange *)g_sequence_get(i))->end + 1; + i = g_sequence_iter_next(i); + if (!g_sequence_iter_is_end(i)) + range->end = ((InodeMapRange *)g_sequence_get(i))->start - 1; + + g_print("Creating inode map range, 2: start=%"PRIu64" end=%"PRIu64"\n", + range->start, range->end); + g_assert(inum >= range->start && inum <= range->end); + + range->start = MAX(range->start, + inum & ~(uint64_t)(target_imap_size - 1)); + range->end = MIN(range->end, range->start + target_imap_size - 1); + + g_print("Creating inode map range, 3: start=%"PRIu64" end=%"PRIu64"\n", + range->start, range->end); + + g_sequence_insert_sorted(inode_map, range, compare, NULL); + } + + /* Next, try to find the entry within this range of the inode map. */ + InodeMapEntry *entry = NULL; + i = g_sequence_search(range->map_entries, &inum, compare, NULL); + i = g_sequence_iter_prev(i); + if (!g_sequence_iter_is_end(i)) + entry = (InodeMapEntry *)g_sequence_get(i); + if (entry == NULL || inum != entry->inum) { + if (action <= 0) + return NULL; + + entry = g_new0(InodeMapEntry, 1); + entry->inum = inum; + g_sequence_insert_sorted(range->map_entries, entry, compare, NULL); + + g_print("Created inode map entry for %"PRIu64"\n", inum); + } + + if (action != 0) + range->dirty = TRUE; + + /* Were we requested to delete the item? */ + if (action < 0) { + g_sequence_remove(i); + entry = NULL; + } + + return entry; +} diff --git a/bluesky/inode.c b/bluesky/inode.c index 8748164..a79bcc1 100644 --- a/bluesky/inode.c +++ b/bluesky/inode.c @@ -89,6 +89,7 @@ BlueSkyFS *bluesky_new_fs(gchar *name) fs->flushd_cond = g_cond_new(); fs->locations = g_hash_table_new(bluesky_cloudlog_hash, bluesky_cloudlog_equal); + fs->inode_map = g_sequence_new(NULL); fs->log_state = g_new0(BlueSkyCloudLogState, 1); fs->log_state->data = g_string_new(""); -- 2.20.1