From: Michael Vrable Date: Thu, 22 Jul 2010 21:51:37 +0000 (-0700) Subject: Initial work on cloud log-structured storage. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=810fdc7bdf0cd13aaa7c88d3c5af5aae24e77254;p=bluesky.git Initial work on cloud log-structured storage. Right now this is just the first work towards tracking what objects are stored where (a log in the cloud, in local memory, on local disk, etc.). --- diff --git a/bluesky/CMakeLists.txt b/bluesky/CMakeLists.txt index daaeccb..04be126 100644 --- a/bluesky/CMakeLists.txt +++ b/bluesky/CMakeLists.txt @@ -3,9 +3,9 @@ include_directories("${LIBS3_BUILD_DIR}/include" ${KVSTORE_DIR}) link_directories("${LIBS3_BUILD_DIR}/lib" ${KVSTORE_DIR}) add_library(bluesky SHARED - cache.c crc32c.c crypto.c debug.c dir.c file.c init.c inode.c log.c - serialize.c store.c store-bdb.c store-kv.cc store-multi.c - store-s3.c util.c) + cache.c cloudlog.c crc32c.c crypto.c debug.c dir.c file.c init.c + inode.c log.c serialize.c store.c store-bdb.c store-kv.cc + store-multi.c store-s3.c util.c) add_executable(bluesky-test main.c) set(CMAKE_C_FLAGS "-Wall -std=gnu99 ${CMAKE_C_FLAGS}") diff --git a/bluesky/bluesky-private.h b/bluesky/bluesky-private.h index 83a383e..baeb141 100644 --- a/bluesky/bluesky-private.h +++ b/bluesky/bluesky-private.h @@ -176,6 +176,71 @@ BlueSkyLogItem *bluesky_log_item_new(); void bluesky_log_item_submit(BlueSkyLogItem *item, BlueSkyLog *log); void bluesky_log_item_finish(BlueSkyLogItem *item); +/* Writing of data to the cloud in log segments and tracking the location of + * various pieces of data (both where in the cloud and where cached locally). + * */ + +typedef struct { + char bytes[16]; +} BlueSkyCloudID; + +typedef struct { + uint32_t directory; + uint32_t sequence; + uint32_t offset; + uint32_t size; +} BlueSkyCloudPointer; + +typedef enum { + LOGTYPE_UNKNOWN = 0, + LOGTYPE_DATA = 1, + LOGTYPE_INODE = 2, + LOGTYPE_INODE_MAP = 3, + LOGTYPE_CHECKPOINT = 4, + LOGTYPE_CHECKPOINT_PTR = 5, +} BlueSkyCloudLogType; + +/* A record which tracks an object which has been written to a local log, + * cached, locally, and/or written to the cloud. */ +#define CLOUDLOG_JOURNAL 0x01 +#define CLOUDLOG_CACHE 0x02 +#define CLOUDLOG_CLOUD 0x04 +typedef struct { + gint refcount; + + BlueSkyFS *fs; + + BlueSkyCloudLogType type; + + // Bitmask of CLOUDLOG_* flags indicating where the object exists. + int location_flags; + + // A stable identifier for the object (only changes when authenticated data + // is written out, but stays the same when the in-cloud cleaner relocates + // the object). + BlueSkyCloudID id; + + // The inode which owns this data, if any, and an offset. + uint64_t inum; + int32_t inum_offset; + + // The location of the object in the cloud, if available. + BlueSkyCloudPointer location; + + // TODO: Location in journal/cache + + // Serialized data, if available in memory (otherwise NULL). + BlueSkyRCStr *data; +} BlueSkyCloudLog; + +gboolean bluesky_cloudlog_equal(gconstpointer a, gconstpointer b); +guint bluesky_cloudlog_hash(gconstpointer a); +BlueSkyCloudLog *bluesky_cloudlog_new(BlueSkyFS *fs); +void bluesky_cloudlog_ref(BlueSkyCloudLog *log); +void bluesky_cloudlog_unref(BlueSkyCloudLog *log); +BlueSkyLogItem *bluesky_cloudlog_sync(BlueSkyCloudLog *log); +void bluesky_cloudlog_insert(BlueSkyCloudLog *log); + #ifdef __cplusplus } #endif diff --git a/bluesky/bluesky.h b/bluesky/bluesky.h index 2255a17..9543d9c 100644 --- a/bluesky/bluesky.h +++ b/bluesky/bluesky.h @@ -146,6 +146,10 @@ typedef struct { /* Mutex for the flush daemon, to prevent concurrent execution. */ GMutex *flushd_lock; + + /* Mapping of object identifiers (blocks, inodes) to physical location (in + * the local cache or in the logs in the cloud). */ + GHashTable *locations; } BlueSkyFS; /* Inode number of the root directory. */ diff --git a/bluesky/cloudlog.c b/bluesky/cloudlog.c new file mode 100644 index 0000000..0c5db98 --- /dev/null +++ b/bluesky/cloudlog.c @@ -0,0 +1,92 @@ +/* Blue Sky: File Systems in the Cloud + * + * Copyright (C) 2009 The Regents of the University of California + * Written by Michael Vrable + * + * TODO: Licensing + */ + +#include +#include +#include + +#include "bluesky-private.h" + +/* The locations hash table in the file system is used to map objects to their locations. Objects are named using 128- */ + +typedef struct { + BlueSkyCloudID id; + + BlueSkyCloudPointer *cloud_loc; +} BlueSkyLocationEntry; + +BlueSkyCloudID bluesky_cloudlog_new_id() +{ + BlueSkyCloudID id; + bluesky_crypt_random_bytes((uint8_t *)&id.bytes, sizeof(id)); + return id; +} + +gboolean bluesky_cloudlog_equal(gconstpointer a, gconstpointer b) +{ + BlueSkyCloudID *id1 = (BlueSkyCloudID *)a, *id2 = (BlueSkyCloudID *)b; + + return memcmp(id1, id2, sizeof(BlueSkyCloudID)) == 0; +} + +guint bluesky_cloudlog_hash(gconstpointer a) +{ + BlueSkyCloudID *id = (BlueSkyCloudID *)a; + + // Assume that bits in the ID are randomly chosen so that any subset of the + // bits can be used as a hash key. + return *(guint *)(&id->bytes); +} + +/* Formatting of cloud log segments. This handles grouping items together + * before writing a batch to the cloud, handling indirection through items like + * the inode map, etc. */ + +BlueSkyCloudLog *bluesky_cloudlog_new(BlueSkyFS *fs) +{ + BlueSkyCloudLog *log = g_new0(BlueSkyCloudLog, 1); + + log->fs = fs; + log->type = LOGTYPE_UNKNOWN; + log->id = bluesky_cloudlog_new_id(); + g_atomic_int_set(&log->refcount, 1); + + return log; +} + +void bluesky_cloudlog_ref(BlueSkyCloudLog *log) +{ + g_atomic_int_inc(&log->refcount); +} + +void bluesky_cloudlog_unref(BlueSkyCloudLog *log) +{ + if (g_atomic_int_dec_and_test(&log->refcount)) { + g_print("Cloud log refcount dropped to zero.\n"); + } +} + +/* Start a write of the object to the local log. */ +BlueSkyLogItem *bluesky_cloudlog_sync(BlueSkyCloudLog *log) +{ + BlueSkyLogItem *log_item = bluesky_log_item_new(); + log_item->key = g_strdup("cloudlog"); + log_item->data = log->data; + bluesky_string_ref(log->data); + bluesky_log_item_submit(log_item, log->fs->log); + return log_item; +} + +/* Add the given entry to the global hash table containing cloud log entries. + * Takes ownership of the caller's reference. */ +void bluesky_cloudlog_insert(BlueSkyCloudLog *log) +{ + g_mutex_lock(log->fs->lock); + g_hash_table_insert(log->fs->locations, &log->id, log); + g_mutex_unlock(log->fs->lock); +} diff --git a/bluesky/debug.c b/bluesky/debug.c index b409c74..3496608 100644 --- a/bluesky/debug.c +++ b/bluesky/debug.c @@ -35,6 +35,16 @@ static void inode_dump(gpointer key, gpointer value, gpointer user_data) inode->change_count, inode->change_commit); } +static void cloudlog_dump(gpointer key, gpointer value, gpointer user_data) +{ + BlueSkyCloudLog *log = (BlueSkyCloudLog *)value; + + for (int i = 0; i < sizeof(BlueSkyCloudID); i++) { + g_print("%02x", (uint8_t)(log->id.bytes[i])); + } + g_print(": inode=%"PRIu64" locs=%x\n", log->inum, log->location_flags); +} + /* Dump a summary of filesystem state as it is cached in memory. */ void bluesky_debug_dump(BlueSkyFS *fs) { @@ -58,6 +68,10 @@ void bluesky_debug_dump(BlueSkyFS *fs) g_print("\n"); g_hash_table_foreach(fs->inodes, inode_dump, fs); + + g_print("\nLog Objects:\n"); + g_hash_table_foreach(fs->locations, cloudlog_dump, fs); + g_print("\n"); } /* Statistics counters: for operation counts, bytes transferred, etc. */ diff --git a/bluesky/inode.c b/bluesky/inode.c index e53c602..3e8d686 100644 --- a/bluesky/inode.c +++ b/bluesky/inode.c @@ -86,6 +86,8 @@ BlueSkyFS *bluesky_new_fs(gchar *name) fs->next_inum = BLUESKY_ROOT_INUM + 1; fs->store = bluesky_store_new("file"); fs->flushd_lock = g_mutex_new(); + fs->locations = g_hash_table_new(bluesky_cloudlog_hash, + bluesky_cloudlog_equal); return fs; } @@ -313,16 +315,19 @@ void bluesky_inode_start_sync(BlueSkyInode *inode, BlueSkyStoreAsync *barrier) char key[64]; sprintf(key, "inode-%016"PRIx64, inode->inum); - BlueSkyLogItem *log_item = bluesky_log_item_new(); - log_item->key = g_strdup(key); - log_item->data = data; + BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs); + cloudlog->type = LOGTYPE_DATA; + cloudlog->inum = inode->inum; + cloudlog->data = data; bluesky_string_ref(data); - bluesky_log_item_submit(log_item, fs->log); - log_items = g_list_prepend(log_items, log_item); + + log_items = g_list_prepend(log_items, bluesky_cloudlog_sync(cloudlog)); + + bluesky_cloudlog_insert(cloudlog); /* Wait for all log items to be committed to disk. */ while (log_items != NULL) { - log_item = (BlueSkyLogItem *)log_items->data; + BlueSkyLogItem *log_item = (BlueSkyLogItem *)log_items->data; bluesky_log_item_finish(log_item); log_items = g_list_delete_link(log_items, log_items); }