From 9f0e2f7de7d919d6a83944f0e7fdbd07cb6c4189 Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Sun, 31 Oct 2010 14:53:05 -0700 Subject: [PATCH] Convert rangeset implementation from a hashtable to GSequence. This gives log(N) performance, but allows us to easily tell if a request falls in the middle of an object and check for overlaps on inserts (though that part isn't implemented yet). --- bluesky/bluesky-private.h | 5 ++-- bluesky/bluesky.h | 11 +++++++-- bluesky/cloudlog.c | 9 +++---- bluesky/log.c | 18 ++++++++++++-- bluesky/util.c | 50 +++++++++++++++++++++++++++++++++------ 5 files changed, 76 insertions(+), 17 deletions(-) diff --git a/bluesky/bluesky-private.h b/bluesky/bluesky-private.h index 2f69166..0f9d29d 100644 --- a/bluesky/bluesky-private.h +++ b/bluesky/bluesky-private.h @@ -395,8 +395,9 @@ void bluesky_log_finish_all(GList *log_items); BlueSkyCloudLog *bluesky_log_get_commit_point(BlueSkyFS *fs); void bluesky_log_write_commit_point(BlueSkyFS *fs, BlueSkyCloudLog *marker); -BlueSkyRCStr *bluesky_log_map_object(BlueSkyFS *fs, int log_dir, int log_seq, - int log_offset, int log_size); +BlueSkyRCStr *bluesky_log_map_object(BlueSkyFS *fs, int log_dir, + int log_seq, int log_offset, int log_size, + gboolean map_data); void bluesky_mmap_unref(BlueSkyCacheFile *mmap); void bluesky_cachefile_unref(BlueSkyCacheFile *cachefile); diff --git a/bluesky/bluesky.h b/bluesky/bluesky.h index 87c58a2..8fbcc0d 100644 --- a/bluesky/bluesky.h +++ b/bluesky/bluesky.h @@ -87,11 +87,18 @@ void bluesky_string_resize(BlueSkyRCStr *string, gsize len); struct BlueSkyRangeset; typedef struct BlueSkyRangeset BlueSkyRangeset; +typedef struct { + uint64_t start, length; + gpointer data; +} BlueSkyRangesetItem; + BlueSkyRangeset *bluesky_rangeset_new(); void bluesky_rangeset_free(BlueSkyRangeset *rangeset); gboolean bluesky_rangeset_insert(BlueSkyRangeset *rangeset, - int start, int length, gpointer data); -gpointer bluesky_rangeset_lookup(BlueSkyRangeset *rangeset, int start); + uint64_t start, uint64_t length, + gpointer data); +const BlueSkyRangesetItem *bluesky_rangeset_lookup(BlueSkyRangeset *rangeset, + uint64_t offset); /* Storage interface. This presents a key-value store abstraction, and can * have multiple implementations: in-memory, on-disk, in-cloud. */ diff --git a/bluesky/cloudlog.c b/bluesky/cloudlog.c index 4d52d54..ebbdeeb 100644 --- a/bluesky/cloudlog.c +++ b/bluesky/cloudlog.c @@ -287,7 +287,7 @@ void bluesky_cloudlog_fetch(BlueSkyCloudLog *log) BlueSkyRCStr *raw = NULL; if ((log->location_flags | log->pending_write) & CLOUDLOG_JOURNAL) { raw = bluesky_log_map_object(log->fs, -1, log->log_seq, - log->log_offset, log->log_size); + log->log_offset, log->log_size, FALSE); } if (raw == NULL && (log->location_flags & CLOUDLOG_CLOUD)) { @@ -296,7 +296,8 @@ void bluesky_cloudlog_fetch(BlueSkyCloudLog *log) log->location.directory, log->location.sequence, log->location.offset, - log->location.size); + log->location.size, + FALSE); } g_assert(raw != NULL); @@ -313,7 +314,7 @@ void bluesky_cloudlog_fetch(BlueSkyCloudLog *log) bluesky_cloudlog_stats_update(log, -1); offset = log->log_offset + sizeof(struct log_header); log->data = bluesky_log_map_object(log->fs, -1, log->log_seq, - offset, log->data_size); + offset, log->data_size, TRUE); bluesky_cloudlog_stats_update(log, 1); } @@ -323,7 +324,7 @@ void bluesky_cloudlog_fetch(BlueSkyCloudLog *log) offset = log->location.offset + sizeof(struct cloudlog_header); log->data = bluesky_log_map_object(log->fs, log->location.directory, log->location.sequence, - offset, log->data_size); + offset, log->data_size, TRUE); bluesky_cloudlog_stats_update(log, 1); } diff --git a/bluesky/log.c b/bluesky/log.c index 8801b33..38606c1 100644 --- a/bluesky/log.c +++ b/bluesky/log.c @@ -468,8 +468,14 @@ BlueSkyCacheFile *bluesky_cachefile_lookup(BlueSkyFS *fs, return map; } +/* The arguments are mostly straightforward. log_dir is -1 for access from the + * journal, and non-negative for access to a cloud log segment. map_data + * should be TRUE for the case that are mapping just the data of an item where + * we have already parsed the item headers; this surpresses the error when the + * access is not to the first bytes of the item. */ BlueSkyRCStr *bluesky_log_map_object(BlueSkyFS *fs, int log_dir, - int log_seq, int log_offset, int log_size) + int log_seq, int log_offset, int log_size, + gboolean map_data) { if (page_size == 0) { page_size = getpagesize(); @@ -485,9 +491,17 @@ BlueSkyRCStr *bluesky_log_map_object(BlueSkyFS *fs, int log_dir, /* Log segments fetched from the cloud might only be partially-fetched. * Check whether the object we are interested in is available. */ if (log_dir >= 0) { - if (!bluesky_rangeset_lookup(map->items, log_offset)) { + const BlueSkyRangesetItem *rangeitem; + rangeitem = bluesky_rangeset_lookup(map->items, log_offset); + if (rangeitem == NULL || rangeitem->start != log_offset) { g_warning("log-%d: Item at offset %d does not seem to be available\n", log_seq, log_offset); } + if (map_data && rangeitem != NULL + && log_offset > rangeitem->start + && log_size <= rangeitem->length - (log_offset - rangeitem->start)) + { + g_warning(" ...allowing access to middle of log item"); + } } if (map->addr == NULL) { diff --git a/bluesky/util.c b/bluesky/util.c index be378ef..4e8b011 100644 --- a/bluesky/util.c +++ b/bluesky/util.c @@ -229,30 +229,66 @@ BlueSkyInode *bluesky_list_tail(GList *head) * should be extended later to track properly. */ struct BlueSkyRangeset { - GHashTable *hashtable; + GSequence *seq; }; +static gint compare64(gconstpointer a, gconstpointer b, gpointer user_data) +{ + uint64_t x = *(uint64_t *)a; + uint64_t y = *(uint64_t *)b; + + if (x < y) + return -1; + else if (x > y) + return 1; + else + return 0; +} + BlueSkyRangeset *bluesky_rangeset_new() { BlueSkyRangeset *rangeset = g_new(BlueSkyRangeset, 1); - rangeset->hashtable = g_hash_table_new(g_direct_hash, g_direct_equal); + rangeset->seq = g_sequence_new(g_free); return rangeset; } void bluesky_rangeset_free(BlueSkyRangeset *rangeset) { - g_hash_table_unref(rangeset->hashtable); + g_sequence_free(rangeset->seq); g_free(rangeset); } gboolean bluesky_rangeset_insert(BlueSkyRangeset *rangeset, - int start, int length, gpointer data) + uint64_t start, uint64_t length, + gpointer data) { - g_hash_table_insert(rangeset->hashtable, GINT_TO_POINTER(start), data); + GSequenceIter *i; + i = g_sequence_search(rangeset->seq, &start, compare64, NULL); + i = g_sequence_iter_prev(i); + + /* TODO: Checks that no other item overlaps. */ + + BlueSkyRangesetItem *item = g_new(BlueSkyRangesetItem, 1); + item->start = start; + item->length = length; + item->data = data; + g_sequence_insert_sorted(rangeset->seq, item, compare64, NULL); + return TRUE; } -gpointer bluesky_rangeset_lookup(BlueSkyRangeset *rangeset, int start) +const BlueSkyRangesetItem *bluesky_rangeset_lookup(BlueSkyRangeset *rangeset, + uint64_t offset) { - return g_hash_table_lookup(rangeset->hashtable, GINT_TO_POINTER(start)); + GSequenceIter *i; + i = g_sequence_search(rangeset->seq, &offset, compare64, NULL); + i = g_sequence_iter_prev(i); + if (g_sequence_iter_is_end(i)) + return NULL; + + BlueSkyRangesetItem *item = (BlueSkyRangesetItem *)g_sequence_get(i); + if (offset >= item->start && offset < item->start + item->length) + return item; + else + return NULL; } -- 2.20.1