From 2ad1881ef34f84f3dc8ded636ada9b21e3fd906b Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Sun, 5 Sep 2010 22:27:34 -0700 Subject: [PATCH] Improve object deserialization: properly parse object headers. --- bluesky/bluesky-private.h | 31 ++++++++++++++++ bluesky/cloudlog.c | 44 ++++++++++++++++------- bluesky/imap.c | 9 +++++ bluesky/log.c | 37 ++++--------------- bluesky/serialize.c | 75 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 153 insertions(+), 43 deletions(-) diff --git a/bluesky/bluesky-private.h b/bluesky/bluesky-private.h index 9f973a3..f1f8ffc 100644 --- a/bluesky/bluesky-private.h +++ b/bluesky/bluesky-private.h @@ -181,6 +181,33 @@ typedef enum { LOGTYPE_CHECKPOINT_PTR = 5, } BlueSkyCloudLogType; +/* Headers that go on items in local log segments and cloud log segments. */ +struct log_header { + uint32_t magic; // HEADER_MAGIC + uint8_t type; // Object type + '0' + uint32_t offset; // Starting byte offset of the log header + uint32_t size1; // Size of the data item (bytes) + uint32_t size2; // + uint32_t size3; // + uint64_t inum; // Inode which owns this data, if any + BlueSkyCloudID id; // Object identifier +} __attribute__((packed)); + +struct log_footer { + uint32_t magic; // FOOTER_MAGIC + uint32_t crc; // Computed from log_header to log_footer.magic +} __attribute__((packed)); + +struct cloudlog_header { + char magic[4]; + uint8_t type; + BlueSkyCloudID id; + uint32_t size1, size2, size3; +} __attribute__((packed)); + +#define JOURNAL_MAGIC "\nLog" +#define CLOUDLOG_MAGIC "AgI-" + /* A record which tracks an object which has been written to a local log, * cached, locally, and/or written to the cloud. */ #define CLOUDLOG_JOURNAL 0x01 @@ -208,6 +235,9 @@ struct _BlueSkyCloudLog { uint64_t inum; int32_t inum_offset; + // The size of encrypted object data, not including any headers + int data_size; + // The location of the object in the cloud, if available. BlueSkyCloudPointer location; @@ -245,6 +275,7 @@ void bluesky_cloudlog_unref(BlueSkyCloudLog *log); void bluesky_cloudlog_stats_update(BlueSkyCloudLog *log, int type); void bluesky_cloudlog_sync(BlueSkyCloudLog *log); void bluesky_cloudlog_insert(BlueSkyCloudLog *log); +BlueSkyCloudLog *bluesky_cloudlog_get(BlueSkyFS *fs, BlueSkyCloudID id); void bluesky_cloudlog_fetch(BlueSkyCloudLog *log); BlueSkyCloudPointer bluesky_cloudlog_serialize(BlueSkyCloudLog *log, BlueSkyFS *fs); diff --git a/bluesky/cloudlog.c b/bluesky/cloudlog.c index beaf1b4..cc186a5 100644 --- a/bluesky/cloudlog.c +++ b/bluesky/cloudlog.c @@ -180,14 +180,29 @@ void bluesky_cloudlog_insert(BlueSkyCloudLog *log) g_mutex_unlock(log->fs->lock); } -struct cloudlog_header { - char magic[4]; - uint8_t type; - BlueSkyCloudID id; - uint32_t size1, size2, size3; -} __attribute__((packed)); +/* Look up the cloud log entry for the given ID. If create is TRUE and the + * item does not exist, create a special pending entry that can later be filled + * in when the real item is loaded. The returned item has a reference held. + * As a special case, if a null ID is provided then NULL is returned. */ +BlueSkyCloudLog *bluesky_cloudlog_get(BlueSkyFS *fs, BlueSkyCloudID id) +{ + static BlueSkyCloudID id0 = {{0}}; -#define CLOUDLOG_MAGIC "AgI-" + if (memcmp(&id, &id0, sizeof(BlueSkyCloudID)) == 0) + return NULL; + + g_mutex_lock(fs->lock); + BlueSkyCloudLog *item; + item = g_hash_table_lookup(fs->locations, &id); + if (item == NULL) { + item = bluesky_cloudlog_new(fs, &id); + g_hash_table_insert(fs->locations, &id, item); + } else { + bluesky_cloudlog_ref(item); + } + g_mutex_unlock(fs->lock); + return item; +} /* Ensure that a cloud log item is loaded in memory, and if not read it in. * TODO: Make asynchronous, and make this also fetch from the cloud. Right now @@ -197,20 +212,23 @@ void bluesky_cloudlog_fetch(BlueSkyCloudLog *log) if (log->data != NULL) return; + int offset; + if ((log->location_flags | log->pending_write) & CLOUDLOG_JOURNAL) { bluesky_cloudlog_stats_update(log, -1); + offset = log->log_offset + sizeof(struct log_header); log->data = bluesky_log_map_object(log->fs, -1, log->log_seq, - log->log_offset, log->log_size); + offset, log->data_size); bluesky_cloudlog_stats_update(log, 1); } if (log->data == NULL && (log->location_flags & CLOUDLOG_CLOUD)) { log->location_flags &= ~CLOUDLOG_JOURNAL; bluesky_cloudlog_stats_update(log, -1); + offset = log->location.offset + sizeof(struct cloudlog_header); log->data = bluesky_log_map_object(log->fs, log->location.directory, log->location.sequence, - log->location.offset, - log->location.size); + offset, log->data_size); bluesky_cloudlog_stats_update(log, 1); } @@ -251,8 +269,8 @@ BlueSkyCloudPointer bluesky_cloudlog_serialize(BlueSkyCloudLog *log, /* TODO: Right now offset/size are set to the raw data, but we should add * header parsing to the code which loads objects back in. */ log->location = state->location; - log->location.offset = state->data->len + sizeof(struct cloudlog_header); - log->location.size = data1->len; + log->location.offset = state->data->len; + log->data_size = data1->len; struct cloudlog_header header; memcpy(header.magic, CLOUDLOG_MAGIC, 4); @@ -267,6 +285,8 @@ BlueSkyCloudPointer bluesky_cloudlog_serialize(BlueSkyCloudLog *log, g_string_append_len(state->data, data2->str, data2->len); g_string_append_len(state->data, data3->str, data3->len); + log->location.size = state->data->len - log->location.offset; + /* If the object we flushed was an inode, update the inode map. */ if (log->type == LOGTYPE_INODE) { g_mutex_lock(fs->lock); diff --git a/bluesky/imap.c b/bluesky/imap.c index 484f24f..dc24b58 100644 --- a/bluesky/imap.c +++ b/bluesky/imap.c @@ -178,3 +178,12 @@ BlueSkyCloudLog *bluesky_inode_map_serialize(BlueSkyFS *fs) log->data = bluesky_string_new_from_gstring(buf); return log; } + +/* Reconstruct the inode map from data stored in the cloud. */ +void bluesky_inode_map_deserialize(BlueSkyFS *fs, BlueSkyCloudLog *imap) +{ + g_mutex_lock(imap->lock); + bluesky_cloudlog_fetch(imap); + g_assert(imap->data != NULL); + g_mutex_unlock(imap->lock); +} diff --git a/bluesky/log.c b/bluesky/log.c index c879ccd..7f2eea4 100644 --- a/bluesky/log.c +++ b/bluesky/log.c @@ -41,22 +41,6 @@ #define HEADER_MAGIC 0x676f4c0a #define FOOTER_MAGIC 0x2e435243 -struct log_header { - uint32_t magic; // HEADER_MAGIC - uint8_t type; // Object type + '0' - uint32_t offset; // Starting byte offset of the log header - uint32_t size1; // Size of the data item (bytes) - uint32_t size2; // - uint32_t size3; // - uint64_t inum; // Inode which owns this data, if any - BlueSkyCloudID id; // Object identifier -} __attribute__((packed)); - -struct log_footer { - uint32_t magic; // FOOTER_MAGIC - uint32_t crc; // Computed from log_header to log_footer.magic -} __attribute__((packed)); - static void writebuf(int fd, const char *buf, size_t len) { while (len > 0) { @@ -226,10 +210,11 @@ static gpointer log_thread(gpointer d) writebuf(log->fd, (const char *)&footer, sizeof(footer)); item->log_seq = log->seq_num; - item->log_offset = offset + sizeof(header); - item->log_size = item->data->len; + item->log_offset = offset; + item->log_size = size; + item->data_size = item->data->len; - offset += sizeof(header) + sizeof(footer) + item->data->len; + offset += size; g_string_free(data1, TRUE); g_string_free(data2, TRUE); @@ -718,18 +703,8 @@ static void bluesky_replay_scan_journal2(BlueSkyFS *fs, GList **objects, + GUINT32_FROM_LE(header->size2) + GUINT32_FROM_LE(header->size3); - g_mutex_lock(fs->lock); - BlueSkyCloudLog *log_item; - log_item = g_hash_table_lookup(fs->locations, &header->id); - if (log_item == NULL) { - log_item = bluesky_cloudlog_new(fs, &header->id); - g_hash_table_insert(fs->locations, &log_item->id, log_item); - g_mutex_lock(log_item->lock); - } else { - bluesky_cloudlog_ref(log_item); - g_mutex_lock(log_item->lock); - } - g_mutex_unlock(fs->lock); + BlueSkyCloudLog *log_item = bluesky_cloudlog_get(fs, header->id); + g_mutex_lock(log_item->lock); *objects = g_list_prepend(*objects, log_item); log_item->inum = GUINT64_FROM_LE(header->inum); diff --git a/bluesky/serialize.c b/bluesky/serialize.c index 792b01a..1659eec 100644 --- a/bluesky/serialize.c +++ b/bluesky/serialize.c @@ -270,3 +270,78 @@ void bluesky_serialize_cloudlog(BlueSkyCloudLog *log, } } } + +/* Deserialize data from the journal or a cloud segment back into the in-memory + * cloud log item format. */ +void bluesky_deserialize_cloudlog(BlueSkyCloudLog *item, + const char *data, + size_t len) +{ + const char *data1, *data2, *data3; + size_t len1, len2, len3; + int type; + BlueSkyCloudID id; + g_assert(len > 4); + + /* Auto-detect the format: either the journal or cloud log, based on the + * magic number at the start */ + if (memcmp(data, JOURNAL_MAGIC, 4) == 0) { + g_assert(len >= sizeof(struct log_header)); + struct log_header *header = (struct log_header *)data; + type = header->type - '0'; + len1 = GUINT32_FROM_LE(header->size1); + len2 = GUINT32_FROM_LE(header->size2); + len3 = GUINT32_FROM_LE(header->size3); + id = header->id; + data1 = data + sizeof(struct log_header); + data2 = data1 + len1; + data3 = data2 + len2; + g_assert(data3 + len3 - data < len); + } else if (memcmp(data, CLOUDLOG_MAGIC, 4) == 0) { + g_assert(len >= sizeof(struct cloudlog_header)); + struct cloudlog_header *header = (struct cloudlog_header *)data; + type = header->type - '0'; + len1 = GUINT32_FROM_LE(header->size1); + len2 = GUINT32_FROM_LE(header->size2); + len3 = GUINT32_FROM_LE(header->size3); + id = header->id; + data1 = data + sizeof(struct cloudlog_header); + data2 = data1 + len1; + data3 = data2 + len2; + g_assert(data3 + len3 - data < len); + } + + BlueSkyFS *fs = item->fs; + + bluesky_string_unref(item->data); + item->data = NULL; + //item->location_flags = CLOUDLOG_JOURNAL; + + int link_count = len2 / sizeof(BlueSkyCloudID); + GArray *new_links = g_array_new(FALSE, TRUE, sizeof(BlueSkyCloudLog *)); + for (int i = 0; i < link_count; i++) { + BlueSkyCloudID id; + g_assert(len2 >= sizeof(id)); + memcpy(&id, data2, sizeof(id)); + data2 += sizeof(id); len2 -= sizeof(id); + + BlueSkyCloudLog *ref = bluesky_cloudlog_get(fs, id); + if (ref != NULL) { + g_mutex_lock(ref->lock); + g_assert(len3 >= sizeof(ref->location)); + memcpy(&ref->location, data3, sizeof(ref->location)); + data3 += sizeof(ref->location); len3 -= sizeof(ref->location); + g_mutex_unlock(ref->lock); + } + + g_array_append_val(new_links, ref); + } + + for (int i = 0; i < item->links->len; i++) { + BlueSkyCloudLog *c = g_array_index(item->links, + BlueSkyCloudLog *, i); + bluesky_cloudlog_unref(c); + } + g_array_unref(item->links); + item->links = new_links; +} -- 2.20.1