X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=bluesky%2Fcleaner.c;h=b13983ea68c8185bf8678d91d9fb665e101652ba;hb=6d6488b277fe7e57b25bcade9c6759688e2bcdea;hp=ca61d94357e8e6e69b00a84e382824f2bad6011b;hpb=e692553e85c46324aaeb36c6e737339ddae115a0;p=bluesky.git diff --git a/bluesky/cleaner.c b/bluesky/cleaner.c index ca61d94..b13983e 100644 --- a/bluesky/cleaner.c +++ b/bluesky/cleaner.c @@ -21,23 +21,106 @@ * rewriting log segments where needed and deleting old segments, is handled by * the in-cloud cleaner component. */ +/* A specialized function for loading an object from the cloud, used just by + * the cleaner. When the cleaner runs, it will produce new objects that have + * the same object ID as before, differing only in the outgoing link locations + * as a result of cleaning. We can't use the normal functions for loading + * objects since those can only deal with one version of an object. Instead, + * we load the cleaned object with this function, and then merge the state into + * the official object as needed. + * + * The bluesky_cleaner_deserialize function is like + * bluesky_deserialize_cloudlog, but does a more limited deserialization and + * keeps the returned item entirely separate from any in-memory BlueSkyCloudLog + * objects. */ + +typedef struct { + BlueSkyCloudLogType type; + BlueSkyCloudID id; + BlueSkyCloudPointer location; + uint64_t inum; + GArray *links; +} BlueSkyCleanerItem; + +typedef struct { + BlueSkyCloudID id; + BlueSkyCloudPointer location; +} BlueSkyCleanerLink; + +static BlueSkyCleanerItem *bluesky_cleaner_deserialize(BlueSkyRCStr *raw) +{ + const char *data = raw->data; + size_t len = raw->len; + const char *data1, *data2, *data3; + size_t len1, len2, len3; + + g_assert(len > 4); + if (len < sizeof(struct cloudlog_header) + || memcmp(data, CLOUDLOG_MAGIC, 4) != 0) + { + g_warning("Deserializing garbage cloud log item from cleaner!"); + return NULL; + }; + + struct cloudlog_header *header = (struct cloudlog_header *)data; + len1 = GUINT32_FROM_LE(header->size1); + len2 = GUINT32_FROM_LE(header->size2); + len3 = GUINT32_FROM_LE(header->size3); + data1 = data + sizeof(struct cloudlog_header); + data2 = data1 + len1; + data3 = data2 + len2; + g_assert(data3 + len3 - data <= len); + + BlueSkyCleanerItem *item = g_new0(BlueSkyCleanerItem, 1); + item->type = header->type - '0'; + item->inum = GUINT64_FROM_LE(header->inum); + memcpy(&item->id, &header->id, sizeof(BlueSkyCloudID)); + + int link_count = len2 / sizeof(BlueSkyCloudID); + g_print("Outgoing links: %d\n", link_count); + item->links = g_array_new(FALSE, TRUE, sizeof(BlueSkyCleanerLink)); + for (int i = 0; i < link_count; i++) { + BlueSkyCleanerLink link; + + g_assert(len2 >= sizeof(link.id)); + memcpy(&link.id, data2, sizeof(link.id)); + data2 += sizeof(link.id); len2 -= sizeof(link.id); + + g_assert(len3 >= sizeof(link.location)); + memcpy(&link.location, data3, sizeof(link.location)); + data3 += sizeof(link.location); len3 -= sizeof(link.location); + + g_array_append_val(item->links, link); + } + + return item; +} + +static void bluesky_cleaner_item_free(BlueSkyCleanerItem *item) +{ + if (item == NULL) + return; + g_array_unref(item->links); + g_free(item); +} + /* Check the cleaner's logs to find the a more recent checkpoint record. This * should be called occasionally to see if the cleaner has done any work since * our last check. */ -void bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) +static BlueSkyCleanerItem *bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) { char *prefix = g_strdup_printf("log-%08d", BLUESKY_CLOUD_DIR_CLEANER); char *last_segment = bluesky_store_lookup_last(fs->store, prefix); g_free(prefix); if (last_segment == NULL) - return; + return NULL; g_print("Last cloud log segment: %s\n", last_segment); int seq = atoi(last_segment + 13); g_free(last_segment); if (seq <= fs->log_state->latest_cleaner_seq_seen) - return; + return NULL; g_print("New log segment appeared in cleaner directory: %d\n", seq); @@ -59,10 +142,92 @@ void bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) length = item->length; } - if (length > 0) { - g_print("Last object: %"PRIu64" + %"PRIu64"\n", offset, length); - } + if (length == 0) + return NULL; + + g_print("Found a cleaner checkpoint record.\n"); + + BlueSkyRCStr *data = bluesky_cachefile_map_raw(cachefile, offset, length); + bluesky_cachefile_unref(cachefile); + g_mutex_unlock(cachefile->lock); + BlueSkyCleanerItem *checkpoint = bluesky_cleaner_deserialize(data); + bluesky_string_unref(data); + + return checkpoint; +} + +static BlueSkyCleanerItem *cleaner_load_item(BlueSkyFS *fs, + BlueSkyCloudPointer location) +{ + g_print("Loading item %d/%d/%d...\n", location.directory, location.sequence, location.offset); + + BlueSkyCacheFile *cachefile; + cachefile = bluesky_cachefile_lookup(fs, location.directory, + location.sequence, TRUE); + while (!cachefile->complete) + g_cond_wait(cachefile->cond, cachefile->lock); + + /* TODO: Ought to check that we are loading an item which validated? */ + BlueSkyRCStr *data = bluesky_cachefile_map_raw(cachefile, location.offset, + location.size); bluesky_cachefile_unref(cachefile); g_mutex_unlock(cachefile->lock); + + BlueSkyCleanerItem *item = bluesky_cleaner_deserialize(data); + bluesky_string_unref(data); + + return item; +} + +/* Does the item at the given cloud location from the cleaner need merging? An + * item in the primary log does not need to be merged, as by definition we + * already know about it. Similarly, old items in the cleaner's log--those + * that we have already seen from a previous merge--do not need to be mergd + * again. */ +gboolean needs_merging(BlueSkyFS *fs, BlueSkyCloudPointer location) +{ + if (location.directory == BLUESKY_CLOUD_DIR_PRIMARY) + return FALSE; + + return TRUE; +} + +void bluesky_cleaner_merge(BlueSkyFS *fs) +{ + BlueSkyCleanerItem *checkpoint = bluesky_cleaner_find_checkpoint(fs); + + if (checkpoint == NULL) { + g_warning("Unable to load cleaner checkpoint record!"); + return; + } + + /* Iterate over each of the inode map sections in the checkpoint */ + for (int i = 0; i < checkpoint->links->len; i++) { + BlueSkyCleanerLink *link = &g_array_index(checkpoint->links, + BlueSkyCleanerLink, i); + /*if (!needs_merging(fs, link->location)) + continue; */ + + BlueSkyCleanerItem *imap = cleaner_load_item(fs, link->location); + if (imap == NULL) { + g_warning("Unable to load cleaner inode map"); + continue; + } + + /* Iterate over all inodes found in the inode map section */ + for (int j = 0; j < imap->links->len; j++) { + BlueSkyCleanerLink *link = &g_array_index(imap->links, + BlueSkyCleanerLink, j); + BlueSkyCleanerItem *inode = cleaner_load_item(fs, link->location); + if (inode != NULL) { + g_print("Got inode %"PRIu64"\n", inode->inum); + } + bluesky_cleaner_item_free(inode); + } + + bluesky_cleaner_item_free(imap); + } + + bluesky_cleaner_item_free(checkpoint); }