X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=bluesky%2Fcleaner.c;h=3ca15b8adfff87ec4affd402b310c95c66c86d87;hb=173738187e4798ff42d3be002f65c142c3b2abc9;hp=ca61d94357e8e6e69b00a84e382824f2bad6011b;hpb=e692553e85c46324aaeb36c6e737339ddae115a0;p=bluesky.git diff --git a/bluesky/cleaner.c b/bluesky/cleaner.c index ca61d94..3ca15b8 100644 --- a/bluesky/cleaner.c +++ b/bluesky/cleaner.c @@ -21,23 +21,106 @@ * rewriting log segments where needed and deleting old segments, is handled by * the in-cloud cleaner component. */ +/* A specialized function for loading an object from the cloud, used just by + * the cleaner. When the cleaner runs, it will produce new objects that have + * the same object ID as before, differing only in the outgoing link locations + * as a result of cleaning. We can't use the normal functions for loading + * objects since those can only deal with one version of an object. Instead, + * we load the cleaned object with this function, and then merge the state into + * the official object as needed. + * + * The bluesky_cleaner_deserialize function is like + * bluesky_deserialize_cloudlog, but does a more limited deserialization and + * keeps the returned item entirely separate from any in-memory BlueSkyCloudLog + * objects. */ + +typedef struct { + BlueSkyCloudLogType type; + BlueSkyCloudID id; + BlueSkyCloudPointer location; + uint64_t inum; + GArray *links; +} BlueSkyCleanerItem; + +typedef struct { + BlueSkyCloudID id; + BlueSkyCloudPointer location; +} BlueSkyCleanerLink; + +static BlueSkyCleanerItem *bluesky_cleaner_deserialize(BlueSkyRCStr *raw) +{ + const char *data = raw->data; + size_t len = raw->len; + const char *data1, *data2, *data3; + size_t len1, len2, len3; + + g_assert(len > 4); + if (len < sizeof(struct cloudlog_header) + || memcmp(data, CLOUDLOG_MAGIC, 4) != 0) + { + g_warning("Deserializing garbage cloud log item from cleaner!"); + return NULL; + }; + + struct cloudlog_header *header = (struct cloudlog_header *)data; + len1 = GUINT32_FROM_LE(header->size1); + len2 = GUINT32_FROM_LE(header->size2); + len3 = GUINT32_FROM_LE(header->size3); + data1 = data + sizeof(struct cloudlog_header); + data2 = data1 + len1; + data3 = data2 + len2; + g_assert(data3 + len3 - data <= len); + + BlueSkyCleanerItem *item = g_new0(BlueSkyCleanerItem, 1); + item->type = header->type - '0'; + item->inum = GUINT64_FROM_LE(header->inum); + memcpy(&item->id, &header->id, sizeof(BlueSkyCloudID)); + + int link_count = len2 / sizeof(BlueSkyCloudID); + g_print("Outgoing links: %d\n", link_count); + item->links = g_array_new(FALSE, TRUE, sizeof(BlueSkyCleanerLink)); + for (int i = 0; i < link_count; i++) { + BlueSkyCleanerLink link; + + g_assert(len2 >= sizeof(link.id)); + memcpy(&link.id, data2, sizeof(link.id)); + data2 += sizeof(link.id); len2 -= sizeof(link.id); + + g_assert(len3 >= sizeof(link.location)); + memcpy(&link.location, data3, sizeof(link.location)); + data3 += sizeof(link.location); len3 -= sizeof(link.location); + + g_array_append_val(item->links, link); + } + + return item; +} + +static void bluesky_cleaner_item_free(BlueSkyCleanerItem *item) +{ + if (item == NULL) + return; + g_array_unref(item->links); + g_free(item); +} + /* Check the cleaner's logs to find the a more recent checkpoint record. This * should be called occasionally to see if the cleaner has done any work since * our last check. */ -void bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) +static BlueSkyCleanerItem *bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) { char *prefix = g_strdup_printf("log-%08d", BLUESKY_CLOUD_DIR_CLEANER); char *last_segment = bluesky_store_lookup_last(fs->store, prefix); g_free(prefix); if (last_segment == NULL) - return; + return NULL; g_print("Last cloud log segment: %s\n", last_segment); int seq = atoi(last_segment + 13); g_free(last_segment); if (seq <= fs->log_state->latest_cleaner_seq_seen) - return; + return NULL; g_print("New log segment appeared in cleaner directory: %d\n", seq); @@ -59,10 +142,214 @@ void bluesky_cleaner_find_checkpoint(BlueSkyFS *fs) length = item->length; } - if (length > 0) { - g_print("Last object: %"PRIu64" + %"PRIu64"\n", offset, length); + if (length == 0) { + bluesky_cachefile_unref(cachefile); + g_mutex_unlock(cachefile->lock); + return NULL; } + g_print("Found a cleaner checkpoint record.\n"); + + BlueSkyRCStr *data = bluesky_cachefile_map_raw(cachefile, offset, length); bluesky_cachefile_unref(cachefile); g_mutex_unlock(cachefile->lock); + + BlueSkyCleanerItem *checkpoint = bluesky_cleaner_deserialize(data); + checkpoint->location.directory = BLUESKY_CLOUD_DIR_CLEANER; + checkpoint->location.directory = seq; + bluesky_string_unref(data); + + return checkpoint; +} + +static BlueSkyCleanerItem *cleaner_load_item(BlueSkyFS *fs, + BlueSkyCloudPointer location) +{ + g_print("Loading item %d/%d/%d...\n", location.directory, location.sequence, location.offset); + + BlueSkyCacheFile *cachefile; + cachefile = bluesky_cachefile_lookup(fs, location.directory, + location.sequence, TRUE); + while (!cachefile->complete) + g_cond_wait(cachefile->cond, cachefile->lock); + + /* TODO: Ought to check that we are loading an item which validated? */ + BlueSkyRCStr *data = bluesky_cachefile_map_raw(cachefile, location.offset, + location.size); + bluesky_cachefile_unref(cachefile); + g_mutex_unlock(cachefile->lock); + + BlueSkyCleanerItem *item = bluesky_cleaner_deserialize(data); + bluesky_string_unref(data); + + return item; +} + +/* Does the item at the given cloud location from the cleaner need merging? An + * item in the primary log does not need to be merged, as by definition we + * already know about it. Similarly, old items in the cleaner's log--those + * that we have already seen from a previous merge--do not need to be mergd + * again. */ +gboolean needs_merging(BlueSkyFS *fs, BlueSkyCloudPointer location) +{ + if (location.directory == BLUESKY_CLOUD_DIR_PRIMARY) + return FALSE; + + if (location.directory == BLUESKY_CLOUD_DIR_CLEANER + && location.sequence <= fs->log_state->latest_cleaner_seq_seen) + return FALSE; + + return TRUE; +} + +/* For an inode which has been modified by the cleaner and must be flushed out + * to cloud storage, mark it as appropriately dirty. We will bypass writingt + * the inode to the journal if possible--but if there have ben other + * uncommitted changes besides what the cleaner did then we will force a + * journal write as well since the cloud shouldn't contain newer data than the + * journal. Inode must be locked. */ +static void cleaner_flush_inode(BlueSkyInode *inode) +{ + // if (inode->change_commit != inode->change_count) { + if (TRUE) { + /* bluesky_inode_start_sync schedules a flush to the cloud so we're all + * done. */ + bluesky_inode_start_sync(inode); + return; + } + + g_assert(inode->unlogged_list == NULL); + + bluesky_list_unlink(&inode->fs->dirty_list, inode->dirty_list); + inode->dirty_list = bluesky_list_prepend(&inode->fs->dirty_list, inode); + inode->change_cloud = inode->change_count; +} + +static void merge_inode(BlueSkyFS *fs, BlueSkyCleanerItem *cleaner_inode) +{ + /* There are two versions we are concerned with: cleaner_ is for the data + * stored in the cleaner's log, and proxy_ is for our most recent version, + * which the cleaner might or might not know about. */ + uint64_t inum = cleaner_inode->inum; + + g_print("Merging inode %"PRIu64" from cleaner\n", inum); + + g_mutex_lock(fs->lock); + InodeMapEntry *entry = bluesky_inode_map_lookup(fs->inode_map, inum, 0); + if (entry == NULL) { + /* Inode doesn't exist: it was probably deleted so keep it that way. */ + g_mutex_unlock(fs->lock); + return; + } + + BlueSkyCloudLog *proxy_item = entry->item; + g_mutex_lock(proxy_item->lock); + //BlueSkyCloudPointer proxy_location = entry->item->location; + //BlueSkyCloudID proxy_id = entry->item->id; + g_mutex_unlock(proxy_item->lock); + g_mutex_unlock(fs->lock); + + /* If the cleaner and the proxy have the same ID, and if the proxy's + * in-memory copy is unmodified, then we can simply use the cleaner's + * version of the inode. */ + /* TODO */ + + /* Merge file data together for a regular file. Iterate over the file + * blocks in the proxy's copy of the inode. If the block ID is unchanged + * in the cleaner but the location was updated, then update the location in + * the cleaner because the block was relocated. Otherwise ignore the + * cleaner's version for that block because the proxy's information is more + * recent. */ + BlueSkyInode *proxy_inode = bluesky_get_inode(fs, inum); + g_mutex_lock(proxy_inode->lock); + if (proxy_inode->type == BLUESKY_REGULAR) { + for (int i = 0; i < proxy_inode->blocks->len; i++) { + BlueSkyBlock *b = &g_array_index(proxy_inode->blocks, + BlueSkyBlock, i); + if (b->type != BLUESKY_BLOCK_REF) + continue; + if (i >= cleaner_inode->links->len) + continue; + BlueSkyCleanerLink *cb = &g_array_index(cleaner_inode->links, + BlueSkyCleanerLink, i); + if (memcmp(&b->ref->id, &cb->id, sizeof(BlueSkyCloudPointer)) != 0) + continue; + + g_print(" Updating block %d pointer\n", i); + b->ref->location = cb->location; + } + } + cleaner_flush_inode(proxy_inode); + g_mutex_unlock(proxy_inode->lock); + + /* Mark the inode as modified so it will get written back to the cloud. We + * don't actually need to force a synchronous write to our local journal + * since there have been no logical modifications. */ +#if 0 + g_mutex_lock(inode->fs->lock); + bluesky_list_unlink(&inode->fs->unlogged_list, inode->unlogged_list); + inode->unlogged_list = bluesky_list_prepend(&inode->fs->unlogged_list, inode); + g_mutex_unlock(inode->fs->lock); +#endif +} + +void bluesky_cleaner_merge(BlueSkyFS *fs) +{ + BlueSkyCleanerItem *checkpoint = bluesky_cleaner_find_checkpoint(fs); + + if (checkpoint == NULL) { + g_warning("Unable to load cleaner checkpoint record!"); + return; + } + + /* Iterate over each of the inode map sections in the checkpoint */ + for (int i = 0; i < checkpoint->links->len; i++) { + BlueSkyCleanerLink *link = &g_array_index(checkpoint->links, + BlueSkyCleanerLink, i); + if (!needs_merging(fs, link->location)) + continue; + + BlueSkyCleanerItem *imap = cleaner_load_item(fs, link->location); + if (imap == NULL) { + g_warning("Unable to load cleaner inode map"); + continue; + } + + /* Iterate over all inodes found in the inode map section */ + for (int j = 0; j < imap->links->len; j++) { + BlueSkyCleanerLink *link = &g_array_index(imap->links, + BlueSkyCleanerLink, j); + if (!needs_merging(fs, link->location)) + continue; + BlueSkyCleanerItem *inode = cleaner_load_item(fs, link->location); + if (inode != NULL) { + merge_inode(fs, inode); + } + bluesky_cleaner_item_free(inode); + } + + bluesky_cleaner_item_free(imap); + } + + fs->log_state->latest_cleaner_seq_seen = checkpoint->location.sequence; + bluesky_cleaner_item_free(checkpoint); +} + +/* Run the cleaner as a background task. */ +static gpointer cleaner_thread(BlueSkyFS *fs) +{ + while (TRUE) { + struct timespec delay; + delay.tv_sec = 30; + delay.tv_nsec = 0; + nanosleep(&delay, NULL); + bluesky_cleaner_merge(fs); + } + + return NULL; +} + +void bluesky_cleaner_thread_launch(BlueSkyFS *fs) +{ + g_thread_create((GThreadFunc)cleaner_thread, fs, FALSE, NULL); }