if (last_segment == NULL)
return NULL;
- g_print("Last cloud log segment: %s\n", last_segment);
+ g_print("Last cloud log segment: %s (processed up to %d)\n",
+ last_segment, fs->log_state->latest_cleaner_seq_seen);
int seq = atoi(last_segment + 13);
g_free(last_segment);
length = item->length;
}
- if (length == 0)
+ if (length == 0) {
+ bluesky_cachefile_unref(cachefile);
+ g_mutex_unlock(cachefile->lock);
return NULL;
+ }
g_print("Found a cleaner checkpoint record.\n");
g_mutex_unlock(cachefile->lock);
BlueSkyCleanerItem *checkpoint = bluesky_cleaner_deserialize(data);
+ checkpoint->location.directory = BLUESKY_CLOUD_DIR_CLEANER;
+ checkpoint->location.sequence = seq;
bluesky_string_unref(data);
return checkpoint;
if (location.directory == BLUESKY_CLOUD_DIR_PRIMARY)
return FALSE;
+ if (location.directory == BLUESKY_CLOUD_DIR_CLEANER
+ && location.sequence <= fs->log_state->latest_cleaner_seq_seen)
+ return FALSE;
+
return TRUE;
}
+/* For an inode which has been modified by the cleaner and must be flushed out
+ * to cloud storage, mark it as appropriately dirty. We will bypass writingt
+ * the inode to the journal if possible--but if there have ben other
+ * uncommitted changes besides what the cleaner did then we will force a
+ * journal write as well since the cloud shouldn't contain newer data than the
+ * journal. Inode must be locked. */
+static void cleaner_flush_inode(BlueSkyInode *inode)
+{
+ // if (inode->change_commit != inode->change_count) {
+ if (TRUE) {
+ /* bluesky_inode_start_sync schedules a flush to the cloud so we're all
+ * done. */
+ bluesky_inode_start_sync(inode);
+ return;
+ }
+
+ g_assert(inode->unlogged_list == NULL);
+
+ bluesky_list_unlink(&inode->fs->dirty_list, inode->dirty_list);
+ inode->dirty_list = bluesky_list_prepend(&inode->fs->dirty_list, inode);
+ inode->change_cloud = inode->change_count;
+}
+
+static void merge_inode(BlueSkyFS *fs, BlueSkyCleanerItem *cleaner_inode)
+{
+ /* There are two versions we are concerned with: cleaner_ is for the data
+ * stored in the cleaner's log, and proxy_ is for our most recent version,
+ * which the cleaner might or might not know about. */
+ uint64_t inum = cleaner_inode->inum;
+
+ g_print("Merging inode %"PRIu64" from cleaner\n", inum);
+
+ g_mutex_lock(fs->lock);
+ InodeMapEntry *entry = bluesky_inode_map_lookup(fs->inode_map, inum, 0);
+ if (entry == NULL) {
+ /* Inode doesn't exist: it was probably deleted so keep it that way. */
+ g_mutex_unlock(fs->lock);
+ return;
+ }
+
+ BlueSkyCloudLog *proxy_item = entry->item;
+ g_mutex_lock(proxy_item->lock);
+ //BlueSkyCloudPointer proxy_location = entry->item->location;
+ //BlueSkyCloudID proxy_id = entry->item->id;
+ g_mutex_unlock(proxy_item->lock);
+ g_mutex_unlock(fs->lock);
+
+ /* If the cleaner and the proxy have the same ID, and if the proxy's
+ * in-memory copy is unmodified, then we can simply use the cleaner's
+ * version of the inode. */
+ /* TODO */
+
+ /* Merge file data together for a regular file. Iterate over the file
+ * blocks in the proxy's copy of the inode. If the block ID is unchanged
+ * in the cleaner but the location was updated, then update the location in
+ * the cleaner because the block was relocated. Otherwise ignore the
+ * cleaner's version for that block because the proxy's information is more
+ * recent. */
+ BlueSkyInode *proxy_inode = bluesky_get_inode(fs, inum);
+ g_mutex_lock(proxy_inode->lock);
+ if (proxy_inode->type == BLUESKY_REGULAR) {
+ for (int i = 0; i < proxy_inode->blocks->len; i++) {
+ BlueSkyBlock *b = &g_array_index(proxy_inode->blocks,
+ BlueSkyBlock, i);
+ if (b->type != BLUESKY_BLOCK_REF)
+ continue;
+ if (i >= cleaner_inode->links->len)
+ continue;
+ BlueSkyCleanerLink *cb = &g_array_index(cleaner_inode->links,
+ BlueSkyCleanerLink, i);
+ if (memcmp(&b->ref->id, &cb->id, sizeof(BlueSkyCloudPointer)) != 0)
+ continue;
+
+ g_print(" Updating block %d pointer\n", i);
+ b->ref->location = cb->location;
+ }
+ }
+ cleaner_flush_inode(proxy_inode);
+ g_mutex_unlock(proxy_inode->lock);
+
+ /* Mark the inode as modified so it will get written back to the cloud. We
+ * don't actually need to force a synchronous write to our local journal
+ * since there have been no logical modifications. */
+#if 0
+ g_mutex_lock(inode->fs->lock);
+ bluesky_list_unlink(&inode->fs->unlogged_list, inode->unlogged_list);
+ inode->unlogged_list = bluesky_list_prepend(&inode->fs->unlogged_list, inode);
+ g_mutex_unlock(inode->fs->lock);
+#endif
+}
+
void bluesky_cleaner_merge(BlueSkyFS *fs)
{
BlueSkyCleanerItem *checkpoint = bluesky_cleaner_find_checkpoint(fs);
return;
}
+ if (checkpoint->type != LOGTYPE_CHECKPOINT) {
+ g_warning("Last cleaner object not a checkpoint; cleaning probably in progress.");
+ bluesky_cleaner_item_free(checkpoint);
+ return;
+ }
+
/* Iterate over each of the inode map sections in the checkpoint */
for (int i = 0; i < checkpoint->links->len; i++) {
BlueSkyCleanerLink *link = &g_array_index(checkpoint->links,
BlueSkyCleanerLink, i);
- /*if (!needs_merging(fs, link->location))
- continue; */
+ if (!needs_merging(fs, link->location))
+ continue;
BlueSkyCleanerItem *imap = cleaner_load_item(fs, link->location);
if (imap == NULL) {
for (int j = 0; j < imap->links->len; j++) {
BlueSkyCleanerLink *link = &g_array_index(imap->links,
BlueSkyCleanerLink, j);
+ if (!needs_merging(fs, link->location))
+ continue;
BlueSkyCleanerItem *inode = cleaner_load_item(fs, link->location);
if (inode != NULL) {
- g_print("Got inode %"PRIu64"\n", inode->inum);
+ merge_inode(fs, inode);
}
bluesky_cleaner_item_free(inode);
}
bluesky_cleaner_item_free(imap);
}
+ fs->log_state->latest_cleaner_seq_seen = checkpoint->location.sequence;
bluesky_cleaner_item_free(checkpoint);
}
+
+/* Run the cleaner as a background task. */
+static gpointer cleaner_thread(BlueSkyFS *fs)
+{
+ while (TRUE) {
+ struct timespec delay;
+ delay.tv_sec = 30;
+ delay.tv_nsec = 0;
+ nanosleep(&delay, NULL);
+ bluesky_cleaner_merge(fs);
+ }
+
+ return NULL;
+}
+
+void bluesky_cleaner_thread_launch(BlueSkyFS *fs)
+{
+ g_thread_create((GThreadFunc)cleaner_thread, fs, FALSE, NULL);
+}