Allow batched log writes when writing dirty inodes.
[bluesky.git] / bluesky / file.c
index 75768d1..b107a40 100644 (file)
@@ -9,6 +9,7 @@
 #include <stdint.h>
 #include <glib.h>
 #include <string.h>
+#include <inttypes.h>
 
 #include "bluesky-private.h"
 
@@ -30,11 +31,10 @@ void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
 
     switch (block->type) {
     case BLUESKY_BLOCK_ZERO:
-        g_print("Allocating zero block of size %zd\n", block_len);
         block->data = bluesky_string_new(g_malloc0(block_len), block_len);
         break;
     case BLUESKY_BLOCK_REF:
-        bluesky_block_fetch(inode->fs, block);
+        bluesky_block_fetch(inode->fs, block, NULL);
         g_assert(block->type == BLUESKY_BLOCK_CACHED);
         /* Fall through */
     case BLUESKY_BLOCK_CACHED:
@@ -43,6 +43,12 @@ void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
         break;
     }
 
+    if (block->type != BLUESKY_BLOCK_CACHED
+            && block->type != BLUESKY_BLOCK_DIRTY)
+        g_atomic_int_add(&inode->fs->cache_total, 1);
+    if (block->type != BLUESKY_BLOCK_DIRTY)
+        g_atomic_int_add(&inode->fs->cache_dirty, 1);
+
     block->type = BLUESKY_BLOCK_DIRTY;
 }
 
@@ -55,25 +61,52 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
     if (size == inode->size)
         return;
 
+    if (bluesky_verbose) {
+        g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
+              "Truncating file to %"PRIi64" bytes", size);
+    }
+
     uint64_t blocks = (size + BLUESKY_BLOCK_SIZE - 1) / BLUESKY_BLOCK_SIZE;
 
     if (blocks > inode->blocks->len) {
         /* Need to add new blocks to the end of a file.  New block structures
          * are automatically zeroed, which initializes them to be pointers to
-         * zero blocks so we don't need to do any more work. */
+         * zero blocks so we don't need to do any more work.  If the
+         * previously-last block in the file is smaller than
+         * BLUESKY_BLOCK_SIZE, extend it to full size. */
+        if (inode->blocks->len > 0) {
+            BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
+                                             inode->blocks->len - 1);
+
+            if (b->type != BLUESKY_BLOCK_ZERO
+                    && (b->type == BLUESKY_BLOCK_REF
+                        || b->data->len < BLUESKY_BLOCK_SIZE)) {
+                bluesky_block_touch(inode, inode->blocks->len - 1);
+                gsize old_size = b->data->len;
+                bluesky_string_resize(b->data, BLUESKY_BLOCK_SIZE);
+                memset(&b->data->data[old_size], 0,
+                       BLUESKY_BLOCK_SIZE - old_size);
+            }
+        }
+
         g_array_set_size(inode->blocks, blocks);
     } else if (blocks < inode->blocks->len) {
         /* Delete blocks from a file.  Must reclaim memory. */
         for (guint i = inode->blocks->len; i < blocks; i++) {
             BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
             g_free(b->ref);
+            if (b->type == BLUESKY_BLOCK_CACHED
+                    || b->type == BLUESKY_BLOCK_DIRTY)
+                g_atomic_int_add(&inode->fs->cache_total, -1);
+            if (b->type == BLUESKY_BLOCK_DIRTY)
+                g_atomic_int_add(&inode->fs->cache_dirty, -1);
             bluesky_string_unref(b->data);
         }
         g_array_set_size(inode->blocks, blocks);
     }
 
-    /* Ensure the last block of the file is properly sized.  If the block is
-     * extended, newly-added bytes must be zeroed. */
+    /* Ensure the new last block of the file is properly sized.  If the block
+     * is extended, newly-added bytes must be zeroed. */
     if (blocks > 0) {
         BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
                                          blocks - 1);
@@ -114,7 +147,6 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
         BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
                                          block_num);
         memcpy(&b->data->data[block_offset], data, bytes);
-        bluesky_block_flush(inode->fs, b);
 
         offset += bytes;
         data += bytes;
@@ -122,16 +154,42 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
     }
 
     bluesky_inode_update_ctime(inode, 1);
-    bluesky_inode_flush(inode->fs, inode);
 }
 
 void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
                        char *buf, gint len)
 {
+    if (len == 0 && offset <= inode->size)
+        return;
+
     g_return_if_fail(inode->type == BLUESKY_REGULAR);
     g_return_if_fail(offset < inode->size);
     g_return_if_fail(len <= inode->size - offset);
 
+    /* Start fetches on any data blocks that we will need for this read. */
+    BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
+    barrier->op = STORE_OP_BARRIER;
+    uint64_t start_block, end_block;
+    start_block = offset / BLUESKY_BLOCK_SIZE;
+    end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE;
+    if (bluesky_verbose) {
+        g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
+              "Start prefetch on blocks %"PRIi64" .. %"PRIi64,
+              start_block, end_block);
+    }
+    for (uint64_t i = start_block; i <= end_block; i++) {
+        BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
+                                         i);
+        if (b->type == BLUESKY_BLOCK_REF)
+            bluesky_block_fetch(inode->fs, b, barrier);
+    }
+    bluesky_store_async_submit(barrier);
+    bluesky_store_async_wait(barrier);
+    bluesky_store_async_unref(barrier);
+    if (bluesky_verbose) {
+        g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
+    }
+
     while (len > 0) {
         uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
         gint block_offset = offset % BLUESKY_BLOCK_SIZE;
@@ -144,7 +202,7 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
             memset(buf, 0, bytes);
             break;
         case BLUESKY_BLOCK_REF:
-            bluesky_block_fetch(inode->fs, b);
+            bluesky_block_fetch(inode->fs, b, NULL);
             /* Fall through */
         case BLUESKY_BLOCK_CACHED:
         case BLUESKY_BLOCK_DIRTY:
@@ -160,20 +218,48 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
 
 /* Read the given block from cloud-backed storage if the data is not already
  * cached. */
-void bluesky_block_fetch(BlueSkyFS *fs, BlueSkyBlock *block)
+static void block_fetch_completion(BlueSkyStoreAsync *async, gpointer data)
+{
+    BlueSkyBlock *block = (BlueSkyBlock *)data;
+
+    bluesky_string_unref(block->data);
+    block->data = async->data;
+    bluesky_string_ref(block->data);
+
+    if (block->data == NULL) {
+        g_warning("Failed to fetch data block from store!\n");
+        block->data = bluesky_string_new(g_malloc0(BLUESKY_BLOCK_SIZE),
+                                         BLUESKY_BLOCK_SIZE);
+    }
+
+    block->type = BLUESKY_BLOCK_CACHED;
+}
+
+void bluesky_block_fetch(BlueSkyFS *fs, BlueSkyBlock *block,
+                         BlueSkyStoreAsync *barrier)
 {
     if (block->type != BLUESKY_BLOCK_REF)
         return;
 
-    BlueSkyRCStr *string = bluesky_store_get(fs->store, block->ref);
+    BlueSkyStoreAsync *async = bluesky_store_async_new(fs->store);
+    async->op = STORE_OP_GET;
+    async->key = g_strdup(block->ref);
+    bluesky_store_async_add_notifier(async, (GFunc)block_fetch_completion, block);
+    bluesky_store_async_submit(async);
 
-    bluesky_string_unref(block->data);
-    block->data = string;
-    block->type = BLUESKY_BLOCK_CACHED;
+    if (barrier != NULL)
+        bluesky_store_add_barrier(barrier, async);
+    else
+        bluesky_store_async_wait(async);
+
+    bluesky_store_async_unref(async);
+    g_atomic_int_add(&fs->cache_total, 1);
 }
 
 /* Write the given block to cloud-backed storage and mark it clean. */
-void bluesky_block_flush(BlueSkyFS *fs, BlueSkyBlock *block)
+void bluesky_block_flush(BlueSkyFS *fs, BlueSkyBlock *block,
+                         BlueSkyStoreAsync *barrier,
+                         GList **log_items)
 {
     if (block->type != BLUESKY_BLOCK_DIRTY)
         return;
@@ -181,18 +267,70 @@ void bluesky_block_flush(BlueSkyFS *fs, BlueSkyBlock *block)
     BlueSkyRCStr *data = block->data;
 
     GChecksum *csum = g_checksum_new(G_CHECKSUM_SHA256);
-    g_checksum_update(csum, data->data, data->len);
+    g_checksum_update(csum, (const guchar *)data->data, data->len);
     gchar *name = g_strdup(g_checksum_get_string(csum));
 
-    bluesky_store_put(fs->store, name, data);
+    /* Start commit to the local log. */
+    BlueSkyLogItem *log_item = bluesky_log_item_new();
+    log_item->key = g_strdup(name);
+    log_item->data = data;
+    bluesky_string_ref(data);
+    bluesky_log_item_submit(log_item, fs->log);
+    *log_items = g_list_prepend(*log_items, log_item);
+
+    /* Store the file data asynchronously, and don't bother waiting for a
+     * response. */
+    BlueSkyStoreAsync *async = bluesky_store_async_new(fs->store);
+    async->op = STORE_OP_PUT;
+    async->key = g_strdup(name);
+    bluesky_string_ref(data);
+    async->data = data;
+    bluesky_store_async_submit(async);
+    if (barrier != NULL)
+        bluesky_store_add_barrier(barrier, async);
+    bluesky_store_async_unref(async);
+
     g_free(block->ref);
     block->ref = name;
 
-    /* block->type = BLUESKY_BLOCK_CACHED; */
-    bluesky_string_unref(block->data);
-    block->data = NULL;
-    block->type = BLUESKY_BLOCK_REF;
+    block->type = BLUESKY_BLOCK_CACHED;
+    g_atomic_int_add(&fs->cache_dirty, -1);
 
     g_checksum_free(csum);
-    //bluesky_string_unref(data);
+}
+
+/* Flush all blocks in a file to stable storage. */
+void bluesky_file_flush(BlueSkyInode *inode, BlueSkyStoreAsync *barrier,
+                        GList **log_items)
+{
+    g_return_if_fail(inode->type == BLUESKY_REGULAR);
+
+    for (int i = 0; i < inode->blocks->len; i++) {
+        BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
+        bluesky_block_flush(inode->fs, b, barrier, log_items);
+    }
+}
+
+/* Drop clean data blocks for a file from cache. */
+void bluesky_file_drop_cached(BlueSkyInode *inode)
+{
+    g_return_if_fail(inode->type == BLUESKY_REGULAR);
+
+    for (int i = 0; i < inode->blocks->len; i++) {
+        BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
+        if (b->type == BLUESKY_BLOCK_CACHED) {
+            if (bluesky_verbose) {
+                g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
+                      "Dropping block %d of inode %"PRIu64" from cache",
+                      i, inode->inum);
+                g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
+                      "  (reference count was %d)", b->data->refcount);
+            }
+
+            bluesky_string_unref(b->data);
+            b->data = NULL;
+            b->type = BLUESKY_BLOCK_REF;
+            g_atomic_int_add(&inode->fs->cache_total, -1);
+        }
+    }
 }