Improve the reading back of objects committed to the journal.
[bluesky.git] / bluesky / file.c
index 6ba39e3..34e103f 100644 (file)
@@ -31,11 +31,10 @@ void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
 
     switch (block->type) {
     case BLUESKY_BLOCK_ZERO:
-        g_print("Allocating zero block of size %zd\n", block_len);
         block->data = bluesky_string_new(g_malloc0(block_len), block_len);
         break;
     case BLUESKY_BLOCK_REF:
-        bluesky_block_fetch(inode->fs, block);
+        bluesky_block_fetch(inode, block, NULL);
         g_assert(block->type == BLUESKY_BLOCK_CACHED);
         /* Fall through */
     case BLUESKY_BLOCK_CACHED:
@@ -44,7 +43,16 @@ void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
         break;
     }
 
+    if (block->type != BLUESKY_BLOCK_CACHED
+            && block->type != BLUESKY_BLOCK_DIRTY)
+        g_atomic_int_add(&inode->fs->cache_total, 1);
+    if (block->type != BLUESKY_BLOCK_DIRTY)
+        g_atomic_int_add(&inode->fs->cache_dirty, 1);
+
     block->type = BLUESKY_BLOCK_DIRTY;
+    if (block->cloudref != NULL)
+        bluesky_cloudlog_unref(block->cloudref);
+    block->cloudref = NULL;
 }
 
 /* Set the size of a file.  This will truncate or extend the file as needed.
@@ -56,7 +64,10 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
     if (size == inode->size)
         return;
 
-    g_print("Truncating file to %"PRIi64" bytes\n", size);
+    if (bluesky_verbose) {
+        g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
+              "Truncating file to %"PRIi64" bytes", size);
+    }
 
     uint64_t blocks = (size + BLUESKY_BLOCK_SIZE - 1) / BLUESKY_BLOCK_SIZE;
 
@@ -86,8 +97,13 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
         /* Delete blocks from a file.  Must reclaim memory. */
         for (guint i = inode->blocks->len; i < blocks; i++) {
             BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
-            g_free(b->ref);
+            if (b->type == BLUESKY_BLOCK_CACHED
+                    || b->type == BLUESKY_BLOCK_DIRTY)
+                g_atomic_int_add(&inode->fs->cache_total, -1);
+            if (b->type == BLUESKY_BLOCK_DIRTY)
+                g_atomic_int_add(&inode->fs->cache_dirty, -1);
             bluesky_string_unref(b->data);
+            bluesky_cloudlog_unref(b->cloudref);
         }
         g_array_set_size(inode->blocks, blocks);
     }
@@ -118,8 +134,6 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
 void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
                         const char *data, gint len)
 {
-    g_print("Write %d bytes at offset %"PRIi64"\n", len, offset);
-
     g_return_if_fail(inode->type == BLUESKY_REGULAR);
     g_return_if_fail(offset < inode->size);
     g_return_if_fail(len <= inode->size - offset);
@@ -148,8 +162,6 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
 void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
                        char *buf, gint len)
 {
-    g_print("Read %d bytes at offset %"PRIi64"\n", len, offset);
-
     if (len == 0 && offset <= inode->size)
         return;
 
@@ -157,6 +169,30 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
     g_return_if_fail(offset < inode->size);
     g_return_if_fail(len <= inode->size - offset);
 
+    /* Start fetches on any data blocks that we will need for this read. */
+    BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
+    barrier->op = STORE_OP_BARRIER;
+    uint64_t start_block, end_block;
+    start_block = offset / BLUESKY_BLOCK_SIZE;
+    end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE;
+    if (bluesky_verbose) {
+        g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
+              "Start prefetch on blocks %"PRIi64" .. %"PRIi64,
+              start_block, end_block);
+    }
+    for (uint64_t i = start_block; i <= end_block; i++) {
+        BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
+                                         i);
+        if (b->type == BLUESKY_BLOCK_REF)
+            bluesky_block_fetch(inode, b, barrier);
+    }
+    bluesky_store_async_submit(barrier);
+    bluesky_store_async_wait(barrier);
+    bluesky_store_async_unref(barrier);
+    if (bluesky_verbose) {
+        g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
+    }
+
     while (len > 0) {
         uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
         gint block_offset = offset % BLUESKY_BLOCK_SIZE;
@@ -169,7 +205,7 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
             memset(buf, 0, bytes);
             break;
         case BLUESKY_BLOCK_REF:
-            bluesky_block_fetch(inode->fs, b);
+            bluesky_block_fetch(inode, b, NULL);
             /* Fall through */
         case BLUESKY_BLOCK_CACHED:
         case BLUESKY_BLOCK_DIRTY:
@@ -183,61 +219,59 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
     }
 }
 
-/* Read the given block from cloud-backed storage if the data is not already
- * cached. */
-void bluesky_block_fetch(BlueSkyFS *fs, BlueSkyBlock *block)
+void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block,
+                         BlueSkyStoreAsync *barrier)
 {
     if (block->type != BLUESKY_BLOCK_REF)
         return;
 
-    BlueSkyRCStr *string = bluesky_store_get(fs->store, block->ref);
-
-    bluesky_string_unref(block->data);
-    block->data = string;
+    g_mutex_lock(block->cloudref->lock);
+    bluesky_cloudlog_fetch(block->cloudref);
+    block->data = block->cloudref->data;
+    bluesky_string_ref(block->data);
+    g_mutex_unlock(block->cloudref->lock);
     block->type = BLUESKY_BLOCK_CACHED;
+    g_atomic_int_add(&inode->fs->cache_total, 1);
 }
 
 /* Write the given block to cloud-backed storage and mark it clean. */
-void bluesky_block_flush(BlueSkyFS *fs, BlueSkyBlock *block,
-                         BlueSkyStoreAsync *barrier)
+void bluesky_block_flush(BlueSkyInode *inode, BlueSkyBlock *block,
+                         GList **log_items)
 {
+    BlueSkyFS *fs = inode->fs;
+
     if (block->type != BLUESKY_BLOCK_DIRTY)
         return;
 
-    BlueSkyRCStr *data = block->data;
+    if (block->cloudref != NULL)
+        bluesky_cloudlog_unref(block->cloudref);
 
-    GChecksum *csum = g_checksum_new(G_CHECKSUM_SHA256);
-    g_checksum_update(csum, (const guchar *)data->data, data->len);
-    gchar *name = g_strdup(g_checksum_get_string(csum));
+    BlueSkyRCStr *data = block->data;
 
-    /* Store the file data asynchronously, and don't bother waiting for a
-     * response. */
-    BlueSkyStoreAsync *async = bluesky_store_async_new(fs->store);
-    async->op = STORE_OP_PUT;
-    async->key = g_strdup(name);
+    BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs);
+    cloudlog->type = LOGTYPE_DATA;
+    cloudlog->inum = inode->inum;
+    cloudlog->data = data;
     bluesky_string_ref(data);
-    async->data = data;
-    bluesky_store_async_submit(async);
-    if (barrier != NULL)
-        bluesky_store_add_barrier(barrier, async);
-    bluesky_store_async_unref(async);
+    bluesky_cloudlog_sync(cloudlog);
+    *log_items = g_list_prepend(*log_items, cloudlog);
+    bluesky_cloudlog_insert(cloudlog);
 
-    g_free(block->ref);
-    block->ref = name;
+    block->cloudref = cloudlog;
+    bluesky_cloudlog_ref(cloudlog);
 
     block->type = BLUESKY_BLOCK_CACHED;
-
-    g_checksum_free(csum);
+    g_atomic_int_add(&fs->cache_dirty, -1);
 }
 
 /* Flush all blocks in a file to stable storage. */
-void bluesky_file_flush(BlueSkyInode *inode, BlueSkyStoreAsync *barrier)
+void bluesky_file_flush(BlueSkyInode *inode, GList **log_items)
 {
     g_return_if_fail(inode->type == BLUESKY_REGULAR);
 
     for (int i = 0; i < inode->blocks->len; i++) {
         BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
-        bluesky_block_flush(inode->fs, b, barrier);
+        bluesky_block_flush(inode, b, log_items);
     }
 }
 
@@ -249,13 +283,22 @@ void bluesky_file_drop_cached(BlueSkyInode *inode)
     for (int i = 0; i < inode->blocks->len; i++) {
         BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
         if (b->type == BLUESKY_BLOCK_CACHED) {
-            g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
-                  "Dropping block %d of inode %"PRIu64" from cache",
-                  i, inode->inum);
+            if (bluesky_verbose) {
+                g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
+                      "Dropping block %d of inode %"PRIu64" from cache",
+                      i, inode->inum);
+                g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
+                      "  (reference count was %d)", b->data->refcount);
+            }
 
             bluesky_string_unref(b->data);
             b->data = NULL;
             b->type = BLUESKY_BLOCK_REF;
+            g_atomic_int_add(&inode->fs->cache_total, -1);
+            g_mutex_lock(b->cloudref->lock);
+            bluesky_string_unref(b->cloudref->data);
+            b->cloudref->data = NULL;
+            g_mutex_unlock(b->cloudref->lock);
         }
     }
 }