Rework caching of data blocks to eliminate double-caching.
[bluesky.git] / bluesky / file.c
index b329531..977618d 100644 (file)
@@ -31,31 +31,33 @@ void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
 
     switch (block->type) {
     case BLUESKY_BLOCK_ZERO:
-        block->data = bluesky_string_new(g_malloc0(block_len), block_len);
+        block->dirty = bluesky_string_new(g_malloc0(block_len), block_len);
         break;
     case BLUESKY_BLOCK_REF:
+        // FIXME: locking on the cloudlog?
         bluesky_block_fetch(inode, block, NULL);
-        g_assert(block->type == BLUESKY_BLOCK_CACHED);
-        /* Fall through */
-    case BLUESKY_BLOCK_CACHED:
+        bluesky_string_ref(block->ref->data);
+        block->dirty = bluesky_string_dup(block->ref->data);
+        break;
     case BLUESKY_BLOCK_DIRTY:
-        block->data = bluesky_string_dup(block->data);
+        block->dirty = bluesky_string_dup(block->dirty);
         break;
     }
 
-    if (block->type != BLUESKY_BLOCK_CACHED
+    /*if (block->type != BLUESKY_BLOCK_CACHED
             && block->type != BLUESKY_BLOCK_DIRTY)
-        g_atomic_int_add(&inode->fs->cache_total, 1);
+        g_atomic_int_add(&inode->fs->cache_total, 1);   //FIXME */
     if (block->type != BLUESKY_BLOCK_DIRTY)
         g_atomic_int_add(&inode->fs->cache_dirty, 1);
 
     block->type = BLUESKY_BLOCK_DIRTY;
-    bluesky_cloudlog_unref(block->cloudref);
-    block->cloudref = NULL;
+    bluesky_cloudlog_unref(block->ref);
+    block->ref = NULL;
 }
 
 /* Set the size of a file.  This will truncate or extend the file as needed.
  * Newly-allocated bytes are zeroed. */
+// FIXME
 void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
 {
     g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE);
@@ -82,11 +84,11 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
 
             if (b->type != BLUESKY_BLOCK_ZERO
                     && (b->type == BLUESKY_BLOCK_REF
-                        || b->data->len < BLUESKY_BLOCK_SIZE)) {
+                        || b->dirty->len < BLUESKY_BLOCK_SIZE)) {
                 bluesky_block_touch(inode, inode->blocks->len - 1);
-                gsize old_size = b->data->len;
-                bluesky_string_resize(b->data, BLUESKY_BLOCK_SIZE);
-                memset(&b->data->data[old_size], 0,
+                gsize old_size = b->dirty->len;
+                bluesky_string_resize(b->dirty, BLUESKY_BLOCK_SIZE);
+                memset(&b->dirty->data[old_size], 0,
                        BLUESKY_BLOCK_SIZE - old_size);
             }
         }
@@ -96,13 +98,13 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
         /* Delete blocks from a file.  Must reclaim memory. */
         for (guint i = inode->blocks->len; i < blocks; i++) {
             BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
-            if (b->type == BLUESKY_BLOCK_CACHED
+            /* if (b->type == BLUESKY_BLOCK_CACHED
                     || b->type == BLUESKY_BLOCK_DIRTY)
-                g_atomic_int_add(&inode->fs->cache_total, -1);
+                g_atomic_int_add(&inode->fs->cache_total, -1); FIXME */
             if (b->type == BLUESKY_BLOCK_DIRTY)
                 g_atomic_int_add(&inode->fs->cache_dirty, -1);
-            bluesky_string_unref(b->data);
-            bluesky_cloudlog_unref(b->cloudref);
+            bluesky_string_unref(b->dirty);
+            bluesky_cloudlog_unref(b->ref);
         }
         g_array_set_size(inode->blocks, blocks);
     }
@@ -115,13 +117,13 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
 
         if (b->type != BLUESKY_BLOCK_ZERO) {
             bluesky_block_touch(inode, blocks - 1);
-            gsize old_size = b->data->len;
+            gsize old_size = b->dirty->len;
             gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE;
 
-            bluesky_string_resize(b->data, new_size);
+            bluesky_string_resize(b->dirty, new_size);
 
             if (new_size > old_size) {
-                memset(&b->data->data[old_size], 0, new_size - old_size);
+                memset(&b->dirty->data[old_size], 0, new_size - old_size);
             }
         }
     }
@@ -140,6 +142,8 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
     if (len == 0)
         return;
 
+    // TODO: Optimization: If we are entirely overwriting a block we don't need
+    // to fetch it frm storage first.
     while (len > 0) {
         uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
         gint block_offset = offset % BLUESKY_BLOCK_SIZE;
@@ -148,7 +152,7 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
         bluesky_block_touch(inode, block_num);
         BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
                                          block_num);
-        memcpy(&b->data->data[block_offset], data, bytes);
+        memcpy(&b->dirty->data[block_offset], data, bytes);
 
         offset += bytes;
         data += bytes;
@@ -168,6 +172,7 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
     g_return_if_fail(offset < inode->size);
     g_return_if_fail(len <= inode->size - offset);
 
+#if 0
     /* Start fetches on any data blocks that we will need for this read. */
     BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
     barrier->op = STORE_OP_BARRIER;
@@ -191,6 +196,7 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
     if (bluesky_verbose) {
         g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
     }
+#endif
 
     while (len > 0) {
         uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
@@ -199,17 +205,17 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
 
         BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
                                          block_num);
-        switch (b->type) {
-        case BLUESKY_BLOCK_ZERO:
+        if (b->type == BLUESKY_BLOCK_ZERO) {
             memset(buf, 0, bytes);
-            break;
-        case BLUESKY_BLOCK_REF:
-            bluesky_block_fetch(inode, b, NULL);
-            /* Fall through */
-        case BLUESKY_BLOCK_CACHED:
-        case BLUESKY_BLOCK_DIRTY:
-            memcpy(buf, &b->data->data[block_offset], bytes);
-            break;
+        } else {
+            BlueSkyRCStr *data = NULL;
+            if (b->type == BLUESKY_BLOCK_REF) {
+                bluesky_block_fetch(inode, b, NULL);
+                data = b->ref->data;
+            } else if (b->type == BLUESKY_BLOCK_DIRTY) {
+                data = b->dirty;
+            }
+            memcpy(buf, &data->data[block_offset], bytes);
         }
 
         offset += bytes;
@@ -224,13 +230,11 @@ void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block,
     if (block->type != BLUESKY_BLOCK_REF)
         return;
 
-    g_mutex_lock(block->cloudref->lock);
-    bluesky_cloudlog_fetch(block->cloudref);
-    block->data = block->cloudref->data;
-    bluesky_string_ref(block->data);
-    g_mutex_unlock(block->cloudref->lock);
-    block->type = BLUESKY_BLOCK_CACHED;
-    g_atomic_int_add(&inode->fs->cache_total, 1);
+    g_mutex_lock(block->ref->lock);
+    bluesky_cloudlog_fetch(block->ref);
+    g_mutex_unlock(block->ref->lock);
+    block->type = BLUESKY_BLOCK_REF;
+    g_atomic_int_add(&inode->fs->cache_total, 1);  //FIXME
 }
 
 /* Write the given block to cloud-backed storage and mark it clean. */
@@ -242,22 +246,21 @@ void bluesky_block_flush(BlueSkyInode *inode, BlueSkyBlock *block,
     if (block->type != BLUESKY_BLOCK_DIRTY)
         return;
 
-    bluesky_cloudlog_unref(block->cloudref);
-
-    BlueSkyRCStr *data = block->data;
+    g_assert(block->ref == NULL);
 
     BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs);
     cloudlog->type = LOGTYPE_DATA;
     cloudlog->inum = inode->inum;
-    cloudlog->data = data;
-    bluesky_string_ref(data);
+    cloudlog->data = block->dirty;      // String ownership is transferred
     bluesky_cloudlog_sync(cloudlog);
+    bluesky_cloudlog_ref(cloudlog);     // Reference for log_items list
     *log_items = g_list_prepend(*log_items, cloudlog);
     bluesky_cloudlog_insert(cloudlog);
 
-    block->cloudref = cloudlog;
+    block->ref = cloudlog;              // Uses initial reference from _new()
 
-    block->type = BLUESKY_BLOCK_CACHED;
+    block->type = BLUESKY_BLOCK_REF;
+    block->dirty = NULL;
     g_atomic_int_add(&fs->cache_dirty, -1);
 }
 
@@ -279,25 +282,17 @@ void bluesky_file_drop_cached(BlueSkyInode *inode)
 
     for (int i = 0; i < inode->blocks->len; i++) {
         BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
-        if (b->type == BLUESKY_BLOCK_CACHED) {
-            if (bluesky_verbose) {
-                g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
-                      "Dropping block %d of inode %"PRIu64" from cache",
-                      i, inode->inum);
-                g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
-                      "  (reference count was %d)", b->data->refcount);
+        if (b->type == BLUESKY_BLOCK_REF) {
+            g_mutex_lock(b->ref->lock);
+            if (b->ref->data != NULL
+                && g_atomic_int_get(&b->ref->data_lock_count) == 0
+                && (b->ref->location_flags != 0))
+            {
+                bluesky_string_unref(b->ref->data);
+                b->ref->data = NULL;
             }
-
-            bluesky_string_unref(b->data);
-            b->data = NULL;
-            b->type = BLUESKY_BLOCK_REF;
+            g_mutex_unlock(b->ref->lock);
             g_atomic_int_add(&inode->fs->cache_total, -1);
-            g_mutex_lock(b->cloudref->lock);
-            if (b->cloudref->location_flags & CLOUDLOG_JOURNAL) {
-                bluesky_string_unref(b->cloudref->data);
-                b->cloudref->data = NULL;
-            }
-            g_mutex_unlock(b->cloudref->lock);
         }
     }
 }