/* Core filesystem: handling of regular files and caching of file data. */
/* Mark a given block dirty and make sure that data is faulted in so that it
- * can be written to. */
-void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
+ * can be written to.
+ *
+ * If preserve is set to false, this is a hint that the block is about to be
+ * entirely overwritten. In this case, a dirty block is made available but any
+ * prior contents might be lost. A value of preserve = TRUE is always safe. */
+void bluesky_block_touch(BlueSkyInode *inode, uint64_t i, gboolean preserve)
{
g_return_if_fail(i < inode->blocks->len);
BlueSkyBlock *block = &g_array_index(inode->blocks, BlueSkyBlock, i);
switch (block->type) {
case BLUESKY_BLOCK_ZERO:
- block->data = bluesky_string_new(g_malloc0(block_len), block_len);
+ block->dirty = bluesky_string_new(g_malloc0(block_len), block_len);
break;
case BLUESKY_BLOCK_REF:
- bluesky_block_fetch(inode->fs, block, NULL);
- g_assert(block->type == BLUESKY_BLOCK_CACHED);
- /* Fall through */
- case BLUESKY_BLOCK_CACHED:
+ if (preserve) {
+ // FIXME: locking on the cloudlog?
+ bluesky_block_fetch(inode, block, NULL);
+ bluesky_string_ref(block->ref->data);
+ block->dirty = bluesky_string_dup(block->ref->data);
+ } else {
+ block->dirty = bluesky_string_new(g_malloc0(block_len), block_len);
+ }
+ break;
case BLUESKY_BLOCK_DIRTY:
- block->data = bluesky_string_dup(block->data);
+ block->dirty = bluesky_string_dup(block->dirty);
break;
}
- if (block->type != BLUESKY_BLOCK_CACHED
- && block->type != BLUESKY_BLOCK_DIRTY)
- g_atomic_int_add(&inode->fs->cache_total, 1);
if (block->type != BLUESKY_BLOCK_DIRTY)
g_atomic_int_add(&inode->fs->cache_dirty, 1);
block->type = BLUESKY_BLOCK_DIRTY;
+ bluesky_cloudlog_unref(block->ref);
+ block->ref = NULL;
}
/* Set the size of a file. This will truncate or extend the file as needed.
* Newly-allocated bytes are zeroed. */
+// FIXME
void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
{
g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE);
uint64_t blocks = (size + BLUESKY_BLOCK_SIZE - 1) / BLUESKY_BLOCK_SIZE;
+ /* Calculate number of bytes in the last block of the file */
+ int lastblock_old, lastblock_new;
+ lastblock_old = inode->size % BLUESKY_BLOCK_SIZE;
+ if (lastblock_old == 0 && inode->size > 0)
+ lastblock_old = BLUESKY_BLOCK_SIZE;
+ lastblock_new = size % BLUESKY_BLOCK_SIZE;
+ if (lastblock_new == 0 && size > 0)
+ lastblock_new = BLUESKY_BLOCK_SIZE;
+
if (blocks > inode->blocks->len) {
/* Need to add new blocks to the end of a file. New block structures
* are automatically zeroed, which initializes them to be pointers to
inode->blocks->len - 1);
if (b->type != BLUESKY_BLOCK_ZERO
- && (b->type == BLUESKY_BLOCK_REF
- || b->data->len < BLUESKY_BLOCK_SIZE)) {
- bluesky_block_touch(inode, inode->blocks->len - 1);
- gsize old_size = b->data->len;
- bluesky_string_resize(b->data, BLUESKY_BLOCK_SIZE);
- memset(&b->data->data[old_size], 0,
+ && lastblock_old < BLUESKY_BLOCK_SIZE) {
+ bluesky_block_touch(inode, inode->blocks->len - 1, TRUE);
+ gsize old_size = b->dirty->len;
+ if (lastblock_old != old_size) {
+ fprintf(stderr,
+ "Warning: last block size = %zd, expected %d\n",
+ old_size, lastblock_old);
+ }
+ bluesky_string_resize(b->dirty, BLUESKY_BLOCK_SIZE);
+ memset(&b->dirty->data[old_size], 0,
BLUESKY_BLOCK_SIZE - old_size);
}
}
g_array_set_size(inode->blocks, blocks);
} else if (blocks < inode->blocks->len) {
/* Delete blocks from a file. Must reclaim memory. */
- for (guint i = inode->blocks->len; i < blocks; i++) {
+ for (guint i = blocks; i < inode->blocks->len; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
- g_free(b->ref);
- if (b->type == BLUESKY_BLOCK_CACHED
- || b->type == BLUESKY_BLOCK_DIRTY)
- g_atomic_int_add(&inode->fs->cache_total, -1);
if (b->type == BLUESKY_BLOCK_DIRTY)
g_atomic_int_add(&inode->fs->cache_dirty, -1);
- bluesky_string_unref(b->data);
+ bluesky_string_unref(b->dirty);
+ bluesky_cloudlog_unref(b->ref);
}
g_array_set_size(inode->blocks, blocks);
}
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
blocks - 1);
- if (b->type != BLUESKY_BLOCK_ZERO) {
- bluesky_block_touch(inode, blocks - 1);
- gsize old_size = b->data->len;
+ gboolean need_resize = TRUE;
+ if (b->type == BLUESKY_BLOCK_ZERO)
+ need_resize = FALSE;
+ else if (size < inode->size && lastblock_new == BLUESKY_BLOCK_SIZE)
+ need_resize = FALSE;
+
+ if (need_resize) {
+ bluesky_block_touch(inode, blocks - 1, TRUE);
+ gsize old_size = b->dirty->len;
gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE;
- bluesky_string_resize(b->data, new_size);
+ bluesky_string_resize(b->dirty, new_size);
if (new_size > old_size) {
- memset(&b->data->data[old_size], 0, new_size - old_size);
+ memset(&b->dirty->data[old_size], 0, new_size - old_size);
}
}
}
gint block_offset = offset % BLUESKY_BLOCK_SIZE;
gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
- bluesky_block_touch(inode, block_num);
+ gboolean preserve = TRUE;
+ gsize block_size = BLUESKY_BLOCK_SIZE;
+ if (block_num == inode->blocks->len - 1) {
+ block_size = inode->size - block_num * BLUESKY_BLOCK_SIZE;
+ }
+ if (block_offset == 0 && bytes == block_size) {
+ preserve = FALSE;
+ }
+ bluesky_block_touch(inode, block_num, preserve);
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
block_num);
- memcpy(&b->data->data[block_offset], data, bytes);
+ memcpy(&b->dirty->data[block_offset], data, bytes);
offset += bytes;
data += bytes;
g_return_if_fail(offset < inode->size);
g_return_if_fail(len <= inode->size - offset);
- /* Start fetches on any data blocks that we will need for this read. */
- BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
- barrier->op = STORE_OP_BARRIER;
+ BlueSkyProfile *profile = bluesky_profile_get();
+
+ bluesky_profile_add_event(profile,
+ g_strdup_printf("Start file read prefetch"));
uint64_t start_block, end_block;
start_block = offset / BLUESKY_BLOCK_SIZE;
end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE;
- if (bluesky_verbose) {
- g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
- "Start prefetch on blocks %"PRIi64" .. %"PRIi64,
- start_block, end_block);
- }
for (uint64_t i = start_block; i <= end_block; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
i);
if (b->type == BLUESKY_BLOCK_REF)
- bluesky_block_fetch(inode->fs, b, barrier);
- }
- bluesky_store_async_submit(barrier);
- bluesky_store_async_wait(barrier);
- bluesky_store_async_unref(barrier);
- if (bluesky_verbose) {
- g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
+ bluesky_cloudlog_prefetch(b->ref);
}
+ bluesky_profile_add_event(profile,
+ g_strdup_printf("End file read prefetch"));
+
while (len > 0) {
uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
gint block_offset = offset % BLUESKY_BLOCK_SIZE;
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
block_num);
- switch (b->type) {
- case BLUESKY_BLOCK_ZERO:
+ if (b->type == BLUESKY_BLOCK_ZERO) {
memset(buf, 0, bytes);
- break;
- case BLUESKY_BLOCK_REF:
- bluesky_block_fetch(inode->fs, b, NULL);
- /* Fall through */
- case BLUESKY_BLOCK_CACHED:
- case BLUESKY_BLOCK_DIRTY:
- memcpy(buf, &b->data->data[block_offset], bytes);
- break;
+ } else {
+ BlueSkyRCStr *data = NULL;
+ if (b->type == BLUESKY_BLOCK_REF) {
+ bluesky_block_fetch(inode, b, NULL);
+ data = b->ref->data;
+ } else if (b->type == BLUESKY_BLOCK_DIRTY) {
+ data = b->dirty;
+ }
+ memcpy(buf, &data->data[block_offset], bytes);
}
offset += bytes;
buf += bytes;
len -= bytes;
}
-}
-/* Read the given block from cloud-backed storage if the data is not already
- * cached. */
-static void block_fetch_completion(BlueSkyStoreAsync *async, gpointer data)
-{
- BlueSkyBlock *block = (BlueSkyBlock *)data;
-
- bluesky_string_unref(block->data);
- block->data = async->data;
- bluesky_string_ref(block->data);
-
- if (block->data == NULL) {
- g_warning("Failed to fetch data block from store!\n");
- block->data = bluesky_string_new(g_malloc0(BLUESKY_BLOCK_SIZE),
- BLUESKY_BLOCK_SIZE);
- }
-
- block->type = BLUESKY_BLOCK_CACHED;
+ bluesky_profile_add_event(profile,
+ g_strdup_printf("BlueSky read complete"));
}
-void bluesky_block_fetch(BlueSkyFS *fs, BlueSkyBlock *block,
+void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block,
BlueSkyStoreAsync *barrier)
{
if (block->type != BLUESKY_BLOCK_REF)
return;
- BlueSkyStoreAsync *async = bluesky_store_async_new(fs->store);
- async->op = STORE_OP_GET;
- async->key = g_strdup(block->ref);
- bluesky_store_async_add_notifier(async, (GFunc)block_fetch_completion, block);
- bluesky_store_async_submit(async);
-
- if (barrier != NULL)
- bluesky_store_add_barrier(barrier, async);
- else
- bluesky_store_async_wait(async);
-
- bluesky_store_async_unref(async);
- g_atomic_int_add(&fs->cache_total, 1);
+ g_mutex_lock(block->ref->lock);
+ bluesky_cloudlog_fetch(block->ref);
+ g_mutex_unlock(block->ref->lock);
+ block->type = BLUESKY_BLOCK_REF;
}
/* Write the given block to cloud-backed storage and mark it clean. */
-void bluesky_block_flush(BlueSkyFS *fs, BlueSkyBlock *block,
- BlueSkyStoreAsync *barrier,
+void bluesky_block_flush(BlueSkyInode *inode, BlueSkyBlock *block,
GList **log_items)
{
+ BlueSkyFS *fs = inode->fs;
+
if (block->type != BLUESKY_BLOCK_DIRTY)
return;
- BlueSkyRCStr *data = block->data;
+ g_assert(block->ref == NULL);
- BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs);
- gchar *name = bluesky_cloudlog_id_to_string(cloudlog->id);
+ BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs, NULL);
cloudlog->type = LOGTYPE_DATA;
- cloudlog->inum = 0; //FIXME
- cloudlog->data = data;
- bluesky_string_ref(data);
- *log_items = g_list_prepend(*log_items, bluesky_cloudlog_sync(cloudlog));
+ cloudlog->inum = inode->inum;
+ cloudlog->data = block->dirty; // String ownership is transferred
+ bluesky_cloudlog_stats_update(cloudlog, 1);
+ bluesky_cloudlog_sync(cloudlog);
+ bluesky_cloudlog_ref(cloudlog); // Reference for log_items list
+ *log_items = g_list_prepend(*log_items, cloudlog);
bluesky_cloudlog_insert(cloudlog);
- /* Store the file data asynchronously, and don't bother waiting for a
- * response. */
- BlueSkyStoreAsync *async = bluesky_store_async_new(fs->store);
- async->op = STORE_OP_PUT;
- async->key = g_strdup(name);
- bluesky_string_ref(data);
- async->data = data;
- bluesky_store_async_submit(async);
- if (barrier != NULL)
- bluesky_store_add_barrier(barrier, async);
- bluesky_store_async_unref(async);
-
- g_free(block->ref);
- block->ref = name;
-
- block->type = BLUESKY_BLOCK_CACHED;
+ block->ref = cloudlog; // Uses initial reference from _new()
+
+ block->type = BLUESKY_BLOCK_REF;
+ block->dirty = NULL;
g_atomic_int_add(&fs->cache_dirty, -1);
}
/* Flush all blocks in a file to stable storage. */
-void bluesky_file_flush(BlueSkyInode *inode, BlueSkyStoreAsync *barrier,
- GList **log_items)
+void bluesky_file_flush(BlueSkyInode *inode, GList **log_items)
{
g_return_if_fail(inode->type == BLUESKY_REGULAR);
for (int i = 0; i < inode->blocks->len; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
- bluesky_block_flush(inode->fs, b, barrier, log_items);
+ bluesky_block_flush(inode, b, log_items);
}
}
for (int i = 0; i < inode->blocks->len; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
- if (b->type == BLUESKY_BLOCK_CACHED) {
- if (bluesky_verbose) {
- g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
- "Dropping block %d of inode %"PRIu64" from cache",
- i, inode->inum);
- g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
- " (reference count was %d)", b->data->refcount);
+ if (b->type == BLUESKY_BLOCK_REF) {
+ g_mutex_lock(b->ref->lock);
+ if (b->ref->data != NULL
+ && g_atomic_int_get(&b->ref->data_lock_count) == 0
+ && (b->ref->location_flags != 0))
+ {
+ bluesky_cloudlog_stats_update(b->ref, -1);
+ bluesky_string_unref(b->ref->data);
+ b->ref->data = NULL;
+ bluesky_cloudlog_stats_update(b->ref, 1);
}
-
- bluesky_string_unref(b->data);
- b->data = NULL;
- b->type = BLUESKY_BLOCK_REF;
- g_atomic_int_add(&inode->fs->cache_total, -1);
+ g_mutex_unlock(b->ref->lock);
}
}
}