X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=bluesky%2Ffile.c;h=19363756b17e3fc32ede50a6c4b2f4faa539909a;hb=388030970805a70cb4fad34ade5e3de7a3607a57;hp=57193dd9ee69fefdc3595a6a1df1c27c2990aabe;hpb=ed6f1f04bd4d7a73ed26b3bb0d5e0df1cc0acf42;p=bluesky.git diff --git a/bluesky/file.c b/bluesky/file.c index 57193dd..1936375 100644 --- a/bluesky/file.c +++ b/bluesky/file.c @@ -16,8 +16,12 @@ /* Core filesystem: handling of regular files and caching of file data. */ /* Mark a given block dirty and make sure that data is faulted in so that it - * can be written to. */ -void bluesky_block_touch(BlueSkyInode *inode, uint64_t i) + * can be written to. + * + * If preserve is set to false, this is a hint that the block is about to be + * entirely overwritten. In this case, a dirty block is made available but any + * prior contents might be lost. A value of preserve = TRUE is always safe. */ +void bluesky_block_touch(BlueSkyInode *inode, uint64_t i, gboolean preserve) { g_return_if_fail(i < inode->blocks->len); BlueSkyBlock *block = &g_array_index(inode->blocks, BlueSkyBlock, i); @@ -31,32 +35,34 @@ void bluesky_block_touch(BlueSkyInode *inode, uint64_t i) switch (block->type) { case BLUESKY_BLOCK_ZERO: - block->data = bluesky_string_new(g_malloc0(block_len), block_len); + block->dirty = bluesky_string_new(g_malloc0(block_len), block_len); break; case BLUESKY_BLOCK_REF: - bluesky_block_fetch(inode, block, NULL); - g_assert(block->type == BLUESKY_BLOCK_CACHED); - /* Fall through */ - case BLUESKY_BLOCK_CACHED: + if (preserve) { + // FIXME: locking on the cloudlog? + bluesky_block_fetch(inode, block, NULL); + bluesky_string_ref(block->ref->data); + block->dirty = bluesky_string_dup(block->ref->data); + } else { + block->dirty = bluesky_string_new(g_malloc0(block_len), block_len); + } + break; case BLUESKY_BLOCK_DIRTY: - block->data = bluesky_string_dup(block->data); + block->dirty = bluesky_string_dup(block->dirty); break; } - if (block->type != BLUESKY_BLOCK_CACHED - && block->type != BLUESKY_BLOCK_DIRTY) - g_atomic_int_add(&inode->fs->cache_total, 1); if (block->type != BLUESKY_BLOCK_DIRTY) g_atomic_int_add(&inode->fs->cache_dirty, 1); block->type = BLUESKY_BLOCK_DIRTY; - if (block->cloudref != NULL) - bluesky_cloudlog_unref(block->cloudref); - block->cloudref = NULL; + bluesky_cloudlog_unref(block->ref); + block->ref = NULL; } /* Set the size of a file. This will truncate or extend the file as needed. * Newly-allocated bytes are zeroed. */ +// FIXME void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) { g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE); @@ -71,6 +77,15 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) uint64_t blocks = (size + BLUESKY_BLOCK_SIZE - 1) / BLUESKY_BLOCK_SIZE; + /* Calculate number of bytes in the last block of the file */ + int lastblock_old, lastblock_new; + lastblock_old = inode->size % BLUESKY_BLOCK_SIZE; + if (lastblock_old == 0 && inode->size > 0) + lastblock_old = BLUESKY_BLOCK_SIZE; + lastblock_new = size % BLUESKY_BLOCK_SIZE; + if (lastblock_new == 0 && size > 0) + lastblock_new = BLUESKY_BLOCK_SIZE; + if (blocks > inode->blocks->len) { /* Need to add new blocks to the end of a file. New block structures * are automatically zeroed, which initializes them to be pointers to @@ -82,12 +97,16 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) inode->blocks->len - 1); if (b->type != BLUESKY_BLOCK_ZERO - && (b->type == BLUESKY_BLOCK_REF - || b->data->len < BLUESKY_BLOCK_SIZE)) { - bluesky_block_touch(inode, inode->blocks->len - 1); - gsize old_size = b->data->len; - bluesky_string_resize(b->data, BLUESKY_BLOCK_SIZE); - memset(&b->data->data[old_size], 0, + && lastblock_old < BLUESKY_BLOCK_SIZE) { + bluesky_block_touch(inode, inode->blocks->len - 1, TRUE); + gsize old_size = b->dirty->len; + if (lastblock_old != old_size) { + fprintf(stderr, + "Warning: last block size = %zd, expected %d\n", + old_size, lastblock_old); + } + bluesky_string_resize(b->dirty, BLUESKY_BLOCK_SIZE); + memset(&b->dirty->data[old_size], 0, BLUESKY_BLOCK_SIZE - old_size); } } @@ -95,15 +114,12 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) g_array_set_size(inode->blocks, blocks); } else if (blocks < inode->blocks->len) { /* Delete blocks from a file. Must reclaim memory. */ - for (guint i = inode->blocks->len; i < blocks; i++) { + for (guint i = blocks; i < inode->blocks->len; i++) { BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i); - if (b->type == BLUESKY_BLOCK_CACHED - || b->type == BLUESKY_BLOCK_DIRTY) - g_atomic_int_add(&inode->fs->cache_total, -1); if (b->type == BLUESKY_BLOCK_DIRTY) g_atomic_int_add(&inode->fs->cache_dirty, -1); - bluesky_string_unref(b->data); - bluesky_cloudlog_unref(b->cloudref); + bluesky_string_unref(b->dirty); + bluesky_cloudlog_unref(b->ref); } g_array_set_size(inode->blocks, blocks); } @@ -114,15 +130,21 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, blocks - 1); - if (b->type != BLUESKY_BLOCK_ZERO) { - bluesky_block_touch(inode, blocks - 1); - gsize old_size = b->data->len; + gboolean need_resize = TRUE; + if (b->type == BLUESKY_BLOCK_ZERO) + need_resize = FALSE; + else if (size < inode->size && lastblock_new == BLUESKY_BLOCK_SIZE) + need_resize = FALSE; + + if (need_resize) { + bluesky_block_touch(inode, blocks - 1, TRUE); + gsize old_size = b->dirty->len; gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE; - bluesky_string_resize(b->data, new_size); + bluesky_string_resize(b->dirty, new_size); if (new_size > old_size) { - memset(&b->data->data[old_size], 0, new_size - old_size); + memset(&b->dirty->data[old_size], 0, new_size - old_size); } } } @@ -146,10 +168,18 @@ void bluesky_file_write(BlueSkyInode *inode, uint64_t offset, gint block_offset = offset % BLUESKY_BLOCK_SIZE; gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len); - bluesky_block_touch(inode, block_num); + gboolean preserve = TRUE; + gsize block_size = BLUESKY_BLOCK_SIZE; + if (block_num == inode->blocks->len - 1) { + block_size = inode->size - block_num * BLUESKY_BLOCK_SIZE; + } + if (block_offset == 0 && bytes == block_size) { + preserve = FALSE; + } + bluesky_block_touch(inode, block_num, preserve); BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, block_num); - memcpy(&b->data->data[block_offset], data, bytes); + memcpy(&b->dirty->data[block_offset], data, bytes); offset += bytes; data += bytes; @@ -169,30 +199,23 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset, g_return_if_fail(offset < inode->size); g_return_if_fail(len <= inode->size - offset); - /* Start fetches on any data blocks that we will need for this read. */ - BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store); - barrier->op = STORE_OP_BARRIER; + BlueSkyProfile *profile = bluesky_profile_get(); + + bluesky_profile_add_event(profile, + g_strdup_printf("Start file read prefetch")); uint64_t start_block, end_block; start_block = offset / BLUESKY_BLOCK_SIZE; end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE; - if (bluesky_verbose) { - g_log("bluesky/file", G_LOG_LEVEL_DEBUG, - "Start prefetch on blocks %"PRIi64" .. %"PRIi64, - start_block, end_block); - } for (uint64_t i = start_block; i <= end_block; i++) { BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i); if (b->type == BLUESKY_BLOCK_REF) - bluesky_block_fetch(inode, b, barrier); - } - bluesky_store_async_submit(barrier); - bluesky_store_async_wait(barrier); - bluesky_store_async_unref(barrier); - if (bluesky_verbose) { - g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete."); + bluesky_cloudlog_prefetch(b->ref); } + bluesky_profile_add_event(profile, + g_strdup_printf("End file read prefetch")); + while (len > 0) { uint64_t block_num = offset / BLUESKY_BLOCK_SIZE; gint block_offset = offset % BLUESKY_BLOCK_SIZE; @@ -200,23 +223,26 @@ void bluesky_file_read(BlueSkyInode *inode, uint64_t offset, BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, block_num); - switch (b->type) { - case BLUESKY_BLOCK_ZERO: + if (b->type == BLUESKY_BLOCK_ZERO) { memset(buf, 0, bytes); - break; - case BLUESKY_BLOCK_REF: - bluesky_block_fetch(inode, b, NULL); - /* Fall through */ - case BLUESKY_BLOCK_CACHED: - case BLUESKY_BLOCK_DIRTY: - memcpy(buf, &b->data->data[block_offset], bytes); - break; + } else { + BlueSkyRCStr *data = NULL; + if (b->type == BLUESKY_BLOCK_REF) { + bluesky_block_fetch(inode, b, NULL); + data = b->ref->data; + } else if (b->type == BLUESKY_BLOCK_DIRTY) { + data = b->dirty; + } + memcpy(buf, &data->data[block_offset], bytes); } offset += bytes; buf += bytes; len -= bytes; } + + bluesky_profile_add_event(profile, + g_strdup_printf("BlueSky read complete")); } void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block, @@ -225,13 +251,10 @@ void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block, if (block->type != BLUESKY_BLOCK_REF) return; - g_mutex_lock(block->cloudref->lock); - bluesky_cloudlog_fetch(block->cloudref); - block->data = block->cloudref->data; - bluesky_string_ref(block->data); - g_mutex_unlock(block->cloudref->lock); - block->type = BLUESKY_BLOCK_CACHED; - g_atomic_int_add(&inode->fs->cache_total, 1); + g_mutex_lock(block->ref->lock); + bluesky_cloudlog_fetch(block->ref); + g_mutex_unlock(block->ref->lock); + block->type = BLUESKY_BLOCK_REF; } /* Write the given block to cloud-backed storage and mark it clean. */ @@ -243,24 +266,22 @@ void bluesky_block_flush(BlueSkyInode *inode, BlueSkyBlock *block, if (block->type != BLUESKY_BLOCK_DIRTY) return; - if (block->cloudref != NULL) - bluesky_cloudlog_unref(block->cloudref); - - BlueSkyRCStr *data = block->data; + g_assert(block->ref == NULL); - BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs); + BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs, NULL); cloudlog->type = LOGTYPE_DATA; cloudlog->inum = inode->inum; - cloudlog->data = data; - bluesky_string_ref(data); + cloudlog->data = block->dirty; // String ownership is transferred + bluesky_cloudlog_stats_update(cloudlog, 1); bluesky_cloudlog_sync(cloudlog); + bluesky_cloudlog_ref(cloudlog); // Reference for log_items list *log_items = g_list_prepend(*log_items, cloudlog); bluesky_cloudlog_insert(cloudlog); - block->cloudref = cloudlog; - bluesky_cloudlog_ref(cloudlog); + block->ref = cloudlog; // Uses initial reference from _new() - block->type = BLUESKY_BLOCK_CACHED; + block->type = BLUESKY_BLOCK_REF; + block->dirty = NULL; g_atomic_int_add(&fs->cache_dirty, -1); } @@ -282,25 +303,18 @@ void bluesky_file_drop_cached(BlueSkyInode *inode) for (int i = 0; i < inode->blocks->len; i++) { BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i); - if (b->type == BLUESKY_BLOCK_CACHED) { - if (bluesky_verbose) { - g_log("bluesky/cache", G_LOG_LEVEL_DEBUG, - "Dropping block %d of inode %"PRIu64" from cache", - i, inode->inum); - g_log("bluesky/cache", G_LOG_LEVEL_DEBUG, - " (reference count was %d)", b->data->refcount); - } - - bluesky_string_unref(b->data); - b->data = NULL; - b->type = BLUESKY_BLOCK_REF; - g_atomic_int_add(&inode->fs->cache_total, -1); - g_mutex_lock(b->cloudref->lock); - if (b->cloudref->location_flags & CLOUDLOG_JOURNAL) { - bluesky_string_unref(b->cloudref->data); - b->cloudref->data = NULL; + if (b->type == BLUESKY_BLOCK_REF) { + g_mutex_lock(b->ref->lock); + if (b->ref->data != NULL + && g_atomic_int_get(&b->ref->data_lock_count) == 0 + && (b->ref->location_flags != 0)) + { + bluesky_cloudlog_stats_update(b->ref, -1); + bluesky_string_unref(b->ref->data); + b->ref->data = NULL; + bluesky_cloudlog_stats_update(b->ref, 1); } - g_mutex_unlock(b->cloudref->lock); + g_mutex_unlock(b->ref->lock); } } }