Previously data could be cached both as a cloud log item and as a string at
the inode level. Now cache as a string for dirty data and a cloud log item
for clean data.
} BlueSkyCloudPointer;
typedef enum {
} BlueSkyCloudPointer;
typedef enum {
LOGTYPE_UNKNOWN = 0,
LOGTYPE_DATA = 1,
LOGTYPE_INODE = 2,
LOGTYPE_UNKNOWN = 0,
LOGTYPE_DATA = 1,
LOGTYPE_INODE = 2,
// Pointers to other objects
GArray *pointers;
// Pointers to other objects
GArray *pointers;
- // Serialized data, if available in memory (otherwise NULL).
+ // Serialized data, if available in memory (otherwise NULL), and a lock
+ // count which tracks if there are users that require the data to be kept
+ // around.
};
/* Serialize objects into a log segment to be written to the cloud. */
};
/* Serialize objects into a log segment to be written to the cloud. */
#define BLUESKY_MAX_FILE_SIZE (BLUESKY_BLOCK_SIZE << 24)
typedef enum {
BLUESKY_BLOCK_ZERO = 0, /* Data is all zeroes, not explicitly stored */
#define BLUESKY_MAX_FILE_SIZE (BLUESKY_BLOCK_SIZE << 24)
typedef enum {
BLUESKY_BLOCK_ZERO = 0, /* Data is all zeroes, not explicitly stored */
- BLUESKY_BLOCK_REF = 1, /* Reference to key/value store, not cached */
- BLUESKY_BLOCK_CACHED = 2, /* Data is cached in memory, clean */
- BLUESKY_BLOCK_DIRTY = 3, /* Data needs to be committed to store */
+ BLUESKY_BLOCK_REF = 1, /* Reference to cloud log item, data clean */
+ BLUESKY_BLOCK_DIRTY = 2, /* Data needs to be committed to store */
} BlueSkyBlockType;
typedef struct {
BlueSkyBlockType type;
} BlueSkyBlockType;
typedef struct {
BlueSkyBlockType type;
- BlueSkyRCStr *data; /* Pointer to data in memory if cached */
- BlueSkyCloudLog *cloudref; /* Reference to cloud log entry with data */
+ BlueSkyCloudLog *ref; /* if REF: cloud log entry with data */
+ BlueSkyRCStr *dirty; /* if DIRTY: raw data in memory */
} BlueSkyBlock;
BlueSkyFS *bluesky_init_fs(gchar *name, BlueSkyStore *store);
} BlueSkyBlock;
BlueSkyFS *bluesky_init_fs(gchar *name, BlueSkyStore *store);
g_hash_table_remove(fs->locations, &log->id);
g_mutex_unlock(fs->lock);
g_hash_table_remove(fs->locations, &log->id);
g_mutex_unlock(fs->lock);
+ log->type = LOGTYPE_INVALID;
g_mutex_free(log->lock);
g_cond_free(log->cond);
g_array_unref(log->pointers);
g_mutex_free(log->lock);
g_cond_free(log->cond);
g_array_unref(log->pointers);
BlueSkyCloudLog *log2
= (BlueSkyCloudLog *)g_hash_table_lookup(log->fs->locations, &id);
// TODO: refcount
BlueSkyCloudLog *log2
= (BlueSkyCloudLog *)g_hash_table_lookup(log->fs->locations, &id);
// TODO: refcount
+ bluesky_cloudlog_fetch(log2);
g_assert(log2 != NULL);
bluesky_cloudlog_ref(log2);
g_mutex_unlock(log->fs->lock);
g_assert(log2 != NULL);
bluesky_cloudlog_ref(log2);
g_mutex_unlock(log->fs->lock);
while (state->inode_list != NULL) {
BlueSkyCloudLog *log = (BlueSkyCloudLog *)state->inode_list->data;
bluesky_cloudlog_serialize(log, state);
while (state->inode_list != NULL) {
BlueSkyCloudLog *log = (BlueSkyCloudLog *)state->inode_list->data;
bluesky_cloudlog_serialize(log, state);
+ bluesky_cloudlog_unref(log);
state->inode_list = g_list_delete_link(state->inode_list,
state->inode_list);
}
state->inode_list = g_list_delete_link(state->inode_list,
state->inode_list);
}
}
g_print(": refs=%d ty=%d inode=%"PRIu64" locs=%x log@(%d,%d) cloud@(%d,%d,%d)\n",
log->refcount,
}
g_print(": refs=%d ty=%d inode=%"PRIu64" locs=%x log@(%d,%d) cloud@(%d,%d,%d)\n",
log->refcount,
- log->type, log->inum, log->location_flags,
+ log->type, log->inum,
+ log->location_flags | (log->data != NULL ? 0x100 : 0),
log->log_seq, log->log_offset, log->location.directory,
log->location.sequence, log->location.offset);
}
log->log_seq, log->log_offset, log->location.directory,
log->location.sequence, log->location.offset);
}
switch (block->type) {
case BLUESKY_BLOCK_ZERO:
switch (block->type) {
case BLUESKY_BLOCK_ZERO:
- block->data = bluesky_string_new(g_malloc0(block_len), block_len);
+ block->dirty = bluesky_string_new(g_malloc0(block_len), block_len);
break;
case BLUESKY_BLOCK_REF:
break;
case BLUESKY_BLOCK_REF:
+ // FIXME: locking on the cloudlog?
bluesky_block_fetch(inode, block, NULL);
bluesky_block_fetch(inode, block, NULL);
- g_assert(block->type == BLUESKY_BLOCK_CACHED);
- /* Fall through */
- case BLUESKY_BLOCK_CACHED:
+ bluesky_string_ref(block->ref->data);
+ block->dirty = bluesky_string_dup(block->ref->data);
+ break;
case BLUESKY_BLOCK_DIRTY:
case BLUESKY_BLOCK_DIRTY:
- block->data = bluesky_string_dup(block->data);
+ block->dirty = bluesky_string_dup(block->dirty);
- if (block->type != BLUESKY_BLOCK_CACHED
+ /*if (block->type != BLUESKY_BLOCK_CACHED
&& block->type != BLUESKY_BLOCK_DIRTY)
&& block->type != BLUESKY_BLOCK_DIRTY)
- g_atomic_int_add(&inode->fs->cache_total, 1);
+ g_atomic_int_add(&inode->fs->cache_total, 1); //FIXME */
if (block->type != BLUESKY_BLOCK_DIRTY)
g_atomic_int_add(&inode->fs->cache_dirty, 1);
block->type = BLUESKY_BLOCK_DIRTY;
if (block->type != BLUESKY_BLOCK_DIRTY)
g_atomic_int_add(&inode->fs->cache_dirty, 1);
block->type = BLUESKY_BLOCK_DIRTY;
- bluesky_cloudlog_unref(block->cloudref);
- block->cloudref = NULL;
+ bluesky_cloudlog_unref(block->ref);
+ block->ref = NULL;
}
/* Set the size of a file. This will truncate or extend the file as needed.
* Newly-allocated bytes are zeroed. */
}
/* Set the size of a file. This will truncate or extend the file as needed.
* Newly-allocated bytes are zeroed. */
void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
{
g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE);
void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
{
g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE);
if (b->type != BLUESKY_BLOCK_ZERO
&& (b->type == BLUESKY_BLOCK_REF
if (b->type != BLUESKY_BLOCK_ZERO
&& (b->type == BLUESKY_BLOCK_REF
- || b->data->len < BLUESKY_BLOCK_SIZE)) {
+ || b->dirty->len < BLUESKY_BLOCK_SIZE)) {
bluesky_block_touch(inode, inode->blocks->len - 1);
bluesky_block_touch(inode, inode->blocks->len - 1);
- gsize old_size = b->data->len;
- bluesky_string_resize(b->data, BLUESKY_BLOCK_SIZE);
- memset(&b->data->data[old_size], 0,
+ gsize old_size = b->dirty->len;
+ bluesky_string_resize(b->dirty, BLUESKY_BLOCK_SIZE);
+ memset(&b->dirty->data[old_size], 0,
BLUESKY_BLOCK_SIZE - old_size);
}
}
BLUESKY_BLOCK_SIZE - old_size);
}
}
/* Delete blocks from a file. Must reclaim memory. */
for (guint i = inode->blocks->len; i < blocks; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
/* Delete blocks from a file. Must reclaim memory. */
for (guint i = inode->blocks->len; i < blocks; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
- if (b->type == BLUESKY_BLOCK_CACHED
+ /* if (b->type == BLUESKY_BLOCK_CACHED
|| b->type == BLUESKY_BLOCK_DIRTY)
|| b->type == BLUESKY_BLOCK_DIRTY)
- g_atomic_int_add(&inode->fs->cache_total, -1);
+ g_atomic_int_add(&inode->fs->cache_total, -1); FIXME */
if (b->type == BLUESKY_BLOCK_DIRTY)
g_atomic_int_add(&inode->fs->cache_dirty, -1);
if (b->type == BLUESKY_BLOCK_DIRTY)
g_atomic_int_add(&inode->fs->cache_dirty, -1);
- bluesky_string_unref(b->data);
- bluesky_cloudlog_unref(b->cloudref);
+ bluesky_string_unref(b->dirty);
+ bluesky_cloudlog_unref(b->ref);
}
g_array_set_size(inode->blocks, blocks);
}
}
g_array_set_size(inode->blocks, blocks);
}
if (b->type != BLUESKY_BLOCK_ZERO) {
bluesky_block_touch(inode, blocks - 1);
if (b->type != BLUESKY_BLOCK_ZERO) {
bluesky_block_touch(inode, blocks - 1);
- gsize old_size = b->data->len;
+ gsize old_size = b->dirty->len;
gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE;
gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE;
- bluesky_string_resize(b->data, new_size);
+ bluesky_string_resize(b->dirty, new_size);
if (new_size > old_size) {
if (new_size > old_size) {
- memset(&b->data->data[old_size], 0, new_size - old_size);
+ memset(&b->dirty->data[old_size], 0, new_size - old_size);
+ // TODO: Optimization: If we are entirely overwriting a block we don't need
+ // to fetch it frm storage first.
while (len > 0) {
uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
gint block_offset = offset % BLUESKY_BLOCK_SIZE;
while (len > 0) {
uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
gint block_offset = offset % BLUESKY_BLOCK_SIZE;
bluesky_block_touch(inode, block_num);
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
block_num);
bluesky_block_touch(inode, block_num);
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
block_num);
- memcpy(&b->data->data[block_offset], data, bytes);
+ memcpy(&b->dirty->data[block_offset], data, bytes);
offset += bytes;
data += bytes;
offset += bytes;
data += bytes;
g_return_if_fail(offset < inode->size);
g_return_if_fail(len <= inode->size - offset);
g_return_if_fail(offset < inode->size);
g_return_if_fail(len <= inode->size - offset);
/* Start fetches on any data blocks that we will need for this read. */
BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
barrier->op = STORE_OP_BARRIER;
/* Start fetches on any data blocks that we will need for this read. */
BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
barrier->op = STORE_OP_BARRIER;
if (bluesky_verbose) {
g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
}
if (bluesky_verbose) {
g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
}
while (len > 0) {
uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
while (len > 0) {
uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
block_num);
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
block_num);
- switch (b->type) {
- case BLUESKY_BLOCK_ZERO:
+ if (b->type == BLUESKY_BLOCK_ZERO) {
- break;
- case BLUESKY_BLOCK_REF:
- bluesky_block_fetch(inode, b, NULL);
- /* Fall through */
- case BLUESKY_BLOCK_CACHED:
- case BLUESKY_BLOCK_DIRTY:
- memcpy(buf, &b->data->data[block_offset], bytes);
- break;
+ } else {
+ BlueSkyRCStr *data = NULL;
+ if (b->type == BLUESKY_BLOCK_REF) {
+ bluesky_block_fetch(inode, b, NULL);
+ data = b->ref->data;
+ } else if (b->type == BLUESKY_BLOCK_DIRTY) {
+ data = b->dirty;
+ }
+ memcpy(buf, &data->data[block_offset], bytes);
if (block->type != BLUESKY_BLOCK_REF)
return;
if (block->type != BLUESKY_BLOCK_REF)
return;
- g_mutex_lock(block->cloudref->lock);
- bluesky_cloudlog_fetch(block->cloudref);
- block->data = block->cloudref->data;
- bluesky_string_ref(block->data);
- g_mutex_unlock(block->cloudref->lock);
- block->type = BLUESKY_BLOCK_CACHED;
- g_atomic_int_add(&inode->fs->cache_total, 1);
+ g_mutex_lock(block->ref->lock);
+ bluesky_cloudlog_fetch(block->ref);
+ g_mutex_unlock(block->ref->lock);
+ block->type = BLUESKY_BLOCK_REF;
+ g_atomic_int_add(&inode->fs->cache_total, 1); //FIXME
}
/* Write the given block to cloud-backed storage and mark it clean. */
}
/* Write the given block to cloud-backed storage and mark it clean. */
if (block->type != BLUESKY_BLOCK_DIRTY)
return;
if (block->type != BLUESKY_BLOCK_DIRTY)
return;
- bluesky_cloudlog_unref(block->cloudref);
-
- BlueSkyRCStr *data = block->data;
+ g_assert(block->ref == NULL);
BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs);
cloudlog->type = LOGTYPE_DATA;
cloudlog->inum = inode->inum;
BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs);
cloudlog->type = LOGTYPE_DATA;
cloudlog->inum = inode->inum;
- cloudlog->data = data;
- bluesky_string_ref(data);
+ cloudlog->data = block->dirty; // String ownership is transferred
bluesky_cloudlog_sync(cloudlog);
bluesky_cloudlog_sync(cloudlog);
+ bluesky_cloudlog_ref(cloudlog); // Reference for log_items list
*log_items = g_list_prepend(*log_items, cloudlog);
bluesky_cloudlog_insert(cloudlog);
*log_items = g_list_prepend(*log_items, cloudlog);
bluesky_cloudlog_insert(cloudlog);
- block->cloudref = cloudlog;
+ block->ref = cloudlog; // Uses initial reference from _new()
- block->type = BLUESKY_BLOCK_CACHED;
+ block->type = BLUESKY_BLOCK_REF;
+ block->dirty = NULL;
g_atomic_int_add(&fs->cache_dirty, -1);
}
g_atomic_int_add(&fs->cache_dirty, -1);
}
for (int i = 0; i < inode->blocks->len; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
for (int i = 0; i < inode->blocks->len; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
- if (b->type == BLUESKY_BLOCK_CACHED) {
- if (bluesky_verbose) {
- g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
- "Dropping block %d of inode %"PRIu64" from cache",
- i, inode->inum);
- g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
- " (reference count was %d)", b->data->refcount);
+ if (b->type == BLUESKY_BLOCK_REF) {
+ g_mutex_lock(b->ref->lock);
+ if (b->ref->data != NULL
+ && g_atomic_int_get(&b->ref->data_lock_count) == 0
+ && (b->ref->location_flags != 0))
+ {
+ bluesky_string_unref(b->ref->data);
+ b->ref->data = NULL;
-
- bluesky_string_unref(b->data);
- b->data = NULL;
- b->type = BLUESKY_BLOCK_REF;
+ g_mutex_unlock(b->ref->lock);
g_atomic_int_add(&inode->fs->cache_total, -1);
g_atomic_int_add(&inode->fs->cache_total, -1);
- g_mutex_lock(b->cloudref->lock);
- if (b->cloudref->location_flags & CLOUDLOG_JOURNAL) {
- bluesky_string_unref(b->cloudref->data);
- b->cloudref->data = NULL;
- }
- g_mutex_unlock(b->cloudref->lock);
if (b->type == BLUESKY_BLOCK_DIRTY) {
g_error("Deleting an inode with dirty file data!");
}
if (b->type == BLUESKY_BLOCK_DIRTY) {
g_error("Deleting an inode with dirty file data!");
}
- bluesky_cloudlog_unref(b->cloudref);
- bluesky_string_unref(b->data);
+ bluesky_cloudlog_unref(b->ref);
+ bluesky_string_unref(b->dirty);
}
g_array_unref(inode->blocks);
break;
}
g_array_unref(inode->blocks);
break;
GString *buf = g_string_new("");
bluesky_serialize_inode(buf, inode);
GString *buf = g_string_new("");
bluesky_serialize_inode(buf, inode);
- BlueSkyRCStr *data = bluesky_string_new_from_gstring(buf);
char key[64];
sprintf(key, "inode-%016"PRIx64, inode->inum);
char key[64];
sprintf(key, "inode-%016"PRIx64, inode->inum);
BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs);
cloudlog->type = LOGTYPE_INODE;
cloudlog->inum = inode->inum;
BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs);
cloudlog->type = LOGTYPE_INODE;
cloudlog->inum = inode->inum;
- cloudlog->data = data;
- bluesky_string_ref(data);
+ cloudlog->data = bluesky_string_new_from_gstring(buf);
if (inode->type == BLUESKY_REGULAR) {
for (int i = 0; i < inode->blocks->len; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
if (inode->type == BLUESKY_REGULAR) {
for (int i = 0; i < inode->blocks->len; i++) {
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
- if (b->type == BLUESKY_BLOCK_CACHED
- || b->type == BLUESKY_BLOCK_REF)
- {
- BlueSkyCloudID id = b->cloudref->id;
+ if (b->type == BLUESKY_BLOCK_REF) {
+ BlueSkyCloudID id = b->ref->id;
g_array_append_val(cloudlog->pointers, id);
}
}
}
g_array_append_val(cloudlog->pointers, id);
}
}
}
- if (inode->committed_item != NULL)
- bluesky_cloudlog_unref(inode->committed_item);
+ bluesky_cloudlog_unref(inode->committed_item);
inode->committed_item = cloudlog;
bluesky_cloudlog_sync(cloudlog);
inode->committed_item = cloudlog;
bluesky_cloudlog_sync(cloudlog);
+ bluesky_cloudlog_ref(cloudlog);
log_items = g_list_prepend(log_items, cloudlog);
bluesky_cloudlog_insert(cloudlog);
log_items = g_list_prepend(log_items, cloudlog);
bluesky_cloudlog_insert(cloudlog);
g_cond_signal(item->cond);
g_mutex_unlock(item->lock);
log->committed = g_slist_delete_link(log->committed, log->committed);
g_cond_signal(item->cond);
g_mutex_unlock(item->lock);
log->committed = g_slist_delete_link(log->committed, log->committed);
+ bluesky_cloudlog_unref(item);
if ((item->location_flags | item->pending_write) & CLOUDLOG_JOURNAL) {
g_mutex_unlock(item->lock);
bluesky_cloudlog_unref(item);
if ((item->location_flags | item->pending_write) & CLOUDLOG_JOURNAL) {
g_mutex_unlock(item->lock);
bluesky_cloudlog_unref(item);
+ g_atomic_int_add(&item->data_lock_count, -1);
offset += sizeof(header) + sizeof(footer) + item->data->len;
log->committed = g_slist_prepend(log->committed, item);
offset += sizeof(header) + sizeof(footer) + item->data->len;
log->committed = g_slist_prepend(log->committed, item);
+ g_atomic_int_add(&item->data_lock_count, -1);
g_mutex_unlock(item->lock);
/* Force an if there are no other log items currently waiting to be
g_mutex_unlock(item->lock);
/* Force an if there are no other log items currently waiting to be
void bluesky_log_item_submit(BlueSkyCloudLog *item, BlueSkyLog *log)
{
bluesky_cloudlog_ref(item);
void bluesky_log_item_submit(BlueSkyCloudLog *item, BlueSkyLog *log)
{
bluesky_cloudlog_ref(item);
+ g_atomic_int_add(&item->data_lock_count, 1);
g_async_queue_push(log->queue, item);
}
g_async_queue_push(log->queue, item);
}
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
BlueSkyCloudID id;
memset(&id, 0, sizeof(id));
BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
BlueSkyCloudID id;
memset(&id, 0, sizeof(id));
- if (b->cloudref != NULL)
- id = b->cloudref->id;
+ if (b->ref != NULL)
+ id = b->ref->id;
g_string_append_len(out, (const char *)&id, sizeof(id));
}
break;
g_string_append_len(out, (const char *)&id, sizeof(id));
}
break;