From: Michael Vrable Date: Thu, 12 Nov 2009 02:33:48 +0000 (-0800) Subject: Do not pad the final block of a file with zeroes. X-Git-Url: http://git.vrable.net/?a=commitdiff_plain;h=0325ee9e9afb02b08bdec3497e8cc54cb942989d;p=bluesky.git Do not pad the final block of a file with zeroes. To accomplish this, a resize operation was added for reference-counted strings. --- diff --git a/bluesky/bluesky.h b/bluesky/bluesky.h index 7408849..dbb69bb 100644 --- a/bluesky/bluesky.h +++ b/bluesky/bluesky.h @@ -33,6 +33,7 @@ BlueSkyRCStr *bluesky_string_new(gpointer data, gsize len); void bluesky_string_ref(BlueSkyRCStr *string); void bluesky_string_unref(BlueSkyRCStr *string); BlueSkyRCStr *bluesky_string_dup(BlueSkyRCStr *string); +void bluesky_string_resize(BlueSkyRCStr *string, gsize len); /* Cryptographic operations. */ #define CRYPTO_BLOCK_SIZE 16 /* 128-bit AES */ diff --git a/bluesky/file.c b/bluesky/file.c index 607d611..75768d1 100644 --- a/bluesky/file.c +++ b/bluesky/file.c @@ -21,10 +21,17 @@ void bluesky_block_touch(BlueSkyInode *inode, uint64_t i) g_return_if_fail(i < inode->blocks->len); BlueSkyBlock *block = &g_array_index(inode->blocks, BlueSkyBlock, i); + gsize block_len; + if (i < inode->blocks->len - 1) { + block_len = BLUESKY_BLOCK_SIZE; + } else { + block_len = inode->size - i * BLUESKY_BLOCK_SIZE; + } + switch (block->type) { case BLUESKY_BLOCK_ZERO: - block->data = bluesky_string_new(g_malloc0(BLUESKY_BLOCK_SIZE), - BLUESKY_BLOCK_SIZE); + g_print("Allocating zero block of size %zd\n", block_len); + block->data = bluesky_string_new(g_malloc0(block_len), block_len); break; case BLUESKY_BLOCK_REF: bluesky_block_fetch(inode->fs, block); @@ -65,17 +72,21 @@ void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size) g_array_set_size(inode->blocks, blocks); } - /* If the file size is being decreased, ensure that any trailing data in - * the last block is zeroed. */ - if (size < inode->size) { + /* Ensure the last block of the file is properly sized. If the block is + * extended, newly-added bytes must be zeroed. */ + if (blocks > 0) { BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, blocks - 1); + if (b->type != BLUESKY_BLOCK_ZERO) { bluesky_block_touch(inode, blocks - 1); - int end_offset = size % BLUESKY_BLOCK_SIZE; - if (end_offset > 0) { - memset(&b->data->data[end_offset], 0, - BLUESKY_BLOCK_SIZE - end_offset); + gsize old_size = b->data->len; + gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE; + + bluesky_string_resize(b->data, new_size); + + if (new_size > old_size) { + memset(&b->data->data[old_size], 0, new_size - old_size); } } } diff --git a/bluesky/store.c b/bluesky/store.c index 50b47b0..52cc6b3 100644 --- a/bluesky/store.c +++ b/bluesky/store.c @@ -64,59 +64,6 @@ void bluesky_store_put(BlueSkyStore *store, store->impl->put(store->handle, key, val); } -/* Create and return a new reference-counted string. The reference count is - * initially one. The newly-returned string takes ownership of the memory - * pointed at by data, and will call g_free on it when the reference count - * drops to zero. */ -BlueSkyRCStr *bluesky_string_new(gpointer data, gsize len) -{ - BlueSkyRCStr *string = g_new(BlueSkyRCStr, 1); - string->data = data; - string->len = len; - g_atomic_int_set(&string->refcount, 1); - return string; -} - -void bluesky_string_ref(BlueSkyRCStr *string) -{ - if (string == NULL) - return; - - g_atomic_int_inc(&string->refcount); -} - -void bluesky_string_unref(BlueSkyRCStr *string) -{ - if (string == NULL) - return; - - if (g_atomic_int_dec_and_test(&string->refcount)) { - g_free(string->data); - g_free(string); - } -} - -/* Duplicate and return a new reference-counted string, containing a copy of - * the original data, with a reference count of 1. As an optimization, if the - * passed-in string already has a reference count of 1, the original is - * returned. Can be used to make a mutable copy of a shared string. For this - * to truly be safe, it is probably needed that there be some type of lock - * protecting access to the string. */ -BlueSkyRCStr *bluesky_string_dup(BlueSkyRCStr *string) -{ - if (string == NULL) - return NULL; - - if (g_atomic_int_dec_and_test(&string->refcount)) { - /* There are no other shared copies, so return this one. */ - g_atomic_int_inc(&string->refcount); - return string; - } else { - return bluesky_string_new(g_memdup(string->data, string->len), - string->len); - } -} - /* Simple in-memory data store for test purposes. */ typedef struct { GMutex *lock; diff --git a/bluesky/util.c b/bluesky/util.c index 1f98990..1f21bbf 100644 --- a/bluesky/util.c +++ b/bluesky/util.c @@ -35,3 +35,71 @@ gchar *bluesky_lowercase(const gchar *s) /* TODO: Unicode handling; for now just do ASCII. */ return g_ascii_strdown(s, -1); } + +/**** Reference-counted strings. ****/ + +/* Create and return a new reference-counted string. The reference count is + * initially one. The newly-returned string takes ownership of the memory + * pointed at by data, and will call g_free on it when the reference count + * drops to zero. */ +BlueSkyRCStr *bluesky_string_new(gpointer data, gsize len) +{ + BlueSkyRCStr *string = g_new(BlueSkyRCStr, 1); + string->data = data; + string->len = len; + g_atomic_int_set(&string->refcount, 1); + return string; +} + +void bluesky_string_ref(BlueSkyRCStr *string) +{ + if (string == NULL) + return; + + g_atomic_int_inc(&string->refcount); +} + +void bluesky_string_unref(BlueSkyRCStr *string) +{ + if (string == NULL) + return; + + if (g_atomic_int_dec_and_test(&string->refcount)) { + g_free(string->data); + g_free(string); + } +} + +/* Duplicate and return a new reference-counted string, containing a copy of + * the original data, with a reference count of 1. As an optimization, if the + * passed-in string already has a reference count of 1, the original is + * returned. Can be used to make a mutable copy of a shared string. For this + * to truly be safe, it is probably needed that there be some type of lock + * protecting access to the string. */ +BlueSkyRCStr *bluesky_string_dup(BlueSkyRCStr *string) +{ + if (string == NULL) + return NULL; + + if (g_atomic_int_dec_and_test(&string->refcount)) { + /* There are no other shared copies, so return this one. */ + g_atomic_int_inc(&string->refcount); + return string; + } else { + return bluesky_string_new(g_memdup(string->data, string->len), + string->len); + } +} + +/* Resize the data block used by a BlueSkyRCStr. The data pointer might change + * after making this call, so it should not be cached across calls to this + * function. To avoid confusing any other users, the caller probably ought to + * hold the only reference to the string (by calling bluesky_string_dup first + * if needed). */ +void bluesky_string_resize(BlueSkyRCStr *string, gsize len) +{ + if (string->len == len) + return; + + string->data = g_realloc(string->data, len); +}