1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
14 #include "bluesky-private.h"
16 /* Core filesystem: handling of regular files and caching of file data. */
18 /* Mark a given block dirty and make sure that data is faulted in so that it
19 * can be written to. */
20 void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
22 g_return_if_fail(i < inode->blocks->len);
23 BlueSkyBlock *block = &g_array_index(inode->blocks, BlueSkyBlock, i);
26 if (i < inode->blocks->len - 1) {
27 block_len = BLUESKY_BLOCK_SIZE;
29 block_len = inode->size - i * BLUESKY_BLOCK_SIZE;
32 switch (block->type) {
33 case BLUESKY_BLOCK_ZERO:
34 block->data = bluesky_string_new(g_malloc0(block_len), block_len);
36 case BLUESKY_BLOCK_REF:
37 bluesky_block_fetch(inode->fs, block, NULL);
38 g_assert(block->type == BLUESKY_BLOCK_CACHED);
40 case BLUESKY_BLOCK_CACHED:
41 case BLUESKY_BLOCK_DIRTY:
42 block->data = bluesky_string_dup(block->data);
46 if (block->type != BLUESKY_BLOCK_CACHED
47 && block->type != BLUESKY_BLOCK_DIRTY)
48 g_atomic_int_add(&inode->fs->cache_total, 1);
49 if (block->type != BLUESKY_BLOCK_DIRTY)
50 g_atomic_int_add(&inode->fs->cache_dirty, 1);
52 block->type = BLUESKY_BLOCK_DIRTY;
55 /* Set the size of a file. This will truncate or extend the file as needed.
56 * Newly-allocated bytes are zeroed. */
57 void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
59 g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE);
61 if (size == inode->size)
64 if (bluesky_verbose) {
65 g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
66 "Truncating file to %"PRIi64" bytes", size);
69 uint64_t blocks = (size + BLUESKY_BLOCK_SIZE - 1) / BLUESKY_BLOCK_SIZE;
71 if (blocks > inode->blocks->len) {
72 /* Need to add new blocks to the end of a file. New block structures
73 * are automatically zeroed, which initializes them to be pointers to
74 * zero blocks so we don't need to do any more work. If the
75 * previously-last block in the file is smaller than
76 * BLUESKY_BLOCK_SIZE, extend it to full size. */
77 if (inode->blocks->len > 0) {
78 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
79 inode->blocks->len - 1);
81 if (b->type != BLUESKY_BLOCK_ZERO
82 && (b->type == BLUESKY_BLOCK_REF
83 || b->data->len < BLUESKY_BLOCK_SIZE)) {
84 bluesky_block_touch(inode, inode->blocks->len - 1);
85 gsize old_size = b->data->len;
86 bluesky_string_resize(b->data, BLUESKY_BLOCK_SIZE);
87 memset(&b->data->data[old_size], 0,
88 BLUESKY_BLOCK_SIZE - old_size);
92 g_array_set_size(inode->blocks, blocks);
93 } else if (blocks < inode->blocks->len) {
94 /* Delete blocks from a file. Must reclaim memory. */
95 for (guint i = inode->blocks->len; i < blocks; i++) {
96 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
98 if (b->type == BLUESKY_BLOCK_CACHED
99 || b->type == BLUESKY_BLOCK_DIRTY)
100 g_atomic_int_add(&inode->fs->cache_total, -1);
101 if (b->type == BLUESKY_BLOCK_DIRTY)
102 g_atomic_int_add(&inode->fs->cache_dirty, -1);
103 bluesky_string_unref(b->data);
105 g_array_set_size(inode->blocks, blocks);
108 /* Ensure the new last block of the file is properly sized. If the block
109 * is extended, newly-added bytes must be zeroed. */
111 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
114 if (b->type != BLUESKY_BLOCK_ZERO) {
115 bluesky_block_touch(inode, blocks - 1);
116 gsize old_size = b->data->len;
117 gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE;
119 bluesky_string_resize(b->data, new_size);
121 if (new_size > old_size) {
122 memset(&b->data->data[old_size], 0, new_size - old_size);
128 bluesky_inode_update_ctime(inode, 1);
131 void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
132 const char *data, gint len)
134 g_return_if_fail(inode->type == BLUESKY_REGULAR);
135 g_return_if_fail(offset < inode->size);
136 g_return_if_fail(len <= inode->size - offset);
142 uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
143 gint block_offset = offset % BLUESKY_BLOCK_SIZE;
144 gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
146 bluesky_block_touch(inode, block_num);
147 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
149 memcpy(&b->data->data[block_offset], data, bytes);
156 bluesky_inode_update_ctime(inode, 1);
159 void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
162 if (len == 0 && offset <= inode->size)
165 g_return_if_fail(inode->type == BLUESKY_REGULAR);
166 g_return_if_fail(offset < inode->size);
167 g_return_if_fail(len <= inode->size - offset);
169 /* Start fetches on any data blocks that we will need for this read. */
170 BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
171 barrier->op = STORE_OP_BARRIER;
172 uint64_t start_block, end_block;
173 start_block = offset / BLUESKY_BLOCK_SIZE;
174 end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE;
175 if (bluesky_verbose) {
176 g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
177 "Start prefetch on blocks %"PRIi64" .. %"PRIi64,
178 start_block, end_block);
180 for (uint64_t i = start_block; i <= end_block; i++) {
181 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
183 if (b->type == BLUESKY_BLOCK_REF)
184 bluesky_block_fetch(inode->fs, b, barrier);
186 bluesky_store_async_submit(barrier);
187 bluesky_store_async_wait(barrier);
188 bluesky_store_async_unref(barrier);
189 if (bluesky_verbose) {
190 g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
194 uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
195 gint block_offset = offset % BLUESKY_BLOCK_SIZE;
196 gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
198 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
201 case BLUESKY_BLOCK_ZERO:
202 memset(buf, 0, bytes);
204 case BLUESKY_BLOCK_REF:
205 bluesky_block_fetch(inode->fs, b, NULL);
207 case BLUESKY_BLOCK_CACHED:
208 case BLUESKY_BLOCK_DIRTY:
209 memcpy(buf, &b->data->data[block_offset], bytes);
219 /* Read the given block from cloud-backed storage if the data is not already
221 static void block_fetch_completion(BlueSkyStoreAsync *async, gpointer data)
223 BlueSkyBlock *block = (BlueSkyBlock *)data;
225 bluesky_string_unref(block->data);
226 block->data = async->data;
227 bluesky_string_ref(block->data);
229 if (block->data == NULL) {
230 g_warning("Failed to fetch data block from store!\n");
231 block->data = bluesky_string_new(g_malloc0(BLUESKY_BLOCK_SIZE),
235 block->type = BLUESKY_BLOCK_CACHED;
238 void bluesky_block_fetch(BlueSkyFS *fs, BlueSkyBlock *block,
239 BlueSkyStoreAsync *barrier)
241 if (block->type != BLUESKY_BLOCK_REF)
244 BlueSkyStoreAsync *async = bluesky_store_async_new(fs->store);
245 async->op = STORE_OP_GET;
246 async->key = g_strdup(block->ref);
247 bluesky_store_async_add_notifier(async, (GFunc)block_fetch_completion, block);
248 bluesky_store_async_submit(async);
251 bluesky_store_add_barrier(barrier, async);
253 bluesky_store_async_wait(async);
255 bluesky_store_async_unref(async);
256 g_atomic_int_add(&fs->cache_total, 1);
259 /* Write the given block to cloud-backed storage and mark it clean. */
260 void bluesky_block_flush(BlueSkyFS *fs, BlueSkyBlock *block,
261 BlueSkyStoreAsync *barrier)
263 if (block->type != BLUESKY_BLOCK_DIRTY)
266 BlueSkyRCStr *data = block->data;
268 GChecksum *csum = g_checksum_new(G_CHECKSUM_SHA256);
269 g_checksum_update(csum, (const guchar *)data->data, data->len);
270 gchar *name = g_strdup(g_checksum_get_string(csum));
272 /* Store the file data asynchronously, and don't bother waiting for a
274 BlueSkyStoreAsync *async = bluesky_store_async_new(fs->store);
275 async->op = STORE_OP_PUT;
276 async->key = g_strdup(name);
277 bluesky_string_ref(data);
279 bluesky_store_async_submit(async);
281 bluesky_store_add_barrier(barrier, async);
282 bluesky_store_async_unref(async);
287 block->type = BLUESKY_BLOCK_CACHED;
288 g_atomic_int_add(&fs->cache_dirty, -1);
290 g_checksum_free(csum);
293 /* Flush all blocks in a file to stable storage. */
294 void bluesky_file_flush(BlueSkyInode *inode, BlueSkyStoreAsync *barrier)
296 g_return_if_fail(inode->type == BLUESKY_REGULAR);
298 for (int i = 0; i < inode->blocks->len; i++) {
299 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
300 bluesky_block_flush(inode->fs, b, barrier);
304 /* Drop clean data blocks for a file from cache. */
305 void bluesky_file_drop_cached(BlueSkyInode *inode)
307 g_return_if_fail(inode->type == BLUESKY_REGULAR);
309 for (int i = 0; i < inode->blocks->len; i++) {
310 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
311 if (b->type == BLUESKY_BLOCK_CACHED) {
312 if (bluesky_verbose) {
313 g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
314 "Dropping block %d of inode %"PRIu64" from cache",
316 g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
317 " (reference count was %d)", b->data->refcount);
320 bluesky_string_unref(b->data);
322 b->type = BLUESKY_BLOCK_REF;
323 g_atomic_int_add(&inode->fs->cache_total, -1);