1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
14 #include "bluesky-private.h"
16 /* Core filesystem: handling of regular files and caching of file data. */
18 /* Mark a given block dirty and make sure that data is faulted in so that it
19 * can be written to. */
20 void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
22 g_return_if_fail(i < inode->blocks->len);
23 BlueSkyBlock *block = &g_array_index(inode->blocks, BlueSkyBlock, i);
26 if (i < inode->blocks->len - 1) {
27 block_len = BLUESKY_BLOCK_SIZE;
29 block_len = inode->size - i * BLUESKY_BLOCK_SIZE;
32 switch (block->type) {
33 case BLUESKY_BLOCK_ZERO:
34 block->data = bluesky_string_new(g_malloc0(block_len), block_len);
36 case BLUESKY_BLOCK_REF:
37 bluesky_block_fetch(inode, block, NULL);
38 g_assert(block->type == BLUESKY_BLOCK_CACHED);
40 case BLUESKY_BLOCK_CACHED:
41 case BLUESKY_BLOCK_DIRTY:
42 block->data = bluesky_string_dup(block->data);
46 if (block->type != BLUESKY_BLOCK_CACHED
47 && block->type != BLUESKY_BLOCK_DIRTY)
48 g_atomic_int_add(&inode->fs->cache_total, 1);
49 if (block->type != BLUESKY_BLOCK_DIRTY)
50 g_atomic_int_add(&inode->fs->cache_dirty, 1);
52 block->type = BLUESKY_BLOCK_DIRTY;
53 bluesky_cloudlog_unref(block->cloudref);
54 block->cloudref = NULL;
57 /* Set the size of a file. This will truncate or extend the file as needed.
58 * Newly-allocated bytes are zeroed. */
59 void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
61 g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE);
63 if (size == inode->size)
66 if (bluesky_verbose) {
67 g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
68 "Truncating file to %"PRIi64" bytes", size);
71 uint64_t blocks = (size + BLUESKY_BLOCK_SIZE - 1) / BLUESKY_BLOCK_SIZE;
73 if (blocks > inode->blocks->len) {
74 /* Need to add new blocks to the end of a file. New block structures
75 * are automatically zeroed, which initializes them to be pointers to
76 * zero blocks so we don't need to do any more work. If the
77 * previously-last block in the file is smaller than
78 * BLUESKY_BLOCK_SIZE, extend it to full size. */
79 if (inode->blocks->len > 0) {
80 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
81 inode->blocks->len - 1);
83 if (b->type != BLUESKY_BLOCK_ZERO
84 && (b->type == BLUESKY_BLOCK_REF
85 || b->data->len < BLUESKY_BLOCK_SIZE)) {
86 bluesky_block_touch(inode, inode->blocks->len - 1);
87 gsize old_size = b->data->len;
88 bluesky_string_resize(b->data, BLUESKY_BLOCK_SIZE);
89 memset(&b->data->data[old_size], 0,
90 BLUESKY_BLOCK_SIZE - old_size);
94 g_array_set_size(inode->blocks, blocks);
95 } else if (blocks < inode->blocks->len) {
96 /* Delete blocks from a file. Must reclaim memory. */
97 for (guint i = inode->blocks->len; i < blocks; i++) {
98 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
99 if (b->type == BLUESKY_BLOCK_CACHED
100 || b->type == BLUESKY_BLOCK_DIRTY)
101 g_atomic_int_add(&inode->fs->cache_total, -1);
102 if (b->type == BLUESKY_BLOCK_DIRTY)
103 g_atomic_int_add(&inode->fs->cache_dirty, -1);
104 bluesky_string_unref(b->data);
105 bluesky_cloudlog_unref(b->cloudref);
107 g_array_set_size(inode->blocks, blocks);
110 /* Ensure the new last block of the file is properly sized. If the block
111 * is extended, newly-added bytes must be zeroed. */
113 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
116 if (b->type != BLUESKY_BLOCK_ZERO) {
117 bluesky_block_touch(inode, blocks - 1);
118 gsize old_size = b->data->len;
119 gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE;
121 bluesky_string_resize(b->data, new_size);
123 if (new_size > old_size) {
124 memset(&b->data->data[old_size], 0, new_size - old_size);
130 bluesky_inode_update_ctime(inode, 1);
133 void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
134 const char *data, gint len)
136 g_return_if_fail(inode->type == BLUESKY_REGULAR);
137 g_return_if_fail(offset < inode->size);
138 g_return_if_fail(len <= inode->size - offset);
144 uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
145 gint block_offset = offset % BLUESKY_BLOCK_SIZE;
146 gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
148 bluesky_block_touch(inode, block_num);
149 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
151 memcpy(&b->data->data[block_offset], data, bytes);
158 bluesky_inode_update_ctime(inode, 1);
161 void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
164 if (len == 0 && offset <= inode->size)
167 g_return_if_fail(inode->type == BLUESKY_REGULAR);
168 g_return_if_fail(offset < inode->size);
169 g_return_if_fail(len <= inode->size - offset);
171 /* Start fetches on any data blocks that we will need for this read. */
172 BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
173 barrier->op = STORE_OP_BARRIER;
174 uint64_t start_block, end_block;
175 start_block = offset / BLUESKY_BLOCK_SIZE;
176 end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE;
177 if (bluesky_verbose) {
178 g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
179 "Start prefetch on blocks %"PRIi64" .. %"PRIi64,
180 start_block, end_block);
182 for (uint64_t i = start_block; i <= end_block; i++) {
183 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
185 if (b->type == BLUESKY_BLOCK_REF)
186 bluesky_block_fetch(inode, b, barrier);
188 bluesky_store_async_submit(barrier);
189 bluesky_store_async_wait(barrier);
190 bluesky_store_async_unref(barrier);
191 if (bluesky_verbose) {
192 g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
196 uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
197 gint block_offset = offset % BLUESKY_BLOCK_SIZE;
198 gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
200 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
203 case BLUESKY_BLOCK_ZERO:
204 memset(buf, 0, bytes);
206 case BLUESKY_BLOCK_REF:
207 bluesky_block_fetch(inode, b, NULL);
209 case BLUESKY_BLOCK_CACHED:
210 case BLUESKY_BLOCK_DIRTY:
211 memcpy(buf, &b->data->data[block_offset], bytes);
221 void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block,
222 BlueSkyStoreAsync *barrier)
224 if (block->type != BLUESKY_BLOCK_REF)
227 g_mutex_lock(block->cloudref->lock);
228 bluesky_cloudlog_fetch(block->cloudref);
229 block->data = block->cloudref->data;
230 bluesky_string_ref(block->data);
231 g_mutex_unlock(block->cloudref->lock);
232 block->type = BLUESKY_BLOCK_CACHED;
233 g_atomic_int_add(&inode->fs->cache_total, 1);
236 /* Write the given block to cloud-backed storage and mark it clean. */
237 void bluesky_block_flush(BlueSkyInode *inode, BlueSkyBlock *block,
240 BlueSkyFS *fs = inode->fs;
242 if (block->type != BLUESKY_BLOCK_DIRTY)
245 bluesky_cloudlog_unref(block->cloudref);
247 BlueSkyRCStr *data = block->data;
249 BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs);
250 cloudlog->type = LOGTYPE_DATA;
251 cloudlog->inum = inode->inum;
252 cloudlog->data = data;
253 bluesky_string_ref(data);
254 bluesky_cloudlog_sync(cloudlog);
255 *log_items = g_list_prepend(*log_items, cloudlog);
256 bluesky_cloudlog_insert(cloudlog);
258 block->cloudref = cloudlog;
260 block->type = BLUESKY_BLOCK_CACHED;
261 g_atomic_int_add(&fs->cache_dirty, -1);
264 /* Flush all blocks in a file to stable storage. */
265 void bluesky_file_flush(BlueSkyInode *inode, GList **log_items)
267 g_return_if_fail(inode->type == BLUESKY_REGULAR);
269 for (int i = 0; i < inode->blocks->len; i++) {
270 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
271 bluesky_block_flush(inode, b, log_items);
275 /* Drop clean data blocks for a file from cache. */
276 void bluesky_file_drop_cached(BlueSkyInode *inode)
278 g_return_if_fail(inode->type == BLUESKY_REGULAR);
280 for (int i = 0; i < inode->blocks->len; i++) {
281 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
282 if (b->type == BLUESKY_BLOCK_CACHED) {
283 if (bluesky_verbose) {
284 g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
285 "Dropping block %d of inode %"PRIu64" from cache",
287 g_log("bluesky/cache", G_LOG_LEVEL_DEBUG,
288 " (reference count was %d)", b->data->refcount);
291 bluesky_string_unref(b->data);
293 b->type = BLUESKY_BLOCK_REF;
294 g_atomic_int_add(&inode->fs->cache_total, -1);
295 g_mutex_lock(b->cloudref->lock);
296 if (b->cloudref->location_flags & CLOUDLOG_JOURNAL) {
297 bluesky_string_unref(b->cloudref->data);
298 b->cloudref->data = NULL;
300 g_mutex_unlock(b->cloudref->lock);