1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
14 #include "bluesky-private.h"
16 /* Core filesystem: handling of regular files and caching of file data. */
18 /* Mark a given block dirty and make sure that data is faulted in so that it
19 * can be written to. */
20 void bluesky_block_touch(BlueSkyInode *inode, uint64_t i)
22 g_return_if_fail(i < inode->blocks->len);
23 BlueSkyBlock *block = &g_array_index(inode->blocks, BlueSkyBlock, i);
26 if (i < inode->blocks->len - 1) {
27 block_len = BLUESKY_BLOCK_SIZE;
29 block_len = inode->size - i * BLUESKY_BLOCK_SIZE;
32 switch (block->type) {
33 case BLUESKY_BLOCK_ZERO:
34 block->dirty = bluesky_string_new(g_malloc0(block_len), block_len);
36 case BLUESKY_BLOCK_REF:
37 // FIXME: locking on the cloudlog?
38 bluesky_block_fetch(inode, block, NULL);
39 bluesky_string_ref(block->ref->data);
40 block->dirty = bluesky_string_dup(block->ref->data);
42 case BLUESKY_BLOCK_DIRTY:
43 block->dirty = bluesky_string_dup(block->dirty);
47 if (block->type != BLUESKY_BLOCK_DIRTY)
48 g_atomic_int_add(&inode->fs->cache_dirty, 1);
50 block->type = BLUESKY_BLOCK_DIRTY;
51 bluesky_cloudlog_unref(block->ref);
55 /* Set the size of a file. This will truncate or extend the file as needed.
56 * Newly-allocated bytes are zeroed. */
58 void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
60 g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE);
62 if (size == inode->size)
65 if (bluesky_verbose) {
66 g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
67 "Truncating file to %"PRIi64" bytes", size);
70 uint64_t blocks = (size + BLUESKY_BLOCK_SIZE - 1) / BLUESKY_BLOCK_SIZE;
72 if (blocks > inode->blocks->len) {
73 /* Need to add new blocks to the end of a file. New block structures
74 * are automatically zeroed, which initializes them to be pointers to
75 * zero blocks so we don't need to do any more work. If the
76 * previously-last block in the file is smaller than
77 * BLUESKY_BLOCK_SIZE, extend it to full size. */
78 if (inode->blocks->len > 0) {
79 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
80 inode->blocks->len - 1);
82 if (b->type != BLUESKY_BLOCK_ZERO
83 && (b->type == BLUESKY_BLOCK_REF
84 || b->dirty->len < BLUESKY_BLOCK_SIZE)) {
85 bluesky_block_touch(inode, inode->blocks->len - 1);
86 gsize old_size = b->dirty->len;
87 bluesky_string_resize(b->dirty, BLUESKY_BLOCK_SIZE);
88 memset(&b->dirty->data[old_size], 0,
89 BLUESKY_BLOCK_SIZE - old_size);
93 g_array_set_size(inode->blocks, blocks);
94 } else if (blocks < inode->blocks->len) {
95 /* Delete blocks from a file. Must reclaim memory. */
96 for (guint i = blocks; i < inode->blocks->len; i++) {
97 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
98 if (b->type == BLUESKY_BLOCK_DIRTY)
99 g_atomic_int_add(&inode->fs->cache_dirty, -1);
100 bluesky_string_unref(b->dirty);
101 bluesky_cloudlog_unref(b->ref);
103 g_array_set_size(inode->blocks, blocks);
106 /* Ensure the new last block of the file is properly sized. If the block
107 * is extended, newly-added bytes must be zeroed. */
109 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
112 if (b->type != BLUESKY_BLOCK_ZERO) {
113 bluesky_block_touch(inode, blocks - 1);
114 gsize old_size = b->dirty->len;
115 gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE;
117 bluesky_string_resize(b->dirty, new_size);
119 if (new_size > old_size) {
120 memset(&b->dirty->data[old_size], 0, new_size - old_size);
126 bluesky_inode_update_ctime(inode, 1);
129 void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
130 const char *data, gint len)
132 g_return_if_fail(inode->type == BLUESKY_REGULAR);
133 g_return_if_fail(offset < inode->size);
134 g_return_if_fail(len <= inode->size - offset);
139 // TODO: Optimization: If we are entirely overwriting a block we don't need
140 // to fetch it frm storage first.
142 uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
143 gint block_offset = offset % BLUESKY_BLOCK_SIZE;
144 gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
146 bluesky_block_touch(inode, block_num);
147 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
149 memcpy(&b->dirty->data[block_offset], data, bytes);
156 bluesky_inode_update_ctime(inode, 1);
159 void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
162 if (len == 0 && offset <= inode->size)
165 g_return_if_fail(inode->type == BLUESKY_REGULAR);
166 g_return_if_fail(offset < inode->size);
167 g_return_if_fail(len <= inode->size - offset);
170 /* Start fetches on any data blocks that we will need for this read. */
171 BlueSkyStoreAsync *barrier = bluesky_store_async_new(inode->fs->store);
172 barrier->op = STORE_OP_BARRIER;
173 uint64_t start_block, end_block;
174 start_block = offset / BLUESKY_BLOCK_SIZE;
175 end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE;
176 if (bluesky_verbose) {
177 g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
178 "Start prefetch on blocks %"PRIi64" .. %"PRIi64,
179 start_block, end_block);
181 for (uint64_t i = start_block; i <= end_block; i++) {
182 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
184 if (b->type == BLUESKY_BLOCK_REF)
185 bluesky_block_fetch(inode, b, barrier);
187 bluesky_store_async_submit(barrier);
188 bluesky_store_async_wait(barrier);
189 bluesky_store_async_unref(barrier);
190 if (bluesky_verbose) {
191 g_log("bluesky/file", G_LOG_LEVEL_DEBUG, "Prefetch complete.");
195 uint64_t start_block, end_block;
196 start_block = offset / BLUESKY_BLOCK_SIZE;
197 end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE;
198 for (uint64_t i = start_block; i <= end_block; i++) {
199 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
201 if (b->type == BLUESKY_BLOCK_REF)
202 bluesky_cloudlog_prefetch(b->ref);
206 uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
207 gint block_offset = offset % BLUESKY_BLOCK_SIZE;
208 gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
210 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
212 if (b->type == BLUESKY_BLOCK_ZERO) {
213 memset(buf, 0, bytes);
215 BlueSkyRCStr *data = NULL;
216 if (b->type == BLUESKY_BLOCK_REF) {
217 bluesky_block_fetch(inode, b, NULL);
219 } else if (b->type == BLUESKY_BLOCK_DIRTY) {
222 memcpy(buf, &data->data[block_offset], bytes);
231 void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block,
232 BlueSkyStoreAsync *barrier)
234 if (block->type != BLUESKY_BLOCK_REF)
237 g_mutex_lock(block->ref->lock);
238 bluesky_cloudlog_fetch(block->ref);
239 g_mutex_unlock(block->ref->lock);
240 block->type = BLUESKY_BLOCK_REF;
243 /* Write the given block to cloud-backed storage and mark it clean. */
244 void bluesky_block_flush(BlueSkyInode *inode, BlueSkyBlock *block,
247 BlueSkyFS *fs = inode->fs;
249 if (block->type != BLUESKY_BLOCK_DIRTY)
252 g_assert(block->ref == NULL);
254 BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs, NULL);
255 cloudlog->type = LOGTYPE_DATA;
256 cloudlog->inum = inode->inum;
257 cloudlog->data = block->dirty; // String ownership is transferred
258 bluesky_cloudlog_stats_update(cloudlog, 1);
259 bluesky_cloudlog_sync(cloudlog);
260 bluesky_cloudlog_ref(cloudlog); // Reference for log_items list
261 *log_items = g_list_prepend(*log_items, cloudlog);
262 bluesky_cloudlog_insert(cloudlog);
264 block->ref = cloudlog; // Uses initial reference from _new()
266 block->type = BLUESKY_BLOCK_REF;
268 g_atomic_int_add(&fs->cache_dirty, -1);
271 /* Flush all blocks in a file to stable storage. */
272 void bluesky_file_flush(BlueSkyInode *inode, GList **log_items)
274 g_return_if_fail(inode->type == BLUESKY_REGULAR);
276 for (int i = 0; i < inode->blocks->len; i++) {
277 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
278 bluesky_block_flush(inode, b, log_items);
282 /* Drop clean data blocks for a file from cache. */
283 void bluesky_file_drop_cached(BlueSkyInode *inode)
285 g_return_if_fail(inode->type == BLUESKY_REGULAR);
287 for (int i = 0; i < inode->blocks->len; i++) {
288 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
289 if (b->type == BLUESKY_BLOCK_REF) {
290 g_mutex_lock(b->ref->lock);
291 if (b->ref->data != NULL
292 && g_atomic_int_get(&b->ref->data_lock_count) == 0
293 && (b->ref->location_flags != 0))
295 bluesky_cloudlog_stats_update(b->ref, -1);
296 bluesky_string_unref(b->ref->data);
298 bluesky_cloudlog_stats_update(b->ref, 1);
300 g_mutex_unlock(b->ref->lock);