1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
14 #include "bluesky-private.h"
16 /* Core filesystem: handling of regular files and caching of file data. */
18 /* Mark a given block dirty and make sure that data is faulted in so that it
21 * If preserve is set to false, this is a hint that the block is about to be
22 * entirely overwritten. In this case, a dirty block is made available but any
23 * prior contents might be lost. A value of preserve = TRUE is always safe. */
24 void bluesky_block_touch(BlueSkyInode *inode, uint64_t i, gboolean preserve)
26 g_return_if_fail(i < inode->blocks->len);
27 BlueSkyBlock *block = &g_array_index(inode->blocks, BlueSkyBlock, i);
30 if (i < inode->blocks->len - 1) {
31 block_len = BLUESKY_BLOCK_SIZE;
33 block_len = inode->size - i * BLUESKY_BLOCK_SIZE;
36 switch (block->type) {
37 case BLUESKY_BLOCK_ZERO:
38 block->dirty = bluesky_string_new(g_malloc0(block_len), block_len);
40 case BLUESKY_BLOCK_REF:
42 // FIXME: locking on the cloudlog?
43 bluesky_block_fetch(inode, block, NULL);
44 bluesky_string_ref(block->ref->data);
45 block->dirty = bluesky_string_dup(block->ref->data);
47 block->dirty = bluesky_string_new(g_malloc0(block_len), block_len);
50 case BLUESKY_BLOCK_DIRTY:
51 block->dirty = bluesky_string_dup(block->dirty);
55 if (block->type != BLUESKY_BLOCK_DIRTY)
56 g_atomic_int_add(&inode->fs->cache_dirty, 1);
58 block->type = BLUESKY_BLOCK_DIRTY;
59 bluesky_cloudlog_unref(block->ref);
63 /* Set the size of a file. This will truncate or extend the file as needed.
64 * Newly-allocated bytes are zeroed. */
66 void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size)
68 g_return_if_fail(size <= BLUESKY_MAX_FILE_SIZE);
70 if (size == inode->size)
73 if (bluesky_verbose) {
74 g_log("bluesky/file", G_LOG_LEVEL_DEBUG,
75 "Truncating file to %"PRIi64" bytes", size);
78 uint64_t blocks = (size + BLUESKY_BLOCK_SIZE - 1) / BLUESKY_BLOCK_SIZE;
80 if (blocks > inode->blocks->len) {
81 /* Need to add new blocks to the end of a file. New block structures
82 * are automatically zeroed, which initializes them to be pointers to
83 * zero blocks so we don't need to do any more work. If the
84 * previously-last block in the file is smaller than
85 * BLUESKY_BLOCK_SIZE, extend it to full size. */
86 if (inode->blocks->len > 0) {
87 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
88 inode->blocks->len - 1);
90 if (b->type != BLUESKY_BLOCK_ZERO
91 && (b->type == BLUESKY_BLOCK_REF
92 || b->dirty->len < BLUESKY_BLOCK_SIZE)) {
93 bluesky_block_touch(inode, inode->blocks->len - 1, TRUE);
94 gsize old_size = b->dirty->len;
95 bluesky_string_resize(b->dirty, BLUESKY_BLOCK_SIZE);
96 memset(&b->dirty->data[old_size], 0,
97 BLUESKY_BLOCK_SIZE - old_size);
101 g_array_set_size(inode->blocks, blocks);
102 } else if (blocks < inode->blocks->len) {
103 /* Delete blocks from a file. Must reclaim memory. */
104 for (guint i = blocks; i < inode->blocks->len; i++) {
105 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
106 if (b->type == BLUESKY_BLOCK_DIRTY)
107 g_atomic_int_add(&inode->fs->cache_dirty, -1);
108 bluesky_string_unref(b->dirty);
109 bluesky_cloudlog_unref(b->ref);
111 g_array_set_size(inode->blocks, blocks);
114 /* Ensure the new last block of the file is properly sized. If the block
115 * is extended, newly-added bytes must be zeroed. */
117 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
120 if (b->type != BLUESKY_BLOCK_ZERO) {
121 bluesky_block_touch(inode, blocks - 1, TRUE);
122 gsize old_size = b->dirty->len;
123 gsize new_size = size - (blocks - 1) * BLUESKY_BLOCK_SIZE;
125 bluesky_string_resize(b->dirty, new_size);
127 if (new_size > old_size) {
128 memset(&b->dirty->data[old_size], 0, new_size - old_size);
134 bluesky_inode_update_ctime(inode, 1);
137 void bluesky_file_write(BlueSkyInode *inode, uint64_t offset,
138 const char *data, gint len)
140 g_return_if_fail(inode->type == BLUESKY_REGULAR);
141 g_return_if_fail(offset < inode->size);
142 g_return_if_fail(len <= inode->size - offset);
147 // TODO: Optimization: If we are entirely overwriting a block we don't need
148 // to fetch it frm storage first. We don't yet handle the case where the
149 // partial last block of a file is entirely overwritten.
151 uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
152 gint block_offset = offset % BLUESKY_BLOCK_SIZE;
153 gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
155 gboolean preserve = TRUE;
156 if (block_offset == 0 && bytes == BLUESKY_BLOCK_SIZE) {
159 bluesky_block_touch(inode, block_num, preserve);
160 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
162 memcpy(&b->dirty->data[block_offset], data, bytes);
169 bluesky_inode_update_ctime(inode, 1);
172 void bluesky_file_read(BlueSkyInode *inode, uint64_t offset,
175 if (len == 0 && offset <= inode->size)
178 g_return_if_fail(inode->type == BLUESKY_REGULAR);
179 g_return_if_fail(offset < inode->size);
180 g_return_if_fail(len <= inode->size - offset);
182 BlueSkyProfile *profile = bluesky_profile_get();
184 bluesky_profile_add_event(profile,
185 g_strdup_printf("Start file read prefetch"));
186 uint64_t start_block, end_block;
187 start_block = offset / BLUESKY_BLOCK_SIZE;
188 end_block = (offset + len - 1) / BLUESKY_BLOCK_SIZE;
189 for (uint64_t i = start_block; i <= end_block; i++) {
190 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
192 if (b->type == BLUESKY_BLOCK_REF)
193 bluesky_cloudlog_prefetch(b->ref);
196 bluesky_profile_add_event(profile,
197 g_strdup_printf("End file read prefetch"));
200 uint64_t block_num = offset / BLUESKY_BLOCK_SIZE;
201 gint block_offset = offset % BLUESKY_BLOCK_SIZE;
202 gint bytes = MIN(BLUESKY_BLOCK_SIZE - block_offset, len);
204 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock,
206 if (b->type == BLUESKY_BLOCK_ZERO) {
207 memset(buf, 0, bytes);
209 BlueSkyRCStr *data = NULL;
210 if (b->type == BLUESKY_BLOCK_REF) {
211 bluesky_block_fetch(inode, b, NULL);
213 } else if (b->type == BLUESKY_BLOCK_DIRTY) {
216 memcpy(buf, &data->data[block_offset], bytes);
224 bluesky_profile_add_event(profile,
225 g_strdup_printf("BlueSky read complete"));
228 void bluesky_block_fetch(BlueSkyInode *inode, BlueSkyBlock *block,
229 BlueSkyStoreAsync *barrier)
231 if (block->type != BLUESKY_BLOCK_REF)
234 g_mutex_lock(block->ref->lock);
235 bluesky_cloudlog_fetch(block->ref);
236 g_mutex_unlock(block->ref->lock);
237 block->type = BLUESKY_BLOCK_REF;
240 /* Write the given block to cloud-backed storage and mark it clean. */
241 void bluesky_block_flush(BlueSkyInode *inode, BlueSkyBlock *block,
244 BlueSkyFS *fs = inode->fs;
246 if (block->type != BLUESKY_BLOCK_DIRTY)
249 g_assert(block->ref == NULL);
251 BlueSkyCloudLog *cloudlog = bluesky_cloudlog_new(fs, NULL);
252 cloudlog->type = LOGTYPE_DATA;
253 cloudlog->inum = inode->inum;
254 cloudlog->data = block->dirty; // String ownership is transferred
255 bluesky_cloudlog_stats_update(cloudlog, 1);
256 bluesky_cloudlog_sync(cloudlog);
257 bluesky_cloudlog_ref(cloudlog); // Reference for log_items list
258 *log_items = g_list_prepend(*log_items, cloudlog);
259 bluesky_cloudlog_insert(cloudlog);
261 block->ref = cloudlog; // Uses initial reference from _new()
263 block->type = BLUESKY_BLOCK_REF;
265 g_atomic_int_add(&fs->cache_dirty, -1);
268 /* Flush all blocks in a file to stable storage. */
269 void bluesky_file_flush(BlueSkyInode *inode, GList **log_items)
271 g_return_if_fail(inode->type == BLUESKY_REGULAR);
273 for (int i = 0; i < inode->blocks->len; i++) {
274 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
275 bluesky_block_flush(inode, b, log_items);
279 /* Drop clean data blocks for a file from cache. */
280 void bluesky_file_drop_cached(BlueSkyInode *inode)
282 g_return_if_fail(inode->type == BLUESKY_REGULAR);
284 for (int i = 0; i < inode->blocks->len; i++) {
285 BlueSkyBlock *b = &g_array_index(inode->blocks, BlueSkyBlock, i);
286 if (b->type == BLUESKY_BLOCK_REF) {
287 g_mutex_lock(b->ref->lock);
288 if (b->ref->data != NULL
289 && g_atomic_int_get(&b->ref->data_lock_count) == 0
290 && (b->ref->location_flags != 0))
292 bluesky_cloudlog_stats_update(b->ref, -1);
293 bluesky_string_unref(b->ref->data);
295 bluesky_cloudlog_stats_update(b->ref, 1);
297 g_mutex_unlock(b->ref->lock);