1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
16 #include "bluesky-private.h"
18 /* Inode maps. There is both an in-memory representation as well as the
19 * serialized form in the cloud.
21 * The inode map is broken up into pieces by partitioning in the inode number
22 * space. The checkpoint region will contain pointers to each of these map
25 /* Roughly how many inodes to try to put in each inode map range. */
26 static int target_imap_size = 4096;
28 /* Comparison function for lookuping up inode map entries. Reads a 64-bit
29 * value from the pointed-to address to do the comparison, so we require that
30 * the InodeMapEntry and InodeMapRange structures start with the 64-bit value
31 * they are sorted by. */
32 static gint compare(gconstpointer a, gconstpointer b, gpointer user_data)
34 uint64_t x = *(uint64_t *)a;
35 uint64_t y = *(uint64_t *)b;
45 /* Look up the inode map entry for the given inode number. If action is +1,
46 * create a new entry if it does not already exist; if it is -1 then delete the
47 * entry instead. If 0, return the entry if it is already present. A non-zero
48 * action value will mark the inode map range as dirty. */
49 InodeMapEntry *bluesky_inode_map_lookup(GSequence *inode_map, uint64_t inum,
54 /* First, scan through to find the inode map section that covers the
55 * appropriate range. Create one if it does not exist but we were asked to
57 InodeMapRange *range = NULL;
59 i = g_sequence_search(inode_map, &inum, compare, NULL);
60 i = g_sequence_iter_prev(i);
61 if (!g_sequence_iter_is_end(i))
62 range = (InodeMapRange *)g_sequence_get(i);
63 if (range == NULL || inum < range->start || inum > range->end) {
67 /* Create a new range. First, determine bounds on the range enpoints
68 * based on neighboring ranges. Then, shrink the size of the range to
69 * a reasonable size that covers the needed inode number. */
70 range = g_new0(InodeMapRange, 1);
72 range->end = G_MAXUINT64;
73 range->map_entries = g_sequence_new(NULL);
74 range->serialized = NULL;
76 g_print("Creating inode map range, 1: start=%"PRIu64" end=%"PRIu64"\n",
77 range->start, range->end);
79 if (!g_sequence_iter_is_begin(i) && !g_sequence_iter_is_end(i))
80 range->start = ((InodeMapRange *)g_sequence_get(i))->end + 1;
81 i = g_sequence_iter_next(i);
82 if (!g_sequence_iter_is_end(i))
83 range->end = ((InodeMapRange *)g_sequence_get(i))->start - 1;
85 g_print("Creating inode map range, 2: start=%"PRIu64" end=%"PRIu64"\n",
86 range->start, range->end);
87 g_assert(inum >= range->start && inum <= range->end);
89 range->start = MAX(range->start,
90 inum & ~(uint64_t)(target_imap_size - 1));
91 range->end = MIN(range->end, range->start + target_imap_size - 1);
93 g_print("Creating inode map range, 3: start=%"PRIu64" end=%"PRIu64"\n",
94 range->start, range->end);
96 g_sequence_insert_sorted(inode_map, range, compare, NULL);
99 /* Next, try to find the entry within this range of the inode map. */
100 InodeMapEntry *entry = NULL;
101 i = g_sequence_search(range->map_entries, &inum, compare, NULL);
102 i = g_sequence_iter_prev(i);
103 if (!g_sequence_iter_is_end(i))
104 entry = (InodeMapEntry *)g_sequence_get(i);
105 if (entry == NULL || inum != entry->inum) {
109 entry = g_new0(InodeMapEntry, 1);
111 g_sequence_insert_sorted(range->map_entries, entry, compare, NULL);
113 g_print("Created inode map entry for %"PRIu64"\n", inum);
117 bluesky_cloudlog_unref(range->serialized);
118 range->serialized = NULL;
121 /* Were we requested to delete the item? */
123 g_sequence_remove(i);
130 /* Convert a section of the inode map to serialized form, in preparation for
131 * writing it out to the cloud. */
132 static void bluesky_inode_map_serialize_section(BlueSkyFS *fs,
133 InodeMapRange *range)
135 if (range->serialized != NULL)
138 GString *buf = g_string_new("");
139 BlueSkyCloudLog *log = bluesky_cloudlog_new(fs, NULL);
140 log->type = LOGTYPE_INODE_MAP;
143 GSequenceIter *i = g_sequence_get_begin_iter(range->map_entries);
144 while (!g_sequence_iter_is_end(i)) {
145 InodeMapEntry *entry = (InodeMapEntry *)g_sequence_get(i);
146 uint64_t inum = GUINT64_TO_LE(entry->inum);
147 g_string_append_len(buf, (const char *)&inum, sizeof(inum));
148 g_array_append_val(log->links, entry->item);
149 i = g_sequence_iter_next(i);
152 log->data = bluesky_string_new_from_gstring(buf);
153 bluesky_cloudlog_unref(range->serialized);
154 range->serialized = log;
157 BlueSkyCloudLog *bluesky_inode_map_serialize(BlueSkyFS *fs)
159 GString *buf = g_string_new("");
160 BlueSkyCloudLog *log = bluesky_cloudlog_new(fs, NULL);
161 log->type = LOGTYPE_CHECKPOINT;
164 GSequenceIter *i = g_sequence_get_begin_iter(fs->inode_map);
165 while (!g_sequence_iter_is_end(i)) {
166 InodeMapRange *range = (InodeMapRange *)g_sequence_get(i);
167 uint64_t inum = GUINT64_TO_LE(range->start);
168 g_string_append_len(buf, (const char *)&inum, sizeof(inum));
169 inum = GUINT64_TO_LE(range->end);
170 g_string_append_len(buf, (const char *)&inum, sizeof(inum));
172 if (range->serialized == NULL)
173 bluesky_inode_map_serialize_section(fs, range);
174 bluesky_cloudlog_ref(range->serialized);
175 g_array_append_val(log->links, range->serialized);
176 i = g_sequence_iter_next(i);
179 log->data = bluesky_string_new_from_gstring(buf);
183 /* Reconstruct the inode map from data stored in the cloud. */
184 static void bluesky_inode_map_deserialize(BlueSkyFS *fs, BlueSkyCloudLog *imap)
186 g_mutex_lock(imap->lock);
187 bluesky_cloudlog_fetch(imap);
188 g_assert(imap->data != NULL);
189 g_assert(imap->data->len == 16 * imap->links->len);
190 //uint64_t *inum_range = (uint64_t *)imap->data->data;
191 for (int i = 0; i < imap->links->len; i++) {
192 //int64_t start = GUINT64_FROM_LE(*inum_range++);
193 //int64_t end = GUINT64_FROM_LE(*inum_range++);
194 BlueSkyCloudLog *section = g_array_index(imap->links,
195 BlueSkyCloudLog *, i);
196 g_mutex_lock(section->lock);
197 bluesky_cloudlog_fetch(section);
198 g_print("Loaded cloudlog item (%zd bytes)\n", section->data->len);
200 uint64_t *inum = (uint64_t *)section->data->data;
201 for (int j = 0; j < section->links->len; j++) {
202 InodeMapEntry *entry;
203 entry = bluesky_inode_map_lookup(fs->inode_map, *inum, 1);
205 entry->item = g_array_index(section->links,
206 BlueSkyCloudLog *, j);
207 bluesky_cloudlog_ref(entry->item);
208 entry->id = entry->item->id;
209 entry->location = entry->item->location;
212 g_mutex_unlock(section->lock);
214 g_mutex_unlock(imap->lock);
217 /* Find the most recent checkpoint record in the cloud and reload inode map
218 * data from it to initialize the filesystem. Returns a boolean indicating
219 * whether a checkpoint was found and loaded or not. */
220 gboolean bluesky_checkpoint_load(BlueSkyFS *fs)
222 char *last_segment = bluesky_store_lookup_last(fs->store, "log-");
223 if (last_segment == NULL)
226 g_print("Last cloud log segment: %s\n", last_segment);
227 int seq = atoi(last_segment + 13);
228 fs->log_state->location.sequence = seq + 1;
230 BlueSkyRCStr *last = bluesky_store_get(fs->store, last_segment);
231 g_free(last_segment);
233 g_warning("Unable to fetch last log segment from cloud!");
237 /* Scan through the contents of the last log segment to find a checkpoint
238 * record. We need to do a linear scan since at this point we don't have a
239 * direct pointer; once we have the last commit record then all other data
240 * can be loaded by directly following pointers. */
241 const char *buf = last->data;
242 size_t len = last->len;
243 const char *checkpoint = NULL;
244 size_t checkpoint_size = 0;
245 while (len > sizeof(struct cloudlog_header)) {
246 struct cloudlog_header *header = (struct cloudlog_header *)buf;
247 if (memcmp(header->magic, CLOUDLOG_MAGIC, 4) != 0) {
248 g_warning("Could not parse cloudlog entry!");
251 int size = sizeof(struct cloudlog_header);
252 size += GUINT32_FROM_LE(header->size1);
253 size += GUINT32_FROM_LE(header->size2);
254 size += GUINT32_FROM_LE(header->size3);
256 g_warning("Cloudlog entry is malformed (size too large)!");
259 if (header->type - '0' == LOGTYPE_CHECKPOINT) {
261 checkpoint_size = size;
267 g_print("Found checkpoint record at %zd (size %zd)\n",
268 checkpoint - last->data, checkpoint_size);
270 /* Bootstrap the loading process by manually setting the location of this
272 BlueSkyCloudLog *commit;
273 commit = bluesky_cloudlog_get(fs,
274 ((struct cloudlog_header *)checkpoint)->id);
275 g_mutex_lock(commit->lock);
276 commit->location_flags |= CLOUDLOG_CLOUD;
277 commit->location.directory = 0;
278 commit->location.sequence = seq;
279 commit->location.offset = checkpoint - last->data;
280 commit->location.size = checkpoint_size;
281 g_mutex_unlock(commit->lock);
283 bluesky_inode_map_deserialize(fs, commit);
284 //bluesky_cloudlog_unref(commit);