1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
16 #include <sys/syscall.h>
17 #include <sys/types.h>
19 #include "bluesky-private.h"
21 /* Miscellaneous useful functions that don't really fit anywhere else. */
23 bluesky_time_hires bluesky_now_hires()
27 if (clock_gettime(CLOCK_REALTIME, &time) != 0) {
28 perror("clock_gettime");
32 return (int64_t)(time.tv_sec) * 1000000000 + time.tv_nsec;
35 /* Convert a UTF-8 string to lowercase. This can be used to implement
36 * case-insensitive lookups and comparisons, by normalizing all values to
37 * lowercase first. Returns a newly-allocated string as a result. */
38 gchar *bluesky_lowercase(const gchar *s)
40 /* TODO: Unicode handling; for now just do ASCII. */
41 return g_ascii_strdown(s, -1);
44 gboolean bluesky_inode_is_ready(BlueSkyInode *inode)
49 g_mutex_lock(inode->lock);
50 gboolean valid = (inode->type != BLUESKY_PENDING
51 && inode->type != BLUESKY_INVALID);
53 g_mutex_unlock(inode->lock);
58 /**** Reference-counted strings. ****/
60 /* Create and return a new reference-counted string. The reference count is
61 * initially one. The newly-returned string takes ownership of the memory
62 * pointed at by data, and will call g_free on it when the reference count
64 BlueSkyRCStr *bluesky_string_new(gpointer data, gsize len)
66 BlueSkyRCStr *string = g_new(BlueSkyRCStr, 1);
70 g_atomic_int_set(&string->refcount, 1);
74 /* Create a new BlueSkyRCStr from a GString. The GString is destroyed. */
75 BlueSkyRCStr *bluesky_string_new_from_gstring(GString *s)
78 return bluesky_string_new(g_string_free(s, FALSE), len);
81 /* Create a new BlueSkyRCStr from a memory-mapped buffer. */
82 BlueSkyRCStr *bluesky_string_new_from_mmap(BlueSkyCacheFile *mmap,
83 int offset, gsize len)
85 g_assert(offset + len <= mmap->len);
86 g_assert(mmap->addr != NULL);
88 BlueSkyRCStr *string = g_new(BlueSkyRCStr, 1);
90 g_atomic_int_inc(&mmap->mapcount);
91 string->data = (char *)mmap->addr + offset;
93 g_atomic_int_set(&string->refcount, 1);
97 void bluesky_string_ref(BlueSkyRCStr *string)
102 g_atomic_int_inc(&string->refcount);
105 void bluesky_string_unref(BlueSkyRCStr *string)
110 if (g_atomic_int_dec_and_test(&string->refcount)) {
111 if (string->mmap == NULL) {
112 g_free(string->data);
114 bluesky_mmap_unref(string->mmap);
120 /* Duplicate and return a new reference-counted string, containing a copy of
121 * the original data, with a reference count of 1. As an optimization, if the
122 * passed-in string already has a reference count of 1, the original is
123 * returned. Can be used to make a mutable copy of a shared string. For this
124 * to truly be safe, it is probably needed that there be some type of lock
125 * protecting access to the string. */
126 BlueSkyRCStr *bluesky_string_dup(BlueSkyRCStr *string)
131 if (string->mmap != NULL) {
133 s = bluesky_string_new(g_memdup(string->data, string->len),
135 bluesky_string_unref(string);
139 if (g_atomic_int_dec_and_test(&string->refcount)) {
140 /* There are no other shared copies, so return this one. */
141 g_atomic_int_inc(&string->refcount);
144 return bluesky_string_new(g_memdup(string->data, string->len),
149 /* Resize the data block used by a BlueSkyRCStr. The data pointer might change
150 * after making this call, so it should not be cached across calls to this
151 * function. To avoid confusing any other users, the caller probably ought to
152 * hold the only reference to the string (by calling bluesky_string_dup first
154 void bluesky_string_resize(BlueSkyRCStr *string, gsize len)
156 g_assert(string->mmap == NULL);
158 if (string->len == len)
161 g_warn_if_fail(string->refcount == 1);
163 string->data = g_realloc(string->data, len);
167 /* Cache LRU list management functions. These manage the doubly-linked list of
168 * inodes sorted by accessed/modified time. The FS lock should be held while
171 * _remove will unlink an inode from the linked list.
173 * _prepend and _append insert an inode at the head or tail of the linked list,
174 * and return a pointer to the linked list element (which should be stored in
175 * the inode); the inode should not already be in the list.
177 * _head and _tail simply return the first or last item inode in the list. */
178 void bluesky_list_unlink(GList *head, GList *item)
183 if (head->prev == item)
184 head->prev = item->prev;
185 head->next = g_list_delete_link(head->next, item);
188 GList *bluesky_list_prepend(GList *head, BlueSkyInode *inode)
190 head->next = g_list_prepend(head->next, inode);
191 if (head->prev == NULL)
192 head->prev = g_list_last(head->next);
196 GList *bluesky_list_append(GList *head, BlueSkyInode *inode)
198 if (head->next == NULL)
199 return bluesky_list_prepend(head, inode);
201 g_assert(head->prev != NULL && head->prev->next == NULL);
203 GList *link = g_list_alloc();
206 link->prev = head->prev;
207 head->prev->next = link;
212 BlueSkyInode *bluesky_list_head(GList *head)
214 if (head->next == NULL)
217 return (BlueSkyInode *)head->next->data;
220 BlueSkyInode *bluesky_list_tail(GList *head)
222 if (head->prev == NULL)
225 return (BlueSkyInode *)head->prev->data;
228 /**** Range sets. ****/
230 /* These are a data structure which can track a set of discontiguous integer
231 * ranges--such as the partitioning of the inode number space or the bytes in a
232 * log file into objects. This current prototype implementation just tracks
233 * the starting offset with a hash table and doesn't track the length, but
234 * should be extended later to track properly. */
236 struct BlueSkyRangeset {
240 static gint compare64(gconstpointer a, gconstpointer b, gpointer user_data)
242 uint64_t x = *(uint64_t *)a;
243 uint64_t y = *(uint64_t *)b;
253 BlueSkyRangeset *bluesky_rangeset_new()
255 BlueSkyRangeset *rangeset = g_new(BlueSkyRangeset, 1);
256 rangeset->seq = g_sequence_new(g_free);
260 void bluesky_rangeset_free(BlueSkyRangeset *rangeset)
262 g_sequence_free(rangeset->seq);
266 gboolean bluesky_rangeset_insert(BlueSkyRangeset *rangeset,
267 uint64_t start, uint64_t length,
271 i = g_sequence_search(rangeset->seq, &start, compare64, NULL);
272 i = g_sequence_iter_prev(i);
274 /* TODO: Checks that no other item overlaps. */
276 BlueSkyRangesetItem *item = g_new(BlueSkyRangesetItem, 1);
278 item->length = length;
280 g_sequence_insert_sorted(rangeset->seq, item, compare64, NULL);
285 const BlueSkyRangesetItem *bluesky_rangeset_lookup(BlueSkyRangeset *rangeset,
289 i = g_sequence_search(rangeset->seq, &offset, compare64, NULL);
290 i = g_sequence_iter_prev(i);
291 if (g_sequence_iter_is_end(i))
294 BlueSkyRangesetItem *item = (BlueSkyRangesetItem *)g_sequence_get(i);
295 if (offset >= item->start && offset < item->start + item->length)
301 /* Look up the first rangeset item starting at or following the given address.
302 * Can be used to iterate through a rangeset. */
303 const BlueSkyRangesetItem *bluesky_rangeset_lookup_next(BlueSkyRangeset *rangeset, uint64_t offset)
306 i = g_sequence_search(rangeset->seq, &offset, compare64, NULL);
307 i = g_sequence_iter_prev(i);
308 if (g_sequence_iter_is_end(i))
310 BlueSkyRangesetItem *item = (BlueSkyRangesetItem *)g_sequence_get(i);
311 if (item->start < offset) {
312 i = g_sequence_iter_next(i);
313 if (g_sequence_iter_is_end(i))
316 item = (BlueSkyRangesetItem *)g_sequence_get(i);
321 /* Determine the full extent of a rangeset--the smallest offset covered and the
322 * length needed to extend to the end of the last item. */
323 void bluesky_rangeset_get_extents(BlueSkyRangeset *rangeset,
324 uint64_t *start, uint64_t *length)
327 BlueSkyRangesetItem *item;
329 i = g_sequence_get_begin_iter(rangeset->seq);
330 if (g_sequence_iter_is_end(i)) {
336 item = (BlueSkyRangesetItem *)g_sequence_get(i);
337 *start = item->start;
339 i = g_sequence_get_end_iter(rangeset->seq);
340 i = g_sequence_iter_prev(i);
341 item = (BlueSkyRangesetItem *)g_sequence_get(i);
342 *length = (item->start + item->length) - *start;
345 /**** Request response-time tracking. ****/
349 bluesky_time_hires timestamp;
353 /* To catch attempts to access to invalid profile structures. */
354 #define PROFILE_MAGIC 0x439929d8
356 static FILE *profiling_file = NULL;
358 BlueSkyProfile *bluesky_profile_new()
360 BlueSkyProfile *profile = g_new0(BlueSkyProfile, 1);
361 profile->lock = g_mutex_new();
362 profile->magic = PROFILE_MAGIC;
366 void bluesky_profile_free(BlueSkyProfile *profile)
368 if (profile->magic != PROFILE_MAGIC) {
369 g_warning("Access to invalid BlueSkyProfile object!");
372 while (profile->events != NULL) {
373 RTEvent *event = (RTEvent *)profile->events->data;
374 g_free(event->message);
376 profile->events = g_list_delete_link(profile->events, profile->events);
379 g_mutex_free(profile->lock);
380 g_free(profile->description);
384 void bluesky_profile_add_event(BlueSkyProfile *profile, char *message)
386 if (profiling_file == NULL)
389 g_return_if_fail(profile != NULL);
391 if (profile->magic != PROFILE_MAGIC) {
392 g_warning("Access to invalid BlueSkyProfile object!");
395 g_mutex_lock(profile->lock);
396 RTEvent *event = g_new(RTEvent, 1);
397 event->timestamp = bluesky_now_hires();
398 /* FIXME: Non-portable */
399 event->tid = syscall(SYS_gettid);
400 event->message = message;
401 profile->events = g_list_prepend(profile->events, event);
402 g_mutex_unlock(profile->lock);
405 static GStaticMutex profiling_print_lock = G_STATIC_MUTEX_INIT;
407 void bluesky_profile_set_output(FILE *stream)
409 profiling_file = stream;
412 void bluesky_profile_print(BlueSkyProfile *profile)
414 FILE *stream = profiling_file;
418 g_return_if_fail(profile != NULL);
420 if (profile->magic != PROFILE_MAGIC) {
421 g_warning("Access to invalid BlueSkyProfile object!");
425 g_mutex_lock(profile->lock);
426 g_static_mutex_lock(&profiling_print_lock);
427 fprintf(stream, "Event Timeline: %s\n", profile->description);
428 GList *link = g_list_last(profile->events);
429 bluesky_time_hires last_time = 0;
430 while (link != NULL) {
431 RTEvent *event = (RTEvent *)link->data;
432 fprintf(stream, " [%d] [%"PRIi64" ns]: %s\n",
433 event->tid, event->timestamp - last_time, event->message);
434 last_time = event->timestamp;
437 g_static_mutex_unlock(&profiling_print_lock);
438 g_mutex_unlock(profile->lock);
441 static GStaticPrivate per_thread_profile = G_STATIC_PRIVATE_INIT;
443 BlueSkyProfile *bluesky_profile_get()
445 return (BlueSkyProfile *)g_static_private_get(&per_thread_profile);
448 void bluesky_profile_set(BlueSkyProfile *profile)
450 g_static_private_set(&per_thread_profile, profile, NULL);