1 /* Blue Sky: File Systems in the Cloud
3 * Copyright (C) 2009 The Regents of the University of California
4 * Written by Michael Vrable <mvrable@cs.ucsd.edu>
16 #include <sys/syscall.h>
17 #include <sys/types.h>
19 #include "bluesky-private.h"
21 /* Miscellaneous useful functions that don't really fit anywhere else. */
23 bluesky_time_hires bluesky_now_hires()
27 if (clock_gettime(CLOCK_REALTIME, &time) != 0) {
28 perror("clock_gettime");
32 return (int64_t)(time.tv_sec) * 1000000000 + time.tv_nsec;
35 /* Convert a UTF-8 string to lowercase. This can be used to implement
36 * case-insensitive lookups and comparisons, by normalizing all values to
37 * lowercase first. Returns a newly-allocated string as a result. */
38 gchar *bluesky_lowercase(const gchar *s)
40 /* TODO: Unicode handling; for now just do ASCII. */
41 return g_ascii_strdown(s, -1);
44 gboolean bluesky_inode_is_ready(BlueSkyInode *inode)
49 g_mutex_lock(inode->lock);
50 gboolean valid = (inode->type != BLUESKY_PENDING
51 && inode->type != BLUESKY_INVALID);
53 g_mutex_unlock(inode->lock);
58 /**** Reference-counted strings. ****/
60 /* Create and return a new reference-counted string. The reference count is
61 * initially one. The newly-returned string takes ownership of the memory
62 * pointed at by data, and will call g_free on it when the reference count
64 BlueSkyRCStr *bluesky_string_new(gpointer data, gsize len)
66 BlueSkyRCStr *string = g_new(BlueSkyRCStr, 1);
70 g_atomic_int_set(&string->refcount, 1);
74 /* Create a new BlueSkyRCStr from a GString. The GString is destroyed. */
75 BlueSkyRCStr *bluesky_string_new_from_gstring(GString *s)
78 return bluesky_string_new(g_string_free(s, FALSE), len);
81 /* Create a new BlueSkyRCStr from a memory-mapped buffer. */
82 BlueSkyRCStr *bluesky_string_new_from_mmap(BlueSkyCacheFile *mmap,
83 int offset, gsize len)
85 g_assert(offset + len <= mmap->len);
87 BlueSkyRCStr *string = g_new(BlueSkyRCStr, 1);
89 g_atomic_int_inc(&mmap->mapcount);
90 string->data = (char *)mmap->addr + offset;
92 g_atomic_int_set(&string->refcount, 1);
96 void bluesky_string_ref(BlueSkyRCStr *string)
101 g_atomic_int_inc(&string->refcount);
104 void bluesky_string_unref(BlueSkyRCStr *string)
109 if (g_atomic_int_dec_and_test(&string->refcount)) {
110 if (string->mmap == NULL) {
111 g_free(string->data);
113 bluesky_mmap_unref(string->mmap);
119 /* Duplicate and return a new reference-counted string, containing a copy of
120 * the original data, with a reference count of 1. As an optimization, if the
121 * passed-in string already has a reference count of 1, the original is
122 * returned. Can be used to make a mutable copy of a shared string. For this
123 * to truly be safe, it is probably needed that there be some type of lock
124 * protecting access to the string. */
125 BlueSkyRCStr *bluesky_string_dup(BlueSkyRCStr *string)
130 if (string->mmap != NULL) {
132 s = bluesky_string_new(g_memdup(string->data, string->len),
134 bluesky_string_unref(string);
138 if (g_atomic_int_dec_and_test(&string->refcount)) {
139 /* There are no other shared copies, so return this one. */
140 g_atomic_int_inc(&string->refcount);
143 return bluesky_string_new(g_memdup(string->data, string->len),
148 /* Resize the data block used by a BlueSkyRCStr. The data pointer might change
149 * after making this call, so it should not be cached across calls to this
150 * function. To avoid confusing any other users, the caller probably ought to
151 * hold the only reference to the string (by calling bluesky_string_dup first
153 void bluesky_string_resize(BlueSkyRCStr *string, gsize len)
155 g_assert(string->mmap == NULL);
157 if (string->len == len)
160 g_warn_if_fail(string->refcount == 1);
162 string->data = g_realloc(string->data, len);
166 /* Cache LRU list management functions. These manage the doubly-linked list of
167 * inodes sorted by accessed/modified time. The FS lock should be held while
170 * _remove will unlink an inode from the linked list.
172 * _prepend and _append insert an inode at the head or tail of the linked list,
173 * and return a pointer to the linked list element (which should be stored in
174 * the inode); the inode should not already be in the list.
176 * _head and _tail simply return the first or last item inode in the list. */
177 void bluesky_list_unlink(GList *head, GList *item)
182 if (head->prev == item)
183 head->prev = item->prev;
184 head->next = g_list_delete_link(head->next, item);
187 GList *bluesky_list_prepend(GList *head, BlueSkyInode *inode)
189 head->next = g_list_prepend(head->next, inode);
190 if (head->prev == NULL)
191 head->prev = g_list_last(head->next);
195 GList *bluesky_list_append(GList *head, BlueSkyInode *inode)
197 if (head->next == NULL)
198 return bluesky_list_prepend(head, inode);
200 g_assert(head->prev != NULL && head->prev->next == NULL);
202 GList *link = g_list_alloc();
205 link->prev = head->prev;
206 head->prev->next = link;
211 BlueSkyInode *bluesky_list_head(GList *head)
213 if (head->next == NULL)
216 return (BlueSkyInode *)head->next->data;
219 BlueSkyInode *bluesky_list_tail(GList *head)
221 if (head->prev == NULL)
224 return (BlueSkyInode *)head->prev->data;
227 /**** Range sets. ****/
229 /* These are a data structure which can track a set of discontiguous integer
230 * ranges--such as the partitioning of the inode number space or the bytes in a
231 * log file into objects. This current prototype implementation just tracks
232 * the starting offset with a hash table and doesn't track the length, but
233 * should be extended later to track properly. */
235 struct BlueSkyRangeset {
239 static gint compare64(gconstpointer a, gconstpointer b, gpointer user_data)
241 uint64_t x = *(uint64_t *)a;
242 uint64_t y = *(uint64_t *)b;
252 BlueSkyRangeset *bluesky_rangeset_new()
254 BlueSkyRangeset *rangeset = g_new(BlueSkyRangeset, 1);
255 rangeset->seq = g_sequence_new(g_free);
259 void bluesky_rangeset_free(BlueSkyRangeset *rangeset)
261 g_sequence_free(rangeset->seq);
265 gboolean bluesky_rangeset_insert(BlueSkyRangeset *rangeset,
266 uint64_t start, uint64_t length,
270 i = g_sequence_search(rangeset->seq, &start, compare64, NULL);
271 i = g_sequence_iter_prev(i);
273 /* TODO: Checks that no other item overlaps. */
275 BlueSkyRangesetItem *item = g_new(BlueSkyRangesetItem, 1);
277 item->length = length;
279 g_sequence_insert_sorted(rangeset->seq, item, compare64, NULL);
284 const BlueSkyRangesetItem *bluesky_rangeset_lookup(BlueSkyRangeset *rangeset,
288 i = g_sequence_search(rangeset->seq, &offset, compare64, NULL);
289 i = g_sequence_iter_prev(i);
290 if (g_sequence_iter_is_end(i))
293 BlueSkyRangesetItem *item = (BlueSkyRangesetItem *)g_sequence_get(i);
294 if (offset >= item->start && offset < item->start + item->length)
300 /* Look up the first rangeset item starting at or following the given address.
301 * Can be used to iterate through a rangeset. */
302 const BlueSkyRangesetItem *bluesky_rangeset_lookup_next(BlueSkyRangeset *rangeset, uint64_t offset)
305 i = g_sequence_search(rangeset->seq, &offset, compare64, NULL);
306 i = g_sequence_iter_prev(i);
307 if (g_sequence_iter_is_end(i))
309 BlueSkyRangesetItem *item = (BlueSkyRangesetItem *)g_sequence_get(i);
310 if (item->start < offset) {
311 i = g_sequence_iter_next(i);
312 if (g_sequence_iter_is_end(i))
315 item = (BlueSkyRangesetItem *)g_sequence_get(i);
320 /* Determine the full extent of a rangeset--the smallest offset covered and the
321 * length needed to extend to the end of the last item. */
322 void bluesky_rangeset_get_extents(BlueSkyRangeset *rangeset,
323 uint64_t *start, uint64_t *length)
326 BlueSkyRangesetItem *item;
328 i = g_sequence_get_begin_iter(rangeset->seq);
329 if (g_sequence_iter_is_end(i)) {
335 item = (BlueSkyRangesetItem *)g_sequence_get(i);
336 *start = item->start;
338 i = g_sequence_get_end_iter(rangeset->seq);
339 i = g_sequence_iter_prev(i);
340 item = (BlueSkyRangesetItem *)g_sequence_get(i);
341 *length = (item->start + item->length) - *start;
344 /**** Request response-time tracking. ****/
348 bluesky_time_hires timestamp;
352 /* To catch attempts to access to invalid profile structures. */
353 #define PROFILE_MAGIC 0x439929d8
355 BlueSkyProfile *bluesky_profile_new()
357 BlueSkyProfile *profile = g_new0(BlueSkyProfile, 1);
358 profile->lock = g_mutex_new();
359 profile->magic = PROFILE_MAGIC;
363 void bluesky_profile_free(BlueSkyProfile *profile)
365 if (profile->magic != PROFILE_MAGIC) {
366 g_warning("Access to invalid BlueSkyProfile object!");
369 while (profile->events != NULL) {
370 RTEvent *event = (RTEvent *)profile->events->data;
371 g_free(event->message);
373 profile->events = g_list_delete_link(profile->events, profile->events);
376 g_mutex_free(profile->lock);
377 g_free(profile->description);
381 void bluesky_profile_add_event(BlueSkyProfile *profile, char *message)
383 g_return_if_fail(profile != NULL);
385 if (profile->magic != PROFILE_MAGIC) {
386 g_warning("Access to invalid BlueSkyProfile object!");
389 g_mutex_lock(profile->lock);
390 RTEvent *event = g_new(RTEvent, 1);
391 event->timestamp = bluesky_now_hires();
392 /* FIXME: Non-portable */
393 event->tid = syscall(SYS_gettid);
394 event->message = message;
395 profile->events = g_list_prepend(profile->events, event);
396 g_mutex_unlock(profile->lock);
399 static FILE *profiling_file = NULL;
400 static GStaticMutex profiling_print_lock = G_STATIC_MUTEX_INIT;
402 void bluesky_profile_set_output(FILE *stream)
404 profiling_file = stream;
407 void bluesky_profile_print(BlueSkyProfile *profile)
409 if (profiling_file == NULL)
411 FILE *stream = profiling_file;
413 g_return_if_fail(profile != NULL);
415 if (profile->magic != PROFILE_MAGIC) {
416 g_warning("Access to invalid BlueSkyProfile object!");
420 g_mutex_lock(profile->lock);
421 g_static_mutex_lock(&profiling_print_lock);
422 fprintf(stream, "Event Timeline: %s\n", profile->description);
423 GList *link = g_list_last(profile->events);
424 bluesky_time_hires last_time = 0;
425 while (link != NULL) {
426 RTEvent *event = (RTEvent *)link->data;
427 fprintf(stream, " [%d] [%"PRIi64" ns]: %s\n",
428 event->tid, event->timestamp - last_time, event->message);
429 last_time = event->timestamp;
432 g_static_mutex_unlock(&profiling_print_lock);
433 g_mutex_unlock(profile->lock);
436 static GStaticPrivate per_thread_profile = G_STATIC_PRIVATE_INIT;
438 BlueSkyProfile *bluesky_profile_get()
440 return (BlueSkyProfile *)g_static_private_get(&per_thread_profile);
443 void bluesky_profile_set(BlueSkyProfile *profile)
445 g_static_private_set(&per_thread_profile, profile, NULL);