X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=bluesky%2Fbluesky.h;h=7b8aad2073cfb787462f388995965320b75064fa;hb=e6ca13c07e0cb5dd05d48a12cd50a2cf6179ce1a;hp=afb2304c88b74209764dc671de06c3d1af79e52e;hpb=c83d8b650786b8e7d6a9d41c9449c203929c7215;p=bluesky.git diff --git a/bluesky/bluesky.h b/bluesky/bluesky.h index afb2304..7b8aad2 100644 --- a/bluesky/bluesky.h +++ b/bluesky/bluesky.h @@ -10,13 +10,44 @@ #define _BLUESKY_H #include +#include #include #ifdef __cplusplus extern "C" { #endif -struct S3Store; +/* Various options to tweak for performance benchmarking purposes. */ +typedef struct { + /* Perform all get/put operations synchronously. */ + int synchronous_stores; + + /* Write data in cache immediately after file is modified. */ + int writethrough_cache; + + /* Can inodes be fetched asynchronously? (Inode object is initially + * created in a pending state, and not unlocked until the data is actually + * available.) */ + int sync_inode_fetches; + + /* Should frontends handle requests serially or allow operations to proceed + * in parallel? */ + int sync_frontends; +} BlueSkyOptions; + +extern BlueSkyOptions bluesky_options; + +/* BlueSky status and error codes. Various frontends should translate these to + * the appropriate error code for whatever protocol they implement. */ +typedef enum { + BSTATUS_OK = 0, /* No error */ + BSTATUS_IOERR, /* I/O error of some form */ + BSTATUS_NOENT, /* File does not exist */ +} BlueSkyStatus; + +void bluesky_init(void); + +gchar *bluesky_lowercase(const gchar *s); /* Reference-counted blocks of memory, used for passing data in and out of * storage backends and in other places. */ @@ -27,14 +58,33 @@ typedef struct { } BlueSkyRCStr; BlueSkyRCStr *bluesky_string_new(gpointer data, gsize len); +BlueSkyRCStr *bluesky_string_new_from_gstring(GString *s); void bluesky_string_ref(BlueSkyRCStr *string); void bluesky_string_unref(BlueSkyRCStr *string); BlueSkyRCStr *bluesky_string_dup(BlueSkyRCStr *string); +void bluesky_string_resize(BlueSkyRCStr *string, gsize len); /* Cryptographic operations. */ +#define CRYPTO_BLOCK_SIZE 16 /* 128-bit AES */ +#define CRYPTO_KEY_SIZE 16 + void bluesky_crypt_init(); +void bluesky_crypt_hash_key(const char *keystr, uint8_t *out); void bluesky_crypt_random_bytes(guchar *buf, gint len); BlueSkyRCStr *bluesky_crypt_encrypt(BlueSkyRCStr *in, const uint8_t *key); +BlueSkyRCStr *bluesky_crypt_decrypt(BlueSkyRCStr *in, const uint8_t *key); + +/* Storage interface. This presents a key-value store abstraction, and can + * have multiple implementations: in-memory, on-disk, in-cloud. */ +struct _BlueSkyStore; +typedef struct _BlueSkyStore BlueSkyStore; + +void bluesky_store_init(); +BlueSkyStore *bluesky_store_new(const gchar *type); +void bluesky_store_free(BlueSkyStore *store); +BlueSkyRCStr *bluesky_store_get(BlueSkyStore *store, const gchar *key); +void bluesky_store_put(BlueSkyStore *store, + const gchar *key, BlueSkyRCStr *val); /* File types. The numeric values are chosen to match with those used in * NFSv3. */ @@ -46,6 +96,10 @@ typedef enum { BLUESKY_SYMLINK = 5, BLUESKY_SOCKET = 6, BLUESKY_FIFO = 7, + + /* Special types used only internally. */ + BLUESKY_PENDING = 0, /* Inode being loaded; type not yet determined */ + BLUESKY_INVALID = -1, /* Inode is invalid (failed to load) */ } BlueSkyFileType; /* Filesystem state. Each filesystem which is exported is represented by a @@ -57,9 +111,7 @@ typedef struct { GHashTable *inodes; /* Cached inodes */ uint64_t next_inum; /* Next available inode for allocation */ - struct S3Store *store; - - uint8_t *encryption_key; + BlueSkyStore *store; } BlueSkyFS; /* Inode number of the root directory. */ @@ -68,6 +120,10 @@ typedef struct { /* Timestamp, measured in microseconds since the Unix epoch. */ typedef int64_t bluesky_time; +/* High-resolution timer, measured in nanoseconds. */ +typedef int64_t bluesky_time_hires; +bluesky_time_hires bluesky_now_hires(); + /* In-memory representation of an inode within a Blue Sky server. This * corresponds roughly with information that is committed to persistent * storage. Locking/refcounting rules: @@ -78,6 +134,13 @@ typedef int64_t bluesky_time; * dropping from the cache. * - Any pending operations should hold extra references to the inode as * appropriate to keep it available until the operation completes. + * - Locking dependency order is, when multiple locks are to be acquired, to + * acquire locks on parents in the filesystem tree before children. + * (TODO: What about rename when we acquire locks in unrelated parts of the + * filesystem?) + * - An inode should not be locked while the filesystem lock is already held, + * since some code may do an inode lookup (which acquires the filesystem + * lock) while a different inode is locked. * */ typedef struct { GMutex *lock; @@ -95,7 +158,20 @@ typedef struct { * that we don't exhaust the identifier space. */ uint64_t inum; - uint64_t change_count; /* Incremented each with each change made */ + /* change_count is increased with every operation which modifies the inode, + * and can be used to determine if cached data is still valid. + * change_commit is the value of change_count when the inode was last + * committed to stable storage. */ + uint64_t change_count, change_commit; + + /* Timestamp for controlling when modified data is flushed to stable + * storage. When an inode is first modified from a clean state, this is + * set to the current time. If the inode is clean, it is set to zero. */ + int64_t change_time; + + /* Additional state for tracking cache writeback status. */ + uint64_t change_pending; /* change_count version currently being committed to storage */ + int64_t atime; /* Microseconds since the Unix epoch */ int64_t ctime; int64_t mtime; @@ -108,7 +184,11 @@ typedef struct { /* Directory-specific fields */ GSequence *dirents; /* List of entries for READDIR */ GHashTable *dirhash; /* Hash table by name for LOOKUP */ + GHashTable *dirhash_folded; /* As above, but case-folded */ uint64_t parent_inum; /* inode for ".."; 0 if the root directory */ + + /* Symlink-specific fields */ + gchar *symlink_contents; } BlueSkyInode; /* A directory entry. The name is UTF-8 and is a freshly-allocated string. @@ -118,6 +198,7 @@ typedef struct { * is used to provide a stable key for restarting a READDIR call. */ typedef struct { gchar *name; + gchar *name_folded; /* Name, folded for case-insensitive lookup */ uint32_t cookie; uint64_t inum; } BlueSkyDirent; @@ -141,34 +222,46 @@ typedef struct { BlueSkyRCStr *data; /* Pointer to data in memory if cached */ } BlueSkyBlock; -BlueSkyFS *bluesky_new_fs(gchar *name); +BlueSkyFS *bluesky_init_fs(gchar *name, BlueSkyStore *store); +void bluesky_superblock_flush(BlueSkyFS *fs); + int64_t bluesky_get_current_time(); void bluesky_inode_update_ctime(BlueSkyInode *inode, gboolean update_mtime); uint64_t bluesky_fs_alloc_inode(BlueSkyFS *fs); +void bluesky_init_inode(BlueSkyInode *i, BlueSkyFileType type); BlueSkyInode *bluesky_new_inode(uint64_t inum, BlueSkyFS *fs, BlueSkyFileType type); BlueSkyInode *bluesky_get_inode(BlueSkyFS *fs, uint64_t inum); +void bluesky_inode_ref(BlueSkyInode *inode); +void bluesky_inode_unref(BlueSkyInode *inode); void bluesky_insert_inode(BlueSkyFS *fs, BlueSkyInode *inode); void bluesky_dirent_destroy(gpointer dirent); -uint64_t bluesky_directory_hash(gchar *name); uint64_t bluesky_directory_lookup(BlueSkyInode *inode, gchar *name); -gboolean bluesky_directory_insert(BlueSkyInode *dir, gchar *name, +uint64_t bluesky_directory_ilookup(BlueSkyInode *inode, gchar *name); +BlueSkyDirent *bluesky_directory_read(BlueSkyInode *dir, uint32_t cookie); +gboolean bluesky_directory_insert(BlueSkyInode *dir, const gchar *name, uint64_t inum); void bluesky_directory_dump(BlueSkyInode *dir); void bluesky_block_touch(BlueSkyInode *inode, uint64_t i); void bluesky_block_fetch(BlueSkyFS *fs, BlueSkyBlock *block); -void bluesky_block_flush(BlueSkyFS *fs, BlueSkyBlock *block); void bluesky_file_truncate(BlueSkyInode *inode, uint64_t size); void bluesky_file_write(BlueSkyInode *inode, uint64_t offset, const char *data, gint len); void bluesky_file_read(BlueSkyInode *inode, uint64_t offset, char *buf, gint len); -struct S3Store *s3store_new(); -BlueSkyRCStr *s3store_get(struct S3Store *store, const gchar *key); -void s3store_put(struct S3Store *store, const gchar *key, BlueSkyRCStr *val); +void bluesky_inode_flush(BlueSkyFS *fs, BlueSkyInode *inode); +void bluesky_inode_fetch(BlueSkyFS *fs, uint64_t inum); + +gint bluesky_dirent_compare(gconstpointer a, gconstpointer b, + gpointer unused); + +void bluesky_flushd_invoke(BlueSkyFS *fs); +void bluesky_inode_do_sync(BlueSkyInode *inode); + +void bluesky_debug_dump(BlueSkyFS *fs); #ifdef __cplusplus }