A new microbenchmark tool to figure out what format to use for logs.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Wed, 14 Jul 2010 00:30:50 +0000 (17:30 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Wed, 14 Jul 2010 00:30:50 +0000 (17:30 -0700)
We want to log filesystem operations to disk so they are persistent across
proxy crashes, but should do so in a manner that is relatively high
performance...  Try to figure out what that should be.

CMakeLists.txt
bluesky/CMakeLists.txt
logbench/CMakeLists.txt [new file with mode: 0644]
logbench/logbench.c [new file with mode: 0644]

index 9384e61..9fcfeca 100644 (file)
@@ -13,3 +13,4 @@ add_subdirectory(bluesky)
 add_subdirectory(nfs3)
 add_subdirectory(microbench)
 add_subdirectory(cloudbench)
+add_subdirectory(logbench)
index 70ebf30..f7efc46 100644 (file)
@@ -11,7 +11,7 @@ set(CMAKE_C_FLAGS "-Wall -std=gnu99 ${CMAKE_C_FLAGS}")
 set(INSTALL_RPATH_USE_LINK_PATH 1)
 
 include_directories(${GLIB_INCLUDE_DIRS})
-target_link_libraries(bluesky ${GLIB_LIBRARIES} gcrypt s3 kvclient)
+target_link_libraries(bluesky ${GLIB_LIBRARIES} db gcrypt s3 kvclient)
 target_link_libraries(bluesky-test bluesky ${GLIB_LIBRARIES})
 
 #set_target_properties(bluesky PROPERTIES LINK_INTERFACE_LIBRARIES "")
diff --git a/logbench/CMakeLists.txt b/logbench/CMakeLists.txt
new file mode 100644 (file)
index 0000000..c581a15
--- /dev/null
@@ -0,0 +1,6 @@
+add_executable(logbench logbench.c)
+
+set(CMAKE_C_FLAGS "-Wall -std=gnu99 ${CMAKE_C_FLAGS}")
+
+include_directories(${GLIB_INCLUDE_DIRS})
+target_link_libraries(logbench ${GLIB_LIBRARIES} db)
diff --git a/logbench/logbench.c b/logbench/logbench.c
new file mode 100644 (file)
index 0000000..d811dc9
--- /dev/null
@@ -0,0 +1,235 @@
+/* A simple tool for benchmarking various logging strategies.
+ *
+ * We want to log a series of key/value pairs.  Approaches that we try include:
+ *  - Data written directly into the filesystem.
+ *  - Data is written to a Berkeley DB.
+ *  - Data is appended to a log file.
+ * In all cases we want to ensure that data is persistent on disk so it could
+ * be used for crash recovery.  We measure how many log records we can write
+ * per second to gauge performance. */
+
+#define _GNU_SOURCE
+#define _ATFILE_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <db.h>
+#include <glib.h>
+
+struct item {
+    char *key;
+    char *data;
+    size_t len;
+};
+
+const int queue_capacity = 1024;
+const int item_size = 1024;
+
+GAsyncQueue *queue;
+int outstanding = 0;
+GMutex *lock;
+GCond *cond_empty, *cond_full;
+
+struct item *get_item()
+{
+    return (struct item *)g_async_queue_pop(queue);
+}
+
+void finish_item(struct item *item)
+{
+    g_free(item->key);
+    g_free(item->data);
+    g_free(item);
+
+    g_mutex_lock(lock);
+    outstanding--;
+    if (outstanding == 0)
+        g_cond_signal(cond_empty);
+    if (outstanding < queue_capacity)
+        g_cond_signal(cond_full);
+    g_mutex_unlock(lock);
+}
+
+void writebuf(int fd, const char *buf, size_t len)
+{
+    while (len > 0) {
+        ssize_t written;
+        written = write(fd, buf, len);
+        if (written < 0 && errno == EINTR)
+            continue;
+        g_assert(written >= 0);
+        buf += written;
+        len -= written;
+    }
+}
+
+/************************ Direct-to-filesystem logging ***********************/
+static int dirfd = -1;
+
+gpointer fslog_thread(gpointer d)
+{
+    g_print("Launching filesystem writer thread...\n");
+
+    while (TRUE) {
+        struct item *item = get_item();
+
+        int fd = openat(dirfd, item->key, O_CREAT|O_WRONLY|O_TRUNC, 0666);
+        g_assert(fd >= 0);
+
+        writebuf(fd, item->data, item->len);
+
+        finish_item(item);
+
+        fsync(fd);
+        fsync(dirfd);
+        close(fd);
+    }
+
+    return NULL;
+}
+
+void launch_fslog()
+{
+    dirfd = open("logdir", O_DIRECTORY);
+    g_assert(dirfd >= 0);
+
+    for (int i = 0; i < 1; i++)
+        g_thread_create(fslog_thread, NULL, FALSE, NULL);
+}
+
+/****************************** Single-File Log ******************************/
+gpointer flatlog_thread(gpointer d)
+{
+    g_print("Launching flat log writer thread...\n");
+
+    int fd = open("logfile", O_CREAT|O_WRONLY|O_TRUNC, 0666);
+    g_assert(fd >= 0);
+
+    int count = 0;
+
+    while (TRUE) {
+        struct item *item = get_item();
+
+        writebuf(fd, item->key, strlen(item->key) + 1);
+        writebuf(fd, (char *)&item->len, sizeof(item->len));
+        writebuf(fd, item->data, item->len);
+
+        count++;
+        if (count % (1 << 8) == 0)
+            fdatasync(fd);
+
+        finish_item(item);
+    }
+
+    return NULL;
+}
+
+void launch_flatlog()
+{
+    g_thread_create(flatlog_thread, NULL, FALSE, NULL);
+}
+
+/************************* Transactional Berkeley DB *************************/
+gpointer bdb_thread(gpointer d)
+{
+    g_print("Launching BDB log writer thread...\n");
+
+    int res;
+    DB_ENV *env;
+    DB *db;
+    DB_TXN *txn = NULL;
+    int count = 0;
+
+    res = db_env_create(&env, 0);
+    g_assert(res == 0);
+
+    res = env->open(env, "bdb",
+                    DB_CREATE | DB_RECOVER | DB_INIT_LOCK | DB_INIT_LOG
+                     | DB_INIT_MPOOL | DB_INIT_TXN | DB_THREAD, 0644);
+    g_assert(res == 0);
+
+    res = db_create(&db, env, 0);
+    g_assert(res == 0);
+
+    res = db->open(db, NULL, "log.db", "log", DB_BTREE,
+                   DB_CREATE | DB_THREAD | DB_AUTO_COMMIT, 0644);
+    g_assert(res == 0);
+
+    while (TRUE) {
+        if (txn == NULL) {
+            res = env->txn_begin(env, NULL, &txn, 0);
+            g_assert(res == 0);
+        }
+
+        struct item *item = get_item();
+
+        DBT key, value;
+        memset(&key, 0, sizeof(key));
+        memset(&value, 0, sizeof(value));
+
+        key.data = item->key;
+        key.size = strlen(item->key);
+
+        value.data = item->data;
+        value.size = item->len;
+
+        res = db->put(db, NULL, &key, &value, 0);
+        g_assert(res == 0);
+
+        count++;
+        if (count % (1 << 8) == 0) {
+            txn->commit(txn, 0);
+            txn = NULL;
+        }
+
+        finish_item(item);
+    }
+
+    return NULL;
+}
+
+void launch_bdb()
+{
+    g_thread_create(bdb_thread, NULL, FALSE, NULL);
+}
+
+int main(int argc, char *argv[])
+{
+    g_thread_init(NULL);
+    queue = g_async_queue_new();
+    lock = g_mutex_new();
+    cond_empty = g_cond_new();
+    cond_full = g_cond_new();
+
+    launch_fslog();
+    // launch_flatlog();
+    // launch_bdb();
+
+    for (int i = 0; i < (1 << 12); i++) {
+        struct item *item = g_new(struct item, 1);
+        item->key = g_strdup_printf("item-%06d", i);
+        item->data = g_malloc(item_size);
+        item->len = item_size;
+
+        g_mutex_lock(lock);
+        while (outstanding >= queue_capacity)
+            g_cond_wait(cond_full, lock);
+        g_async_queue_push(queue, item);
+        outstanding++;
+        g_mutex_unlock(lock);
+    }
+
+    g_mutex_lock(lock);
+    while (outstanding > 0)
+        g_cond_wait(cond_empty, lock);
+    g_mutex_unlock(lock);
+
+    return 0;
+}