--- /dev/null
+/* A simple tool for benchmarking various logging strategies.
+ *
+ * We want to log a series of key/value pairs. Approaches that we try include:
+ * - Data written directly into the filesystem.
+ * - Data is written to a Berkeley DB.
+ * - Data is appended to a log file.
+ * In all cases we want to ensure that data is persistent on disk so it could
+ * be used for crash recovery. We measure how many log records we can write
+ * per second to gauge performance. */
+
+#define _GNU_SOURCE
+#define _ATFILE_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <db.h>
+#include <glib.h>
+
+struct item {
+ char *key;
+ char *data;
+ size_t len;
+};
+
+const int queue_capacity = 1024;
+const int item_size = 1024;
+
+GAsyncQueue *queue;
+int outstanding = 0;
+GMutex *lock;
+GCond *cond_empty, *cond_full;
+
+struct item *get_item()
+{
+ return (struct item *)g_async_queue_pop(queue);
+}
+
+void finish_item(struct item *item)
+{
+ g_free(item->key);
+ g_free(item->data);
+ g_free(item);
+
+ g_mutex_lock(lock);
+ outstanding--;
+ if (outstanding == 0)
+ g_cond_signal(cond_empty);
+ if (outstanding < queue_capacity)
+ g_cond_signal(cond_full);
+ g_mutex_unlock(lock);
+}
+
+void writebuf(int fd, const char *buf, size_t len)
+{
+ while (len > 0) {
+ ssize_t written;
+ written = write(fd, buf, len);
+ if (written < 0 && errno == EINTR)
+ continue;
+ g_assert(written >= 0);
+ buf += written;
+ len -= written;
+ }
+}
+
+/************************ Direct-to-filesystem logging ***********************/
+static int dirfd = -1;
+
+gpointer fslog_thread(gpointer d)
+{
+ g_print("Launching filesystem writer thread...\n");
+
+ while (TRUE) {
+ struct item *item = get_item();
+
+ int fd = openat(dirfd, item->key, O_CREAT|O_WRONLY|O_TRUNC, 0666);
+ g_assert(fd >= 0);
+
+ writebuf(fd, item->data, item->len);
+
+ finish_item(item);
+
+ fsync(fd);
+ fsync(dirfd);
+ close(fd);
+ }
+
+ return NULL;
+}
+
+void launch_fslog()
+{
+ dirfd = open("logdir", O_DIRECTORY);
+ g_assert(dirfd >= 0);
+
+ for (int i = 0; i < 1; i++)
+ g_thread_create(fslog_thread, NULL, FALSE, NULL);
+}
+
+/****************************** Single-File Log ******************************/
+gpointer flatlog_thread(gpointer d)
+{
+ g_print("Launching flat log writer thread...\n");
+
+ int fd = open("logfile", O_CREAT|O_WRONLY|O_TRUNC, 0666);
+ g_assert(fd >= 0);
+
+ int count = 0;
+
+ while (TRUE) {
+ struct item *item = get_item();
+
+ writebuf(fd, item->key, strlen(item->key) + 1);
+ writebuf(fd, (char *)&item->len, sizeof(item->len));
+ writebuf(fd, item->data, item->len);
+
+ count++;
+ if (count % (1 << 8) == 0)
+ fdatasync(fd);
+
+ finish_item(item);
+ }
+
+ return NULL;
+}
+
+void launch_flatlog()
+{
+ g_thread_create(flatlog_thread, NULL, FALSE, NULL);
+}
+
+/************************* Transactional Berkeley DB *************************/
+gpointer bdb_thread(gpointer d)
+{
+ g_print("Launching BDB log writer thread...\n");
+
+ int res;
+ DB_ENV *env;
+ DB *db;
+ DB_TXN *txn = NULL;
+ int count = 0;
+
+ res = db_env_create(&env, 0);
+ g_assert(res == 0);
+
+ res = env->open(env, "bdb",
+ DB_CREATE | DB_RECOVER | DB_INIT_LOCK | DB_INIT_LOG
+ | DB_INIT_MPOOL | DB_INIT_TXN | DB_THREAD, 0644);
+ g_assert(res == 0);
+
+ res = db_create(&db, env, 0);
+ g_assert(res == 0);
+
+ res = db->open(db, NULL, "log.db", "log", DB_BTREE,
+ DB_CREATE | DB_THREAD | DB_AUTO_COMMIT, 0644);
+ g_assert(res == 0);
+
+ while (TRUE) {
+ if (txn == NULL) {
+ res = env->txn_begin(env, NULL, &txn, 0);
+ g_assert(res == 0);
+ }
+
+ struct item *item = get_item();
+
+ DBT key, value;
+ memset(&key, 0, sizeof(key));
+ memset(&value, 0, sizeof(value));
+
+ key.data = item->key;
+ key.size = strlen(item->key);
+
+ value.data = item->data;
+ value.size = item->len;
+
+ res = db->put(db, NULL, &key, &value, 0);
+ g_assert(res == 0);
+
+ count++;
+ if (count % (1 << 8) == 0) {
+ txn->commit(txn, 0);
+ txn = NULL;
+ }
+
+ finish_item(item);
+ }
+
+ return NULL;
+}
+
+void launch_bdb()
+{
+ g_thread_create(bdb_thread, NULL, FALSE, NULL);
+}
+
+int main(int argc, char *argv[])
+{
+ g_thread_init(NULL);
+ queue = g_async_queue_new();
+ lock = g_mutex_new();
+ cond_empty = g_cond_new();
+ cond_full = g_cond_new();
+
+ launch_fslog();
+ // launch_flatlog();
+ // launch_bdb();
+
+ for (int i = 0; i < (1 << 12); i++) {
+ struct item *item = g_new(struct item, 1);
+ item->key = g_strdup_printf("item-%06d", i);
+ item->data = g_malloc(item_size);
+ item->len = item_size;
+
+ g_mutex_lock(lock);
+ while (outstanding >= queue_capacity)
+ g_cond_wait(cond_full, lock);
+ g_async_queue_push(queue, item);
+ outstanding++;
+ g_mutex_unlock(lock);
+ }
+
+ g_mutex_lock(lock);
+ while (outstanding > 0)
+ g_cond_wait(cond_empty, lock);
+ g_mutex_unlock(lock);
+
+ return 0;
+}