1 /* A simple tool for benchmarking various logging strategies.
3 * We want to log a series of key/value pairs. Approaches that we try include:
4 * - Data written directly into the filesystem.
5 * - Data is written to a Berkeley DB.
6 * - Data is appended to a log file.
7 * In all cases we want to ensure that data is persistent on disk so it could
8 * be used for crash recovery. We measure how many log records we can write
9 * per second to gauge performance. */
12 #define _ATFILE_SOURCE
19 #include <sys/types.h>
33 int queue_capacity = 1024;
36 int opt_batchsize = 1;
37 int opt_writes = (1 << 12);
38 int opt_bdb_async = FALSE;
43 GCond *cond_empty, *cond_full;
48 clock_gettime(CLOCK_MONOTONIC, &ts);
50 return ts.tv_sec * 1000000000LL + ts.tv_nsec;
53 struct item *get_item()
55 return (struct item *)g_async_queue_pop(queue);
58 void finish_item(struct item *item)
67 g_cond_signal(cond_empty);
68 if (outstanding < queue_capacity)
69 g_cond_signal(cond_full);
73 void writebuf(int fd, const char *buf, size_t len)
77 written = write(fd, buf, len);
78 if (written < 0 && errno == EINTR)
80 g_assert(written >= 0);
86 /************************ Direct-to-filesystem logging ***********************/
87 static int dirfd = -1;
89 gpointer fslog_thread(gpointer d)
92 struct item *item = get_item();
94 int fd = openat(dirfd, item->key, O_CREAT|O_WRONLY|O_TRUNC, 0666);
97 writebuf(fd, item->data, item->len);
111 dirfd = open(".", O_DIRECTORY);
112 g_assert(dirfd >= 0);
114 for (int i = 0; i < 1; i++)
115 g_thread_create(fslog_thread, NULL, FALSE, NULL);
118 /****************************** Single-File Log ******************************/
119 gpointer flatlog_thread(gpointer d)
121 int fd = open("logfile", O_CREAT|O_WRONLY|O_TRUNC, 0666);
127 struct item *item = get_item();
129 writebuf(fd, item->key, strlen(item->key) + 1);
130 writebuf(fd, (char *)&item->len, sizeof(item->len));
131 writebuf(fd, item->data, item->len);
134 if (count % opt_batchsize == 0)
143 void launch_flatlog()
145 g_thread_create(flatlog_thread, NULL, FALSE, NULL);
148 /************************* Transactional Berkeley DB *************************/
149 gpointer bdb_thread(gpointer d)
157 res = db_env_create(&env, 0);
160 res = env->open(env, ".",
161 DB_CREATE | DB_RECOVER | DB_INIT_LOCK | DB_INIT_LOG
162 | DB_INIT_MPOOL | DB_INIT_TXN | DB_THREAD, 0644);
166 res = env->set_flags(env, DB_TXN_WRITE_NOSYNC, 1);
170 res = db_create(&db, env, 0);
173 res = db->open(db, NULL, "log.db", "log", DB_BTREE,
174 DB_CREATE | DB_THREAD | DB_AUTO_COMMIT, 0644);
178 if (txn == NULL && !opt_bdb_async) {
179 res = env->txn_begin(env, NULL, &txn, 0);
183 struct item *item = get_item();
186 memset(&key, 0, sizeof(key));
187 memset(&value, 0, sizeof(value));
189 key.data = item->key;
190 key.size = strlen(item->key);
192 value.data = item->data;
193 value.size = item->len;
195 res = db->put(db, opt_bdb_async ? NULL : txn, &key, &value, 0);
199 if (count % opt_batchsize == 0) {
201 env->txn_checkpoint(env, 0, 0, 0);
216 g_thread_create(bdb_thread, NULL, FALSE, NULL);
219 int main(int argc, char *argv[])
221 int64_t time_start, time_end;
224 queue = g_async_queue_new();
225 lock = g_mutex_new();
226 cond_empty = g_cond_new();
227 cond_full = g_cond_new();
231 while ((opt = getopt(argc, argv, "at:s:b:n:BFD")) != -1) {
234 // Make BDB log writes more asynchronous
235 opt_bdb_async = TRUE;
238 // Set number of log worker threads
239 opt_threads = atoi(optarg);
242 // Set item size (in bytes)
243 item_size = atoi(optarg);
247 opt_batchsize = atoi(optarg);
251 opt_writes = atoi(optarg);
254 // Select BDB backend
258 // Select flat file backend
262 // Select file system directory backend
266 fprintf(stderr, "Usage: %s [-t threads] {-B|-F|-D}\n",
283 fprintf(stderr, "Backend not selected!\n");
287 time_start = get_ns();
288 for (int i = 0; i < opt_writes; i++) {
289 struct item *item = g_new(struct item, 1);
290 item->key = g_strdup_printf("item-%06d", i);
291 item->data = g_malloc(item_size);
292 item->len = item_size;
295 g_async_queue_push(queue, item);
297 if (outstanding == opt_batchsize)
298 g_cond_wait(cond_empty, lock);
299 g_mutex_unlock(lock);
303 while (outstanding > 0)
304 g_cond_wait(cond_empty, lock);
305 g_mutex_unlock(lock);
308 double elapsed = (time_end - time_start) / 1e9;
309 printf("Elapsed: %f s\nThroughput: %f txn/s, %f MiB/s\n",
310 elapsed, opt_writes / elapsed,
311 opt_writes / elapsed * item_size / (1 << 20));
313 if (backend == 'b' && opt_bdb_async)
316 FILE *f = fopen("../logbench.data", "a");
318 fprintf(f, "%c\t%d\t%d\t%d\t%f\t%f\t%f\n",
319 backend, item_size, opt_writes, opt_batchsize,
320 elapsed, opt_writes / elapsed,
321 opt_writes / elapsed * item_size / (1 << 20));