Make cache size run-time configurable.
[bluesky.git] / bluesky / log.c
index a904fe1..1fe6176 100644 (file)
 // no absolute guarantees on the size of a log segment.
 #define LOG_SEGMENT_SIZE (1 << 22)
 
-// Target amount of disk space to use for the journal and cache files, in
-// kilobytes.
-#define DISK_CACHE_SIZE_TARGET (64 * 1024)
-
 #define HEADER_MAGIC 0x676f4c0a
 #define FOOTER_MAGIC 0x2e435243
 
@@ -217,15 +213,6 @@ static gpointer log_thread(gpointer d)
 
         offset += sizeof(header) + sizeof(footer) + item->data->len;
 
-        /* Since we have just written a new dirty object to the journal,
-         * increment the count of live dirty objects in that journal file.  The
-         * count will be decremented when objects are deleted or written to the
-         * cloud. */
-        if (!(item->location_flags & CLOUDLOG_CLOUD)) {
-            g_atomic_int_add(&log->current_log->dirty_refs, 1);
-            item->dirty_journal = log->current_log;
-        }
-
         /* Replace the log item's string data with a memory-mapped copy of the
          * data, now that it has been written to the log file.  (Even if it
          * isn't yet on disk, it should at least be in the page cache and so
@@ -522,18 +509,31 @@ void bluesky_cachefile_gc(BlueSkyFS *fs)
          * then we'll just skip the file on this pass. */
         if (g_mutex_trylock(cachefile->lock)) {
             int64_t age = bluesky_get_current_time() - cachefile->atime;
-            g_print("%s addr=%p mapcount=%d refcount=%d dirty=%d atime_age=%f",
+            g_print("%s addr=%p mapcount=%d refcount=%d atime_age=%f",
                     cachefile->filename, cachefile->addr, cachefile->mapcount,
-                    cachefile->refcount, cachefile->dirty_refs, age / 1e6);
+                    cachefile->refcount, age / 1e6);
             if (cachefile->fetching)
                 g_print(" (fetching)");
             g_print("\n");
 
-            if (g_atomic_int_get(&fs->log->disk_used) > DISK_CACHE_SIZE_TARGET
+            gboolean deletion_candidate = FALSE;
+            if (g_atomic_int_get(&fs->log->disk_used)
+                    > bluesky_options.cache_size
                 && g_atomic_int_get(&cachefile->refcount) == 0
-                && g_atomic_int_get(&cachefile->mapcount) == 0
-                && g_atomic_int_get(&cachefile->dirty_refs) == 0)
+                && g_atomic_int_get(&cachefile->mapcount) == 0)
+            {
+                deletion_candidate = TRUE;
+            }
+
+            /* Don't allow journal files to be reclaimed until all data is
+             * known to be durably stored in the cloud. */
+            if (cachefile->type == CLOUDLOG_JOURNAL
+                && cachefile->log_seq >= fs->log->journal_watermark)
             {
+                deletion_candidate = FALSE;
+            }
+
+            if (deletion_candidate) {
                 g_print("   ...deleting\n");
                 if (unlinkat(fs->log->dirfd, cachefile->filename, 0) < 0) {
                     fprintf(stderr, "Unable to unlink journal %s: %m\n",