+ if (g_atomic_int_dec_and_test(&mmap->mapcount)) {
+ g_mutex_lock(mmap->lock);
+ if (g_atomic_int_get(&mmap->mapcount) == 0) {
+ g_print("Unmapped log segment %d...\n", mmap->log_seq);
+ munmap((void *)mmap->addr, mmap->len);
+ mmap->addr = NULL;
+ g_atomic_int_add(&mmap->refcount, -1);
+ }
+ g_mutex_unlock(mmap->lock);
+ }
+}
+
+/* Scan through all currently-stored files in the journal/cache and garbage
+ * collect old unused ones, if needed. */
+static void gather_cachefiles(gpointer key, gpointer value, gpointer user_data)
+{
+ GList **files = (GList **)user_data;
+ *files = g_list_prepend(*files, value);
+}
+
+static gint compare_cachefiles(gconstpointer a, gconstpointer b)
+{
+ int64_t ta, tb;
+
+ ta = ((BlueSkyCacheFile *)a)->atime;
+ tb = ((BlueSkyCacheFile *)b)->atime;
+ if (ta < tb)
+ return -1;
+ else if (ta > tb)
+ return 1;
+ else
+ return 0;
+}
+
+void bluesky_cachefile_gc(BlueSkyFS *fs)
+{
+ GList *files = NULL;
+
+ g_mutex_lock(fs->log->mmap_lock);
+ g_hash_table_foreach(fs->log->mmap_cache, gather_cachefiles, &files);
+
+ /* Sort based on atime. The atime should be stable since it shouln't be
+ * updated except by threads which can grab the mmap_lock, which we already
+ * hold. */
+ files = g_list_sort(files, compare_cachefiles);
+
+ /* Walk the list of files, starting with the oldest, deleting files if
+ * possible until enough space has been reclaimed. */
+ g_print("\nScanning cache: (total size = %d kB)\n", fs->log->disk_used);
+ while (files != NULL) {
+ BlueSkyCacheFile *cachefile = (BlueSkyCacheFile *)files->data;
+ /* Try to lock the structure, but if the lock is held by another thread
+ * then we'll just skip the file on this pass. */
+ if (g_mutex_trylock(cachefile->lock)) {
+ int64_t age = bluesky_get_current_time() - cachefile->atime;
+ g_print("%s addr=%p mapcount=%d refcount=%d atime_age=%f",
+ cachefile->filename, cachefile->addr, cachefile->mapcount,
+ cachefile->refcount, age / 1e6);
+ if (cachefile->fetching)
+ g_print(" (fetching)");
+ g_print("\n");
+
+ gboolean deletion_candidate = FALSE;
+ if (g_atomic_int_get(&fs->log->disk_used)
+ > bluesky_options.cache_size
+ && g_atomic_int_get(&cachefile->refcount) == 0
+ && g_atomic_int_get(&cachefile->mapcount) == 0)
+ {
+ deletion_candidate = TRUE;
+ }
+
+ /* Don't allow journal files to be reclaimed until all data is
+ * known to be durably stored in the cloud. */
+ if (cachefile->type == CLOUDLOG_JOURNAL
+ && cachefile->log_seq >= fs->log->journal_watermark)
+ {
+ deletion_candidate = FALSE;
+ }
+
+ if (deletion_candidate) {
+ g_print(" ...deleting\n");
+ if (unlinkat(fs->log->dirfd, cachefile->filename, 0) < 0) {
+ fprintf(stderr, "Unable to unlink journal %s: %m\n",
+ cachefile->filename);
+ }
+
+ g_atomic_int_add(&fs->log->disk_used, -(cachefile->len / 1024));
+ g_hash_table_remove(fs->log->mmap_cache, cachefile->filename);
+ g_mutex_unlock(cachefile->lock);
+ g_mutex_free(cachefile->lock);
+ g_cond_free(cachefile->cond);
+ g_free(cachefile->filename);
+ g_free(cachefile);
+ } else {
+ g_mutex_unlock(cachefile->lock);
+ }
+ }
+ files = g_list_delete_link(files, files);
+ }
+ g_list_free(files);
+
+ g_mutex_unlock(fs->log->mmap_lock);
+}
+
+/******************************* JOURNAL REPLAY *******************************
+ * The journal replay code is used to recover filesystem state after a
+ * filesystem restart. We first look for the most recent commit record in the
+ * journal, which indicates the point before which all data in the journal has
+ * also been committed to the cloud. Then, we read in all data in the log past
+ * that point.
+ */
+static GList *directory_contents(const char *dirname)
+{
+ GList *contents = NULL;
+ GDir *dir = g_dir_open(dirname, 0, NULL);
+ if (dir == NULL) {
+ g_warning("Unable to open journal directory: %s", dirname);
+ return NULL;
+ }
+
+ const gchar *file;
+ while ((file = g_dir_read_name(dir)) != NULL) {
+ if (strncmp(file, "journal-", 8) == 0)
+ contents = g_list_prepend(contents, g_strdup(file));
+ }
+ g_dir_close(dir);
+
+ contents = g_list_sort(contents, (GCompareFunc)strcmp);
+
+ return contents;
+}
+
+static gboolean validate_journal_item(const char *buf, size_t len, off_t offset)
+{
+ const struct log_header *header;
+ const struct log_footer *footer;
+
+ if (offset + sizeof(struct log_header) + sizeof(struct log_footer) > len)
+ return FALSE;
+
+ header = (const struct log_header *)(buf + offset);
+ if (GUINT32_FROM_LE(header->magic) != HEADER_MAGIC)
+ return FALSE;
+ if (GUINT32_FROM_LE(header->offset) != offset)
+ return FALSE;
+ size_t size = GUINT32_FROM_LE(header->size1)
+ + GUINT32_FROM_LE(header->size2)
+ + GUINT32_FROM_LE(header->size3);
+
+ off_t footer_offset = offset + sizeof(struct log_header) + size;
+ if (footer_offset + sizeof(struct log_footer) > len)
+ return FALSE;
+ footer = (const struct log_footer *)(buf + footer_offset);
+
+ if (GUINT32_FROM_LE(footer->magic) != FOOTER_MAGIC)
+ return FALSE;
+
+ uint32_t crc = crc32c(BLUESKY_CRC32C_SEED, buf + offset,
+ sizeof(struct log_header) + sizeof(struct log_footer)
+ + size);
+ if (crc != BLUESKY_CRC32C_VALIDATOR) {
+ g_warning("Journal entry failed to validate: CRC %08x != %08x",
+ crc, BLUESKY_CRC32C_VALIDATOR);
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+/* Scan through a journal segment to extract correctly-written items (those
+ * that pass sanity checks and have a valid checksum). */
+static void bluesky_replay_scan_journal(const char *buf, size_t len,
+ uint32_t *seq, uint32_t *start_offset)
+{
+ const struct log_header *header;
+ off_t offset = 0;
+
+ while (validate_journal_item(buf, len, offset)) {
+ header = (const struct log_header *)(buf + offset);
+ size_t size = GUINT32_FROM_LE(header->size1)
+ + GUINT32_FROM_LE(header->size2)
+ + GUINT32_FROM_LE(header->size3);
+
+ if (header->type - '0' == LOGTYPE_JOURNAL_CHECKPOINT) {
+ const uint32_t *data = (const uint32_t *)((const char *)header + sizeof(struct log_header));
+ *seq = GUINT32_FROM_LE(data[0]);
+ *start_offset = GUINT32_FROM_LE(data[1]);
+ }
+
+ offset += sizeof(struct log_header) + size + sizeof(struct log_footer);
+ }
+}
+
+static void reload_item(BlueSkyCloudLog *log_item,
+ const char *data,
+ size_t len1, size_t len2, size_t len3)
+{
+ BlueSkyFS *fs = log_item->fs;
+ /*const char *data1 = data;*/
+ const BlueSkyCloudID *data2
+ = (const BlueSkyCloudID *)(data + len1);
+ /*const BlueSkyCloudPointer *data3
+ = (const BlueSkyCloudPointer *)(data + len1 + len2);*/
+
+ bluesky_string_unref(log_item->data);
+ log_item->data = NULL;
+ log_item->location_flags = CLOUDLOG_JOURNAL;
+
+ BlueSkyCloudID id0;
+ memset(&id0, 0, sizeof(id0));
+
+ int link_count = len2 / sizeof(BlueSkyCloudID);
+ GArray *new_links = g_array_new(FALSE, TRUE, sizeof(BlueSkyCloudLog *));
+ for (int i = 0; i < link_count; i++) {
+ BlueSkyCloudID id = data2[i];
+ BlueSkyCloudLog *ref = NULL;
+ if (memcmp(&id, &id0, sizeof(BlueSkyCloudID)) != 0) {
+ g_mutex_lock(fs->lock);
+ ref = g_hash_table_lookup(fs->locations, &id);
+ if (ref != NULL) {
+ bluesky_cloudlog_ref(ref);
+ }
+ g_mutex_unlock(fs->lock);