+ /* File is not mapped in memory. Map the entire file in, then return a
+ * pointer to just the required data. */
+ if (cachefile->addr == NULL) {
+ cachefile->addr = (const char *)mmap(NULL, length, PROT_READ,
+ MAP_SHARED, fd, 0);
+ cachefile->len = length;
+ g_atomic_int_inc(&cachefile->refcount);
+
+ close(fd);
+ return bluesky_string_new_from_mmap(cachefile, offset, size);
+ }
+
+ /* Otherwise, the file was mapped in but doesn't cover the data we need.
+ * This shouldn't happen much, if at all, but if it does just read the data
+ * we need directly from the file. We lose memory-management benefits of
+ * using mmapped data, but otherwise this works. */
+ char *buf = g_malloc(size);
+ size_t actual_size = readbuf(fd, buf, size);
+ close(fd);
+ if (actual_size != size) {
+ g_free(buf);
+ return NULL;
+ } else {
+ return bluesky_string_new(buf, size);
+ }
+}
+
+/* The arguments are mostly straightforward. log_dir is -1 for access from the
+ * journal, and non-negative for access to a cloud log segment. map_data
+ * should be TRUE for the case that are mapping just the data of an item where
+ * we have already parsed the item headers; this surpresses the error when the
+ * access is not to the first bytes of the item. */
+BlueSkyRCStr *bluesky_log_map_object(BlueSkyCloudLog *item, gboolean map_data)
+{
+ BlueSkyFS *fs = item->fs;
+ BlueSkyCacheFile *map = NULL;
+ BlueSkyRCStr *str = NULL;
+ int location = 0;
+ size_t file_offset = 0, file_size = 0;
+ gboolean range_request = bluesky_options.full_segment_fetches
+ ? FALSE : TRUE;
+
+ if (page_size == 0) {
+ page_size = getpagesize();
+ }
+
+ bluesky_cloudlog_stats_update(item, -1);
+
+ /* First, check to see if the journal still contains a copy of the item and
+ * if so use that. */
+ if ((item->location_flags | item->pending_write) & CLOUDLOG_JOURNAL) {
+ map = bluesky_cachefile_lookup(fs, -1, item->log_seq, TRUE);
+ if (map != NULL) {
+ location = CLOUDLOG_JOURNAL;
+ file_offset = item->log_offset;
+ file_size = item->log_size;
+ }
+ }
+
+ if (location == 0 && (item->location_flags & CLOUDLOG_CLOUD)) {
+ item->location_flags &= ~CLOUDLOG_JOURNAL;
+ map = bluesky_cachefile_lookup(fs,
+ item->location.directory,
+ item->location.sequence,
+ !range_request);
+ if (map == NULL) {
+ g_warning("Unable to remap cloud log segment!");
+ goto exit1;
+ }
+ location = CLOUDLOG_CLOUD;
+ file_offset = item->location.offset;
+ file_size = item->location.size;
+ }
+
+ /* Log segments fetched from the cloud might only be partially-fetched.
+ * Check whether the object we are interested in is available. */
+ if (location == CLOUDLOG_CLOUD) {
+ while (TRUE) {
+ const BlueSkyRangesetItem *rangeitem;
+ rangeitem = bluesky_rangeset_lookup(map->items, file_offset);
+ if (rangeitem != NULL && (rangeitem->start != file_offset
+ || rangeitem->length != file_size)) {
+ g_warning("log-%d: Item offset %zd seems to be invalid!",
+ (int)item->location.sequence, file_offset);
+ goto exit2;
+ }
+ if (rangeitem == NULL) {
+ if (bluesky_verbose) {
+ g_print("Item at offset 0x%zx not available, need to fetch.\n",
+ file_offset);
+ }
+ if (range_request) {
+ uint64_t start = file_offset, length = file_size, end;
+ if (map->prefetches != NULL)
+ bluesky_rangeset_get_extents(map->prefetches,
+ &start, &length);
+ start = MIN(start, file_offset);
+ end = MAX(start + length, file_offset + file_size);
+ length = end - start;
+ cloudlog_partial_fetch_start(map, start, length);
+ if (map->prefetches != NULL) {
+ bluesky_rangeset_free(map->prefetches);
+ map->prefetches = NULL;
+ }
+ }
+ g_cond_wait(map->cond, map->lock);
+ } else if (rangeitem->start == file_offset
+ && rangeitem->length == file_size) {
+ if (bluesky_verbose)
+ g_print("Item %zd now available.\n", file_offset);
+ break;
+ }
+ }
+ }
+
+ if (map_data) {
+ if (location == CLOUDLOG_JOURNAL)
+ file_offset += sizeof(struct log_header);
+ else
+ file_offset += sizeof(struct cloudlog_header);