X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=store.h;h=5d877c66c4a11619a4c8f403022d74856b1750e9;hb=15f7cef45e82e54a039e5486f08134c4f9a05471;hp=d4847806c26a851cf0eb66d1afa9ef884a703957;hpb=38c66f088ed65d2f42264c92add6e0b33eac2bfc;p=cumulus.git diff --git a/store.h b/store.h index d484780..5d877c6 100644 --- a/store.h +++ b/store.h @@ -5,15 +5,33 @@ * into segments for storage purposes. This file provides interfaces for * reading and writing objects and segments. */ +#ifndef _LBS_STORE_H +#define _LBS_STORE_H + #include #include #include #include #include +#include + +#include "sha1.h" +/* In memory datatype to represent key/value pairs of information, such as file + * metadata. Currently implemented as map. */ typedef std::map dictionary; +/* In-memory representation of a UUID (Universally-Unique Identifier), which is + * used to name a segment. */ +struct uuid { + uint8_t bytes[16]; +}; + +/* IOException will be thrown if an error occurs while reading or writing in + * one of the I/O wrappers. Depending upon the context; this may be fatal or + * not--typically, errors reading/writing the store will be serious, but errors + * reading an individual file are less so. */ class IOException : public std::exception { private: std::string error; @@ -23,13 +41,22 @@ public: std::string getError() const { return error; } }; +/* OutputStream is an abstract interface for writing data without seeking. + * Output could be to a file, to an object within a segment, or even to a + * memory buffer to help serialize data. */ class OutputStream { public: + OutputStream(); virtual ~OutputStream() { } - virtual void write(const void *data, size_t len) = 0; - /* Convenience functions for writing other data types. Values are always - * written out in little-endian order. */ + // Write the given data buffer + void write(const void *data, size_t len); + + // Return the total number of bytes written so far + int64_t get_pos() const { return bytes_written; } + + // Convenience functions for writing other data types. Values are always + // written out in little-endian order. void write_u8(uint8_t val); void write_u16(uint16_t val); void write_u32(uint32_t val); @@ -40,30 +67,161 @@ public: void write_varint(uint64_t val); + void write_uuid(const struct uuid &u); void write_string(const std::string &s); void write_dictionary(const dictionary &d); + +protected: + // Function which actually causes a write: must be overridden by + // implementation. + virtual void write_internal(const void *data, size_t len) = 0; + +private: + int64_t bytes_written; }; +/* An OutputStream implementation which writes data to memory and returns the + * result as a string. */ class StringOutputStream : public OutputStream { -private: - std::stringstream buf; public: StringOutputStream(); - - virtual void write(const void *data, size_t len); std::string contents() const { return buf.str(); } + +protected: + virtual void write_internal(const void *data, size_t len); + +private: + std::stringstream buf; }; +/* An OutputStream implementation which writes data via the C stdio layer. */ class FileOutputStream : public OutputStream { -private: - FILE *f; public: explicit FileOutputStream(FILE *file); virtual ~FileOutputStream(); - virtual void write(const void *data, size_t len); +protected: + virtual void write_internal(const void *data, size_t len); + +private: + FILE *f; }; +/* An OutputStream which is simply sends writes to another OutputStream, but + * does provide separate tracking of bytes written. */ +class WrapperOutputStream : public OutputStream { +public: + explicit WrapperOutputStream(OutputStream &o); + virtual ~WrapperOutputStream() { } + +protected: + virtual void write_internal(const void *data, size_t len); + +private: + OutputStream ℜ +}; + +/* Like WrapperOutputStream, but additionally computes a checksum of data as it + * is written. */ +class ChecksumOutputStream : public OutputStream { +public: + explicit ChecksumOutputStream(OutputStream &o); + virtual ~ChecksumOutputStream() { } + + /* Once a checksum is computed, no further data should be written to the + * stream. */ + const uint8_t *finish_and_checksum(); + size_t checksum_size() const { return csum.checksum_size(); } + +protected: + virtual void write_internal(const void *data, size_t len); + +private: + OutputStream ℜ + SHA1Checksum csum; +}; + +/* Simple wrappers that encode integers using a StringOutputStream and return + * the encoded result. */ std::string encode_u16(uint16_t val); std::string encode_u32(uint32_t val); std::string encode_u64(uint64_t val); +std::string encode_objref(const struct uuid &segment, uint32_t object); + +/* A class which is used to pack multiple objects into a single segment, with a + * lookup table to quickly locate each object. Call new_object() to get an + * OutputStream to which a new object may be written, and optionally + * finish_object() when finished writing the current object. Only one object + * may be written to a segment at a time; if multiple objects must be written + * concurrently, they must be to different segments. */ +class SegmentWriter { +public: + SegmentWriter(OutputStream *output, struct uuid u); + ~SegmentWriter(); + + struct uuid get_uuid() const { return id; } + + // Start writing out a new object to this segment. + OutputStream *new_object(int *id, const char *type); + void finish_object(); + + // Determine size of segment data written out so far. + size_t get_size() const { return raw_out->get_pos(); } + + // Utility functions for generating and formatting UUIDs for display. + static struct uuid generate_uuid(); + static std::string format_uuid(const struct uuid u); + +private: + struct index_info { + int64_t offset; // File offset at which object starts + int64_t size; // Size of object in bytes + char type[4]; // Object type code + }; + + typedef std::vector object_table; + + ChecksumOutputStream *out; // Output stream with checksumming enabled + OutputStream *raw_out; // Raw output stream, without checksumming + struct uuid id; + + OutputStream *object_stream; + + object_table objects; +}; + +/* A SegmentStore, as the name suggests, is used to store the contents of many + * segments. The SegmentStore internally tracks where data should be placed + * (such as a local directory or remote storage), and allows new segments to be + * easily created as needed. */ +class SegmentStore { +public: + // New segments will be stored in the given directory. + SegmentStore(const std::string &path); + + SegmentWriter *new_segment(); + +private: + std::string directory; +}; + +/* A SegmentPartitioner helps to divide objects up among a collection of + * segments to meet a rough size limit per segment. Like a SegmentWriter, only + * one object should be written at a time; however, multiple + * SegmentPartitioners can be created using the same base SegmentStore. */ +class SegmentPartitioner { +public: + explicit SegmentPartitioner(SegmentStore *s); + ~SegmentPartitioner(); + + OutputStream *new_object(struct uuid *uuid, int *id, const char *type); + +private: + size_t target_size; + + SegmentStore *store; + SegmentWriter *segment; + OutputStream *object; +}; + +#endif // _LBS_STORE_H