X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=store.h;h=e1244f0ecd7731bfdf8cec7851bcb767b143245e;hb=7680a5bda8a87e1f5d02ea07ef767878e3bb143c;hp=795029753070ff11beaca09345cb6f5b6907d7af;hpb=948051f02c9bc3285ce1ea187dbae27120dbf7aa;p=cumulus.git diff --git a/store.h b/store.h index 7950297..e1244f0 100644 --- a/store.h +++ b/store.h @@ -14,9 +14,24 @@ #include #include #include +#include +#include "sha1.h" + +/* In memory datatype to represent key/value pairs of information, such as file + * metadata. Currently implemented as map. */ typedef std::map dictionary; +/* In-memory representation of a UUID (Universally-Unique Identifier), which is + * used to name a segment. */ +struct uuid { + uint8_t bytes[16]; +}; + +/* IOException will be thrown if an error occurs while reading or writing in + * one of the I/O wrappers. Depending upon the context; this may be fatal or + * not--typically, errors reading/writing the store will be serious, but errors + * reading an individual file are less so. */ class IOException : public std::exception { private: std::string error; @@ -26,13 +41,22 @@ public: std::string getError() const { return error; } }; +/* OutputStream is an abstract interface for writing data without seeking. + * Output could be to a file, to an object within a segment, or even to a + * memory buffer to help serialize data. */ class OutputStream { public: + OutputStream(); virtual ~OutputStream() { } - virtual void write(const void *data, size_t len) = 0; - /* Convenience functions for writing other data types. Values are always - * written out in little-endian order. */ + // Write the given data buffer + void write(const void *data, size_t len); + + // Return the total number of bytes written so far + int64_t get_pos() const { return bytes_written; } + + // Convenience functions for writing other data types. Values are always + // written out in little-endian order. void write_u8(uint8_t val); void write_u16(uint16_t val); void write_u32(uint32_t val); @@ -43,32 +67,156 @@ public: void write_varint(uint64_t val); + void write_uuid(const struct uuid &u); void write_string(const std::string &s); void write_dictionary(const dictionary &d); + +protected: + // Function which actually causes a write: must be overridden by + // implementation. + virtual void write_internal(const void *data, size_t len) = 0; + +private: + int64_t bytes_written; }; +/* An OutputStream implementation which writes data to memory and returns the + * result as a string. */ class StringOutputStream : public OutputStream { -private: - std::stringstream buf; public: StringOutputStream(); - - virtual void write(const void *data, size_t len); std::string contents() const { return buf.str(); } + +protected: + virtual void write_internal(const void *data, size_t len); + +private: + std::stringstream buf; }; +/* An OutputStream implementation which writes data via the C stdio layer. */ class FileOutputStream : public OutputStream { -private: - FILE *f; public: explicit FileOutputStream(FILE *file); virtual ~FileOutputStream(); - virtual void write(const void *data, size_t len); +protected: + virtual void write_internal(const void *data, size_t len); + +private: + FILE *f; +}; + +/* An OutputStream which is simply sends writes to another OutputStream, but + * does provide separate tracking of bytes written. */ +class WrapperOutputStream : public OutputStream { +public: + explicit WrapperOutputStream(OutputStream &o); + virtual ~WrapperOutputStream() { } + +protected: + virtual void write_internal(const void *data, size_t len); + +private: + OutputStream ℜ }; +/* Like WrapperOutputStream, but additionally computes a checksum of data as it + * is written. */ +class ChecksumOutputStream : public OutputStream { +public: + explicit ChecksumOutputStream(OutputStream &o); + virtual ~ChecksumOutputStream() { } + + /* Once a checksum is computed, no further data should be written to the + * stream. */ + const uint8_t *finish_and_checksum(); + size_t checksum_size() const { return csum.checksum_size(); } + +protected: + virtual void write_internal(const void *data, size_t len); + +private: + OutputStream ℜ + SHA1Checksum csum; +}; + +/* Simple wrappers that encode integers using a StringOutputStream and return + * the encoded result. */ std::string encode_u16(uint16_t val); std::string encode_u32(uint32_t val); std::string encode_u64(uint64_t val); +std::string encode_objref(const struct uuid &segment, uint32_t object); + +/* A class which is used to pack multiple objects into a single segment, with a + * lookup table to quickly locate each object. Call new_object() to get an + * OutputStream to which a new object may be written, and optionally + * finish_object() when finished writing the current object. Only one object + * may be written to a segment at a time; if multiple objects must be written + * concurrently, they must be to different segments. */ +class SegmentWriter { +public: + SegmentWriter(OutputStream *output, struct uuid u); + ~SegmentWriter(); + + struct uuid get_uuid() const { return id; } + + // Start writing out a new object to this segment. + OutputStream *new_object(int *id); + void finish_object(); + + // Determine size of segment data written out so far. + size_t get_size() const { return raw_out->get_pos(); } + + // Utility functions for generating and formatting UUIDs for display. + static struct uuid generate_uuid(); + static std::string format_uuid(const struct uuid u); + +private: + typedef std::vector > object_table; + + ChecksumOutputStream *out; // Output stream with checksumming enabled + OutputStream *raw_out; // Raw output stream, without checksumming + struct uuid id; + + int64_t object_start_offset; + OutputStream *object_stream; + + object_table objects; +}; + +/* A SegmentStore, as the name suggests, is used to store the contents of many + * segments. The SegmentStore internally tracks where data should be placed + * (such as a local directory or remote storage), and allows new segments to be + * easily created as needed. */ +class SegmentStore { +public: + // New segments will be stored in the given directory. + SegmentStore(const std::string &path); + + SegmentWriter *new_segment(); + +private: + std::string directory; +}; + +/* A SegmentPartitioner helps to divide objects up among a collection of + * segments to meet a rough size limit per segment. Like a SegmentWriter, only + * one object should be written at a time; however, multiple + * SegmentPartitioners can be created using the same base SegmentStore. */ +class SegmentPartitioner { +public: + explicit SegmentPartitioner(SegmentStore *s); + ~SegmentPartitioner(); + + OutputStream *new_object(struct uuid *uuid, int *id); + +private: + size_t target_size; + + SegmentStore *store; + SegmentWriter *segment; + OutputStream *object; +}; #endif // _LBS_STORE_H