From: Michael Vrable Date: Thu, 3 May 2007 22:03:19 +0000 (-0700) Subject: Remove old store implementation. X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=commitdiff_plain;h=13431fb44e1be1bdb3397685970011404fa49be8 Remove old store implementation. --- diff --git a/Makefile b/Makefile index b71e37b..f8c9ec7 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CXXFLAGS=-O -Wall -D_FILE_OFFSET_BITS=64 -g -pg \ `pkg-config --cflags $(PACKAGES)` LDFLAGS=-g -pg -ltar `pkg-config --libs $(PACKAGES)` -SRCS=format.cc scandir.cc sha1.cc store.cc tarstore.cc +SRCS=format.cc scandir.cc sha1.cc tarstore.cc OBJS=$(SRCS:.cc=.o) scandir : $(OBJS) diff --git a/scandir.cc b/scandir.cc index 1616187..dc10771 100644 --- a/scandir.cc +++ b/scandir.cc @@ -18,7 +18,6 @@ #include #include "format.h" -#include "store.h" #include "tarstore.h" #include "sha1.h" diff --git a/store.cc b/store.cc deleted file mode 100644 index 0a833ab..0000000 --- a/store.cc +++ /dev/null @@ -1,342 +0,0 @@ -/* LBS: An LFS-inspired filesystem backup system - * Copyright (C) 2006 Michael Vrable - * - * Backup data is stored in a collection of objects, which are grouped together - * into segments for storage purposes. This file provides interfaces for - * reading and writing objects and segments. */ - -#include -#include - -#include "store.h" - -using std::string; - -OutputStream::OutputStream() - : bytes_written(0) -{ -} - -void OutputStream::write(const void *data, size_t len) -{ - write_internal(data, len); - bytes_written += len; -} - -void OutputStream::write_u8(uint8_t val) -{ - write(&val, 1); -} - -void OutputStream::write_u16(uint16_t val) -{ - unsigned char buf[2]; - - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - write(buf, 2); -} - -void OutputStream::write_u32(uint32_t val) -{ - unsigned char buf[4]; - - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24) & 0xff; - write(buf, 4); -} - -void OutputStream::write_u64(uint64_t val) -{ - unsigned char buf[8]; - - buf[0] = val & 0xff; - buf[1] = (val >> 8) & 0xff; - buf[2] = (val >> 16) & 0xff; - buf[3] = (val >> 24) & 0xff; - buf[4] = (val >> 32) & 0xff; - buf[5] = (val >> 40) & 0xff; - buf[6] = (val >> 48) & 0xff; - buf[7] = (val >> 56) & 0xff; - write(buf, 8); -} - -/* Writes an integer to an output stream using a variable-sized representation: - * seven bits are written at a time (little-endian), and the eigth bit of each - * byte is set if more data follows. */ -void OutputStream::write_varint(uint64_t val) -{ - do { - uint8_t remainder = (val & 0x7f); - val >>= 7; - if (val) - remainder |= 0x80; - write_u8(remainder); - } while (val); -} - -void OutputStream::write_uuid(const struct uuid &u) -{ - write(u.bytes, 16); -} - -/* Write an arbitrary string by first writing out the length, followed by the - * data itself. */ -void OutputStream::write_string(const string &s) -{ - size_t len = s.length(); - write_varint(len); - write(s.data(), len); -} - -void OutputStream::write_dictionary(const dictionary &d) -{ - size_t size = d.size(); - size_t written = 0; - - write_varint(size); - - for (dictionary::const_iterator i = d.begin(); i != d.end(); ++i) { - write_string(i->first); - write_string(i->second); - written++; - } - - assert(written == size); -} - -StringOutputStream::StringOutputStream() - : buf(std::ios_base::out) -{ -} - -void StringOutputStream::write_internal(const void *data, size_t len) -{ - buf.write((const char *)data, len); - if (!buf.good()) - throw IOException("error writing to StringOutputStream"); -} - -FileOutputStream::FileOutputStream(FILE *file) -{ - f = file; -} - -FileOutputStream::~FileOutputStream() -{ - fclose(f); -} - -void FileOutputStream::write_internal(const void *data, size_t len) -{ - size_t res; - - res = fwrite(data, 1, len, f); - if (res != len) { - throw IOException("write error"); - } -} - -WrapperOutputStream::WrapperOutputStream(OutputStream &o) - : real(o) -{ -} - -void WrapperOutputStream::write_internal(const void *data, size_t len) -{ - real.write(data, len); -} - -/* Provide checksumming of a data stream. */ -ChecksumOutputStream::ChecksumOutputStream(OutputStream &o) - : real(o) -{ -} - -void ChecksumOutputStream::write_internal(const void *data, size_t len) -{ - real.write(data, len); - csum.process(data, len); -} - -const uint8_t *ChecksumOutputStream::finish_and_checksum() -{ - return csum.checksum(); -} - -/* Utility functions, for encoding data types to strings. */ -string encode_u16(uint16_t val) -{ - StringOutputStream s; - s.write_u16(val); - return s.contents(); -} - -string encode_u32(uint32_t val) -{ - StringOutputStream s; - s.write_u32(val); - return s.contents(); -} - -string encode_u64(uint64_t val) -{ - StringOutputStream s; - s.write_u64(val); - return s.contents(); -} - -string encode_objref(const struct uuid &segment, uint32_t object) -{ - StringOutputStream s; - s.write_uuid(segment); - s.write_u32(object); - return s.contents(); -} - -SegmentWriter::SegmentWriter(OutputStream *output, struct uuid u) - : raw_out(output), - id(u), - object_stream(NULL) -{ - /* All output data will be checksummed except the very last few bytes, - * which are the checksum itself. */ - out = new ChecksumOutputStream(*raw_out); - - /* Write out the segment header first. */ - static const char signature[] = "LBSSEG0\n"; - out->write(signature, strlen(signature)); - out->write_uuid(id); -} - -SegmentWriter::~SegmentWriter() -{ - if (object_stream) - finish_object(); - - // Write out the object table which gives the sizes and locations of all - // objects, and then add the trailing signature, which indicates the end of - // the segment and gives the offset of the object table. - int64_t index_offset = out->get_pos(); - - for (object_table::const_iterator i = objects.begin(); - i != objects.end(); ++i) { - out->write_s64(i->offset); - out->write_s64(i->size); - out->write(i->type, sizeof(i->type)); - } - - static const char signature2[] = "LBSEND"; - out->write(signature2, strlen(signature2)); - out->write_s64(index_offset); - out->write_u32(objects.size()); - - /* Finally, append a checksum to the end of the file, so that its integrity - * (against accidental, not malicious, corruption) can be verified. */ - const uint8_t *csum = out->finish_and_checksum(); - raw_out->write(csum, out->checksum_size()); - - /* The SegmentWriter takes ownership of the OutputStream it is writing to, - * and destroys it automatically when done with the segment. */ - delete out; - delete raw_out; -} - -OutputStream *SegmentWriter::new_object(int *id, const char *type) -{ - if (object_stream) - finish_object(); - - if (id != NULL) - *id = objects.size(); - - struct index_info info; - info.offset = out->get_pos(); - info.size = -1; // Will be filled in when object is finished - strncpy(info.type, type, sizeof(info.type)); - objects.push_back(info); - - object_stream = new WrapperOutputStream(*out); - return object_stream; -} - -void SegmentWriter::finish_object() -{ - assert(object_stream != NULL); - - // Fill in object size, which could not be stored at start - objects.back().size = object_stream->get_pos(); - - delete object_stream; - object_stream = NULL; -} - -struct uuid SegmentWriter::generate_uuid() -{ - struct uuid u; - - uuid_generate(u.bytes); - - return u; -} - -string SegmentWriter::format_uuid(const struct uuid u) -{ - // A UUID only takes 36 bytes, plus the trailing '\0', so this is safe. - char buf[40]; - - uuid_unparse_lower(u.bytes, buf); - - return string(buf); -} - -SegmentStore::SegmentStore(const string &path) - : directory(path) -{ -} - -SegmentWriter *SegmentStore::new_segment() -{ - struct uuid id = SegmentWriter::generate_uuid(); - string filename = directory + "/" + SegmentWriter::format_uuid(id); - - FILE *f = fopen(filename.c_str(), "wb"); - if (f == NULL) - throw IOException("Unable to open new segment"); - - return new SegmentWriter(new FileOutputStream(f), id); -} - -SegmentPartitioner::SegmentPartitioner(SegmentStore *s) - : store(s), - segment(NULL), - object(NULL) -{ - // Default target size is around 1 MB - target_size = 1024 * 1024; -} - -SegmentPartitioner::~SegmentPartitioner() -{ - if (segment) - delete segment; -} - -OutputStream *SegmentPartitioner::new_object(struct uuid *uuid, int *id, - const char *type) -{ - if (segment != NULL && segment->get_size() > target_size) { - delete segment; - segment = NULL; - } - - if (segment == NULL) - segment = store->new_segment(); - - if (uuid != NULL) - *uuid = segment->get_uuid(); - - return segment->new_object(id, type); -} diff --git a/store.h b/store.h deleted file mode 100644 index 5d877c6..0000000 --- a/store.h +++ /dev/null @@ -1,227 +0,0 @@ -/* LBS: An LFS-inspired filesystem backup system - * Copyright (C) 2006 Michael Vrable - * - * Backup data is stored in a collection of objects, which are grouped together - * into segments for storage purposes. This file provides interfaces for - * reading and writing objects and segments. */ - -#ifndef _LBS_STORE_H -#define _LBS_STORE_H - -#include - -#include -#include -#include -#include -#include - -#include "sha1.h" - -/* In memory datatype to represent key/value pairs of information, such as file - * metadata. Currently implemented as map. */ -typedef std::map dictionary; - -/* In-memory representation of a UUID (Universally-Unique Identifier), which is - * used to name a segment. */ -struct uuid { - uint8_t bytes[16]; -}; - -/* IOException will be thrown if an error occurs while reading or writing in - * one of the I/O wrappers. Depending upon the context; this may be fatal or - * not--typically, errors reading/writing the store will be serious, but errors - * reading an individual file are less so. */ -class IOException : public std::exception { -private: - std::string error; -public: - explicit IOException(const std::string &err) { error = err; } - virtual ~IOException() throw () { } - std::string getError() const { return error; } -}; - -/* OutputStream is an abstract interface for writing data without seeking. - * Output could be to a file, to an object within a segment, or even to a - * memory buffer to help serialize data. */ -class OutputStream { -public: - OutputStream(); - virtual ~OutputStream() { } - - // Write the given data buffer - void write(const void *data, size_t len); - - // Return the total number of bytes written so far - int64_t get_pos() const { return bytes_written; } - - // Convenience functions for writing other data types. Values are always - // written out in little-endian order. - void write_u8(uint8_t val); - void write_u16(uint16_t val); - void write_u32(uint32_t val); - void write_u64(uint64_t val); - - void write_s32(int32_t val) { write_u32((uint32_t)val); } - void write_s64(int64_t val) { write_u64((uint64_t)val); } - - void write_varint(uint64_t val); - - void write_uuid(const struct uuid &u); - void write_string(const std::string &s); - void write_dictionary(const dictionary &d); - -protected: - // Function which actually causes a write: must be overridden by - // implementation. - virtual void write_internal(const void *data, size_t len) = 0; - -private: - int64_t bytes_written; -}; - -/* An OutputStream implementation which writes data to memory and returns the - * result as a string. */ -class StringOutputStream : public OutputStream { -public: - StringOutputStream(); - std::string contents() const { return buf.str(); } - -protected: - virtual void write_internal(const void *data, size_t len); - -private: - std::stringstream buf; -}; - -/* An OutputStream implementation which writes data via the C stdio layer. */ -class FileOutputStream : public OutputStream { -public: - explicit FileOutputStream(FILE *file); - virtual ~FileOutputStream(); - -protected: - virtual void write_internal(const void *data, size_t len); - -private: - FILE *f; -}; - -/* An OutputStream which is simply sends writes to another OutputStream, but - * does provide separate tracking of bytes written. */ -class WrapperOutputStream : public OutputStream { -public: - explicit WrapperOutputStream(OutputStream &o); - virtual ~WrapperOutputStream() { } - -protected: - virtual void write_internal(const void *data, size_t len); - -private: - OutputStream ℜ -}; - -/* Like WrapperOutputStream, but additionally computes a checksum of data as it - * is written. */ -class ChecksumOutputStream : public OutputStream { -public: - explicit ChecksumOutputStream(OutputStream &o); - virtual ~ChecksumOutputStream() { } - - /* Once a checksum is computed, no further data should be written to the - * stream. */ - const uint8_t *finish_and_checksum(); - size_t checksum_size() const { return csum.checksum_size(); } - -protected: - virtual void write_internal(const void *data, size_t len); - -private: - OutputStream ℜ - SHA1Checksum csum; -}; - -/* Simple wrappers that encode integers using a StringOutputStream and return - * the encoded result. */ -std::string encode_u16(uint16_t val); -std::string encode_u32(uint32_t val); -std::string encode_u64(uint64_t val); -std::string encode_objref(const struct uuid &segment, uint32_t object); - -/* A class which is used to pack multiple objects into a single segment, with a - * lookup table to quickly locate each object. Call new_object() to get an - * OutputStream to which a new object may be written, and optionally - * finish_object() when finished writing the current object. Only one object - * may be written to a segment at a time; if multiple objects must be written - * concurrently, they must be to different segments. */ -class SegmentWriter { -public: - SegmentWriter(OutputStream *output, struct uuid u); - ~SegmentWriter(); - - struct uuid get_uuid() const { return id; } - - // Start writing out a new object to this segment. - OutputStream *new_object(int *id, const char *type); - void finish_object(); - - // Determine size of segment data written out so far. - size_t get_size() const { return raw_out->get_pos(); } - - // Utility functions for generating and formatting UUIDs for display. - static struct uuid generate_uuid(); - static std::string format_uuid(const struct uuid u); - -private: - struct index_info { - int64_t offset; // File offset at which object starts - int64_t size; // Size of object in bytes - char type[4]; // Object type code - }; - - typedef std::vector object_table; - - ChecksumOutputStream *out; // Output stream with checksumming enabled - OutputStream *raw_out; // Raw output stream, without checksumming - struct uuid id; - - OutputStream *object_stream; - - object_table objects; -}; - -/* A SegmentStore, as the name suggests, is used to store the contents of many - * segments. The SegmentStore internally tracks where data should be placed - * (such as a local directory or remote storage), and allows new segments to be - * easily created as needed. */ -class SegmentStore { -public: - // New segments will be stored in the given directory. - SegmentStore(const std::string &path); - - SegmentWriter *new_segment(); - -private: - std::string directory; -}; - -/* A SegmentPartitioner helps to divide objects up among a collection of - * segments to meet a rough size limit per segment. Like a SegmentWriter, only - * one object should be written at a time; however, multiple - * SegmentPartitioners can be created using the same base SegmentStore. */ -class SegmentPartitioner { -public: - explicit SegmentPartitioner(SegmentStore *s); - ~SegmentPartitioner(); - - OutputStream *new_object(struct uuid *uuid, int *id, const char *type); - -private: - size_t target_size; - - SegmentStore *store; - SegmentWriter *segment; - OutputStream *object; -}; - -#endif // _LBS_STORE_H diff --git a/tarstore.h b/tarstore.h index a657ec9..6acee7f 100644 --- a/tarstore.h +++ b/tarstore.h @@ -13,12 +13,30 @@ #include #include +#include #include #include #include #include -#include "store.h" +#include "sha1.h" + +/* In memory datatype to represent key/value pairs of information, such as file + * metadata. Currently implemented as map. */ +typedef std::map dictionary; + +/* IOException will be thrown if an error occurs while reading or writing in + * one of the I/O wrappers. Depending upon the context; this may be fatal or + * not--typically, errors reading/writing the store will be serious, but errors + * reading an individual file are less so. */ +class IOException : public std::exception { +private: + std::string error; +public: + explicit IOException(const std::string &err) { error = err; } + virtual ~IOException() throw () { } + std::string getError() const { return error; } +}; /* A simple wrapper around a single TAR file to represent a segment. Objects * may only be written out all at once, since the tar header must be written