From: Michael Vrable Date: Thu, 3 May 2007 22:13:21 +0000 (-0700) Subject: Rename tarstore -> store, since it is the only implementation now. X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=commitdiff_plain;h=d5afe2c08724b4b263a76f187e39861c1e6e0cf5 Rename tarstore -> store, since it is the only implementation now. --- diff --git a/Makefile b/Makefile index f8c9ec7..ffa28a3 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CXXFLAGS=-O -Wall -D_FILE_OFFSET_BITS=64 -g -pg \ `pkg-config --cflags $(PACKAGES)` LDFLAGS=-g -pg -ltar `pkg-config --libs $(PACKAGES)` -SRCS=format.cc scandir.cc sha1.cc tarstore.cc +SRCS=format.cc scandir.cc sha1.cc store.cc OBJS=$(SRCS:.cc=.o) scandir : $(OBJS) diff --git a/scandir.cc b/scandir.cc index dc10771..f3bf302 100644 --- a/scandir.cc +++ b/scandir.cc @@ -18,7 +18,7 @@ #include #include "format.h" -#include "tarstore.h" +#include "store.h" #include "sha1.h" using std::list; diff --git a/store.cc b/store.cc new file mode 100644 index 0000000..00fd26c --- /dev/null +++ b/store.cc @@ -0,0 +1,185 @@ +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2007 Michael Vrable + * + * Backup data is stored in a collection of objects, which are grouped together + * into segments for storage purposes. This implementation of the object store + * is built on top of libtar, and represents segments as TAR files and objects + * as files within them. */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "store.h" + +using std::list; +using std::set; +using std::string; + +list TarSegmentStore::norefs; + +Tarfile::Tarfile(const string &path, const string &segment) + : size(0), + segment_name(segment) +{ + if (tar_open(&t, (char *)path.c_str(), NULL, O_WRONLY | O_CREAT, 0600, + TAR_VERBOSE | TAR_GNU) == -1) + throw IOException("Error opening Tarfile"); +} + +Tarfile::~Tarfile() +{ + string checksum_list = checksums.str(); + internal_write_object(segment_name + "/checksums", + checksum_list.data(), checksum_list.size()); + tar_append_eof(t); + + if (tar_close(t) != 0) + throw IOException("Error closing Tarfile"); +} + +void Tarfile::write_object(int id, const char *data, size_t len) +{ + char buf[64]; + sprintf(buf, "%08x", id); + string path = segment_name + "/" + buf; + + internal_write_object(path, data, len); + + // Compute a checksum for the data block, which will be stored at the end + // of the TAR file. + SHA1Checksum hash; + hash.process(data, len); + sprintf(buf, "%08x", id); + checksums << buf << " " << hash.checksum_str() << "\n"; +} + +void Tarfile::internal_write_object(const string &path, + const char *data, size_t len) +{ + memset(&t->th_buf, 0, sizeof(struct tar_header)); + + th_set_type(t, S_IFREG | 0600); + th_set_user(t, 0); + th_set_group(t, 0); + th_set_mode(t, 0600); + th_set_size(t, len); + th_set_mtime(t, time(NULL)); + th_set_path(t, const_cast(path.c_str())); + th_finish(t); + + if (th_write(t) != 0) + throw IOException("Error writing tar header"); + + size += T_BLOCKSIZE; + + if (len == 0) + return; + + size_t blocks = (len + T_BLOCKSIZE - 1) / T_BLOCKSIZE; + size_t padding = blocks * T_BLOCKSIZE - len; + + for (size_t i = 0; i < blocks - 1; i++) { + if (tar_block_write(t, &data[i * T_BLOCKSIZE]) == -1) + throw IOException("Error writing tar block"); + } + + char block[T_BLOCKSIZE]; + memset(block, 0, sizeof(block)); + memcpy(block, &data[T_BLOCKSIZE * (blocks - 1)], T_BLOCKSIZE - padding); + if (tar_block_write(t, block) == -1) + throw IOException("Error writing final tar block"); + + size += blocks * T_BLOCKSIZE; +} + +static const size_t SEGMENT_SIZE = 4 * 1024 * 1024; + +string TarSegmentStore::write_object(const char *data, size_t len, const + std::string &group, + const std::list &refs) +{ + struct segment_info *segment; + + // Find the segment into which the object should be written, looking up by + // group. If no segment exists yet, create one. + if (segments.find(group) == segments.end()) { + segment = new segment_info; + + uint8_t uuid[16]; + char uuid_buf[40]; + uuid_generate(uuid); + uuid_unparse_lower(uuid, uuid_buf); + segment->name = uuid_buf; + + string filename = path + "/" + segment->name + ".tar"; + segment->file = new Tarfile(filename, segment->name); + + segment->count = 0; + + segments[group] = segment; + } else { + segment = segments[group]; + } + + int id = segment->count; + char id_buf[64]; + sprintf(id_buf, "%08x", id); + + segment->file->write_object(id, data, len); + segment->count++; + + string full_name = segment->name + "/" + id_buf; + + // Store any dependencies this object has on other segments, so they can be + // written when the segment is closed. + for (list::const_iterator i = refs.begin(); i != refs.end(); ++i) { + segment->refs.insert(*i); + } + + // If this segment meets or exceeds the size target, close it so that + // future objects will go into a new segment. + if (segment->file->size_estimate() >= SEGMENT_SIZE) + close_segment(group); + + return full_name; +} + +void TarSegmentStore::sync() +{ + while (!segments.empty()) + close_segment(segments.begin()->first); +} + +void TarSegmentStore::close_segment(const string &group) +{ + struct segment_info *segment = segments[group]; + fprintf(stderr, "Closing segment group %s (%s)\n", + group.c_str(), segment->name.c_str()); + + string reflist; + for (set::iterator i = segment->refs.begin(); + i != segment->refs.end(); ++i) { + reflist += *i + "\n"; + } + segment->file->internal_write_object(segment->name + "/references", + reflist.data(), reflist.size()); + + delete segment->file; + segments.erase(segments.find(group)); + delete segment; +} + +string TarSegmentStore::object_reference_to_segment(const string &object) +{ + return object; +} diff --git a/store.h b/store.h new file mode 100644 index 0000000..6acee7f --- /dev/null +++ b/store.h @@ -0,0 +1,104 @@ +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2006 Michael Vrable + * + * Backup data is stored in a collection of objects, which are grouped together + * into segments for storage purposes. This implementation of the object store + * is built on top of libtar, and represents segments as TAR files and objects + * as files within them. */ + +#ifndef _LBS_TARSTORE_H +#define _LBS_TARSTORE_H + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "sha1.h" + +/* In memory datatype to represent key/value pairs of information, such as file + * metadata. Currently implemented as map. */ +typedef std::map dictionary; + +/* IOException will be thrown if an error occurs while reading or writing in + * one of the I/O wrappers. Depending upon the context; this may be fatal or + * not--typically, errors reading/writing the store will be serious, but errors + * reading an individual file are less so. */ +class IOException : public std::exception { +private: + std::string error; +public: + explicit IOException(const std::string &err) { error = err; } + virtual ~IOException() throw () { } + std::string getError() const { return error; } +}; + +/* A simple wrapper around a single TAR file to represent a segment. Objects + * may only be written out all at once, since the tar header must be written + * first; incremental writing is not supported. */ +class Tarfile { +public: + Tarfile(const std::string &path, const std::string &segment); + ~Tarfile(); + + void write_object(int id, const char *data, size_t len); + + // Return an estimate of the size of the file. + size_t size_estimate() { return size; } + + void internal_write_object(const std::string &path, + const char *data, size_t len); + +private: + size_t size; + std::string segment_name; + std::ostringstream checksums; + TAR *t; +}; + +class TarSegmentStore { +public: + // New segments will be stored in the given directory. + TarSegmentStore(const std::string &path) { this->path = path; } + ~TarSegmentStore() { sync(); } + + // Writes an object to segment in the store, and returns the name + // (segment/object) to refer to it. The optional parameter group can be + // used to control object placement; objects with different group + // parameters are kept in separate segments. + std::string write_object(const char *data, size_t len, + const std::string &group = "", + const std::list &refs = norefs); + + // Ensure all segments have been fully written. + void sync(); + +private: + struct segment_info { + Tarfile *file; + std::string name; // UUID + std::set refs; // Other segments this one refers to + int count; // Objects written to this segment + }; + + std::string path; + std::map segments; + + // An empty list which can be used as an argument to write_object to + // indicate that this object depends on no others. + static std::list norefs; + + // Ensure that all segments in the given group have been fully written. + void close_segment(const std::string &group); + + // Parse an object reference string and return just the segment name + // portion. + std::string object_reference_to_segment(const std::string &object); +}; + +#endif // _LBS_TARSTORE_H diff --git a/tarstore.cc b/tarstore.cc deleted file mode 100644 index 5133c66..0000000 --- a/tarstore.cc +++ /dev/null @@ -1,185 +0,0 @@ -/* LBS: An LFS-inspired filesystem backup system - * Copyright (C) 2007 Michael Vrable - * - * Backup data is stored in a collection of objects, which are grouped together - * into segments for storage purposes. This implementation of the object store - * is built on top of libtar, and represents segments as TAR files and objects - * as files within them. */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "tarstore.h" - -using std::list; -using std::set; -using std::string; - -list TarSegmentStore::norefs; - -Tarfile::Tarfile(const string &path, const string &segment) - : size(0), - segment_name(segment) -{ - if (tar_open(&t, (char *)path.c_str(), NULL, O_WRONLY | O_CREAT, 0600, - TAR_VERBOSE | TAR_GNU) == -1) - throw IOException("Error opening Tarfile"); -} - -Tarfile::~Tarfile() -{ - string checksum_list = checksums.str(); - internal_write_object(segment_name + "/checksums", - checksum_list.data(), checksum_list.size()); - tar_append_eof(t); - - if (tar_close(t) != 0) - throw IOException("Error closing Tarfile"); -} - -void Tarfile::write_object(int id, const char *data, size_t len) -{ - char buf[64]; - sprintf(buf, "%08x", id); - string path = segment_name + "/" + buf; - - internal_write_object(path, data, len); - - // Compute a checksum for the data block, which will be stored at the end - // of the TAR file. - SHA1Checksum hash; - hash.process(data, len); - sprintf(buf, "%08x", id); - checksums << buf << " " << hash.checksum_str() << "\n"; -} - -void Tarfile::internal_write_object(const string &path, - const char *data, size_t len) -{ - memset(&t->th_buf, 0, sizeof(struct tar_header)); - - th_set_type(t, S_IFREG | 0600); - th_set_user(t, 0); - th_set_group(t, 0); - th_set_mode(t, 0600); - th_set_size(t, len); - th_set_mtime(t, time(NULL)); - th_set_path(t, const_cast(path.c_str())); - th_finish(t); - - if (th_write(t) != 0) - throw IOException("Error writing tar header"); - - size += T_BLOCKSIZE; - - if (len == 0) - return; - - size_t blocks = (len + T_BLOCKSIZE - 1) / T_BLOCKSIZE; - size_t padding = blocks * T_BLOCKSIZE - len; - - for (size_t i = 0; i < blocks - 1; i++) { - if (tar_block_write(t, &data[i * T_BLOCKSIZE]) == -1) - throw IOException("Error writing tar block"); - } - - char block[T_BLOCKSIZE]; - memset(block, 0, sizeof(block)); - memcpy(block, &data[T_BLOCKSIZE * (blocks - 1)], T_BLOCKSIZE - padding); - if (tar_block_write(t, block) == -1) - throw IOException("Error writing final tar block"); - - size += blocks * T_BLOCKSIZE; -} - -static const size_t SEGMENT_SIZE = 4 * 1024 * 1024; - -string TarSegmentStore::write_object(const char *data, size_t len, const - std::string &group, - const std::list &refs) -{ - struct segment_info *segment; - - // Find the segment into which the object should be written, looking up by - // group. If no segment exists yet, create one. - if (segments.find(group) == segments.end()) { - segment = new segment_info; - - uint8_t uuid[16]; - char uuid_buf[40]; - uuid_generate(uuid); - uuid_unparse_lower(uuid, uuid_buf); - segment->name = uuid_buf; - - string filename = path + "/" + segment->name + ".tar"; - segment->file = new Tarfile(filename, segment->name); - - segment->count = 0; - - segments[group] = segment; - } else { - segment = segments[group]; - } - - int id = segment->count; - char id_buf[64]; - sprintf(id_buf, "%08x", id); - - segment->file->write_object(id, data, len); - segment->count++; - - string full_name = segment->name + "/" + id_buf; - - // Store any dependencies this object has on other segments, so they can be - // written when the segment is closed. - for (list::const_iterator i = refs.begin(); i != refs.end(); ++i) { - segment->refs.insert(*i); - } - - // If this segment meets or exceeds the size target, close it so that - // future objects will go into a new segment. - if (segment->file->size_estimate() >= SEGMENT_SIZE) - close_segment(group); - - return full_name; -} - -void TarSegmentStore::sync() -{ - while (!segments.empty()) - close_segment(segments.begin()->first); -} - -void TarSegmentStore::close_segment(const string &group) -{ - struct segment_info *segment = segments[group]; - fprintf(stderr, "Closing segment group %s (%s)\n", - group.c_str(), segment->name.c_str()); - - string reflist; - for (set::iterator i = segment->refs.begin(); - i != segment->refs.end(); ++i) { - reflist += *i + "\n"; - } - segment->file->internal_write_object(segment->name + "/references", - reflist.data(), reflist.size()); - - delete segment->file; - segments.erase(segments.find(group)); - delete segment; -} - -string TarSegmentStore::object_reference_to_segment(const string &object) -{ - return object; -} diff --git a/tarstore.h b/tarstore.h deleted file mode 100644 index 6acee7f..0000000 --- a/tarstore.h +++ /dev/null @@ -1,104 +0,0 @@ -/* LBS: An LFS-inspired filesystem backup system - * Copyright (C) 2006 Michael Vrable - * - * Backup data is stored in a collection of objects, which are grouped together - * into segments for storage purposes. This implementation of the object store - * is built on top of libtar, and represents segments as TAR files and objects - * as files within them. */ - -#ifndef _LBS_TARSTORE_H -#define _LBS_TARSTORE_H - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include "sha1.h" - -/* In memory datatype to represent key/value pairs of information, such as file - * metadata. Currently implemented as map. */ -typedef std::map dictionary; - -/* IOException will be thrown if an error occurs while reading or writing in - * one of the I/O wrappers. Depending upon the context; this may be fatal or - * not--typically, errors reading/writing the store will be serious, but errors - * reading an individual file are less so. */ -class IOException : public std::exception { -private: - std::string error; -public: - explicit IOException(const std::string &err) { error = err; } - virtual ~IOException() throw () { } - std::string getError() const { return error; } -}; - -/* A simple wrapper around a single TAR file to represent a segment. Objects - * may only be written out all at once, since the tar header must be written - * first; incremental writing is not supported. */ -class Tarfile { -public: - Tarfile(const std::string &path, const std::string &segment); - ~Tarfile(); - - void write_object(int id, const char *data, size_t len); - - // Return an estimate of the size of the file. - size_t size_estimate() { return size; } - - void internal_write_object(const std::string &path, - const char *data, size_t len); - -private: - size_t size; - std::string segment_name; - std::ostringstream checksums; - TAR *t; -}; - -class TarSegmentStore { -public: - // New segments will be stored in the given directory. - TarSegmentStore(const std::string &path) { this->path = path; } - ~TarSegmentStore() { sync(); } - - // Writes an object to segment in the store, and returns the name - // (segment/object) to refer to it. The optional parameter group can be - // used to control object placement; objects with different group - // parameters are kept in separate segments. - std::string write_object(const char *data, size_t len, - const std::string &group = "", - const std::list &refs = norefs); - - // Ensure all segments have been fully written. - void sync(); - -private: - struct segment_info { - Tarfile *file; - std::string name; // UUID - std::set refs; // Other segments this one refers to - int count; // Objects written to this segment - }; - - std::string path; - std::map segments; - - // An empty list which can be used as an argument to write_object to - // indicate that this object depends on no others. - static std::list norefs; - - // Ensure that all segments in the given group have been fully written. - void close_segment(const std::string &group); - - // Parse an object reference string and return just the segment name - // portion. - std::string object_reference_to_segment(const std::string &object); -}; - -#endif // _LBS_TARSTORE_H