From 6bb973b965a30832c3e2d9f6a24e80d3309ef89e Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Thu, 15 Nov 2007 22:29:19 -0800 Subject: [PATCH] Initial refactoring of metadata logging. Move the writing of entries in the metadata log to a separate class in a separate file (MetadataWriter in metadata.cc). This is the first step of the changes, which moves the existing code but does not significantly change it. It is preparation for more significant changes to metadata writing. --- Makefile | 3 +- metadata.cc | 80 ++++++++++++++++++++++++++++++++++++++++++++++++++++ metadata.h | 33 ++++++++++++++++++++++ scandir.cc | 63 +++++++++-------------------------------- statcache.cc | 4 +-- util.cc | 17 ++++++++--- util.h | 4 ++- 7 files changed, 147 insertions(+), 57 deletions(-) create mode 100644 metadata.cc create mode 100644 metadata.h diff --git a/Makefile b/Makefile index 251db04..17a4a24 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,8 @@ CXXFLAGS=-O -Wall -D_FILE_OFFSET_BITS=64 $(DEBUG) \ `pkg-config --cflags $(PACKAGES)` -DLBS_VERSION=`cat version` LDFLAGS=$(DEBUG) `pkg-config --libs $(PACKAGES)` -SRCS=localdb.cc ref.cc scandir.cc sha1.cc statcache.cc store.cc util.cc +SRCS=localdb.cc metadata.cc ref.cc scandir.cc sha1.cc statcache.cc store.cc \ + util.cc OBJS=$(SRCS:.cc=.o) lbs : $(OBJS) diff --git a/metadata.cc b/metadata.cc new file mode 100644 index 0000000..5d30baf --- /dev/null +++ b/metadata.cc @@ -0,0 +1,80 @@ +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2007 Michael Vrable + * + * Handling of metadata written to backup snapshots. This manages the writing + * of file metadata into new backup snapshots, including breaking the metadata + * log apart across separate objects. Eventually this should include unified + * handling of the statcache, and re-use of metadata between snapshots. + */ + +#include +#include + +#include "metadata.h" +#include "store.h" +#include "statcache.h" +#include "util.h" + +using std::string; +using std::ostream; + +static const size_t LBS_METADATA_BLOCK_SIZE = 65536; + +/* TODO: Move to header file */ +void add_segment(const string& segment); + +MetadataWriter::MetadataWriter(TarSegmentStore *store) +{ + this->store = store; +} + +/* Ensure contents of metadata are flushed to an object. */ +void MetadataWriter::metadata_flush() +{ + string m = metadata.str(); + if (m.size() == 0) + return; + + /* Write current metadata information to a new object. */ + LbsObject *meta = new LbsObject; + meta->set_group("metadata"); + meta->set_data(m.data(), m.size()); + meta->write(store); + meta->checksum(); + + /* Write a reference to this block in the root. */ + ObjectReference ref = meta->get_ref(); + metadata_root << "@" << ref.to_string() << "\n"; + add_segment(ref.get_segment()); + + delete meta; + + metadata.str(""); +} + +void MetadataWriter::add(const string& path, dictionary info) +{ + metadata << "path: " << uri_encode(path) << "\n"; + metadata << encode_dict(info); + metadata << "\n"; + + if (metadata.str().size() > LBS_METADATA_BLOCK_SIZE) + metadata_flush(); +} + +ObjectReference MetadataWriter::close() +{ + metadata_flush(); + const string root_data = metadata_root.str(); + + LbsObject *root = new LbsObject; + root->set_group("metadata"); + root->set_data(root_data.data(), root_data.size()); + root->write(store); + root->checksum(); + add_segment(root->get_ref().get_segment()); + + ObjectReference ref = root->get_ref(); + delete root; + return ref; +} diff --git a/metadata.h b/metadata.h new file mode 100644 index 0000000..7a09c74 --- /dev/null +++ b/metadata.h @@ -0,0 +1,33 @@ +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2007 Michael Vrable + * + * Handling of metadata written to backup snapshots. This manages the writing + * of file metadata into new backup snapshots, including breaking the metadata + * log apart across separate objects. Eventually this should include unified + * handling of the statcache, and re-use of metadata between snapshots. + */ + +#ifndef _LBS_METADATA_H +#define _LBS_METADATA_H + +#include +#include + +#include "store.h" +#include "ref.h" +#include "util.h" + +class MetadataWriter { +public: + MetadataWriter(TarSegmentStore *store); + void add(const std::string& path, dictionary info); + ObjectReference close(); + +private: + void metadata_flush(); + + TarSegmentStore *store; + std::ostringstream metadata, metadata_root; +}; + +#endif // _LBS_METADATA_H diff --git a/scandir.cc b/scandir.cc index a165f4e..49d8c32 100644 --- a/scandir.cc +++ b/scandir.cc @@ -25,6 +25,7 @@ #include #include "localdb.h" +#include "metadata.h" #include "store.h" #include "sha1.h" #include "statcache.h" @@ -44,13 +45,12 @@ using std::ostream; static const char lbs_version[] = LBS_STRINGIFY(LBS_VERSION); static TarSegmentStore *tss = NULL; +static MetadataWriter *metawriter = NULL; /* Buffer for holding a single block of data read from a file. */ static const size_t LBS_BLOCK_SIZE = 1024 * 1024; static char *block_buf; -static const size_t LBS_METADATA_BLOCK_SIZE = 65536; - /* Local database, which tracks objects written in this and previous * invocations to help in creating incremental snapshots. */ LocalDb *db; @@ -59,13 +59,6 @@ LocalDb *db; * skipping files which have not changed. */ StatCache *statcache; -/* Contents of the root object. This will contain a set of indirect links to - * the metadata objects. */ -std::ostringstream metadata_root; - -/* Buffer for building up metadata. */ -std::ostringstream metadata; - /* Keep track of all segments which are needed to reconstruct the snapshot. */ std::set segment_list; @@ -78,28 +71,11 @@ std::list searches; // Directories we don't want to save, but bool relative_paths = true; -/* Ensure contents of metadata are flushed to an object. */ -void metadata_flush() +/* Ensure that the given segment is listed as a dependency of the current + * snapshot. */ +void add_segment(const string& segment) { - string m = metadata.str(); - if (m.size() == 0) - return; - - /* Write current metadata information to a new object. */ - LbsObject *meta = new LbsObject; - meta->set_group("metadata"); - meta->set_data(m.data(), m.size()); - meta->write(tss); - meta->checksum(); - - /* Write a reference to this block in the root. */ - ObjectReference ref = meta->get_ref(); - metadata_root << "@" << ref.to_string() << "\n"; - segment_list.insert(ref.get_segment()); - - delete meta; - - metadata.str(""); + segment_list.insert(segment); } /* Read data from a file descriptor and return the amount of data read. A @@ -381,13 +357,7 @@ void dump_inode(const string& path, // Path within snapshot file_info["type"] = string(1, inode_type); - metadata << "name: " << uri_encode(path) << "\n"; - dict_output(metadata, file_info); - metadata << "\n"; - - // Break apart metadata listing if it becomes too large. - if (metadata.str().size() > LBS_METADATA_BLOCK_SIZE) - metadata_flush(); + metawriter->add(path, file_info); } void scanfile(const string& path, bool include) @@ -713,6 +683,8 @@ int main(int argc, char *argv[]) tss = new TarSegmentStore(backup_dest, db); + metawriter = new MetadataWriter(tss); + /* Initialize the stat cache, for skipping over unchanged files. */ statcache = new StatCache; statcache->Open(localdb_dir.c_str(), desc_buf, @@ -720,22 +692,15 @@ int main(int argc, char *argv[]) scanfile(".", false); - metadata_flush(); - const string md = metadata_root.str(); - - LbsObject *root = new LbsObject; - root->set_group("metadata"); - root->set_data(md.data(), md.size()); - root->write(tss); - root->checksum(); - segment_list.insert(root->get_ref().get_segment()); - - string backup_root = root->get_ref().to_string(); - delete root; + ObjectReference root_ref = metawriter->close(); + add_segment(root_ref.get_segment()); + string backup_root = root_ref.to_string(); statcache->Close(); delete statcache; + delete metawriter; + tss->sync(); tss->dump_stats(); delete tss; diff --git a/statcache.cc b/statcache.cc index 099eb70..3594ba4 100644 --- a/statcache.cc +++ b/statcache.cc @@ -1,5 +1,5 @@ -/* LBS: An LFS-inspired filesystem backup system Copyright (C) 2007 Michael - * Vrable +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2007 Michael Vrable * * To speed backups, we maintain a "stat cache" containing selected information * about all regular files, including modification times and the list of blocks diff --git a/util.cc b/util.cc index a51fa3d..1f922df 100644 --- a/util.cc +++ b/util.cc @@ -99,12 +99,21 @@ long long parse_int(const string &s) return strtoll(s.c_str(), NULL, 0); } -/* Output a dictionary of string key/value pairs to the given output stream. - * The format is a sequence of lines of the form "key: value". */ -void dict_output(ostream &o, map dict) +/* Encode a dictionary of string key/value pairs into a sequence of lines of + * the form "key: value". */ +string encode_dict(const map& dict) { + string result; for (map::const_iterator i = dict.begin(); i != dict.end(); ++i) { - o << i->first << ": " << i->second << "\n"; + result += i->first + ": " + i->second + "\n"; } + return result; +} + +/* Output a dictionary of string key/value pairs to the given output stream. + * The format is a sequence of lines of the form "key: value". */ +void dict_output(ostream &o, const map& dict) +{ + o << encode_dict(dict); } diff --git a/util.h b/util.h index 89ceac7..bb36333 100644 --- a/util.h +++ b/util.h @@ -15,7 +15,9 @@ std::string uri_encode(const std::string &in); std::string uri_decode(const std::string &in); std::string encode_int(long long n, int base=10); -void dict_output(std::ostream &o, std::map dict); +std::string encode_dict(const std::map& dict); +void dict_output(std::ostream &o, + const std::map& dict); long long parse_int(const std::string &s); -- 2.20.1