From a4cf5f4d8df46fa00992a210d587cd824cedcb08 Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Thu, 9 Aug 2007 19:23:20 -0700 Subject: [PATCH] Compute checksums of segments and store them in the local database. When a segment is fully written out, compute a checksum of the file actually written (post-filtering). Store this in the local database so that it will be possible to write out, at the end of a backup, a file containing the checksums of all segments used for the snapshot (including old ones not written out in this execution). --- localdb.cc | 23 +++++++++++++++++++++++ localdb.h | 3 +++ scandir.cc | 7 ++++--- schema.sql | 4 +++- sha1.cc | 23 +++++++++++++++++++++++ sha1.h | 1 + store.cc | 18 +++++++++++++----- store.h | 8 +++++++- 8 files changed, 77 insertions(+), 10 deletions(-) diff --git a/localdb.cc b/localdb.cc index 9ec0c96..5c1f7e5 100644 --- a/localdb.cc +++ b/localdb.cc @@ -285,3 +285,26 @@ void LocalDb::UseObject(const ObjectReference& ref) sqlite3_finalize(stmt); } + +void LocalDb::SetSegmentChecksum(const std::string &segment, + const std::string &path, + const std::string &checksum) +{ + int rc; + sqlite3_stmt *stmt; + + stmt = Prepare("update segments set path = ?, checksum = ? " + "where segmentid = ?"); + sqlite3_bind_text(stmt, 1, path.c_str(), path.size(), + SQLITE_TRANSIENT); + sqlite3_bind_text(stmt, 2, checksum.c_str(), checksum.size(), + SQLITE_TRANSIENT); + sqlite3_bind_int64(stmt, 3, SegmentToId(segment)); + + rc = sqlite3_step(stmt); + if (rc != SQLITE_DONE) { + fprintf(stderr, "Could not update segment checksum in database!\n"); + } + + sqlite3_finalize(stmt); +} diff --git a/localdb.h b/localdb.h index 0fce30d..45bb132 100644 --- a/localdb.h +++ b/localdb.h @@ -30,6 +30,9 @@ public: bool IsOldObject(const std::string &checksum, int64_t size, double *age); bool IsAvailable(const ObjectReference &ref); void UseObject(const ObjectReference& ref); + + void SetSegmentChecksum(const std::string &segment, const std::string &path, + const std::string &checksum); private: sqlite3 *db; int64_t snapshotid; diff --git a/scandir.cc b/scandir.cc index 1db5ec3..82160a1 100644 --- a/scandir.cc +++ b/scandir.cc @@ -675,7 +675,6 @@ int main(int argc, char *argv[]) printf(" %s\n", i->c_str()); } - tss = new TarSegmentStore(backup_dest); block_buf = new char[LBS_BLOCK_SIZE]; /* Store the time when the backup started, so it can be included in the @@ -695,6 +694,8 @@ int main(int argc, char *argv[]) db->Open(database_path.c_str(), desc_buf, backup_scheme.size() ? backup_scheme.c_str() : NULL); + tss = new TarSegmentStore(backup_dest, db); + /* Initialize the stat cache, for skipping over unchanged files. */ statcache = new StatCache; statcache->Open(localdb_dir.c_str(), desc_buf, @@ -715,8 +716,6 @@ int main(int argc, char *argv[]) string backup_root = root->get_ref().to_string(); delete root; - db->Close(); - statcache->Close(); delete statcache; @@ -724,6 +723,8 @@ int main(int argc, char *argv[]) tss->dump_stats(); delete tss; + db->Close(); + /* Write a backup descriptor file, which says which segments are needed and * where to start to restore this snapshot. The filename is based on the * current time. */ diff --git a/schema.sql b/schema.sql index 6d89b4b..a37b501 100644 --- a/schema.sql +++ b/schema.sql @@ -14,7 +14,9 @@ create table snapshots ( -- List of segments which have been created. create table segments ( segmentid integer primary key, - segment text unique not null + segment text unique not null, + path text, + checksum text ); -- Index of all blocks which have been stored in a snapshot, by checksum. diff --git a/sha1.cc b/sha1.cc index c7b7112..3218075 100644 --- a/sha1.cc +++ b/sha1.cc @@ -30,6 +30,7 @@ #include "sha1.h" #include +#include #include #include @@ -348,6 +349,28 @@ void SHA1Checksum::process(const void *data, size_t len) sha1_process_bytes(data, len, &ctx); } +bool SHA1Checksum::process_file(const char *filename) +{ + FILE *f = fopen(filename, "rb"); + if (f == NULL) + return false; + + while (!feof(f)) { + char buf[4096]; + size_t bytes = fread(buf, 1, sizeof(buf), f); + + if (ferror(f)) { + fclose(f); + return false; + } + + process(buf, bytes); + } + + fclose(f); + return true; +} + const uint8_t *SHA1Checksum::checksum() { sha1_finish_ctx(&ctx, resbuf); diff --git a/sha1.h b/sha1.h index 1ede8b3..2d7e161 100644 --- a/sha1.h +++ b/sha1.h @@ -88,6 +88,7 @@ public: ~SHA1Checksum(); void process(const void *data, size_t len); + bool process_file(const char *filename); const uint8_t *checksum(); size_t checksum_size() const { return 20; } std::string checksum_str(); diff --git a/store.cc b/store.cc index 9d4d1bd..930794d 100644 --- a/store.cc +++ b/store.cc @@ -230,11 +230,10 @@ ObjectReference TarSegmentStore::write_object(const char *data, size_t len, segment = new segment_info; segment->name = generate_uuid(); - - string filename = path + "/" + segment->name + ".tar"; - filename += filter_extension; - segment->file = new Tarfile(filename, segment->name); - + segment->basename = segment->name + ".tar"; + segment->basename += filter_extension; + segment->fullname = path + "/" + segment->basename; + segment->file = new Tarfile(segment->fullname, segment->name); segment->count = 0; segments[group] = segment; @@ -281,6 +280,15 @@ void TarSegmentStore::close_segment(const string &group) struct segment_info *segment = segments[group]; delete segment->file; + + if (db != NULL) { + SHA1Checksum segment_checksum; + if (segment_checksum.process_file(segment->fullname.c_str())) { + string checksum = segment_checksum.checksum_str(); + db->SetSegmentChecksum(segment->name, segment->basename, checksum); + } + } + segments.erase(segments.find(group)); delete segment; } diff --git a/store.h b/store.h index 75c2008..a959cce 100644 --- a/store.h +++ b/store.h @@ -17,6 +17,7 @@ #include #include +#include "localdb.h" #include "sha1.h" #include "ref.h" @@ -95,7 +96,9 @@ private: class TarSegmentStore { public: // New segments will be stored in the given directory. - TarSegmentStore(const std::string &path) { this->path = path; } + TarSegmentStore(const std::string &path, + LocalDb *db = NULL) + { this->path = path; this->db = db; } ~TarSegmentStore() { sync(); } // Writes an object to segment in the store, and returns the name @@ -116,10 +119,13 @@ private: Tarfile *file; std::string name; // UUID int count; // Objects written to this segment + std::string basename; // Name of segment without directory + std::string fullname; // Full path to stored segment }; std::string path; std::map segments; + LocalDb *db; // Ensure that all segments in the given group have been fully written. void close_segment(const std::string &group); -- 2.20.1