From ed5a047c1b8a8f5c43b0192e8e774b91497c9706 Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Mon, 14 May 2007 21:57:18 -0700 Subject: [PATCH] Keep an index of old stored blocks, using sqlite3. Link sqlite3 in with the snapshot program, and start to write a wrapper around a "local database" which tracks previously-backed-up data to make incremental backups possible. At the moment, blocks are indexed as they are stored, but we never read from the index, so blocks are not yet reused. --- Makefile | 4 +-- localdb.cc | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ localdb.h | 32 +++++++++++++++++++++ scandir.cc | 19 ++++++++++++- schema.sql | 14 +++++++++ 5 files changed, 149 insertions(+), 3 deletions(-) create mode 100644 localdb.cc create mode 100644 localdb.h create mode 100644 schema.sql diff --git a/Makefile b/Makefile index f29d2a7..33aa0c3 100644 --- a/Makefile +++ b/Makefile @@ -1,10 +1,10 @@ -PACKAGES=uuid +PACKAGES=sqlite3 uuid DEBUG=-g #-pg CXXFLAGS=-O -Wall -D_FILE_OFFSET_BITS=64 $(DEBUG) \ `pkg-config --cflags $(PACKAGES)` LDFLAGS=$(DEBUG) -ltar `pkg-config --libs $(PACKAGES)` -SRCS=format.cc ref.cc scandir.cc sha1.cc store.cc +SRCS=format.cc localdb.cc ref.cc scandir.cc sha1.cc store.cc OBJS=$(SRCS:.cc=.o) scandir : $(OBJS) diff --git a/localdb.cc b/localdb.cc new file mode 100644 index 0000000..63a9f83 --- /dev/null +++ b/localdb.cc @@ -0,0 +1,83 @@ +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2007 Michael Vrable + * + * When creating backup snapshots, maintain a local database of data blocks and + * checksums, in addition to the data contents (which may be stored remotely). + * This database is consulted when attempting to build incremental snapshots, + * as it says which objects can be reused. + * + * The database is implemented as an SQLite3 database, but this implementation + * detail is kept internal to this file, so that the storage format may be + * changed later. */ + +#include +#include +#include +#include + +#include + +#include "localdb.h" +#include "store.h" + +using std::string; + +void LocalDb::Open(const char *path) +{ + int rc; + + rc = sqlite3_open(path, &db); + if (rc) { + fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db)); + sqlite3_close(db); + throw IOException("Error opening local database"); + } + + rc = sqlite3_exec(db, "begin", NULL, NULL, NULL); + if (rc) { + fprintf(stderr, "Can't open database: %s\n", sqlite3_errmsg(db)); + sqlite3_close(db); + throw IOException("Error starting transaction"); + } +} + +void LocalDb::Close() +{ + int rc; + rc = sqlite3_exec(db, "commit", NULL, NULL, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr, "Can't commit database!\n"); + } + sqlite3_close(db); +} + +void LocalDb::StoreObject(const ObjectReference& ref, + const string &checksum, int64_t size) +{ + int rc; + sqlite3_stmt *stmt; + static const char s[] = + "insert into block_index(segment, object, checksum, size) " + "values (?, ?, ?, ?)"; + const char *tail; + + rc = sqlite3_prepare_v2(db, s, strlen(s), &stmt, &tail); + if (rc != SQLITE_OK) { + return; + } + + string seg = ref.get_segment(); + sqlite3_bind_text(stmt, 1, seg.c_str(), seg.size(), SQLITE_TRANSIENT); + string obj = ref.get_sequence(); + sqlite3_bind_text(stmt, 2, obj.c_str(), obj.size(), SQLITE_TRANSIENT); + sqlite3_bind_text(stmt, 3, checksum.c_str(), checksum.size(), + SQLITE_TRANSIENT); + sqlite3_bind_int64(stmt, 4, size); + + rc = sqlite3_step(stmt); + if (rc != SQLITE_DONE) { + fprintf(stderr, "Could not execute INSERT statement!\n"); + } + + sqlite3_finalize(stmt); +} diff --git a/localdb.h b/localdb.h new file mode 100644 index 0000000..8a45e0e --- /dev/null +++ b/localdb.h @@ -0,0 +1,32 @@ +/* LBS: An LFS-inspired filesystem backup system + * Copyright (C) 2007 Michael Vrable + * + * When creating backup snapshots, maintain a local database of data blocks and + * checksums, in addition to the data contents (which may be stored remotely). + * This database is consulted when attempting to build incremental snapshots, + * as it says which objects can be reused. + * + * The database is implemented as an SQLite3 database, but this implementation + * detail is kept internal to this file, so that the storage format may be + * changed later. */ + +#ifndef _LBS_LOCALDB_H +#define _LBS_LOCALDB_H + +#include + +#include + +#include "ref.h" + +class LocalDb { +public: + void Open(const char *path); + void Close(); + void StoreObject(const ObjectReference& ref, + const std::string &checksum, int64_t size); +private: + sqlite3 *db; +}; + +#endif // _LBS_LOCALDB_H diff --git a/scandir.cc b/scandir.cc index a227bb3..cfa29ca 100644 --- a/scandir.cc +++ b/scandir.cc @@ -22,6 +22,7 @@ #include #include "format.h" +#include "localdb.h" #include "store.h" #include "sha1.h" @@ -38,6 +39,10 @@ static char *block_buf; static const size_t LBS_METADATA_BLOCK_SIZE = 65536; +/* Local database, which tracks objects written in this and previous + * invocations to help in creating incremental snapshots. */ +LocalDb *db; + /* Contents of the root object. This will contain a set of indirect links to * the metadata objects. */ std::ostringstream metadata_root; @@ -133,9 +138,15 @@ int64_t dumpfile(int fd, dictionary &file_info) o->write(tss); object_list.push_back(o->get_name()); segment_list.insert(o->get_ref().get_segment()); - delete o; + + // Index this block so it can be used by future snapshots + SHA1Checksum block_hash; + block_hash.process(block_buf, bytes); + db->StoreObject(o->get_ref(), block_hash.checksum_str(), bytes); size += bytes; + + delete o; } file_info["checksum"] = hash.checksum_str(); @@ -340,6 +351,10 @@ int main(int argc, char *argv[]) tss = new TarSegmentStore(backup_dest); + string database_path = backup_dest + "/localdb.sqlite"; + db = new LocalDb; + db->Open(database_path.c_str()); + /* Write a backup descriptor file, which says which segments are needed and * where to start to restore this snapshot. The filename is based on the * current time. */ @@ -380,6 +395,8 @@ int main(int argc, char *argv[]) descriptor << " " << *i << "\n"; } + db->Close(); + tss->sync(); delete tss; diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..114670c --- /dev/null +++ b/schema.sql @@ -0,0 +1,14 @@ +-- We maintain a local index of data blocks that have been previously stored +-- for constructing incremental snapshots. +-- +-- The index is stored in an SQLite3 database. This is its schema. + +-- Index of all blocks which have been stored in one snapshot, by checksum. +create table block_index ( + blockid integer primary key, + segment text, + object text, + checksum text, + size integer +); +create index block_content_index on block_index(checksum); -- 2.20.1