From: Michael Vrable Date: Fri, 30 May 2008 22:12:12 +0000 (-0700) Subject: Initial support for efficient sub-file incrementals. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=26f68e9df784020f16bbc295342123b0eca7de9b;p=cumulus.git Initial support for efficient sub-file incrementals. This is a cleaned-up version of the code written for the OSDI'08 submission to implement sub-file incremental deltas. Large files are broken into small chunks (in a content-sensitive manner with Rabin fingerprints, as in LBFS). Hash values are computed for the chunks and stored in a new database table in the local database. On subsequent backups, when a file has changed, search for chunks that are identical, so that portions of old blocks may be re-used even if the entire blocks cannot be. --- diff --git a/Makefile b/Makefile index 06cbc30..de3e9f9 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,8 @@ CXXFLAGS=-O -Wall -D_FILE_OFFSET_BITS=64 $(DEBUG) \ `pkg-config --cflags $(PACKAGES)` -DLBS_VERSION=`cat version` LDFLAGS=$(DEBUG) `pkg-config --libs $(PACKAGES)` -SRCS=localdb.cc metadata.cc ref.cc remote.cc scandir.cc sha1.cc store.cc util.cc +SRCS=chunk.cc localdb.cc metadata.cc ref.cc remote.cc scandir.cc sha1.cc \ + store.cc subfile.cc util.cc OBJS=$(SRCS:.cc=.o) lbs : $(OBJS) diff --git a/chunk.cc b/chunk.cc new file mode 100644 index 0000000..f1f60e8 --- /dev/null +++ b/chunk.cc @@ -0,0 +1,317 @@ +/* Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2006-2008 The Regents of the University of California + * Written by Michael Vrable + * + * Much of the code in this file is taken from LBFS, which is + * Copyright (C) 1998, 1999 David Mazieres (dm@uun.org) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Compute incremental backups at a sub-file level by chopping files up into + * blocks in a content-sensitive manner (using Rabin fingerprints). This code + * is largely taken from LBFS, primarily the files: + * liblbfs/fingerprint.C (fingerprint.C,v 1.1 2001/01/29 22:49:13 benjie Exp) + * liblbfs/rabinpoly.h (rabinpoly.h,v 1.4 2002/01/07 21:30:21 athicha Exp) + * liblbfs/rabinpoly.C (rabinpoly.C,v 1.1 2001/01/29 22:49:13 benjie Exp) + * async/msb.h (msb.h,v 1.6 1998/12/26 18:21:51 dm Exp) + * async/msb.C (msb.C,v 1.4 1998/12/26 18:21:51 dm Exp) + * but adapted and slimmed down to fit within Cumulus. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "chunk.h" + +using std::string; + +// Functions/data only needed internally go in a separate namespace. Public +// interfaces (at the end of the file) are in the global namespace. +namespace { + +#define FINGERPRINT_PT 0xbfe6b8a5bf378d83LL +#define BREAKMARK_VALUE 0x78 +#define MIN_CHUNK_SIZE 2048 +#define MAX_CHUNK_SIZE 65535 +#define TARGET_CHUNK_SIZE 4096 + +#define SFS_DEV_RANDOM "/dev/random" + +#define INT64(n) n##LL +#define MSB64 INT64(0x8000000000000000) + +template inline R +implicit_cast (R r) +{ + return r; +} + +/* Highest bit set in a byte */ +static const char bytemsb[0x100] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, +}; + +/* Find last set (most significant bit) */ +static inline u_int fls32 (uint32_t) __attribute__ ((const)); +static inline u_int +fls32 (u_int32_t v) +{ + if (v & 0xffff0000) { + if (v & 0xff000000) + return 24 + bytemsb[v>>24]; + else + return 16 + bytemsb[v>>16]; + } + if (v & 0x0000ff00) + return 8 + bytemsb[v>>8]; + else + return bytemsb[v]; +} + +static inline u_int fls64 (u_int64_t) __attribute__ ((const)); +static inline u_int +fls64 (u_int64_t v) +{ + u_int32_t h; + if ((h = v >> 32)) + return 32 + fls32 (h); + else + return fls32 ((u_int32_t) v); +} + +static uint64_t +polymod (uint64_t nh, uint64_t nl, uint64_t d) +{ + assert (d); + int k = fls64 (d) - 1; + d <<= 63 - k; + + if (nh) { + if (nh & MSB64) + nh ^= d; + for (int i = 62; i >= 0; i--) + if (nh & INT64 (1) << i) { + nh ^= d >> 63 - i; + nl ^= d << i + 1; + } + } + for (int i = 63; i >= k; i--) + if (nl & INT64 (1) << i) + nl ^= d >> 63 - i; + return nl; +} + +static void +polymult (uint64_t *php, uint64_t *plp, uint64_t x, uint64_t y) +{ + uint64_t ph = 0, pl = 0; + if (x & 1) + pl = y; + for (int i = 1; i < 64; i++) + if (x & (INT64 (1) << i)) { + ph ^= y >> (64 - i); + pl ^= y << i; + } + if (php) + *php = ph; + if (plp) + *plp = pl; +} + +static uint64_t +polymmult (uint64_t x, uint64_t y, uint64_t d) +{ + uint64_t h, l; + polymult (&h, &l, x, y); + return polymod (h, l, d); +} + +#if 0 +static uint64_t +polygcd (uint64_t x, uint64_t y) +{ + for (;;) { + if (!y) + return x; + x = polymod (0, x, y); + if (!x) + return y; + y = polymod (0, y, x); + } +} + +static bool +polyirreducible (uint64_t f) +{ + uint64_t u = 2; + int m = (fls64 (f) - 1) >> 1; + for (int i = 0; i < m; i++) { + u = polymmult (u, u, f); + if (polygcd (f, u ^ 2) != 1) + return false; + } + return true; +} + +static uint64_t +polygen (u_int degree) +{ + assert (degree > 0 && degree < 64); + uint64_t msb = INT64 (1) << degree; + uint64_t mask = msb - 1; + uint64_t f; + int rfd = open (SFS_DEV_RANDOM, O_RDONLY); + if (rfd < 0) { + fprintf (stderr, "%s: %m\n", SFS_DEV_RANDOM); + exit(1); + } + do { + if (read (rfd, &f, sizeof (f)) != implicit_cast (sizeof (f))) { + fprintf (stderr, "%s: read failed\n", SFS_DEV_RANDOM); + exit(1); + } + f = (f & mask) | msb; + } while (!polyirreducible (f)); + close (rfd); + return f; +} +#endif + +class rabinpoly { + int shift; + uint64_t T[256]; // Lookup table for mod + void calcT (); +public: + const uint64_t poly; // Actual polynomial + + explicit rabinpoly (uint64_t poly); + uint64_t append8 (uint64_t p, uint8_t m) const + { return ((p << 8) | m) ^ T[p >> shift]; } +}; + +void +rabinpoly::calcT () +{ + assert (poly >= 0x100); + int xshift = fls64 (poly) - 1; + shift = xshift - 8; + uint64_t T1 = polymod (0, INT64 (1) << xshift, poly); + for (int j = 0; j < 256; j++) + T[j] = polymmult (j, T1, poly) | ((uint64_t) j << xshift); +} + +rabinpoly::rabinpoly (uint64_t p) + : poly (p) +{ + calcT (); +} + +class window : public rabinpoly { +public: + enum {size = 48}; + //enum {size = 24}; +private: + uint64_t fingerprint; + int bufpos; + uint64_t U[256]; + uint8_t buf[size]; + +public: + window (uint64_t poly); + uint64_t slide8 (uint8_t m) { + if (++bufpos >= size) + bufpos = 0; + uint8_t om = buf[bufpos]; + buf[bufpos] = m; + return fingerprint = append8 (fingerprint ^ U[om], m); + } + void reset () { + fingerprint = 0; + bzero (buf, sizeof (buf)); + } +}; + +window::window (uint64_t poly) + : rabinpoly (poly), fingerprint (0), bufpos (-1) +{ + uint64_t sizeshift = 1; + for (int i = 1; i < size; i++) + sizeshift = append8 (sizeshift, 0); + for (int i = 0; i < 256; i++) + U[i] = polymmult (i, sizeshift, poly); + bzero (buf, sizeof (buf)); +} + +} // end anonymous namespace + +/* Public interface to this module. */ +int chunk_compute_max_num_breaks(size_t buflen) +{ + return (buflen / MIN_CHUNK_SIZE) + 1; +} + +int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints) +{ + size_t start, pos; + window w(FINGERPRINT_PT); + + int i = 0; + start = 0; + for (pos = 0; pos < len; pos++) { + uint64_t sig = w.slide8(buf[pos]); + size_t block_len = pos - start + 1; + if ((sig % TARGET_CHUNK_SIZE == BREAKMARK_VALUE + && block_len >= MIN_CHUNK_SIZE) || block_len >= MAX_CHUNK_SIZE) { + breakpoints[i] = pos; + start = pos + 1; + i++; + w.reset(); + } + } + + if (start < len) { + breakpoints[i] = len - 1; + i++; + } + + return i; +} + +string chunk_algorithm_name() +{ + char buf[64]; + sprintf(buf, "%s-%d", "lbfs", TARGET_CHUNK_SIZE); + return buf; +} diff --git a/chunk.h b/chunk.h new file mode 100644 index 0000000..6cc7392 --- /dev/null +++ b/chunk.h @@ -0,0 +1,41 @@ +/* Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2006-2008 The Regents of the University of California + * Written by Michael Vrable + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Compute incremental backups at a sub-file level by chopping files up into + * blocks in a content-sensitive manner (using Rabin fingerprints). */ + +#ifndef _LBS_CHUNK_H +#define _LBS_CHUNK_H + +#include +#include + +/* Block breakpoints can only be computed for a single block of memory, all + * loaded at once. compute_breaks will, given a block of memory, compute the + * offsets at which successive blocks should end. These will be stored into + * the provided memory at breakpoints. The maximum possible number of blocks + * (given the block size constaints) can be computed by compute_max_num_breaks + * so that the breakpoints array can be properly sized. The actual number of + * blocks is returned by the compute_breaks function. */ +int chunk_compute_max_num_breaks(size_t buflen); +int chunk_compute_breaks(const char *buf, size_t len, size_t *breakpoints); +std::string chunk_algorithm_name(); + +#endif // _LBS_CHUNK_H diff --git a/localdb.cc b/localdb.cc index 58fe87a..c74e27d 100644 --- a/localdb.cc +++ b/localdb.cc @@ -479,3 +479,88 @@ bool LocalDb::GetSegmentChecksum(const string &segment, return found; } + +/* Look up and return the packed representation of the subblock chunk + * signatures. Returns true if signatures were found for the specified object, + * and if so sets *buf to point at a buffer of memory (allocated with malloc; + * the caller should free it), and *len to the length of the buffer. */ +bool LocalDb::LoadChunkSignatures(ObjectReference ref, + void **buf, size_t *len, + string *algorithm) +{ + int rc; + sqlite3_stmt *stmt; + int found = false; + + stmt = Prepare("select algorithm, signatures from subblock_signatures " + "where blockid = (select blockid from block_index " + " where segmentid = ? and object = ?)"); + sqlite3_bind_int64(stmt, 1, SegmentToId(ref.get_segment())); + string obj = ref.get_sequence(); + sqlite3_bind_text(stmt, 2, obj.c_str(), obj.size(), SQLITE_TRANSIENT); + + rc = sqlite3_step(stmt); + if (rc == SQLITE_DONE) { + } else if (rc == SQLITE_ROW) { + const void *data = sqlite3_column_blob(stmt, 0); + *len = sqlite3_column_bytes(stmt, 0); + + if (*len > 0) { + *buf = malloc(*len); + if (*buf != NULL) { + memcpy(*buf, data, *len); + *algorithm = (const char *)sqlite3_column_text(stmt, 1); + found = true; + } + } + } else { + fprintf(stderr, "Could not execute SELECT statement!\n"); + ReportError(rc); + } + + sqlite3_finalize(stmt); + + return found; +} + +/* Store the subblock chunk signatures for a specified object. The object + * itself must have already been indexed in the database. */ +void LocalDb::StoreChunkSignatures(ObjectReference ref, + const void *buf, size_t len, + const string& algorithm) +{ + int rc; + sqlite3_stmt *stmt; + + stmt = Prepare("select blockid from block_index " + "where segmentid = ? and object = ?"); + sqlite3_bind_int64(stmt, 1, SegmentToId(ref.get_segment())); + string obj = ref.get_sequence(); + sqlite3_bind_text(stmt, 2, obj.c_str(), obj.size(), SQLITE_TRANSIENT); + + rc = sqlite3_step(stmt); + if (rc != SQLITE_ROW) { + fprintf(stderr, + "Could not determine blockid in StoreChunkSignatures!\n"); + ReportError(rc); + throw IOException("Error getting blockid"); + } + int64_t blockid = sqlite3_column_int64(stmt, 0); + sqlite3_finalize(stmt); + + stmt = Prepare("insert or replace " + "into subblock_signatures(blockid, algorithm, signatures) " + "values (?, ?, ?)"); + sqlite3_bind_int64(stmt, 1, blockid); + sqlite3_bind_text(stmt, 2, algorithm.c_str(), algorithm.size(), + SQLITE_TRANSIENT); + sqlite3_bind_blob(stmt, 3, buf, len, SQLITE_TRANSIENT); + + rc = sqlite3_step(stmt); + if (rc != SQLITE_DONE) { + fprintf(stderr, "Could not insert sub-block checksums!\n"); + ReportError(rc); + } + + sqlite3_finalize(stmt); +} diff --git a/localdb.h b/localdb.h index 33a0029..b2a90fd 100644 --- a/localdb.h +++ b/localdb.h @@ -54,6 +54,13 @@ public: const std::string &checksum, int size); bool GetSegmentChecksum(const std::string &segment, std::string *seg_path, std::string *seg_checksum); + + bool LoadChunkSignatures(ObjectReference ref, + void **buf, size_t *len, + std::string *algorithm); + void StoreChunkSignatures(ObjectReference ref, + const void *buf, size_t len, + const std::string &algorithm); private: sqlite3 *db; int64_t snapshotid; diff --git a/ref.h b/ref.h index deae39c..d1a0e0c 100644 --- a/ref.h +++ b/ref.h @@ -97,6 +97,7 @@ public: std::string get_segment() const { return segment; } std::string get_sequence() const { return object; } std::string get_basename() const { return segment + "/" + object; } + ObjectReference base() const { return ObjectReference(segment, object); } bool has_checksum() const { return checksum_valid; } std::string get_checksum() const { return checksum; } @@ -113,6 +114,12 @@ public: bool merge(ObjectReference ref); + // Maybe provide non-string implementations? + bool operator==(const ObjectReference &x) const + { return to_string() == x.to_string(); } + bool operator<(const ObjectReference &x) const + { return to_string() < x.to_string(); } + private: RefType type; std::string segment, object, checksum; diff --git a/scandir.cc b/scandir.cc index a005d41..d50867a 100644 --- a/scandir.cc +++ b/scandir.cc @@ -50,6 +50,7 @@ #include "remote.h" #include "store.h" #include "sha1.h" +#include "subfile.h" #include "util.h" using std::list; @@ -139,17 +140,23 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, /* Look up this file in the old stat cache, if we can. If the stat * information indicates that the file has not changed, do not bother - * re-reading the entire contents. */ + * re-reading the entire contents. Even if the information has been + * changed, we can use the list of old blocks in the search for a sub-block + * incremental representation. */ bool cached = false; + list old_blocks; - if (metawriter->find(path) && metawriter->is_unchanged(&stat_buf)) { + bool found = metawriter->find(path); + if (found) + old_blocks = metawriter->get_blocks(); + + if (found && metawriter->is_unchanged(&stat_buf)) { cached = true; - list blocks = metawriter->get_blocks(); /* If any of the blocks in the object have been expired, then we should * fall back to fully reading in the file. */ - for (list::const_iterator i = blocks.begin(); - i != blocks.end(); ++i) { + for (list::const_iterator i = old_blocks.begin(); + i != old_blocks.end(); ++i) { const ObjectReference &ref = *i; if (!db->IsAvailable(ref)) { cached = false; @@ -161,8 +168,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, /* If everything looks okay, use the cached information */ if (cached) { file_info["checksum"] = metawriter->get_checksum(); - for (list::const_iterator i = blocks.begin(); - i != blocks.end(); ++i) { + for (list::const_iterator i = old_blocks.begin(); + i != old_blocks.end(); ++i) { const ObjectReference &ref = *i; object_list.push_back(ref.to_string()); if (ref.is_normal()) @@ -177,6 +184,9 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, * time. */ if (!cached) { SHA1Checksum hash; + Subfile subfile(db); + subfile.load_old_blocks(old_blocks); + while (true) { ssize_t bytes = file_read(fd, block_buf, LBS_BLOCK_SIZE); if (bytes == 0) @@ -215,6 +225,8 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, ref = db->FindObject(block_csum, bytes); } + list refs; + // Store a copy of the object if one does not yet exist if (ref.is_null()) { LbsObject *o = new LbsObject; @@ -247,18 +259,19 @@ int64_t dumpfile(int fd, dictionary &file_info, const string &path, status = "new"; } - o->set_data(block_buf, bytes); - o->write(tss); - ref = o->get_ref(); - db->StoreObject(ref, block_csum, bytes, block_age); - ref.set_range(0, bytes); - delete o; + subfile.analyze_new_block(block_buf, bytes); + refs = subfile.create_incremental(tss, o, block_age); + } else { + refs.push_back(ref); } - object_list.push_back(ref.to_string()); - if (ref.is_normal()) - add_segment(ref.get_segment()); - db->UseObject(ref); + while (!refs.empty()) { + ref = refs.front(); refs.pop_front(); + object_list.push_back(ref.to_string()); + if (ref.is_normal()) + add_segment(ref.get_segment()); + db->UseObject(ref); + } size += bytes; if (status == NULL) diff --git a/schema.sql b/schema.sql index 850e6a9..2851f0f 100644 --- a/schema.sql +++ b/schema.sql @@ -36,6 +36,21 @@ create table block_index ( create index block_content_index on block_index(checksum); create unique index block_name_index on block_index(segmentid, object); +-- Checksums for the decomposition of blocks into even smaller chunks +-- (variable-sized, but generally ~8 kB, and maximum 64 kB). Chunk boundaries +-- are determined based on the contents using Rabin fingerprints. These +-- checksums can be used for computing sub-file incrementals. +-- +-- Each block stored in block_index may have an entry in the +-- subblock_signatures table. The hash_data field is a binary blob consisting +-- of a packed sequence of (chunk length [16-bit unsigned, big-endian], +-- checksum [20 bytes for SHA-1]) tuples that should cover the entire block. +create table subblock_signatures ( + blockid integer primary key, + algorithm text not null, + signatures blob not null +); + -- Summary of segment utilization for each snapshots. create table segments_used ( snapshotid integer not null, diff --git a/subfile.cc b/subfile.cc new file mode 100644 index 0000000..f365c4b --- /dev/null +++ b/subfile.cc @@ -0,0 +1,337 @@ +/* Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2008 The Regents of the University of California + * Written by Michael Vrable + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Allow for sub-file incremental backups: if only a portion of a file changes, + * allow the new data to be written out, and the old data to simply be + * referenced from the new metadata log. */ + +#include +#include +#include + +#include "subfile.h" +#include "chunk.h" +#include "sha1.h" + +using std::list; +using std::map; +using std::set; +using std::string; +using std::vector; +using std::pair; +using std::make_pair; + +Subfile::Subfile(LocalDb *localdb) + : db(localdb), checksums_loaded(false), new_block_summary_valid(false) +{ +} + +Subfile::~Subfile() +{ + for (size_t i = 0; i < block_list.size(); i++) { + delete[] block_list[i].chunks; + } + + free_analysis(); +} + +void Subfile::free_analysis() +{ + if (new_block_summary_valid) + delete[] new_block_summary.chunks; + + new_block_summary_valid = false; +} + +void Subfile::load_old_blocks(const list &blocks) +{ + for (list::const_iterator i = blocks.begin(); + i != blocks.end(); ++i) { + if (!i->is_normal()) + continue; + + ObjectReference base = i->base(); + if (old_blocks.find(base) == old_blocks.end()) { + old_blocks.insert(base); + if (checksums_loaded) + index_chunks(base); + } + } +} + +/* Actually load chunk signatures from the database, and index them in memory. + * This should only be called once per segment. */ +void Subfile::index_chunks(ObjectReference ref) +{ + string refstr = ref.to_string(); + + if (!db->IsAvailable(ref)) + return; + + /* Look for checksums for this block in the database. They may not exist, + * in which case simply return without error. */ + char *packed_sigs; + size_t len; + string algorithm; + if (!db->LoadChunkSignatures(ref, (void **)&packed_sigs, &len, &algorithm)) + return; + if (algorithm != get_algorithm()) { + free(packed_sigs); + return; + } + + int block_id = block_list.size(); + + block_summary summary; + summary.ref = ref.base(); + summary.num_chunks = len / (2 + HASH_SIZE); + summary.chunks = new chunk_info[summary.num_chunks]; + + int block_start = 0; + for (int i = 0; i < summary.num_chunks; i++) { + char *packed_info = &packed_sigs[i * (2 + HASH_SIZE)]; + memcpy(summary.chunks[i].hash, &packed_info[2], HASH_SIZE); + + uint16_t chunk_len; + memcpy(&chunk_len, &packed_info[0], 2); + summary.chunks[i].len = ntohs(chunk_len); + summary.chunks[i].offset = block_start; + block_start += summary.chunks[i].len; + + chunk_index[string(summary.chunks[i].hash, HASH_SIZE)] + = make_pair(block_id, i); + } + + block_list.push_back(summary); + free(packed_sigs); +} + +/* Signatures can be loaded lazily; this method should be called before any + * actual access to the chunk signatures is required, to ensure the data has + * actually been loaded. */ +void Subfile::ensure_signatures_loaded() +{ + if (checksums_loaded) + return; + + for (set::iterator i = old_blocks.begin(); + i != old_blocks.end(); ++i) { + index_chunks(*i); + } + + checksums_loaded = true; +} + +void Subfile::analyze_new_block(const char *buf, size_t len) +{ + analyzed_buf = buf; + analyzed_len = len; + int max_chunks = chunk_compute_max_num_breaks(len); + + free_analysis(); + + size_t *breakpoints = new size_t[max_chunks]; + int num_breakpoints = chunk_compute_breaks(buf, len, breakpoints); + + if (num_breakpoints == 0) { + delete[] breakpoints; + return; + } + + new_block_summary.num_chunks = num_breakpoints; + new_block_summary.chunks = new chunk_info[num_breakpoints]; + + int block_start = 0; + for (int i = 0; i < num_breakpoints; i++) { + new_block_summary.chunks[i].offset = block_start; + new_block_summary.chunks[i].len = breakpoints[i] - block_start + 1; + block_start = breakpoints[i] + 1; + + SHA1Checksum hash; + hash.process(&buf[new_block_summary.chunks[i].offset], + new_block_summary.chunks[i].len); + assert(hash.checksum_size() == (size_t)HASH_SIZE); + memcpy(new_block_summary.chunks[i].hash, hash.checksum(), HASH_SIZE); + } + + new_block_summary_valid = true; + delete[] breakpoints; +} + +void Subfile::store_block_signatures(ObjectReference ref, block_summary summary) +{ + int n = summary.num_chunks; + char *packed = (char *)malloc(n * (2 + HASH_SIZE)); + + for (int i = 0; i < n; i++) { + assert(summary.chunks[i].len >= 0 && summary.chunks[i].len <= 0xffff); + uint16_t len = htons(summary.chunks[i].len); + char *packed_info = &packed[i * (2 + HASH_SIZE)]; + memcpy(&packed_info[0], &len, 2); + memcpy(&packed_info[2], summary.chunks[i].hash, HASH_SIZE); + } + + db->StoreChunkSignatures(ref, packed, n * (2 + HASH_SIZE), + get_algorithm()); + + free(packed); +} + +/* Compute an incremental representation of the most recent block analyzed. */ +enum subfile_item_type { SUBFILE_COPY, SUBFILE_NEW }; + +struct subfile_item { + subfile_item_type type; + + // For type SUBFILE_COPY + ObjectReference ref; + + // For type SUBFILE_NEW + int src_offset, dst_offset; + int len; + char hash[Subfile::HASH_SIZE]; +}; + +/* Compute an incremental representation of the data last analyzed. A list of + * references will be returned corresponding to the data. If new data must be + * written out to the backup, it will be written out via the LbsObject + * provided, to the provided TarSegmentStore. */ +list Subfile::create_incremental(TarSegmentStore *tss, + LbsObject *o, + double block_age) +{ + assert(new_block_summary_valid); + bool matched_old = false; + size_t new_data = 0; + + list items; + list refs; + + ensure_signatures_loaded(); + + assert(new_block_summary.num_chunks > 0); + + for (int i = 0; i < new_block_summary.num_chunks; i++) { + map >::iterator m + = chunk_index.find(string(new_block_summary.chunks[i].hash, + HASH_SIZE)); + + struct subfile_item item; + if (m == chunk_index.end()) { + item.type = SUBFILE_NEW; + item.src_offset = new_block_summary.chunks[i].offset; + item.dst_offset = new_data; + item.len = new_block_summary.chunks[i].len; + memcpy(item.hash, new_block_summary.chunks[i].hash, HASH_SIZE); + new_data += item.len; + } else { + struct chunk_info &old_chunk + = block_list[m->second.first].chunks[m->second.second]; + item.type = SUBFILE_COPY; + item.ref = block_list[m->second.first].ref; + item.ref.set_range(old_chunk.offset, old_chunk.len); + matched_old = true; + } + + items.push_back(item); + } + + // No data was matched. The entire block can be written out as is into a + // new object, and the new_block_summary used to save chunk signatures. + if (!matched_old) { + SHA1Checksum block_hash; + block_hash.process(analyzed_buf, analyzed_len); + string block_csum = block_hash.checksum_str(); + + o->set_data(analyzed_buf, analyzed_len); + o->write(tss); + ObjectReference ref = o->get_ref(); + db->StoreObject(ref, block_csum, analyzed_len, block_age); + store_block_signatures(ref, new_block_summary); + refs.push_back(ref); + delete o; + return refs; + } + + // Otherwise, construct a new block containing all literal data needed (if + // any exists), write it out, and construct the appropriate list of + // references. + list::iterator i; + if (new_data > 0) { + char *literal_buf = new char[new_data]; + for (i = items.begin(); i != items.end(); ++i) { + if (i->type == SUBFILE_NEW) { + memcpy(&literal_buf[i->dst_offset], + &analyzed_buf[i->src_offset], i->len); + } + } + + SHA1Checksum block_hash; + block_hash.process(literal_buf, new_data); + string block_csum = block_hash.checksum_str(); + + o->set_group("data"); + o->set_data(literal_buf, new_data); + o->write(tss); + ObjectReference ref = o->get_ref(); + for (i = items.begin(); i != items.end(); ++i) { + if (i->type == SUBFILE_NEW) { + i->ref = ref; + i->ref.set_range(i->dst_offset, i->len); + } + } + + db->StoreObject(ref, block_csum, new_data, 0.0); + + block_summary summary; + summary.ref = ref; + summary.num_chunks = 0; + summary.chunks = new chunk_info[items.size()]; + for (i = items.begin(); i != items.end(); ++i) { + if (i->type == SUBFILE_NEW) { + chunk_info &info = summary.chunks[summary.num_chunks]; + memcpy(info.hash, i->hash, HASH_SIZE); + info.offset = i->dst_offset; + info.len = i->len; + summary.num_chunks++; + } + } + + store_block_signatures(ref, summary); + + delete[] summary.chunks; + delete[] literal_buf; + } + + delete o; + + ObjectReference ref; + for (i = items.begin(); i != items.end(); ++i) { + string refstr = i->ref.to_string(); + if (!ref.merge(i->ref)) { + refs.push_back(ref); + ref = i->ref; + } + } + assert(!ref.is_null()); + refs.push_back(ref); + + return refs; +} diff --git a/subfile.h b/subfile.h new file mode 100644 index 0000000..e80f76c --- /dev/null +++ b/subfile.h @@ -0,0 +1,94 @@ +/* Cumulus: Smart Filesystem Backup to Dumb Servers + * + * Copyright (C) 2008 The Regents of the University of California + * Written by Michael Vrable + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +/* Allow for sub-file incremental backups: if only a portion of a file changes, + * allow the new data to be written out, and the old data to simply be + * referenced from the new metadata log. */ + +#ifndef _LBS_SUBFILE_H +#define _LBS_SUBFILE_H + +#include +#include +#include +#include +#include + +#include "chunk.h" +#include "localdb.h" +#include "ref.h" +#include "store.h" + +class Subfile { +public: + Subfile(LocalDb *localdb); + ~Subfile(); + + // Prepare to compute a subfile incremental by loading signatures for data + // in the old file. + void load_old_blocks(const std::list &blocks); + + // Break a new block of data into small chunks, and compute checksums of + // the chunks. After doing so, a delta can be computed, or the signatures + // can be written out to the database. The caller must not modify the + // buffer until all operations referring to it are finished. + void analyze_new_block(const char *buf, size_t len); + + std::list create_incremental(TarSegmentStore *tss, + LbsObject *o, + double block_age); + + static const int HASH_SIZE = 20; + +private: + struct chunk_info { + char hash[HASH_SIZE]; + int offset, len; + }; + + struct block_summary { + ObjectReference ref; + int num_chunks; + struct chunk_info *chunks; + }; + + LocalDb *db; + bool checksums_loaded; + std::set old_blocks; + std::vector block_list; + std::map > chunk_index; + + bool new_block_summary_valid; + block_summary new_block_summary; + + const char *analyzed_buf; + size_t analyzed_len; + + void ensure_signatures_loaded(); + void index_chunks(ObjectReference ref); + void free_analysis(); + void store_block_signatures(ObjectReference ref, block_summary summary); + + std::string get_algorithm() { + return chunk_algorithm_name() + "/sha1"; + } +}; + +#endif // _LBS_SUBFILE_H