- Switch to a hierarchical file layout.
- Remove old references to the "LBS" name.
that, see design.txt.
+BACKUP REPOSITORY LAYOUT
+========================
+
+Cumulus backups are stored using a relatively simple layout. Data files
+described below are written into one of several directories on the
+backup server, depending on their purpose:
+ snapshots/
+ Snapshot descriptor files, which quickly summarize each backup
+ snapshot stored.
+ segments0/
+ segments1/
+ Storage of the bulk of the backup data, in compressed/encrypted
+ form. Technically any segment could be stored in either
+ directory (both directories will be searched when looking for a
+ segment). However, data in segments0 might be faster to access
+ (but more expensive) depending on the storage backend. The
+ intent is that segments0 can store filesystem tree metadata and
+ segments1 can store file contents.
+ meta/
+ Snapshot-specific metadata that is not core to the backup. This
+ can include checksums of segments, some data for rebuilding
+ local database contents, etc.
+
+
DATA CHECKSUMS
==============
This segment could be stored in the filesystem as a file
a704eeae-97f2-4f30-91a4-d4473956366b.tar
The UUID used to name a segment is assigned when the segment is created.
+These files are stored in either the segments0 or segments1 directories
+on the backup server.
Filters can be layered on top of the segment storage to provide
compression, encryption, or other features. For example, the example
NOTE: When naming an object, the segment portion consists of the UUID
only. Any extensions appended to the segment when storing it as a file
-in the filesystem (for example, .tar.bz2) are _not_ part of the name of
-the object.
+in the filesystem (for example, .tar.bz2) and path information (for
+example, segments0) are _not_ part of the name of the object.
There are two additional components which may appear in an object name;
both are optional.
}
sqlite3_finalize(stmt);
-
- if (age != 0.0) {
- stmt = Prepare("update segments "
- "set mtime = coalesce(max(mtime, ?), ?) "
- "where segmentid = ?");
- sqlite3_bind_double(stmt, 1, age);
- sqlite3_bind_double(stmt, 2, age);
- sqlite3_bind_int64(stmt, 3, SegmentToId(ref.get_segment()));
- rc = sqlite3_step(stmt);
- sqlite3_finalize(stmt);
- }
}
ObjectReference LocalDb::FindObject(const string &checksum, int64_t size)
}
}
-void LocalDb::SetSegmentChecksum(const std::string &segment,
+void LocalDb::SetSegmentMetadata(const std::string &segment,
const std::string &path,
const std::string &checksum,
+ const std::string &type,
int data_size, int disk_size)
{
int rc;
sqlite3_stmt *stmt;
stmt = Prepare("update segments set path = ?, checksum = ?, "
- "data_size = ?, disk_size = ?, "
+ "type = ?, data_size = ?, disk_size = ?, "
"mtime = coalesce(mtime, julianday('now')) "
"where segmentid = ?");
sqlite3_bind_text(stmt, 1, path.c_str(), path.size(),
SQLITE_TRANSIENT);
sqlite3_bind_text(stmt, 2, checksum.c_str(), checksum.size(),
SQLITE_TRANSIENT);
- sqlite3_bind_int64(stmt, 3, data_size);
- sqlite3_bind_int64(stmt, 4, disk_size);
- sqlite3_bind_int64(stmt, 5, SegmentToId(segment));
+ sqlite3_bind_text(stmt, 3, type.c_str(), type.size(),
+ SQLITE_TRANSIENT);
+ sqlite3_bind_int64(stmt, 4, data_size);
+ sqlite3_bind_int64(stmt, 5, disk_size);
+ sqlite3_bind_int64(stmt, 6, SegmentToId(segment));
rc = sqlite3_step(stmt);
if (rc != SQLITE_DONE) {
sqlite3_finalize(stmt);
}
-bool LocalDb::GetSegmentChecksum(const string &segment,
+bool LocalDb::GetSegmentMetadata(const string &segment,
string *seg_path,
string *seg_checksum)
{
void UseObject(const ObjectReference& ref);
std::set<std::string> GetUsedSegments();
- void SetSegmentChecksum(const std::string &segment, const std::string &path,
+ void SetSegmentMetadata(const std::string &segment, const std::string &path,
const std::string &checksum,
- int data_size, int disk_size);
- bool GetSegmentChecksum(const std::string &segment,
+ const std::string &type, int data_size,
+ int disk_size);
+ bool GetSegmentMetadata(const std::string &segment,
std::string *seg_path, std::string *seg_checksum);
bool LoadChunkSignatures(ObjectReference ref,
* a temporary directory for staging files. Otherwise, write backups
* directly to the destination directory. */
if (backup_script != "") {
- tmp_dir = tmp_dir + "/lbs." + generate_uuid();
+ tmp_dir = tmp_dir + "/cumulus." + generate_uuid();
if (mkdir(tmp_dir.c_str(), 0700) < 0) {
fprintf(stderr, "Cannot create temporary directory %s: %m\n",
tmp_dir.c_str());
checksum_filename += backup_scheme + "-";
checksum_filename = checksum_filename + desc_buf + "." + csum_type + "sums";
RemoteFile *checksum_file = remote->alloc_file(checksum_filename,
- "checksums");
+ "meta");
FILE *checksums = fdopen(checksum_file->get_fd(), "w");
std::set<string> segment_list = db->GetUsedSegments();
for (std::set<string>::iterator i = segment_list.begin();
i != segment_list.end(); ++i) {
string seg_path, seg_csum;
- if (db->GetSegmentChecksum(*i, &seg_path, &seg_csum)) {
+ if (db->GetSegmentMetadata(*i, &seg_path, &seg_csum)) {
const char *raw_checksum = NULL;
if (strncmp(seg_csum.c_str(), csum_type,
strlen(csum_type)) == 0) {
string desc_filename = "snapshot-";
if (backup_scheme.size() > 0)
desc_filename += backup_scheme + "-";
- desc_filename = desc_filename + desc_buf + ".lbs";
+ desc_filename = desc_filename + desc_buf + ".cumulus";
RemoteFile *descriptor_file = remote->alloc_file(desc_filename,
"snapshots");
"""Return a file-like object for reading data from the given file."""
(type, filename) = self._classify(filename)
- return self.store.get(type, filename)
+ return self.store.get(type + "/" + filename)
def lowlevel_stat(self, filename):
"""Return a dictionary of information about the given file.
"""
(type, filename) = self._classify(filename)
- return self.store.stat(type, filename)
+ return self.store.stat(type + "/" + filename)
# Slightly higher-level list methods.
def list_snapshots(self):
yield (path[1], data_obj.read())
def load_snapshot(self, snapshot):
- file = self.store.lowlevel_open("snapshot-" + snapshot + ".lbs")
+ file = self.store.lowlevel_open("snapshot-" + snapshot + ".cumulus")
return file.read().splitlines(True)
def extract_segment(self, segment):
type_patterns = {
'checksums': re.compile(r"^snapshot-(.*)\.(\w+)sums$"),
'segments': re.compile(r"^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(\.\S+)?$"),
- 'snapshots': re.compile(r"^snapshot-(.*)\.lbs$")
+ 'snapshots': re.compile(r"^snapshot-(.*)\.(cumulus|lbs)$")
}
class NotFoundError(exceptions.KeyError):
self.prefix = self.path.rstrip("/")
def _get_path(self, type, name):
- return "%s/%s" % (self.prefix, name)
+ return os.path.join(self.prefix, type, name)
- def list(self, type):
- files = os.listdir(self.prefix)
- return (f for f in files if type_patterns[type].match(f))
+ def list(self, subdir):
+ return os.listdir(os.path.join(self.prefix, subdir))
- def get(self, type, name):
- k = self._get_path(type, name)
- return open(k, 'rb')
+ def get(self, path):
+ return open(os.path.join(self.prefix, path), 'rb')
- def put(self, type, name, fp):
+ def put(self, path, fp):
+ # TODO: Implement
+ raise NotImplementedError
k = self._get_path(type, name)
out = open(k, 'wb')
buf = fp.read(4096)
out.write(buf)
buf = fp.read(4096)
- def delete(self, type, name):
- k = self._get_path(type, name)
- os.unlink(k)
+ def delete(self, path):
+ os.unlink(os.path.join(self.prefix, path))
- def stat(self, type, name):
+ def stat(self, path):
try:
- stat = os.stat(self._get_path(type, name))
+ stat = os.stat(os.path.join(self.prefix, path))
return {'size': stat.st_size}
except OSError:
- raise cumulus.store.NotFoundError, (type, name)
+ raise cumulus.store.NotFoundError, path
Store = FileStore
* scripts that are called when a file is to be transferred. */
#include <assert.h>
+#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
using std::string;
+static const char *backup_directories[] = {
+ "meta",
+ "segments0",
+ "segments1",
+ "snapshots",
+ NULL
+};
+
RemoteStore::RemoteStore(const string &stagedir, const string &script)
{
staging_dir = stagedir;
backup_script = script;
+ /* Ensure all necessary directories exist for each type of backup file. */
+ for (size_t i = 0; backup_directories[i]; i++) {
+ string path = stagedir + "/" + backup_directories[i];
+ if (mkdir(path.c_str(), 0777) < 0) {
+ /* Ignore errors for already-existing directories. */
+ if (errno != EEXIST) {
+ fprintf(stderr,
+ "Warning: Cannot create backup directory %s: %m!",
+ path.c_str());
+ }
+ }
+ }
+
/* A background thread is created for each RemoteStore to manage the actual
* transfers to a remote server. The main program thread can enqueue
* RemoteFile objects to be transferred asynchronously. */
pthread_mutex_lock(&lock);
files_outstanding++;
pthread_mutex_unlock(&lock);
- return new RemoteFile(this, name, type, staging_dir + "/" + name);
+ return new RemoteFile(this, name, type,
+ staging_dir + "/" + type + "/" + name);
}
/* Request that a file be transferred to the remote server. The actual
remote_store = remote;
this->type = type;
this->local_path = local_path;
- this->remote_path = name;
+ this->remote_path = type + "/" + name;
fd = open(local_path.c_str(), O_WRONLY | O_CREAT, 0666);
if (fd < 0)
create table segments (
segmentid integer primary key,
segment text unique not null,
+ mtime real, -- timestamp when segment was created
path text,
checksum text,
- mtime real,
data_size integer, -- sum of bytes in all objects in the segment
- disk_size integer -- size of segment on disk, after compression
- -- TODO: group? metadata vs. non-metadata?
+ disk_size integer, -- size of segment on disk, after compression
+ type text
);
-- Index of all data blocks in stored segments. This is indexed by content
segment->basename += filter_extension;
segment->count = 0;
segment->data_size = 0;
- segment->rf = remote->alloc_file(segment->basename, "segments");
+ segment->rf = remote->alloc_file(segment->basename,
+ group == "metadata" ? "segments0"
+ : "segments1");
segment->file = new Tarfile(segment->rf, segment->name);
segments[group] = segment;
checksum = segment_checksum.checksum_str();
}
- db->SetSegmentChecksum(segment->name, segment->basename, checksum,
- segment->data_size, disk_size);
+ db->SetSegmentMetadata(segment->name, segment->basename, checksum,
+ group, segment->data_size, disk_size);
}
segment->rf->send();
if include_mode:
metadata.append("mode=%o" % st.st_mode)
if include_mtime:
- metadata.append("size=%d" % st.st_mtime)
+ metadata.append("mtime=%d" % st.st_mtime)
if stat.S_ISREG(st.st_mode):
digest = hashlib.sha256()
BUF_SIZE = 1 << 16
log_action "Modifying files..."
rm "$TREE/"*.h
cp -a "$BIN_DIR/third_party" "$TREE"
+"$TEST_DIR"/digest_tree "$TREE" >"$TMP_DIR/digest.2"
log_action "Running second backup..."
sleep 5
mkdir "$BACKUP_DIR"
"$BIN_DIR"/cumulus --dest="$BACKUP_DIR" --localdb="$LOCALDB" \
--scheme=test -v "$TREE"
+
+log_action "Restoring snapshots"
+export LBS_GPG_PASSPHRASE=""
+snapshots=$("$BIN_DIR"/cumulus-util --store="$BACKUP_DIR" list-snapshots)
+echo "Available snapshots:" $snapshots
+i=0
+for s in $snapshots; do
+ i=$((i + 1))
+ dest="$TMP_DIR/restore-$i"
+ mkdir -p "$dest"
+ "$BIN_DIR"/cumulus-util --store="$BACKUP_DIR" restore-snapshot $s "$dest"
+done