File reorganization: move non-essential binaries to contrib/.

author Michael Vrable <mvrable@cs.ucsd.edu>

Thu, 20 Sep 2007 23:26:21 +0000 (16:26 -0700)

committer Michael Vrable <mvrable@turin.ucsd.edu>

Thu, 20 Sep 2007 23:26:21 +0000 (16:26 -0700)
author Michael Vrable <mvrable@cs.ucsd.edu>
Thu, 20 Sep 2007 23:26:21 +0000 (16:26 -0700)
committer Michael Vrable <mvrable@turin.ucsd.edu>
Thu, 20 Sep 2007 23:26:21 +0000 (16:26 -0700)
diff --git a/clean-segments.pl b/clean-segments.pl

deleted file mode 100755 (executable)

index 5af3ae3..0000000
--- a/clean-segments.pl
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/usr/bin/perl -w
-#
-# Garbage collect segments in LBS snapshot directories.
-#
-# Find all segments which are not referenced by any current snapshots and print
-# out a listing of them so that they can be deleted.
-#
-# Takes no command-line arguments, and expects to be invoked from the directory
-# containing the snapshots.
-#
-# Copyright (C) 2007  Michael Vrable
-
-use strict;
-
-my $SEGMENT_PATTERN
-    = '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}';
-
-# Set of all segments which are used by current snapshots.  Value is ignored.
-my %segments_used = ();
-
-# Iterate through all snapshots.  Snapshot descriptors should end with a ".lbs"
-# extension.  Find all segments which are used.
-foreach (glob "*.lbs") {
-    open DESCRIPTOR, "<", $_
-        or die "Cannot open backup descriptor file $_: $!";
-
-    # Parse the backup descriptor file.  We might not need the full parser, but
-    # it shouldn't hurt.
-    my %descriptor = ();
-    my ($line, $last_key);
-    while (defined($line = <DESCRIPTOR>)) {
-        chomp $line;
-
-        if ($line =~ m/^(\w+):\s*(.*)$/) {
-            $descriptor{$1} = $2;
-            $last_key = $1;
-        } elsif ($line =~/^\s/ && defined $last_key) {
-            $descriptor{$last_key} .= $line;
-        } else {
-            undef $last_key;
-            print STDERR "Ignoring line in backup descriptor: $line\n";
-        }
-    }
-
-    # Extract the list of segments from the parsed descriptor file.
-    foreach (split /\s+/, $descriptor{Segments}) {
-        next unless $_;
-        if (m/^$SEGMENT_PATTERN$/) {
-            $segments_used{$_} = 1;
-        } else {
-            warn "Invalid segment name: '$_'\n";
-        }
-    }
-
-    close DESCRIPTOR;
-}
-
-# Look for all segments in this directory, and match them against the list
-# generated above of segments which are used.  Pring out any segments which are
-# not used.
-my %segments_found = ();
-
-foreach (glob "*") {
-    if (m/^($SEGMENT_PATTERN)(\.\S+)?$/) {
-        $segments_found{$1} = 1;
-        if (!exists $segments_used{$1}) {
-            print $_, "\n";
-        }
-    }
-}
-
-# Perform a consistency check: were any segments referenced by snapshot but not
-# found in the directory?
-foreach (sort keys %segments_used) {
-    if (!exists $segments_found{$_}) {
-        print STDERR "Warning: Segment $_ not found\n";
-    }
-}
-
diff --git a/contrib/clean-segments.pl b/contrib/clean-segments.pl

new file mode 100755 (executable)

index 0000000..5af3ae3
--- /dev/null
+++ b/contrib/clean-segments.pl
@@ -0,0 +1,79 @@
+#!/usr/bin/perl -w
+#
+# Garbage collect segments in LBS snapshot directories.
+#
+# Find all segments which are not referenced by any current snapshots and print
+# out a listing of them so that they can be deleted.
+#
+# Takes no command-line arguments, and expects to be invoked from the directory
+# containing the snapshots.
+#
+# Copyright (C) 2007  Michael Vrable
+
+use strict;
+
+my $SEGMENT_PATTERN
+    = '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}';
+
+# Set of all segments which are used by current snapshots.  Value is ignored.
+my %segments_used = ();
+
+# Iterate through all snapshots.  Snapshot descriptors should end with a ".lbs"
+# extension.  Find all segments which are used.
+foreach (glob "*.lbs") {
+    open DESCRIPTOR, "<", $_
+        or die "Cannot open backup descriptor file $_: $!";
+
+    # Parse the backup descriptor file.  We might not need the full parser, but
+    # it shouldn't hurt.
+    my %descriptor = ();
+    my ($line, $last_key);
+    while (defined($line = <DESCRIPTOR>)) {
+        chomp $line;
+
+        if ($line =~ m/^(\w+):\s*(.*)$/) {
+            $descriptor{$1} = $2;
+            $last_key = $1;
+        } elsif ($line =~/^\s/ && defined $last_key) {
+            $descriptor{$last_key} .= $line;
+        } else {
+            undef $last_key;
+            print STDERR "Ignoring line in backup descriptor: $line\n";
+        }
+    }
+
+    # Extract the list of segments from the parsed descriptor file.
+    foreach (split /\s+/, $descriptor{Segments}) {
+        next unless $_;
+        if (m/^$SEGMENT_PATTERN$/) {
+            $segments_used{$_} = 1;
+        } else {
+            warn "Invalid segment name: '$_'\n";
+        }
+    }
+
+    close DESCRIPTOR;
+}
+
+# Look for all segments in this directory, and match them against the list
+# generated above of segments which are used.  Pring out any segments which are
+# not used.
+my %segments_found = ();
+
+foreach (glob "*") {
+    if (m/^($SEGMENT_PATTERN)(\.\S+)?$/) {
+        $segments_found{$1} = 1;
+        if (!exists $segments_used{$1}) {
+            print $_, "\n";
+        }
+    }
+}
+
+# Perform a consistency check: were any segments referenced by snapshot but not
+# found in the directory?
+foreach (sort keys %segments_used) {
+    if (!exists $segments_found{$_}) {
+        print STDERR "Warning: Segment $_ not found\n";
+    }
+}
+
diff --git a/contrib/lbs-filter-gpg b/contrib/lbs-filter-gpg

new file mode 100755 (executable)

index 0000000..010c05f
--- /dev/null
+++ b/contrib/lbs-filter-gpg
@@ -0,0 +1,59 @@
+#!/bin/bash
+#
+# Filter for encrypting/decrypting/signing LBS archives using gpg.
+#
+# This takes input on stdin and produces output to stdout.  It can operate in
+# one of several modes, depending upon the command-line argument supplied:
+#   --encrypt           Encrypt the data stream
+#   --decrypt           Decrypt the supplied data
+#   --clearsign         Enclose a text file with a signature
+# Options are controlled by various environment variables:
+#   LBS_GPG_HOME        set the gpg home directory (containing keyrings)
+#   LBS_GPG_ENC_KEY     key ID to use encryption
+#   LBS_GPG_SIGN_KEY    key ID to use for signing
+#   LBS_GPG_PASSPHRASE  passphrase to supply to gpg, if needed
+
+declare -a gpg_options
+gpg_options=(--quiet --batch)
+
+if [ -n "$LBS_GPG_HOME" ]; then
+    gpg_options=("${gpg_options[@]}" --homedir "$LBS_GPG_HOME")
+fi
+
+# Run gpg with the options in $gpg_options and any arguments supplied to this
+# function.  If LBS_GPG_PASSPHRASE is set, it will arrange redirections so that
+# the passphrase is supplied to gpg on a file descriptor.
+run_gpg () {
+    if [ -n "$LBS_GPG_PASSPHRASE" ]; then
+        exec 4<&0
+        echo "$LBS_GPG_PASSPHRASE" |
+            gpg "${gpg_options[@]}" --passphrase-fd=3 "$@" 3<&0 <&4
+    else
+        gpg "${gpg_options[@]}" "$@"
+    fi
+}
+
+case "$1" in
+    --encrypt)
+        if [ -n "$LBS_GPG_ENC_KEY" ]; then
+            gpg_options=("${gpg_options[@]}" --recipient "$LBS_GPG_ENC_KEY")
+        fi
+        run_gpg --encrypt
+        ;;
+
+    --decrypt)
+        run_gpg
+        ;;
+
+    --clearsign)
+        if [ -n "$LBS_GPG_SIGN_KEY" ]; then
+            gpg_options=("${gpg_options[@]}" --local-user "$LBS_GPG_SIGN_KEY")
+        fi
+        run_gpg --clearsign
+        ;;
+
+    *)
+        echo "$0: Unknown command or command not specified: $1" 1>&2
+        exit 1
+        ;;
+esac
diff --git a/contrib/restore.pl b/contrib/restore.pl

new file mode 100755 (executable)

index 0000000..ad1f594
--- /dev/null
+++ b/contrib/restore.pl
@@ -0,0 +1,429 @@
+#!/usr/bin/perl -w
+#
+# Proof-of-concept/reference decoder for LBS-format backup snapshots.
+#
+# This decoder aims to decompress an LBS snapshot.  It is not meant to be
+# particularly efficient, but should be a small and portable tool for doing so
+# (important for recovering from data loss).  It is also meant to serve as a
+# check on the snapshot tool and data format itself, and serve as documentation
+# for the format.
+#
+# This decoder does not understand TAR archives; it assumes that all segments
+# in the snapshot have already been decompressed, and that objects are
+# available simply as files in the filesystem.  This simplifies the design.
+#
+# Limitations: Since this code is probably using 32-bit arithmetic, files
+# larger than 2-4 GB may not be properly handled.
+#
+# Copyright (C) 2007  Michael Vrable
+
+use strict;
+use Digest::SHA1;
+use File::Basename;
+
+my $OBJECT_DIR;                 # Where are the unpacked objects available?
+my $DEST_DIR = ".";             # Where should restored files should be placed?
+my $RECURSION_LIMIT = 3;        # Bound on recursive object references
+
+my $VERBOSE = 0;                # Set to 1 to enable debugging messages
+
+############################ CHECKSUM VERIFICATION ############################
+# A very simple layer for verifying checksums.  Checksums may be used on object
+# references directly, and can also be used to verify entire reconstructed
+# files.
+#
+# A checksum to verify is given in the form "algorithm=hexdigest".  Given such
+# a string, we can construct a "verifier" object.  Bytes can be incrementally
+# added to the verifier, and at the end a test can be made to see if the
+# checksum matches.  The caller need not know what algorithm is used.  However,
+# at the moment we only support SHA-1 for computing digest (algorith name
+# "sha1").
+sub verifier_create {
+    my $checksum = shift;
+
+    if ($checksum !~ m/^(\w+)=([0-9a-f]+)$/) {
+        die "Malformed checksum: $checksum";
+    }
+    my ($algorithm, $hash) = ($1, $2);
+    if ($algorithm ne 'sha1') {
+        die "Unsupported checksum algorithm: $algorithm";
+    }
+
+    my %verifier = (
+        ALGORITHM => $algorithm,
+        HASH => $hash,
+        DIGESTER => new Digest::SHA1
+    );
+
+    return \%verifier;
+}
+
+sub verifier_add_bytes {
+    my $verifier = shift;
+    my $digester = $verifier->{DIGESTER};
+    my $data = shift;
+
+    $digester->add($data);
+}
+
+sub verifier_check {
+    my $verifier = shift;
+    my $digester = $verifier->{DIGESTER};
+
+    my $newhash = $digester->hexdigest();
+    if ($VERBOSE && $verifier->{HASH} ne $newhash) {
+        print STDERR "Verification failure: ",
+            $newhash, " != ", $verifier->{HASH}, "\n";
+    }
+    return ($verifier->{HASH} eq $newhash);
+}
+
+################################ OBJECT ACCESS ################################
+# The base of the decompressor is the object reference layer.  See ref.h for a
+# description of the format for object references.  These functions will parse
+# an object reference, locate the object data from the filesystem, perform any
+# necessary integrity checks (if a checksum is included), and return the object
+# data.
+sub load_ref {
+    # First, try to parse the object reference string into constituent pieces.
+    # The format is segment/object(checksum)[range].  Both the checksum and
+    # range are optional.
+    my $ref_str = shift;
+
+    if ($ref_str !~ m/^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[\S+\])?$/) {
+        die "Malformed object reference: $ref_str";
+    }
+
+    my ($segment, $object, $checksum, $range) = ($1, $2, $3, $4);
+
+    # Next, use the segment/object components to locate and read the object
+    # contents from disk.
+    open OBJECT, "<", "$OBJECT_DIR/$segment/$object"
+        or die "Unable to open object $OBJECT_DIR/$segment/$object: $!";
+    my $contents = join '', <OBJECT>;
+    close OBJECT;
+
+    # If a checksum was specified in the object reference, verify the object
+    # integrity by computing a checksum of the read data and comparing.
+    if ($checksum) {
+        $checksum =~ m/^\((\S+)\)$/;
+        my $verifier = verifier_create($1);
+        verifier_add_bytes($verifier, $contents);
+        if (!verifier_check($verifier)) {
+            die "Integrity check for object $ref_str failed";
+        }
+    }
+
+    # If a range was specified, then only a subset of the bytes of the object
+    # are desired.  Extract just the desired bytes.
+    if ($range) {
+        if ($range !~ m/^\[(\d+)\+(\d+)\]$/) {
+            die "Malformed object range: $range";
+        }
+
+        my $object_size = length $contents;
+        my ($start, $length) = ($1 + 0, $2 + 0);
+        if ($start >= $object_size || $start + $length > $object_size) {
+            die "Object range $range falls outside object bounds "
+                . "(actual size $object_size)";
+        }
+
+        $contents = substr $contents, $start, $length;
+    }
+
+    return $contents;
+}
+
+############################### FILE PROCESSING ###############################
+# Process the metadata for a single file.  process_file is the main entry
+# point; it should be given a list of file metadata key/value pairs.
+# iterate_objects is a helper function used to iterate over the set of object
+# references that contain the file data for a regular file.
+
+sub parse_int {
+    my $str = shift;
+    if ($str =~ /^0/) {
+        return oct($str);
+    } else {
+        return $str + 0;
+    }
+}
+
+sub uri_decode {
+    my $str = shift;
+    $str =~ s/%([0-9a-f]{2})/chr(hex($1))/ge;
+    return $str;
+}
+
+sub iterate_objects {
+    my $callback = shift;       # Function to be called for each reference
+    my $arg = shift;            # Argument passed to callback
+    my $text = shift;           # Whitespace-separate list of object references
+
+    # Simple limit to guard against cycles in the object references
+    my $recursion_level = shift || 0;
+    if ($recursion_level >= $RECURSION_LIMIT) {
+        die "Recursion limit reached";
+    }
+
+    # Split the provided text at whitespace boundaries to produce the list of
+    # object references.  If any of these start with "@", then we have an
+    # indirect reference, and must look up that object and call iterate_objects
+    # with the contents.
+    my $obj;
+    foreach $obj (split /\s+/, $text) {
+        next if $obj eq "";
+        if ($obj =~ /^@(\S+)$/) {
+            my $indirect = load_ref($1);
+            iterate_objects($callback, $arg, $indirect, $recursion_level + 1);
+        } else {
+            &$callback($arg, $obj);
+        }
+    }
+}
+
+sub obj_callback {
+    my $state = shift;
+    my $obj = shift;
+    my $data = load_ref($obj);
+    print FILE $data
+        or die "Error writing file data: $!";
+    verifier_add_bytes($state->{VERIFIER}, $data);
+    $state->{BYTES} += length($data);
+}
+
+# Extract the contents of a regular file by concatenating all the objects that
+# comprise it.
+sub unpack_file {
+    my $name = shift;
+    my %info = @_;
+    my %state = ();
+
+    if (!defined $info{data}) {
+        die "File contents not specified for $name";
+    }
+    if (!defined $info{checksum} || !defined $info{size}) {
+        die "File $name is missing checksum or size";
+    }
+
+    $info{size} = parse_int($info{size});
+
+    # Open the file to be recreated.  The data will be written out by the call
+    # to iterate_objects.
+    open FILE, ">", "$DEST_DIR/$name"
+        or die "Cannot write file $name: $!";
+
+    # Set up state so that we can incrementally compute the checksum and length
+    # of the reconstructed data.  Then iterate over all objects in the file.
+    $state{VERIFIER} = verifier_create($info{checksum});
+    $state{BYTES} = 0;
+    iterate_objects(\&obj_callback, \%state, $info{data});
+
+    close FILE;
+
+    # Verify that the reconstructed object matches the size/checksum we were
+    # given.
+    if (!verifier_check($state{VERIFIER}) || $state{BYTES} != $info{size}) {
+        die "File reconstruction failed for $name: size or checksum differs";
+    }
+}
+
+sub process_file {
+    my %info = @_;
+
+    if (!defined($info{name})) {
+        die "Filename not specified in metadata block";
+    }
+
+    my $type = $info{type};
+
+    my $filename = uri_decode($info{name});
+    print "$filename\n" if $VERBOSE;
+
+    # Restore the specified file.  How to do so depends upon the file type, so
+    # dispatch based on that.
+    my $dest = "$DEST_DIR/$filename";
+    if ($type eq '-') {
+        # Regular file
+        unpack_file($filename, %info);
+    } elsif ($type eq 'd') {
+        # Directory
+        if ($filename ne '.') {
+            mkdir $dest or die "Cannot create directory $filename: $!";
+        }
+    } elsif ($type eq 'l') {
+        # Symlink
+        if (!defined($info{contents})) {
+            die "Symlink $filename has no value specified";
+        }
+        my $contents = uri_decode($info{contents});
+        symlink $contents, $dest
+            or die "Cannot create symlink $filename: $!";
+
+        # TODO: We can't properly restore all metadata for symbolic links
+        # (attempts to do so below will change metadata for the pointed-to
+        # file).  This should be later fixed, but for now we simply return
+        # before getting to the restore metadata step below.
+        return;
+    } elsif ($type eq 'p' || $type eq 's' || $type eq 'c' || $type eq 'b') {
+        # Pipe, socket, character device, block device.
+        # TODO: Handle these cases.
+        print STDERR "Ignoring special file $filename of type $type\n";
+        return;
+    } else {
+        die "Unknown file type '$type' for file $filename";
+    }
+
+    # Restore mode, ownership, and any other metadata for the file.  This is
+    # split out from the code above since the code is the same regardless of
+    # file type.
+    my $mtime = $info{mtime} || time();
+    utime time(), $mtime, $dest
+        or warn "Unable to update mtime for $dest";
+
+    my $uid = -1;
+    my $gid = -1;
+    if (defined $info{user}) {
+        my @items = split /\s/, $info{user};
+        $uid = parse_int($items[0]) if exists $items[0];
+    }
+    if (defined $info{group}) {
+        my @items = split /\s/, $info{group};
+        $gid = parse_int($items[0]) if exists $items[0];
+    }
+    chown $uid, $gid, $dest
+        or warn "Unable to change ownership for $dest";
+
+    if (defined $info{mode}) {
+        my $mode = parse_int($info{mode});
+        chmod $mode, $dest
+            or warn "Unable to change permissions for $dest";
+    }
+}
+
+########################### METADATA LIST PROCESSING ##########################
+# Process the file metadata listing provided, and as information for each file
+# is extracted, pass it to process_file.  This will recursively follow indirect
+# references to other metadata objects.
+sub process_metadata {
+    my ($metadata, $recursion_level) = @_;
+
+    # Check recursion; this will prevent us from infinitely recursing on an
+    # indirect reference which loops back to itself.
+    $recursion_level ||= 0;
+    if ($recursion_level >= $RECURSION_LIMIT) {
+        die "Recursion limit reached";
+    }
+
+    # Split the metadata into lines, then start processing each line.  There
+    # are two primary cases:
+    #   - Lines starting with "@" are indirect references to other metadata
+    #     objects.  Recursively process that object before continuing.
+    #   - Other lines should come in groups separated by a blank line; these
+    #     contain metadata for a single file that should be passed to
+    #     process_file.
+    # Note that blocks of metadata about a file cannot span a boundary between
+    # metadata objects.
+    my %info = ();
+    my $line;
+    my $last_key;
+    foreach $line (split /\n/, $metadata) {
+        # If we find a blank line or a reference to another block, process any
+        # data for the previous file first.
+        if ($line eq '' || $line =~ m/^@/) {
+            process_file(%info) if %info;
+            %info = ();
+            undef $last_key;
+            next if $line eq '';
+        }
+
+        # Recursively handle indirect metadata blocks.
+        if ($line =~ m/^@(\S+)$/) {
+            print "Indirect: $1\n" if $VERBOSE;
+            my $indirect = load_ref($1);
+            process_metadata($indirect, $recursion_level + 1);
+            next;
+        }
+
+        # Try to parse the data as "key: value" pairs of file metadata.  Also
+        # handle continuation lines, which start with whitespace and continue
+        # the previous "key: value" pair.
+        if ($line =~ m/^(\w+):\s*(.*)$/) {
+            $info{$1} = $2;
+            $last_key = $1;
+        } elsif ($line =~/^\s/ && defined $last_key) {
+            $info{$last_key} .= $line;
+        } else {
+            print STDERR "Junk in file metadata section: $line\n";
+        }
+    }
+
+    # Process any last file metadata which has not already been processed.
+    process_file(%info) if %info;
+}
+
+############################### MAIN ENTRY POINT ##############################
+# Program start.  We expect to be called with a single argument, which is the
+# name of the backup descriptor file written by a backup pass.  This will name
+# the root object in the snapshot, from which we can reach all other data we
+# need.
+
+# Parse command-line arguments.  The first (required) is the name of the
+# snapshot descriptor file.  The backup objects are assumed to be stored in the
+# same directory as the descriptor.  The second (optional) argument is the
+# directory where the restored files should be written; it defaults to ".";
+my $descriptor = $ARGV[0];
+unless (defined($descriptor) && -r $descriptor) {
+    print STDERR "Usage: $0 <snapshot file>\n";
+    exit 1;
+}
+
+if (defined($ARGV[1])) {
+    $DEST_DIR = $ARGV[1];
+}
+
+$OBJECT_DIR = dirname($descriptor);
+print "Source directory: $OBJECT_DIR\n" if $VERBOSE;
+
+# Read the snapshot descriptor to find the root object.  Parse it to get a set
+# of key/value pairs.
+open DESCRIPTOR, "<", $descriptor
+    or die "Cannot open backup descriptor file $descriptor: $!";
+my %descriptor = ();
+my ($line, $last_key);
+while (defined($line = <DESCRIPTOR>)) {
+    # Any lines of the form "key: value" should be inserted into the
+    # %descriptor dictionary.  Any continuation line (a line starting with
+    # whitespace) will append text to the previous key's value.  Ignore other
+    # lines.
+    chomp $line;
+
+    if ($line =~ m/^(\w+):\s*(.*)$/) {
+        $descriptor{$1} = $2;
+        $last_key = $1;
+    } elsif ($line =~/^\s/ && defined $last_key) {
+        $descriptor{$last_key} .= $line;
+    } else {
+        undef $last_key;
+        print STDERR "Ignoring line in backup descriptor: $line\n";
+    }
+}
+
+# A valid backup descriptor should at the very least specify the root metadata
+# object.
+if (!exists $descriptor{Root}) {
+    die "Expected 'Root:' specification in backup descriptor file";
+}
+my $root = $descriptor{Root};
+close DESCRIPTOR;
+
+# Set the umask to something restrictive.  As we unpack files, we'll originally
+# write the files/directories without setting the permissions, so be
+# conservative and ensure that they can't be read.  Afterwards, we'll properly
+# fix up permissions.
+umask 077;
+
+# Start processing metadata stored in the root to recreate the files.
+print "Root object: $root\n" if $VERBOSE;
+my $contents = load_ref($root);
+process_metadata($contents);
diff --git a/lbs-filter-gpg b/lbs-filter-gpg

deleted file mode 100755 (executable)

index 010c05f..0000000
--- a/lbs-filter-gpg
+++ /dev/null
@@ -1,59 +0,0 @@
-#!/bin/bash
-#
-# Filter for encrypting/decrypting/signing LBS archives using gpg.
-#
-# This takes input on stdin and produces output to stdout.  It can operate in
-# one of several modes, depending upon the command-line argument supplied:
-#   --encrypt           Encrypt the data stream
-#   --decrypt           Decrypt the supplied data
-#   --clearsign         Enclose a text file with a signature
-# Options are controlled by various environment variables:
-#   LBS_GPG_HOME        set the gpg home directory (containing keyrings)
-#   LBS_GPG_ENC_KEY     key ID to use encryption
-#   LBS_GPG_SIGN_KEY    key ID to use for signing
-#   LBS_GPG_PASSPHRASE  passphrase to supply to gpg, if needed
-
-declare -a gpg_options
-gpg_options=(--quiet --batch)
-
-if [ -n "$LBS_GPG_HOME" ]; then
-    gpg_options=("${gpg_options[@]}" --homedir "$LBS_GPG_HOME")
-fi
-
-# Run gpg with the options in $gpg_options and any arguments supplied to this
-# function.  If LBS_GPG_PASSPHRASE is set, it will arrange redirections so that
-# the passphrase is supplied to gpg on a file descriptor.
-run_gpg () {
-    if [ -n "$LBS_GPG_PASSPHRASE" ]; then
-        exec 4<&0
-        echo "$LBS_GPG_PASSPHRASE" |
-            gpg "${gpg_options[@]}" --passphrase-fd=3 "$@" 3<&0 <&4
-    else
-        gpg "${gpg_options[@]}" "$@"
-    fi
-}
-
-case "$1" in
-    --encrypt)
-        if [ -n "$LBS_GPG_ENC_KEY" ]; then
-            gpg_options=("${gpg_options[@]}" --recipient "$LBS_GPG_ENC_KEY")
-        fi
-        run_gpg --encrypt
-        ;;
-
-    --decrypt)
-        run_gpg
-        ;;
-
-    --clearsign)
-        if [ -n "$LBS_GPG_SIGN_KEY" ]; then
-            gpg_options=("${gpg_options[@]}" --local-user "$LBS_GPG_SIGN_KEY")
-        fi
-        run_gpg --clearsign
-        ;;
-
-    *)
-        echo "$0: Unknown command or command not specified: $1" 1>&2
-        exit 1
-        ;;
-esac
diff --git a/restore.pl b/restore.pl

deleted file mode 100755 (executable)

index ad1f594..0000000
--- a/restore.pl
+++ /dev/null
@@ -1,429 +0,0 @@
-#!/usr/bin/perl -w
-#
-# Proof-of-concept/reference decoder for LBS-format backup snapshots.
-#
-# This decoder aims to decompress an LBS snapshot.  It is not meant to be
-# particularly efficient, but should be a small and portable tool for doing so
-# (important for recovering from data loss).  It is also meant to serve as a
-# check on the snapshot tool and data format itself, and serve as documentation
-# for the format.
-#
-# This decoder does not understand TAR archives; it assumes that all segments
-# in the snapshot have already been decompressed, and that objects are
-# available simply as files in the filesystem.  This simplifies the design.
-#
-# Limitations: Since this code is probably using 32-bit arithmetic, files
-# larger than 2-4 GB may not be properly handled.
-#
-# Copyright (C) 2007  Michael Vrable
-
-use strict;
-use Digest::SHA1;
-use File::Basename;
-
-my $OBJECT_DIR;                 # Where are the unpacked objects available?
-my $DEST_DIR = ".";             # Where should restored files should be placed?
-my $RECURSION_LIMIT = 3;        # Bound on recursive object references
-
-my $VERBOSE = 0;                # Set to 1 to enable debugging messages
-
-############################ CHECKSUM VERIFICATION ############################
-# A very simple layer for verifying checksums.  Checksums may be used on object
-# references directly, and can also be used to verify entire reconstructed
-# files.
-#
-# A checksum to verify is given in the form "algorithm=hexdigest".  Given such
-# a string, we can construct a "verifier" object.  Bytes can be incrementally
-# added to the verifier, and at the end a test can be made to see if the
-# checksum matches.  The caller need not know what algorithm is used.  However,
-# at the moment we only support SHA-1 for computing digest (algorith name
-# "sha1").
-sub verifier_create {
-    my $checksum = shift;
-
-    if ($checksum !~ m/^(\w+)=([0-9a-f]+)$/) {
-        die "Malformed checksum: $checksum";
-    }
-    my ($algorithm, $hash) = ($1, $2);
-    if ($algorithm ne 'sha1') {
-        die "Unsupported checksum algorithm: $algorithm";
-    }
-
-    my %verifier = (
-        ALGORITHM => $algorithm,
-        HASH => $hash,
-        DIGESTER => new Digest::SHA1
-    );
-
-    return \%verifier;
-}
-
-sub verifier_add_bytes {
-    my $verifier = shift;
-    my $digester = $verifier->{DIGESTER};
-    my $data = shift;
-
-    $digester->add($data);
-}
-
-sub verifier_check {
-    my $verifier = shift;
-    my $digester = $verifier->{DIGESTER};
-
-    my $newhash = $digester->hexdigest();
-    if ($VERBOSE && $verifier->{HASH} ne $newhash) {
-        print STDERR "Verification failure: ",
-            $newhash, " != ", $verifier->{HASH}, "\n";
-    }
-    return ($verifier->{HASH} eq $newhash);
-}
-
-################################ OBJECT ACCESS ################################
-# The base of the decompressor is the object reference layer.  See ref.h for a
-# description of the format for object references.  These functions will parse
-# an object reference, locate the object data from the filesystem, perform any
-# necessary integrity checks (if a checksum is included), and return the object
-# data.
-sub load_ref {
-    # First, try to parse the object reference string into constituent pieces.
-    # The format is segment/object(checksum)[range].  Both the checksum and
-    # range are optional.
-    my $ref_str = shift;
-
-    if ($ref_str !~ m/^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[\S+\])?$/) {
-        die "Malformed object reference: $ref_str";
-    }
-
-    my ($segment, $object, $checksum, $range) = ($1, $2, $3, $4);
-
-    # Next, use the segment/object components to locate and read the object
-    # contents from disk.
-    open OBJECT, "<", "$OBJECT_DIR/$segment/$object"
-        or die "Unable to open object $OBJECT_DIR/$segment/$object: $!";
-    my $contents = join '', <OBJECT>;
-    close OBJECT;
-
-    # If a checksum was specified in the object reference, verify the object
-    # integrity by computing a checksum of the read data and comparing.
-    if ($checksum) {
-        $checksum =~ m/^\((\S+)\)$/;
-        my $verifier = verifier_create($1);
-        verifier_add_bytes($verifier, $contents);
-        if (!verifier_check($verifier)) {
-            die "Integrity check for object $ref_str failed";
-        }
-    }
-
-    # If a range was specified, then only a subset of the bytes of the object
-    # are desired.  Extract just the desired bytes.
-    if ($range) {
-        if ($range !~ m/^\[(\d+)\+(\d+)\]$/) {
-            die "Malformed object range: $range";
-        }
-
-        my $object_size = length $contents;
-        my ($start, $length) = ($1 + 0, $2 + 0);
-        if ($start >= $object_size || $start + $length > $object_size) {
-            die "Object range $range falls outside object bounds "
-                . "(actual size $object_size)";
-        }
-
-        $contents = substr $contents, $start, $length;
-    }
-
-    return $contents;
-}
-
-############################### FILE PROCESSING ###############################
-# Process the metadata for a single file.  process_file is the main entry
-# point; it should be given a list of file metadata key/value pairs.
-# iterate_objects is a helper function used to iterate over the set of object
-# references that contain the file data for a regular file.
-
-sub parse_int {
-    my $str = shift;
-    if ($str =~ /^0/) {
-        return oct($str);
-    } else {
-        return $str + 0;
-    }
-}
-
-sub uri_decode {
-    my $str = shift;
-    $str =~ s/%([0-9a-f]{2})/chr(hex($1))/ge;
-    return $str;
-}
-
-sub iterate_objects {
-    my $callback = shift;       # Function to be called for each reference
-    my $arg = shift;            # Argument passed to callback
-    my $text = shift;           # Whitespace-separate list of object references
-
-    # Simple limit to guard against cycles in the object references
-    my $recursion_level = shift || 0;
-    if ($recursion_level >= $RECURSION_LIMIT) {
-        die "Recursion limit reached";
-    }
-
-    # Split the provided text at whitespace boundaries to produce the list of
-    # object references.  If any of these start with "@", then we have an
-    # indirect reference, and must look up that object and call iterate_objects
-    # with the contents.
-    my $obj;
-    foreach $obj (split /\s+/, $text) {
-        next if $obj eq "";
-        if ($obj =~ /^@(\S+)$/) {
-            my $indirect = load_ref($1);
-            iterate_objects($callback, $arg, $indirect, $recursion_level + 1);
-        } else {
-            &$callback($arg, $obj);
-        }
-    }
-}
-
-sub obj_callback {
-    my $state = shift;
-    my $obj = shift;
-    my $data = load_ref($obj);
-    print FILE $data
-        or die "Error writing file data: $!";
-    verifier_add_bytes($state->{VERIFIER}, $data);
-    $state->{BYTES} += length($data);
-}
-
-# Extract the contents of a regular file by concatenating all the objects that
-# comprise it.
-sub unpack_file {
-    my $name = shift;
-    my %info = @_;
-    my %state = ();
-
-    if (!defined $info{data}) {
-        die "File contents not specified for $name";
-    }
-    if (!defined $info{checksum} || !defined $info{size}) {
-        die "File $name is missing checksum or size";
-    }
-
-    $info{size} = parse_int($info{size});
-
-    # Open the file to be recreated.  The data will be written out by the call
-    # to iterate_objects.
-    open FILE, ">", "$DEST_DIR/$name"
-        or die "Cannot write file $name: $!";
-
-    # Set up state so that we can incrementally compute the checksum and length
-    # of the reconstructed data.  Then iterate over all objects in the file.
-    $state{VERIFIER} = verifier_create($info{checksum});
-    $state{BYTES} = 0;
-    iterate_objects(\&obj_callback, \%state, $info{data});
-
-    close FILE;
-
-    # Verify that the reconstructed object matches the size/checksum we were
-    # given.
-    if (!verifier_check($state{VERIFIER}) || $state{BYTES} != $info{size}) {
-        die "File reconstruction failed for $name: size or checksum differs";
-    }
-}
-
-sub process_file {
-    my %info = @_;
-
-    if (!defined($info{name})) {
-        die "Filename not specified in metadata block";
-    }
-
-    my $type = $info{type};
-
-    my $filename = uri_decode($info{name});
-    print "$filename\n" if $VERBOSE;
-
-    # Restore the specified file.  How to do so depends upon the file type, so
-    # dispatch based on that.
-    my $dest = "$DEST_DIR/$filename";
-    if ($type eq '-') {
-        # Regular file
-        unpack_file($filename, %info);
-    } elsif ($type eq 'd') {
-        # Directory
-        if ($filename ne '.') {
-            mkdir $dest or die "Cannot create directory $filename: $!";
-        }
-    } elsif ($type eq 'l') {
-        # Symlink
-        if (!defined($info{contents})) {
-            die "Symlink $filename has no value specified";
-        }
-        my $contents = uri_decode($info{contents});
-        symlink $contents, $dest
-            or die "Cannot create symlink $filename: $!";
-
-        # TODO: We can't properly restore all metadata for symbolic links
-        # (attempts to do so below will change metadata for the pointed-to
-        # file).  This should be later fixed, but for now we simply return
-        # before getting to the restore metadata step below.
-        return;
-    } elsif ($type eq 'p' || $type eq 's' || $type eq 'c' || $type eq 'b') {
-        # Pipe, socket, character device, block device.
-        # TODO: Handle these cases.
-        print STDERR "Ignoring special file $filename of type $type\n";
-        return;
-    } else {
-        die "Unknown file type '$type' for file $filename";
-    }
-
-    # Restore mode, ownership, and any other metadata for the file.  This is
-    # split out from the code above since the code is the same regardless of
-    # file type.
-    my $mtime = $info{mtime} || time();
-    utime time(), $mtime, $dest
-        or warn "Unable to update mtime for $dest";
-
-    my $uid = -1;
-    my $gid = -1;
-    if (defined $info{user}) {
-        my @items = split /\s/, $info{user};
-        $uid = parse_int($items[0]) if exists $items[0];
-    }
-    if (defined $info{group}) {
-        my @items = split /\s/, $info{group};
-        $gid = parse_int($items[0]) if exists $items[0];
-    }
-    chown $uid, $gid, $dest
-        or warn "Unable to change ownership for $dest";
-
-    if (defined $info{mode}) {
-        my $mode = parse_int($info{mode});
-        chmod $mode, $dest
-            or warn "Unable to change permissions for $dest";
-    }
-}
-
-########################### METADATA LIST PROCESSING ##########################
-# Process the file metadata listing provided, and as information for each file
-# is extracted, pass it to process_file.  This will recursively follow indirect
-# references to other metadata objects.
-sub process_metadata {
-    my ($metadata, $recursion_level) = @_;
-
-    # Check recursion; this will prevent us from infinitely recursing on an
-    # indirect reference which loops back to itself.
-    $recursion_level ||= 0;
-    if ($recursion_level >= $RECURSION_LIMIT) {
-        die "Recursion limit reached";
-    }
-
-    # Split the metadata into lines, then start processing each line.  There
-    # are two primary cases:
-    #   - Lines starting with "@" are indirect references to other metadata
-    #     objects.  Recursively process that object before continuing.
-    #   - Other lines should come in groups separated by a blank line; these
-    #     contain metadata for a single file that should be passed to
-    #     process_file.
-    # Note that blocks of metadata about a file cannot span a boundary between
-    # metadata objects.
-    my %info = ();
-    my $line;
-    my $last_key;
-    foreach $line (split /\n/, $metadata) {
-        # If we find a blank line or a reference to another block, process any
-        # data for the previous file first.
-        if ($line eq '' || $line =~ m/^@/) {
-            process_file(%info) if %info;
-            %info = ();
-            undef $last_key;
-            next if $line eq '';
-        }
-
-        # Recursively handle indirect metadata blocks.
-        if ($line =~ m/^@(\S+)$/) {
-            print "Indirect: $1\n" if $VERBOSE;
-            my $indirect = load_ref($1);
-            process_metadata($indirect, $recursion_level + 1);
-            next;
-        }
-
-        # Try to parse the data as "key: value" pairs of file metadata.  Also
-        # handle continuation lines, which start with whitespace and continue
-        # the previous "key: value" pair.
-        if ($line =~ m/^(\w+):\s*(.*)$/) {
-            $info{$1} = $2;
-            $last_key = $1;
-        } elsif ($line =~/^\s/ && defined $last_key) {
-            $info{$last_key} .= $line;
-        } else {
-            print STDERR "Junk in file metadata section: $line\n";
-        }
-    }
-
-    # Process any last file metadata which has not already been processed.
-    process_file(%info) if %info;
-}
-
-############################### MAIN ENTRY POINT ##############################
-# Program start.  We expect to be called with a single argument, which is the
-# name of the backup descriptor file written by a backup pass.  This will name
-# the root object in the snapshot, from which we can reach all other data we
-# need.
-
-# Parse command-line arguments.  The first (required) is the name of the
-# snapshot descriptor file.  The backup objects are assumed to be stored in the
-# same directory as the descriptor.  The second (optional) argument is the
-# directory where the restored files should be written; it defaults to ".";
-my $descriptor = $ARGV[0];
-unless (defined($descriptor) && -r $descriptor) {
-    print STDERR "Usage: $0 <snapshot file>\n";
-    exit 1;
-}
-
-if (defined($ARGV[1])) {
-    $DEST_DIR = $ARGV[1];
-}
-
-$OBJECT_DIR = dirname($descriptor);
-print "Source directory: $OBJECT_DIR\n" if $VERBOSE;
-
-# Read the snapshot descriptor to find the root object.  Parse it to get a set
-# of key/value pairs.
-open DESCRIPTOR, "<", $descriptor
-    or die "Cannot open backup descriptor file $descriptor: $!";
-my %descriptor = ();
-my ($line, $last_key);
-while (defined($line = <DESCRIPTOR>)) {
-    # Any lines of the form "key: value" should be inserted into the
-    # %descriptor dictionary.  Any continuation line (a line starting with
-    # whitespace) will append text to the previous key's value.  Ignore other
-    # lines.
-    chomp $line;
-
-    if ($line =~ m/^(\w+):\s*(.*)$/) {
-        $descriptor{$1} = $2;
-        $last_key = $1;
-    } elsif ($line =~/^\s/ && defined $last_key) {
-        $descriptor{$last_key} .= $line;
-    } else {
-        undef $last_key;
-        print STDERR "Ignoring line in backup descriptor: $line\n";
-    }
-}
-
-# A valid backup descriptor should at the very least specify the root metadata
-# object.
-if (!exists $descriptor{Root}) {
-    die "Expected 'Root:' specification in backup descriptor file";
-}
-my $root = $descriptor{Root};
-close DESCRIPTOR;
-
-# Set the umask to something restrictive.  As we unpack files, we'll originally
-# write the files/directories without setting the permissions, so be
-# conservative and ensure that they can't be read.  Afterwards, we'll properly
-# fix up permissions.
-umask 077;
-
-# Start processing metadata stored in the root to recreate the files.
-print "Root object: $root\n" if $VERBOSE;
-my $contents = load_ref($root);
-process_metadata($contents);
author	Michael Vrable <mvrable@cs.ucsd.edu>
	Thu, 20 Sep 2007 23:26:21 +0000 (16:26 -0700)
committer	Michael Vrable <mvrable@turin.ucsd.edu>
	Thu, 20 Sep 2007 23:26:21 +0000 (16:26 -0700)
clean-segments.pl	[deleted file]	patch \| blob \| history
contrib/clean-segments.pl	[new file with mode: 0755]	patch \| blob
contrib/lbs-filter-gpg	[new file with mode: 0755]	patch \| blob
contrib/restore.pl	[new file with mode: 0755]	patch \| blob
lbs-filter-gpg	[deleted file]	patch \| blob \| history
restore.pl	[deleted file]	patch \| blob \| history