3 # Proof-of-concept/reference decoder for LBS-format backup snapshots.
5 # This decoder aims to decompress an LBS snapshot. It is not meant to be
6 # particularly efficient, but should be a small and portable tool for doing so
7 # (important for recovering from data loss). It is also meant to serve as a
8 # check on the snapshot tool and data format itself, and serve as documentation
11 # This decoder does not understand TAR archives; it assumes that all segments
12 # in the snapshot have already been decompressed, and that objects are
13 # available simply as files in the filesystem. This simplifies the design.
15 # Copyright (C) 2007 Michael Vrable
21 my $OBJECT_DIR = "."; # Directory where objects are unpacked
22 my $RECURSION_LIMIT = 3; # Bound on recursive object references
24 ############################ CHECKSUM VERIFICATION ############################
25 # A very simple later for verifying checksums. Checksums may be used on object
26 # references directly, and can also be used to verify entire reconstructed
29 # A checksum to verify is given in the form "algorithm=hexdigest". Given such
30 # a string, we can construct a "verifier" object. Bytes can be incrementally
31 # added to the verifier, and at the end a test can be made to see if the
32 # checksum matches. The caller need not know what algorithm is used. However,
33 # at the moment we only support SHA-1 for computing digest (algorith name
38 if ($checksum !~ m/^(\w+)=([0-9a-f]+)$/) {
39 die "Malformed checksum: $checksum";
41 my ($algorithm, $hash) = ($1, $2);
42 if ($algorithm ne 'sha1') {
43 die "Unsupported checksum algorithm: $algorithm";
47 ALGORITHM => $algorithm,
49 DIGESTER => new Digest::SHA1
55 sub verifier_add_bytes {
57 my $digester = $verifier->{DIGESTER};
60 $digester->add($data);
65 my $digester = $verifier->{DIGESTER};
67 my $newhash = $digester->hexdigest();
68 if ($verifier->{HASH} ne $newhash) {
69 print STDERR "Verification failure: ",
70 $newhash, " != ", $verifier->{HASH}, "\n";
72 return ($verifier->{HASH} eq $newhash);
75 ################################ OBJECT ACCESS ################################
76 # The base of the decompressor is the object reference layer. See ref.h for a
77 # description of the format for object references. These functions will parse
78 # an object reference, locate the object data from the filesystem, perform any
79 # necessary integrity checks (if a checksum is included), and return the object
82 # First, try to parse the object reference string into constituent pieces.
83 # The format is segment/object(checksum)[range]. Both the checksum and
87 if ($ref_str !~ m/^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[\S+\])?$/) {
88 die "Malformed object reference: $ref_str";
91 my ($segment, $object, $checksum, $range) = ($1, $2, $3, $4);
93 # Next, use the segment/object components to locate and read the object
95 open OBJECT, "<", "$OBJECT_DIR/$segment/$object"
96 or die "Unable to open object $OBJECT_DIR/$segment/$object: $!";
97 my $contents = join '', <OBJECT>;
100 # If a checksum was specified in the object reference, verify the object
101 # integrity by computing a checksum of the read data and comparing.
103 $checksum =~ m/^\((\S+)\)$/;
104 my $verifier = verifier_create($1);
105 verifier_add_bytes($verifier, $contents);
106 if (!verifier_check($verifier)) {
107 die "Integrity check for object $ref_str failed";
111 # If a range was specified, then only a subset of the bytes of the object
112 # are desired. Extract just the desired bytes.
114 if ($range !~ m/^\[(\d+)\+(\d+)\]$/) {
115 die "Malformed object range: $range";
118 my $object_size = length $contents;
119 my ($start, $length) = ($1 + 0, $2 + 0);
120 if ($start >= $object_size || $start + $length > $object_size) {
121 die "Object range $range falls outside object bounds "
122 . "(actual size $object_size)";
125 $contents = substr $contents, $start, $length;
131 ############################### FILE PROCESSING ###############################
132 # Process the metadata for a single file. process_file is the main entry
133 # point; it should be given a list of file metadata key/value pairs.
134 # iterate_objects is a helper function used to iterate over the set of object
135 # references that contain the file data for a regular file.
137 sub iterate_objects {
138 my $callback = shift; # Function to be called for each reference
139 my $arg = shift; # Argument passed to callback
140 my $text = shift; # Whitespace-separate list of object references
142 # Simple limit to guard against cycles in the object references
143 my $recursion_level = shift || 0;
144 if ($recursion_level >= $RECURSION_LIMIT) {
145 die "Recursion limit reached";
148 # Split the provided text at whitespace boundaries to produce the list of
149 # object references. If any of these start with "@", then we have an
150 # indirect reference, and must look up that object and call iterate_objects
153 foreach $obj (split /\s+/, $text) {
155 if ($obj =~ /^@(\S+)$/) {
156 my $indirect = load_ref($1);
157 iterate_objects($callback, $arg, $1, $recursion_level + 1);
159 &$callback($arg, $obj);
165 my $verifier = shift;
167 my $data = load_ref($obj);
168 print " ", $obj, " (size ", length($data), ")\n";
169 verifier_add_bytes($verifier, $data);
176 print "process_file: ", $info{name}, "\n";
178 if (defined $info{data}) {
179 my $verifier = verifier_create($info{checksum});
181 iterate_objects(\&obj_callback, $verifier, $info{data});
183 print " checksum: ", (verifier_check($verifier) ? "pass" : "fail"),
184 " ", $info{checksum}, "\n";
188 ########################### METADATA LIST PROCESSING ##########################
189 # Process the file metadata listing provided, and as information for each file
190 # is extracted, pass it to process_file. This will recursively follow indirect
191 # references to other metadata objects.
192 sub process_metadata {
193 my ($metadata, $recursion_level) = @_;
195 # Check recursion; this will prevent us from infinitely recursing on an
196 # indirect reference which loops back to itself.
197 $recursion_level ||= 0;
198 if ($recursion_level >= $RECURSION_LIMIT) {
199 die "Recursion limit reached";
202 # Split the metadata into lines, then start processing each line. There
203 # are two primary cases:
204 # - Lines starting with "@" are indirect references to other metadata
205 # objects. Recursively process that object before continuing.
206 # - Other lines should come in groups separated by a blank line; these
207 # contain metadata for a single file that should be passed to
209 # Note that blocks of metadata about a file cannot span a boundary between
213 foreach $line (split /\n/, $metadata) {
214 # If we find a blank line or a reference to another block, process any
215 # data for the previous file first.
216 if ($line eq '' || $line =~ m/^@/) {
217 process_file(%info) if %info;
222 # Recursively handle indirect metadata blocks.
223 if ($line =~ m/^@(\S+)$/) {
224 print "Indirect: $1\n";
225 my $indirect = load_ref($1);
226 process_metadata($indirect, $recursion_level + 1);
230 # Try to parse the data as "key: value" pairs of file metadata.
231 if ($line =~ m/^(\w+):\s+(.*)\s*$/) {
234 print STDERR "Junk in file metadata section: $line\n";
238 # Process any last file metadata which has not already been processed.
239 process_file(%info) if %info;
242 ############################### MAIN ENTRY POINT ##############################
243 # Program start. We expect to be called with a single argument, which is the
244 # name of the backup descriptor file written by a backup pass. This will name
245 # the root object in the snapshot, from which we can reach all other data we
248 my $descriptor = $ARGV[0];
249 unless (defined($descriptor) && -r $descriptor) {
250 print STDERR "Usage: $0 <snapshot file>\n";
254 $OBJECT_DIR = dirname($descriptor);
255 print "Source directory: $OBJECT_DIR\n";
257 open DESCRIPTOR, "<", $descriptor
258 or die "Cannot open backup descriptor file $descriptor: $!";
259 my $line = <DESCRIPTOR>;
260 if ($line !~ m/^root: (\S+)$/) {
261 die "Expected 'root:' specification in backup descriptor file";
266 print "Root object: $root\n";
268 my $contents = load_ref($root);
269 process_metadata($contents);