3 # Proof-of-concept/reference decoder for LBS-format backup snapshots.
5 # This decoder aims to decompress an LBS snapshot. It is not meant to be
6 # particularly efficient, but should be a small and portable tool for doing so
7 # (important for recovering from data loss). It is also meant to serve as a
8 # check on the snapshot tool and data format itself, and serve as documentation
11 # This decoder does not understand TAR archives; it assumes that all segments
12 # in the snapshot have already been decompressed, and that objects are
13 # available simply as files in the filesystem. This simplifies the design.
15 # Limitations: Since this code is probably using 32-bit arithmetic, files
16 # larger than 2-4 GB may not be properly handled.
18 # Copyright (C) 2007 Michael Vrable
24 my $OBJECT_DIR; # Where are the unpacked objects available?
25 my $DEST_DIR = "."; # Where should restored files should be placed?
26 my $RECURSION_LIMIT = 3; # Bound on recursive object references
28 my $VERBOSE = 0; # Set to 1 to enable debugging messages
30 ############################ CHECKSUM VERIFICATION ############################
31 # A very simple layer for verifying checksums. Checksums may be used on object
32 # references directly, and can also be used to verify entire reconstructed
35 # A checksum to verify is given in the form "algorithm=hexdigest". Given such
36 # a string, we can construct a "verifier" object. Bytes can be incrementally
37 # added to the verifier, and at the end a test can be made to see if the
38 # checksum matches. The caller need not know what algorithm is used. However,
39 # at the moment we only support SHA-1 for computing digest (algorith name
44 if ($checksum !~ m/^(\w+)=([0-9a-f]+)$/) {
45 die "Malformed checksum: $checksum";
47 my ($algorithm, $hash) = ($1, $2);
48 if ($algorithm ne 'sha1') {
49 die "Unsupported checksum algorithm: $algorithm";
53 ALGORITHM => $algorithm,
55 DIGESTER => new Digest::SHA1
61 sub verifier_add_bytes {
63 my $digester = $verifier->{DIGESTER};
66 $digester->add($data);
71 my $digester = $verifier->{DIGESTER};
73 my $newhash = $digester->hexdigest();
74 if ($VERBOSE && $verifier->{HASH} ne $newhash) {
75 print STDERR "Verification failure: ",
76 $newhash, " != ", $verifier->{HASH}, "\n";
78 return ($verifier->{HASH} eq $newhash);
81 ################################ OBJECT ACCESS ################################
82 # The base of the decompressor is the object reference layer. See ref.h for a
83 # description of the format for object references. These functions will parse
84 # an object reference, locate the object data from the filesystem, perform any
85 # necessary integrity checks (if a checksum is included), and return the object
90 # Check for special objects before attempting general parsing.
91 if ($ref_str =~ m/^zero\[((\d+)\+)?(\d+)\]$/) {
92 return "\0" x ($3 + 0);
95 # Try to parse the object reference string into constituent pieces. The
96 # format is segment/object(checksum)[range]. Both the checksum and range
98 if ($ref_str !~ m/^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[\S+\])?$/) {
99 die "Malformed object reference: $ref_str";
102 my ($segment, $object, $checksum, $range) = ($1, $2, $3, $4);
104 # Next, use the segment/object components to locate and read the object
105 # contents from disk.
106 open OBJECT, "<", "$OBJECT_DIR/$segment/$object"
107 or die "Unable to open object $OBJECT_DIR/$segment/$object: $!";
108 my $contents = join '', <OBJECT>;
111 # If a checksum was specified in the object reference, verify the object
112 # integrity by computing a checksum of the read data and comparing.
114 $checksum =~ m/^\((\S+)\)$/;
115 my $verifier = verifier_create($1);
116 verifier_add_bytes($verifier, $contents);
117 if (!verifier_check($verifier)) {
118 die "Integrity check for object $ref_str failed";
122 # If a range was specified, then only a subset of the bytes of the object
123 # are desired. Extract just the desired bytes.
125 my $object_size = length $contents;
126 my ($start, $length);
128 if ($range =~ m/^\[=(\d+)\]$/) {
129 die "Object size incorrect (ref $ref_str, actual size $object_size"
130 if $object_size != $1;
131 ($start, $length) = (0, $1 + 0);
132 } elsif ($range =~ m/^\[(\d+)\]$/) {
133 ($start, $length) = (0, $1 + 0);
134 } elsif ($range =~ m/^\[(\d+)\+(\d+)\]$/) {
135 ($start, $length) = ($1 + 0, $2 + 0);
137 die "Malformed object range: $range";
140 if ($start >= $object_size || $start + $length > $object_size) {
141 die "Object range $range falls outside object bounds "
142 . "(actual size $object_size)";
145 $contents = substr $contents, $start, $length;
151 ############################### FILE PROCESSING ###############################
152 # Process the metadata for a single file. process_file is the main entry
153 # point; it should be given a list of file metadata key/value pairs.
154 # iterate_objects is a helper function used to iterate over the set of object
155 # references that contain the file data for a regular file.
168 $str =~ s/%([0-9a-f]{2})/chr(hex($1))/ge;
172 sub iterate_objects {
173 my $callback = shift; # Function to be called for each reference
174 my $arg = shift; # Argument passed to callback
175 my $text = shift; # Whitespace-separate list of object references
177 # Simple limit to guard against cycles in the object references
178 my $recursion_level = shift || 0;
179 if ($recursion_level >= $RECURSION_LIMIT) {
180 die "Recursion limit reached";
183 # Split the provided text at whitespace boundaries to produce the list of
184 # object references. If any of these start with "@", then we have an
185 # indirect reference, and must look up that object and call iterate_objects
188 foreach $obj (split /\s+/, $text) {
190 if ($obj =~ /^@(\S+)$/) {
191 my $indirect = load_ref($1);
192 iterate_objects($callback, $arg, $indirect, $recursion_level + 1);
194 &$callback($arg, $obj);
202 my $data = load_ref($obj);
204 or die "Error writing file data: $!";
205 verifier_add_bytes($state->{VERIFIER}, $data);
206 $state->{BYTES} += length($data);
209 # Extract the contents of a regular file by concatenating all the objects that
216 if (!defined $info{data}) {
217 die "File contents not specified for $name";
219 if (!defined $info{checksum} || !defined $info{size}) {
220 die "File $name is missing checksum or size";
223 $info{size} = parse_int($info{size});
225 # Open the file to be recreated. The data will be written out by the call
226 # to iterate_objects.
227 open FILE, ">", "$DEST_DIR/$name"
228 or die "Cannot write file $name: $!";
230 # Set up state so that we can incrementally compute the checksum and length
231 # of the reconstructed data. Then iterate over all objects in the file.
232 $state{VERIFIER} = verifier_create($info{checksum});
234 iterate_objects(\&obj_callback, \%state, $info{data});
238 # Verify that the reconstructed object matches the size/checksum we were
240 if (!verifier_check($state{VERIFIER}) || $state{BYTES} != $info{size}) {
241 die "File reconstruction failed for $name: size or checksum differs";
248 if (!defined($info{name})) {
249 die "Filename not specified in metadata block";
252 my $type = $info{type};
254 my $filename = uri_decode($info{name});
255 print "$filename\n" if $VERBOSE;
257 # Restore the specified file. How to do so depends upon the file type, so
258 # dispatch based on that.
259 my $dest = "$DEST_DIR/$filename";
260 if ($type eq '-' || $type eq 'f') {
262 unpack_file($filename, %info);
263 } elsif ($type eq 'd') {
265 if ($filename ne '.') {
266 mkdir $dest or die "Cannot create directory $filename: $!";
268 } elsif ($type eq 'l') {
270 my $target = $info{target} || $info{contents};
271 if (!defined($target)) {
272 die "Symlink $filename has no value specified";
274 $target = uri_decode($target);
275 symlink $target, $dest
276 or die "Cannot create symlink $filename: $!";
278 # TODO: We can't properly restore all metadata for symbolic links
279 # (attempts to do so below will change metadata for the pointed-to
280 # file). This should be later fixed, but for now we simply return
281 # before getting to the restore metadata step below.
283 } elsif ($type eq 'p' || $type eq 's' || $type eq 'c' || $type eq 'b') {
284 # Pipe, socket, character device, block device.
285 # TODO: Handle these cases.
286 print STDERR "Ignoring special file $filename of type $type\n";
289 die "Unknown file type '$type' for file $filename";
292 # Restore mode, ownership, and any other metadata for the file. This is
293 # split out from the code above since the code is the same regardless of
295 my $mtime = $info{mtime} || time();
296 utime time(), $mtime, $dest
297 or warn "Unable to update mtime for $dest";
301 if (defined $info{user}) {
302 my @items = split /\s/, $info{user};
303 $uid = parse_int($items[0]) if exists $items[0];
305 if (defined $info{group}) {
306 my @items = split /\s/, $info{group};
307 $gid = parse_int($items[0]) if exists $items[0];
309 chown $uid, $gid, $dest
310 or warn "Unable to change ownership for $dest";
312 if (defined $info{mode}) {
313 my $mode = parse_int($info{mode});
315 or warn "Unable to change permissions for $dest";
319 ########################### METADATA LIST PROCESSING ##########################
320 # Process the file metadata listing provided, and as information for each file
321 # is extracted, pass it to process_file. This will recursively follow indirect
322 # references to other metadata objects.
323 sub process_metadata {
324 my ($metadata, $recursion_level) = @_;
326 # Check recursion; this will prevent us from infinitely recursing on an
327 # indirect reference which loops back to itself.
328 $recursion_level ||= 0;
329 if ($recursion_level >= $RECURSION_LIMIT) {
330 die "Recursion limit reached";
333 # Split the metadata into lines, then start processing each line. There
334 # are two primary cases:
335 # - Lines starting with "@" are indirect references to other metadata
336 # objects. Recursively process that object before continuing.
337 # - Other lines should come in groups separated by a blank line; these
338 # contain metadata for a single file that should be passed to
340 # Note that blocks of metadata about a file cannot span a boundary between
345 foreach $line (split /\n/, $metadata) {
346 # If we find a blank line or a reference to another block, process any
347 # data for the previous file first.
348 if ($line eq '' || $line =~ m/^@/) {
349 process_file(%info) if %info;
355 # Recursively handle indirect metadata blocks.
356 if ($line =~ m/^@(\S+)$/) {
357 print "Indirect: $1\n" if $VERBOSE;
358 my $indirect = load_ref($1);
359 process_metadata($indirect, $recursion_level + 1);
363 # Try to parse the data as "key: value" pairs of file metadata. Also
364 # handle continuation lines, which start with whitespace and continue
365 # the previous "key: value" pair.
366 if ($line =~ m/^([-\w]+):\s*(.*)$/) {
369 } elsif ($line =~/^\s/ && defined $last_key) {
370 $info{$last_key} .= $line;
372 print STDERR "Junk in file metadata section: $line\n";
376 # Process any last file metadata which has not already been processed.
377 process_file(%info) if %info;
380 ############################### MAIN ENTRY POINT ##############################
381 # Program start. We expect to be called with a single argument, which is the
382 # name of the backup descriptor file written by a backup pass. This will name
383 # the root object in the snapshot, from which we can reach all other data we
386 # Parse command-line arguments. The first (required) is the name of the
387 # snapshot descriptor file. The backup objects are assumed to be stored in the
388 # same directory as the descriptor. The second (optional) argument is the
389 # directory where the restored files should be written; it defaults to ".";
390 my $descriptor = $ARGV[0];
391 unless (defined($descriptor) && -r $descriptor) {
392 print STDERR "Usage: $0 <snapshot file>\n";
396 if (defined($ARGV[1])) {
397 $DEST_DIR = $ARGV[1];
400 $OBJECT_DIR = dirname($descriptor);
401 print "Source directory: $OBJECT_DIR\n" if $VERBOSE;
403 # Read the snapshot descriptor to find the root object. Parse it to get a set
404 # of key/value pairs.
405 open DESCRIPTOR, "<", $descriptor
406 or die "Cannot open backup descriptor file $descriptor: $!";
408 my ($line, $last_key);
409 while (defined($line = <DESCRIPTOR>)) {
410 # Any lines of the form "key: value" should be inserted into the
411 # %descriptor dictionary. Any continuation line (a line starting with
412 # whitespace) will append text to the previous key's value. Ignore other
416 if ($line =~ m/^([-\w]+):\s*(.*)$/) {
417 $descriptor{$1} = $2;
419 } elsif ($line =~/^\s/ && defined $last_key) {
420 $descriptor{$last_key} .= $line;
423 print STDERR "Ignoring line in backup descriptor: $line\n";
427 # A valid backup descriptor should at the very least specify the root metadata
429 if (!exists $descriptor{Root}) {
430 die "Expected 'Root:' specification in backup descriptor file";
432 my $root = $descriptor{Root};
435 # Set the umask to something restrictive. As we unpack files, we'll originally
436 # write the files/directories without setting the permissions, so be
437 # conservative and ensure that they can't be read. Afterwards, we'll properly
438 # fix up permissions.
441 # Start processing metadata stored in the root to recreate the files.
442 print "Root object: $root\n" if $VERBOSE;
443 my $contents = load_ref($root);
444 process_metadata($contents);