From: Michael Vrable Date: Sat, 12 May 2007 06:18:43 +0000 (-0700) Subject: Continue work on reference decoder. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=6ac0cb32a08c81ddb87cd3a1373e8e64e8757534;p=cumulus.git Continue work on reference decoder. - Add code to parse metadata sections (including following indirect pointers). - Add code to parse data block listings (including indirect pointers). - Verify checksums on all files. Primarily left: actually write restored file contents out to the filesystem. --- diff --git a/restore.pl b/restore.pl index 90ff3cc..ef7a3df 100755 --- a/restore.pl +++ b/restore.pl @@ -16,8 +16,10 @@ use strict; use Digest::SHA1; +use File::Basename; my $OBJECT_DIR = "."; # Directory where objects are unpacked +my $RECURSION_LIMIT = 3; # Bound on recursive object references ############################ CHECKSUM VERIFICATION ############################ # A very simple later for verifying checksums. Checksums may be used on object @@ -63,6 +65,10 @@ sub verifier_check { my $digester = $verifier->{DIGESTER}; my $newhash = $digester->hexdigest(); + if ($verifier->{HASH} ne $newhash) { + print STDERR "Verification failure: ", + $newhash, " != ", $verifier->{HASH}, "\n"; + } return ($verifier->{HASH} eq $newhash); } @@ -87,7 +93,7 @@ sub load_ref { # Next, use the segment/object components to locate and read the object # contents from disk. open OBJECT, "<", "$OBJECT_DIR/$segment/$object" - or die "Unable to open object: $OBJECT_DIR/$segment/$object"; + or die "Unable to open object $OBJECT_DIR/$segment/$object: $!"; my $contents = join '', ; close OBJECT; @@ -122,10 +128,142 @@ sub load_ref { return $contents; } +############################### FILE PROCESSING ############################### +# Process the metadata for a single file. process_file is the main entry +# point; it should be given a list of file metadata key/value pairs. +# iterate_objects is a helper function used to iterate over the set of object +# references that contain the file data for a regular file. + +sub iterate_objects { + my $callback = shift; # Function to be called for each reference + my $arg = shift; # Argument passed to callback + my $text = shift; # Whitespace-separate list of object references + + # Simple limit to guard against cycles in the object references + my $recursion_level = shift || 0; + if ($recursion_level >= $RECURSION_LIMIT) { + die "Recursion limit reached"; + } + + # Split the provided text at whitespace boundaries to produce the list of + # object references. If any of these start with "@", then we have an + # indirect reference, and must look up that object and call iterate_objects + # with the contents. + my $obj; + foreach $obj (split /\s+/, $text) { + next if $obj eq ""; + if ($obj =~ /^@(\S+)$/) { + my $indirect = load_ref($1); + iterate_objects($callback, $arg, $1, $recursion_level + 1); + } else { + &$callback($arg, $obj); + } + } +} + +sub obj_callback { + my $verifier = shift; + my $obj = shift; + my $data = load_ref($obj); + print " ", $obj, " (size ", length($data), ")\n"; + verifier_add_bytes($verifier, $data); +} + +sub process_file { + my %info = @_; + + # TODO + print "process_file: ", $info{name}, "\n"; + + if (defined $info{data}) { + my $verifier = verifier_create($info{checksum}); + + iterate_objects(\&obj_callback, $verifier, $info{data}); + + print " checksum: ", (verifier_check($verifier) ? "pass" : "fail"), + " ", $info{checksum}, "\n"; + } +} + +########################### METADATA LIST PROCESSING ########################## +# Process the file metadata listing provided, and as information for each file +# is extracted, pass it to process_file. This will recursively follow indirect +# references to other metadata objects. +sub process_metadata { + my ($metadata, $recursion_level) = @_; + + # Check recursion; this will prevent us from infinitely recursing on an + # indirect reference which loops back to itself. + $recursion_level ||= 0; + if ($recursion_level >= $RECURSION_LIMIT) { + die "Recursion limit reached"; + } + + # Split the metadata into lines, then start processing each line. There + # are two primary cases: + # - Lines starting with "@" are indirect references to other metadata + # objects. Recursively process that object before continuing. + # - Other lines should come in groups separated by a blank line; these + # contain metadata for a single file that should be passed to + # process_file. + # Note that blocks of metadata about a file cannot span a boundary between + # metadata objects. + my %info = (); + my $line; + foreach $line (split /\n/, $metadata) { + # If we find a blank line or a reference to another block, process any + # data for the previous file first. + if ($line eq '' || $line =~ m/^@/) { + process_file(%info) if %info; + %info = (); + next if $line eq ''; + } + + # Recursively handle indirect metadata blocks. + if ($line =~ m/^@(\S+)$/) { + print "Indirect: $1\n"; + my $indirect = load_ref($1); + process_metadata($indirect, $recursion_level + 1); + next; + } + + # Try to parse the data as "key: value" pairs of file metadata. + if ($line =~ m/^(\w+):\s+(.*)\s*$/) { + $info{$1} = $2; + } else { + print STDERR "Junk in file metadata section: $line\n"; + } + } + + # Process any last file metadata which has not already been processed. + process_file(%info) if %info; +} + ############################### MAIN ENTRY POINT ############################## -my $object = $ARGV[0]; +# Program start. We expect to be called with a single argument, which is the +# name of the backup descriptor file written by a backup pass. This will name +# the root object in the snapshot, from which we can reach all other data we +# need. + +my $descriptor = $ARGV[0]; +unless (defined($descriptor) && -r $descriptor) { + print STDERR "Usage: $0 \n"; + exit 1; +} + +$OBJECT_DIR = dirname($descriptor); +print "Source directory: $OBJECT_DIR\n"; + +open DESCRIPTOR, "<", $descriptor + or die "Cannot open backup descriptor file $descriptor: $!"; +my $line = ; +if ($line !~ m/^root: (\S+)$/) { + die "Expected 'root:' specification in backup descriptor file"; +} +my $root = $1; +close DESCRIPTOR; -#print "Object: $object\n\n"; +print "Root object: $root\n"; -my $contents = load_ref($object); -print $contents; +my $contents = load_ref($root); +process_metadata($contents);