+############################### FILE PROCESSING ###############################
+# Process the metadata for a single file. process_file is the main entry
+# point; it should be given a list of file metadata key/value pairs.
+# iterate_objects is a helper function used to iterate over the set of object
+# references that contain the file data for a regular file.
+
+sub parse_int {
+ my $str = shift;
+ if ($str =~ /^0/) {
+ return oct($str);
+ } else {
+ return $str + 0;
+ }
+}
+
+sub uri_decode {
+ my $str = shift;
+ $str =~ s/%([0-9a-f]{2})/chr(hex($1))/ge;
+ return $str;
+}
+
+sub iterate_objects {
+ my $callback = shift; # Function to be called for each reference
+ my $arg = shift; # Argument passed to callback
+ my $text = shift; # Whitespace-separate list of object references
+
+ # Simple limit to guard against cycles in the object references
+ my $recursion_level = shift || 0;
+ if ($recursion_level >= $RECURSION_LIMIT) {
+ die "Recursion limit reached";
+ }
+
+ # Split the provided text at whitespace boundaries to produce the list of
+ # object references. If any of these start with "@", then we have an
+ # indirect reference, and must look up that object and call iterate_objects
+ # with the contents.
+ my $obj;
+ foreach $obj (split /\s+/, $text) {
+ next if $obj eq "";
+ if ($obj =~ /^@(\S+)$/) {
+ my $indirect = load_ref($1);
+ iterate_objects($callback, $arg, $indirect, $recursion_level + 1);
+ } else {
+ &$callback($arg, $obj);
+ }
+ }
+}
+
+sub obj_callback {
+ my $state = shift;
+ my $obj = shift;
+ my $data = load_ref($obj);
+ print FILE $data
+ or die "Error writing file data: $!";
+ verifier_add_bytes($state->{VERIFIER}, $data);
+ $state->{BYTES} += length($data);
+}
+
+# Extract the contents of a regular file by concatenating all the objects that
+# comprise it.
+sub unpack_file {
+ my $name = shift;
+ my %info = @_;
+ my %state = ();
+
+ if (!defined $info{data}) {
+ die "File contents not specified for $name";
+ }
+ if (!defined $info{checksum} || !defined $info{size}) {
+ die "File $name is missing checksum or size";
+ }
+
+ $info{size} = parse_int($info{size});
+
+ # Open the file to be recreated. The data will be written out by the call
+ # to iterate_objects.
+ open FILE, ">", "$DEST_DIR/$name"
+ or die "Cannot write file $name: $!";
+
+ # Set up state so that we can incrementally compute the checksum and length
+ # of the reconstructed data. Then iterate over all objects in the file.
+ $state{VERIFIER} = verifier_create($info{checksum});
+ $state{BYTES} = 0;
+ iterate_objects(\&obj_callback, \%state, $info{data});
+
+ close FILE;
+
+ # Verify that the reconstructed object matches the size/checksum we were
+ # given.
+ if (!verifier_check($state{VERIFIER}) || $state{BYTES} != $info{size}) {
+ die "File reconstruction failed for $name: size or checksum differs";
+ }
+}
+
+sub process_file {
+ my %info = @_;
+
+ if (!defined($info{name})) {
+ die "Filename not specified in metadata block";
+ }
+
+ my $type = $info{type};
+
+ my $filename = uri_decode($info{name});
+ print "$filename\n" if $VERBOSE;
+
+ # Restore the specified file. How to do so depends upon the file type, so
+ # dispatch based on that.
+ my $dest = "$DEST_DIR/$filename";
+ if ($type eq '-') {
+ # Regular file
+ unpack_file($filename, %info);
+ } elsif ($type eq 'd') {
+ # Directory
+ if ($filename ne '.') {
+ mkdir $dest or die "Cannot create directory $filename: $!";
+ }
+ } elsif ($type eq 'l') {
+ # Symlink
+ if (!defined($info{contents})) {
+ die "Symlink $filename has no value specified";
+ }
+ my $contents = uri_decode($info{contents});
+ symlink $contents, $dest
+ or die "Cannot create symlink $filename: $!";
+
+ # TODO: We can't properly restore all metadata for symbolic links
+ # (attempts to do so below will change metadata for the pointed-to
+ # file). This should be later fixed, but for now we simply return
+ # before getting to the restore metadata step below.
+ return;
+ } elsif ($type eq 'p' || $type eq 's' || $type eq 'c' || $type eq 'b') {
+ # Pipe, socket, character device, block device.
+ # TODO: Handle these cases.
+ print STDERR "Ignoring special file $filename of type $type\n";
+ return;
+ } else {
+ die "Unknown file type '$type' for file $filename";
+ }
+
+ # Restore mode, ownership, and any other metadata for the file. This is
+ # split out from the code above since the code is the same regardless of
+ # file type.
+ my $mtime = $info{mtime} || time();
+ utime time(), $mtime, $dest
+ or warn "Unable to update mtime for $dest";
+
+ my $uid = -1;
+ my $gid = -1;
+ if (defined $info{user}) {
+ my @items = split /\s/, $info{user};
+ $uid = parse_int($items[0]) if exists $items[0];
+ }
+ if (defined $info{group}) {
+ my @items = split /\s/, $info{group};
+ $gid = parse_int($items[0]) if exists $items[0];
+ }
+ chown $uid, $gid, $dest
+ or warn "Unable to change ownership for $dest";
+
+ if (defined $info{mode}) {
+ my $mode = parse_int($info{mode});
+ chmod $mode, $dest
+ or warn "Unable to change permissions for $dest";
+ }
+}
+
+########################### METADATA LIST PROCESSING ##########################
+# Process the file metadata listing provided, and as information for each file
+# is extracted, pass it to process_file. This will recursively follow indirect
+# references to other metadata objects.
+sub process_metadata {
+ my ($metadata, $recursion_level) = @_;
+
+ # Check recursion; this will prevent us from infinitely recursing on an
+ # indirect reference which loops back to itself.
+ $recursion_level ||= 0;
+ if ($recursion_level >= $RECURSION_LIMIT) {
+ die "Recursion limit reached";
+ }
+
+ # Split the metadata into lines, then start processing each line. There
+ # are two primary cases:
+ # - Lines starting with "@" are indirect references to other metadata
+ # objects. Recursively process that object before continuing.
+ # - Other lines should come in groups separated by a blank line; these
+ # contain metadata for a single file that should be passed to
+ # process_file.
+ # Note that blocks of metadata about a file cannot span a boundary between
+ # metadata objects.
+ my %info = ();
+ my $line;
+ my $last_key;
+ foreach $line (split /\n/, $metadata) {
+ # If we find a blank line or a reference to another block, process any
+ # data for the previous file first.
+ if ($line eq '' || $line =~ m/^@/) {
+ process_file(%info) if %info;
+ %info = ();
+ undef $last_key;
+ next if $line eq '';
+ }
+
+ # Recursively handle indirect metadata blocks.
+ if ($line =~ m/^@(\S+)$/) {
+ print "Indirect: $1\n" if $VERBOSE;
+ my $indirect = load_ref($1);
+ process_metadata($indirect, $recursion_level + 1);
+ next;
+ }
+
+ # Try to parse the data as "key: value" pairs of file metadata. Also
+ # handle continuation lines, which start with whitespace and continue
+ # the previous "key: value" pair.
+ if ($line =~ m/^(\w+):\s*(.*)$/) {
+ $info{$1} = $2;
+ $last_key = $1;
+ } elsif ($line =~/^\s/ && defined $last_key) {
+ $info{$last_key} .= $line;
+ } else {
+ print STDERR "Junk in file metadata section: $line\n";
+ }
+ }
+
+ # Process any last file metadata which has not already been processed.
+ process_file(%info) if %info;
+}
+