3 LBS - Perl interface to Log-Structured Backup stores
12 our ($VERSION, @ISA, @EXPORT, @EXPORT_OK);
14 # Totally unstable API.
21 my $store = new LBS::Store;
29 @EXPORT_OK = qw(parse_headers);
33 This module makes it easy to write Perl scripts that work with backup snapshots
34 produced by LBS (the Log-Structured Backup System). Various LBS utilities will
39 use Carp qw(carp croak);
44 =head2 LBS::ChecksumVerifier
48 =item new LBS::ChecksumVerifier ( CHECKSUM )
50 Parse the checksum string C<CHECKSUM> and return an object which can be used to
51 verify the integrity of a piece of data. The data can be fed incrementally to
52 the returned object, and at the end a call can be made to see if the data
53 matches the originally-supplied checksum.
58 package LBS::ChecksumVerifier;
64 $self->{CHECKSUM} = shift;
66 if ($self->{CHECKSUM} !~ m/^(\w+)=([0-9a-f]+)$/) {
67 die "Malformed checksum: $self->{CHECKSUM}";
72 if ($algorithm ne 'sha1') {
73 die "Unsupported checksum algorithm: $algorithm";
76 $self->{DIGESTER} = new Digest::SHA1;
85 $self->{DIGESTER}->add($data);
90 my $newhash = $self->{DIGESTER}->hexdigest();
91 return ($self->{HASH} eq $newhash);
97 =item new LBS::Store ( DIRECTORY )
99 Construct a new Store object, which is used to get access to segments and
100 metadata files containing snapshots. The data may be stored in a local
101 directory, or might be fetched from a remote server. The constructor above is
102 for local access, and takes as a single argument the name of the directory
103 containing all files.
105 =item load_ref ( REFSTR )
107 Load the object contents referenced by the given C<REFSTR>. This will
108 automatically validate any object checksums.
114 use File::Temp qw(tempdir);
117 = '[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}';
122 $self->{DIR} = shift;
123 $self->{TMPDIR} = tempdir("lbs.XXXXXX", TMPDIR => 1);
125 print STDERR "### Tempdir is $self->{TMPDIR}\n";
127 $self->{EXTENSION} = ".bz2";
128 $self->{FILTER} = "bzip2 -dc";
130 $self->{CACHED} = [ ];
138 $self->_lru_clean(0);
139 print STDERR "### Cleaning temporary directory $self->{TMPDIR}\n";
140 system("rm", "-rf", $self->{TMPDIR});
146 my @cache = @{$self->{CACHED}};
147 @cache = grep { $_ ne $segment } @cache;
148 push @cache, $segment;
149 $self->{CACHED} = [ @cache ];
155 $limit = 16 unless defined($limit);
157 my @cache = @{$self->{CACHED}};
158 while (scalar @cache > $limit) {
159 my $segment = shift @cache;
160 my $dir = "$self->{TMPDIR}/$segment";
161 print STDERR "### Cleaning segment $segment\n";
162 system("rm", "-rf", $dir);
164 $self->{CACHED} = [ @cache ];
171 if (grep { $_ eq $segment } @{$self->{CACHED}}) {
172 $self->_lru_update($segment);
176 my $file = "$self->{DIR}/$segment.tar$self->{EXTENSION}";
177 die "Can't find segment $file" unless -f $file;
180 print STDERR "### Extracting segment $segment\n";
181 system("$self->{FILTER} <$file | tar -C $self->{TMPDIR} -xf -");
182 $self->_lru_update($segment);
185 # Load an object, without any support for object slicing or checksum
186 # verification. This method can be overridden by a subclass, and will be
187 # called by the full object reference parser below.
193 $self->_extract($segment);
194 my $file = "$self->{TMPDIR}/$segment/$object";
195 open OBJECT, "<", $file or die "Can't open file $file: $!";
196 my $contents = join '', <OBJECT>;
206 if ($ref !~ m/^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[\S+\])?$/) {
207 die "Malformed object reference: $ref";
210 my ($segment, $object, $checksum, $range) = ($1, $2, $3, $4);
212 my $contents = $self->load_object($segment, $object);
214 # If a checksum was specified in the object reference, verify the
215 # object integrity by computing a checksum of the read data and
218 $checksum =~ m/^\((\S+)\)$/;
219 my $verifier = new LBS::ChecksumVerifier($1);
220 $verifier->add($contents);
221 if (!$verifier->verify()) {
222 die "Integrity check for object $ref failed";
226 # If a range was specified, then only a subset of the bytes of the
227 # object are desired. Extract just the desired bytes.
229 if ($range !~ m/^\[(\d+)\+(\d+)\]$/) {
230 die "Malformed object range: $range";
233 my $object_size = length $contents;
234 my ($start, $length) = ($1 + 0, $2 + 0);
235 if ($start >= $object_size || $start + $length > $object_size) {
236 die "Object range $range falls outside object bounds "
237 . "(actual size $object_size)";
240 $contents = substr $contents, $start, $length;
249 opendir DIR, $self->{DIR};
250 foreach (readdir DIR) {
251 if (m/^($SEGMENT_PATTERN)(\.\S+)?$/) {
256 return sort keys %segments;
262 opendir DIR, $self->{DIR};
263 foreach (readdir DIR) {
264 if (m/^snapshot-(.*)\.lbs$/) {
269 return sort @snapshots;
274 my $snapshot = shift;
275 open SNAPSHOT, "$self->{DIR}/snapshot-$snapshot.lbs"
277 my $contents = join '', <SNAPSHOT>;
285 $self->_extract($segment);
286 opendir DIR, "$self->{TMPDIR}/$segment";
287 my @objects = grep { /[0-9a-f]{8}/ } readdir(DIR);
289 return sort @objects;
293 =head2 LBS::MetadataReader
295 =item new LBS::MetadataReader ( STORE, REF )
300 package LBS::MetadataReader;
306 $self->{STORE} = shift;
309 $self->{SPLIT_PATTERN} = $args{SPLIT} || '(?<=\n)';
313 if (exists $args{REF}) {
314 $self->{DATA} = [ $self->_read($args{REF}) ];
315 } elsif (exists $args{DATA}) {
316 my $pattern = $self->{SPLIT_PATTERN};
317 $self->{DATA} = [ split /$pattern/, $args{DATA} ];
319 die "Must specify REF or DATA argument!";
328 my $pattern = $self->{SPLIT_PATTERN};
329 print STDERR "### Reading from $ref\n";
330 my @pieces = split /$pattern/, $self->{STORE}->load_ref($ref);
334 # FIXME: Bound recursion.
339 if (!@{$self->{DATA}}) {
343 my $item = shift @{$self->{DATA}};
345 # Check for indirect references
346 if ($item =~ m/^@(\S*)/) {
347 unshift @{$self->{DATA}}, $self->_read($1);
355 =head2 LBS::MetadataParser
357 =item new LBS::MetadataParser ( STORE, REF )
362 package LBS::MetadataParser;
368 $self->{STORE} = shift;
370 $self->{READER} = new LBS::MetadataReader $self->{STORE}, REF => $ref;
382 $line = $self->{READER}->get();
383 chomp $line if defined $line;
384 while (defined($line) && $line ne "") {
385 if ($line =~ m/^(\w+):\s*(.*)$/) {
388 } elsif ($line =~/^\s/ && defined $last_key) {
389 $info{$last_key} .= $line;
391 print STDERR "Junk in file metadata section: $line\n";
394 $line = $self->{READER}->get();
398 # Perform a bit of post-processing on the "data" field, which might
399 # contain indirect references to blocks. Pull all the references
401 if (exists $info{data}) {
402 my $reader = new LBS::MetadataReader($self->{STORE},
406 while (($_ = $reader->get())) {
409 $info{data} = join " ", @blocks;
412 # Don't return an empty result unless we've hit end-of-file.
413 if (!%info && defined($line)) {
414 return $self->get_item();
421 # Parse an RFC822-style list of headers and return a dictionary with the
429 foreach $line (split /\n/, $data) {
430 if ($line =~ m/^(\w+):\s*(.*)$/) {
433 } elsif ($line =~/^\s/ && defined $last_key) {
434 $info{$last_key} .= $line;
437 print STDERR "Ignoring line in backup descriptor: $line\n";