-"""High-level interface for working with LBS archives.
+"""High-level interface for working with Cumulus archives.
This module provides an easy interface for reading from and manipulating
-various parts of an LBS archive:
+various parts of a Cumulus archive:
- listing the snapshots and segments present
- reading segment contents
- parsing snapshot descriptors and snapshot metadata logs
"""
from __future__ import division
-import os, re, sha, tarfile, tempfile, thread
+import hashlib, os, re, tarfile, tempfile, thread
from pysqlite2 import dbapi2 as sqlite3
import cumulus.store, cumulus.store.file
# The largest supported snapshot format that can be understood.
-FORMAT_VERSION = (0, 8) # LBS Snapshot v0.8
+FORMAT_VERSION = (0, 11) # Cumulus Snapshot v0.11
# Maximum number of nested indirect references allowed in a snapshot.
MAX_RECURSION_DEPTH = 3
# All segments which have been accessed this session.
accessed_segments = set()
+# Table of methods used to filter segments before storage, and corresponding
+# filename extensions. These are listed in priority order (methods earlier in
+# the list are tried first).
+SEGMENT_FILTERS = [
+ (".gpg", "cumulus-filter-gpg --decrypt"),
+ (".gz", "gzip -dc"),
+ (".bz2", "bzip2 -dc"),
+]
+
+def uri_decode(s):
+ """Decode a URI-encoded (%xx escapes) string."""
+ def hex_decode(m): return chr(int(m.group(1), 16))
+ return re.sub(r"%([0-9a-f]{2})", hex_decode, s)
+def uri_encode(s):
+ """Encode a string to URI-encoded (%xx escapes) form."""
+ def hex_encode(c):
+ if c > '+' and c < '\x7f' and c != '@':
+ return c
+ else:
+ return "%%%02x" % (ord(c),)
+ return ''.join(hex_encode(c) for c in s)
+
class Struct:
"""A class which merely acts as a data container.
return "<%s %s>" % (self.__class__, self.__dict__)
CHECKSUM_ALGORITHMS = {
- 'sha1': sha.new
+ 'sha1': hashlib.sha1,
+ 'sha256': hashlib.sha256,
}
class ChecksumCreator:
- """Compute an LBS checksum for provided data.
+ """Compute a Cumulus checksum for provided data.
The algorithm used is selectable, but currently defaults to sha1.
"""
"""
def __init__(self, path):
- self.store = cumulus.store.file.FileStore(path)
+ if isinstance(path, cumulus.store.Store):
+ self.store = path
+ elif path.find(":") >= 0:
+ self.store = cumulus.store.open(path)
+ else:
+ self.store = cumulus.store.file.FileStore(path)
def _classify(self, filename):
for (t, r) in cumulus.store.type_patterns.items():
return (t, filename)
return (None, filename)
+ def scan(self):
+ self.store.scan()
+
def lowlevel_open(self, filename):
"""Return a file-like object for reading data from the given file."""
def get_segment(self, segment):
accessed_segments.add(segment)
- raw = self.store.lowlevel_open(segment + ".tar.gpg")
- (input, output) = os.popen2("lbs-filter-gpg --decrypt")
- def copy_thread(src, dst):
- BLOCK_SIZE = 4096
- while True:
- block = src.read(BLOCK_SIZE)
- if len(block) == 0: break
- dst.write(block)
- dst.close()
+ for (extension, filter) in SEGMENT_FILTERS:
+ try:
+ raw = self.store.lowlevel_open(segment + ".tar" + extension)
+
+ (input, output) = os.popen2(filter)
+ def copy_thread(src, dst):
+ BLOCK_SIZE = 4096
+ while True:
+ block = src.read(BLOCK_SIZE)
+ if len(block) == 0: break
+ dst.write(block)
+ dst.close()
+
+ thread.start_new_thread(copy_thread, (raw, input))
+ return output
+ except:
+ pass
- thread.start_new_thread(copy_thread, (raw, input))
- return output
+ raise cumulus.store.NotFoundError
def load_segment(self, segment):
seg = tarfile.open(segment, 'r|', self.get_segment(segment))
def parse_metadata_version(s):
"""Convert a string with the snapshot version format to a tuple."""
- m = re.match(r"^LBS Snapshot v(\d+(\.\d+)*)$", s)
+ m = re.match(r"^(?:Cumulus|LBS) Snapshot v(\d+(\.\d+)*)$", s)
if m is None:
return ()
else:
@staticmethod
def decode_str(s):
"""Decode a URI-encoded (%xx escapes) string."""
- def hex_decode(m): return chr(int(m.group(1), 16))
- return re.sub(r"%([0-9a-f]{2})", hex_decode, s)
+ return uri_decode(s)
@staticmethod
def raw_str(s):
"""
# The expired column of the block_index table is used when generating a
- # new LBS snapshot. A null value indicates that an object may be
+ # new Cumulus snapshot. A null value indicates that an object may be
# re-used. Otherwise, an object must be written into a new segment if
# needed. Objects with distinct expired values will be written into
# distinct segments, to allow for some grouping by age. The value 0 is