X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2F__init__.py;h=3f6b6f7104b6ba664e3236c6591e7ed0dd55e882;hb=e8f99f8ccafa3c0c08e370413f0e4d6142582fb4;hp=51d3ee82f49ec0e072714d343f1e9b38cd552ac3;hpb=2ee97034047db53780a52d803b1c577b4c23c303;p=cumulus.git diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py index 51d3ee8..3f6b6f7 100644 --- a/python/cumulus/__init__.py +++ b/python/cumulus/__init__.py @@ -1,7 +1,7 @@ -"""High-level interface for working with LBS archives. +"""High-level interface for working with Cumulus archives. This module provides an easy interface for reading from and manipulating -various parts of an LBS archive: +various parts of a Cumulus archive: - listing the snapshots and segments present - reading segment contents - parsing snapshot descriptors and snapshot metadata logs @@ -9,13 +9,13 @@ various parts of an LBS archive: """ from __future__ import division -import os, re, sha, tarfile, tempfile, thread +import hashlib, os, re, tarfile, tempfile, thread from pysqlite2 import dbapi2 as sqlite3 import cumulus.store, cumulus.store.file # The largest supported snapshot format that can be understood. -FORMAT_VERSION = (0, 8) # LBS Snapshot v0.8 +FORMAT_VERSION = (0, 11) # Cumulus Snapshot v0.11 # Maximum number of nested indirect references allowed in a snapshot. MAX_RECURSION_DEPTH = 3 @@ -23,6 +23,28 @@ MAX_RECURSION_DEPTH = 3 # All segments which have been accessed this session. accessed_segments = set() +# Table of methods used to filter segments before storage, and corresponding +# filename extensions. These are listed in priority order (methods earlier in +# the list are tried first). +SEGMENT_FILTERS = [ + (".gpg", "cumulus-filter-gpg --decrypt"), + (".gz", "gzip -dc"), + (".bz2", "bzip2 -dc"), +] + +def uri_decode(s): + """Decode a URI-encoded (%xx escapes) string.""" + def hex_decode(m): return chr(int(m.group(1), 16)) + return re.sub(r"%([0-9a-f]{2})", hex_decode, s) +def uri_encode(s): + """Encode a string to URI-encoded (%xx escapes) form.""" + def hex_encode(c): + if c > '+' and c < '\x7f' and c != '@': + return c + else: + return "%%%02x" % (ord(c),) + return ''.join(hex_encode(c) for c in s) + class Struct: """A class which merely acts as a data container. @@ -34,11 +56,12 @@ class Struct: return "<%s %s>" % (self.__class__, self.__dict__) CHECKSUM_ALGORITHMS = { - 'sha1': sha.new + 'sha1': hashlib.sha1, + 'sha256': hashlib.sha256, } class ChecksumCreator: - """Compute an LBS checksum for provided data. + """Compute a Cumulus checksum for provided data. The algorithm used is selectable, but currently defaults to sha1. """ @@ -84,7 +107,9 @@ class LowlevelDataStore: """ def __init__(self, path): - if path.find(":") >= 0: + if isinstance(path, cumulus.store.Store): + self.store = path + elif path.find(":") >= 0: self.store = cumulus.store.open(path) else: self.store = cumulus.store.file.FileStore(path) @@ -95,6 +120,9 @@ class LowlevelDataStore: return (t, filename) return (None, filename) + def scan(self): + self.store.scan() + def lowlevel_open(self, filename): """Return a file-like object for reading data from the given file.""" @@ -171,19 +199,26 @@ class ObjectStore: def get_segment(self, segment): accessed_segments.add(segment) - raw = self.store.lowlevel_open(segment + ".tar.gpg") - (input, output) = os.popen2("lbs-filter-gpg --decrypt") - def copy_thread(src, dst): - BLOCK_SIZE = 4096 - while True: - block = src.read(BLOCK_SIZE) - if len(block) == 0: break - dst.write(block) - dst.close() + for (extension, filter) in SEGMENT_FILTERS: + try: + raw = self.store.lowlevel_open(segment + ".tar" + extension) + + (input, output) = os.popen2(filter) + def copy_thread(src, dst): + BLOCK_SIZE = 4096 + while True: + block = src.read(BLOCK_SIZE) + if len(block) == 0: break + dst.write(block) + dst.close() + + thread.start_new_thread(copy_thread, (raw, input)) + return output + except: + pass - thread.start_new_thread(copy_thread, (raw, input)) - return output + raise cumulus.store.NotFoundError def load_segment(self, segment): seg = tarfile.open(segment, 'r|', self.get_segment(segment)) @@ -290,7 +325,7 @@ def parse_full(lines): def parse_metadata_version(s): """Convert a string with the snapshot version format to a tuple.""" - m = re.match(r"^LBS Snapshot v(\d+(\.\d+)*)$", s) + m = re.match(r"^(?:Cumulus|LBS) Snapshot v(\d+(\.\d+)*)$", s) if m is None: return () else: @@ -347,8 +382,7 @@ class MetadataItem: @staticmethod def decode_str(s): """Decode a URI-encoded (%xx escapes) string.""" - def hex_decode(m): return chr(int(m.group(1), 16)) - return re.sub(r"%([0-9a-f]{2})", hex_decode, s) + return uri_decode(s) @staticmethod def raw_str(s): @@ -649,7 +683,7 @@ class LocalDatabase: """ # The expired column of the block_index table is used when generating a - # new LBS snapshot. A null value indicates that an object may be + # new Cumulus snapshot. A null value indicates that an object may be # re-used. Otherwise, an object must be written into a new segment if # needed. Objects with distinct expired values will be written into # distinct segments, to allow for some grouping by age. The value 0 is