X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2F__init__.py;h=c53a78aca32650ecabd23fa704cd17ac05829fe8;hb=refs%2Fheads%2Fmaster;hp=0e39d37006b88479824de84de4f4cf134d383d39;hpb=567bd6a883eaa54d10ca61d0b6083da09bf48085;p=cumulus.git diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py index 0e39d37..c53a78a 100644 --- a/python/cumulus/__init__.py +++ b/python/cumulus/__init__.py @@ -28,11 +28,15 @@ various parts of a Cumulus archive: from __future__ import division, print_function, unicode_literals +import codecs import hashlib import itertools import os +import posixpath import re +import six import sqlite3 +import subprocess import sys import tarfile import tempfile @@ -43,11 +47,7 @@ except ImportError: import cumulus.store import cumulus.store.file - -if sys.version < '3': - StringTypes = (str, unicode) -else: - StringTypes = (str,) +import cumulus.util # The largest supported snapshot format that can be understood. FORMAT_VERSION = (0, 11) # Cumulus Snapshot v0.11 @@ -68,18 +68,11 @@ SEGMENT_FILTERS = [ ("", None), ] -def uri_decode(s): - """Decode a URI-encoded (%xx escapes) string.""" - def hex_decode(m): return chr(int(m.group(1), 16)) - return re.sub(r"%([0-9a-f]{2})", hex_decode, s) -def uri_encode(s): - """Encode a string to URI-encoded (%xx escapes) form.""" - def hex_encode(c): - if c > '+' and c < '\x7f' and c != '@': - return c - else: - return "%%%02x" % (ord(c),) - return ''.join(hex_encode(c) for c in s) +def to_lines(data): + """Decode binary data from a file into a sequence of lines. + + Newline markers are retained.""" + return list(codecs.iterdecode(data.splitlines(True), "utf-8")) class Struct: """A class which merely acts as a data container. @@ -160,7 +153,7 @@ class SearchPathEntry(object): and context is any additional data associated with this search entry (if any). """ - return (os.path.join(self._directory_prefix, basename + self._suffix), + return (posixpath.join(self._directory_prefix, basename + self._suffix), self._context) class SearchPath(object): @@ -225,7 +218,7 @@ class SearchPath(object): for f in backend.list(d): success = True m = self.match(f) - if m: yield (os.path.join(d, f), m) + if m: yield (posixpath.join(d, f), m) except cumulus.store.NotFoundError: pass if not success: @@ -274,11 +267,8 @@ class BackendWrapper(object): store may either be a Store object or URL. """ - if type(backend) in StringTypes: - if backend.find(":") >= 0: - self._backend = cumulus.store.open(backend) - else: - self._backend = cumulus.store.file.FileStore(backend) + if isinstance(backend, six.string_types): + self._backend = cumulus.store.open(backend) else: self._backend = backend @@ -338,7 +328,7 @@ class CumulusStore: if m: return ("zero", None, None, (0, int(m.group(1)), False)) - m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(((\d+)\+)?(\d+)|=(\d+))\])?$", refstr) + m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(=?(\d+)|(\d+)\+(\d+))\])?$", refstr) if not m: return segment = m.group(1) @@ -350,12 +340,9 @@ class CumulusStore: checksum = checksum.lstrip("(").rstrip(")") if slice is not None: - if m.group(9) is not None: + if m.group(6) is not None: # Size-assertion slice - slice = (0, int(m.group(9)), True) - elif m.group(6) is None: - # Abbreviated slice - slice = (0, int(m.group(8)), False) + slice = (0, int(m.group(6)), True) else: slice = (int(m.group(7)), int(m.group(8)), False) @@ -369,13 +356,15 @@ class CumulusStore: def load_snapshot(self, snapshot): snapshot_file = self.backend.open_snapshot(snapshot)[0] - return snapshot_file.read().splitlines(True) + return to_lines(snapshot_file.read()) @staticmethod def filter_data(filehandle, filter_cmd): if filter_cmd is None: return filehandle - (input, output) = os.popen2(filter_cmd) + p = subprocess.Popen(filter_cmd, shell=True, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, close_fds=True) + input, output = p.stdin, p.stdout def copy_thread(src, dst): BLOCK_SIZE = 4096 while True: @@ -384,6 +373,7 @@ class CumulusStore: dst.write(block) src.close() dst.close() + p.wait() _thread.start_new_thread(copy_thread, (filehandle, input)) return output @@ -443,6 +433,9 @@ class CumulusStore: if slice is not None: (start, length, exact) = slice + # Note: The following assertion check may need to be commented out + # to restore from pre-v0.8 snapshots, as the syntax for + # size-assertion slices has changed. if exact and len(data) != length: raise ValueError data = data[start:start+length] if len(data) != length: raise IndexError @@ -464,30 +457,33 @@ def parse(lines, terminate=None): stop reading input lines. """ - dict = {} + result = {} last_key = None + def make_result(result): + return dict((k, "".join(v)) for (k, v) in result.items()) + for l in lines: # Strip off a trailing newline, if present if len(l) > 0 and l[-1] == "\n": l = l[:-1] if terminate is not None and terminate(l): - if len(dict) > 0: yield dict - dict = {} + if len(result) > 0: yield make_result(result) + result = {} last_key = None continue m = re.match(r"^([-\w]+):\s*(.*)$", l) if m: - dict[m.group(1)] = m.group(2) + result[m.group(1)] = [m.group(2)] last_key = m.group(1) elif len(l) > 0 and l[0].isspace() and last_key is not None: - dict[last_key] += l + result[last_key].append(l) else: last_key = None - if len(dict) > 0: yield dict + if len(result) > 0: yield make_result(result) def parse_full(lines): try: @@ -516,7 +512,7 @@ def read_metadata(object_store, root): def follow_ref(refstr): if len(stack) >= MAX_RECURSION_DEPTH: raise OverflowError - lines = object_store.get(refstr).splitlines(True) + lines = to_lines(object_store.get(refstr)) lines.reverse() stack.append(lines) @@ -555,7 +551,7 @@ class MetadataItem: @staticmethod def decode_str(s): """Decode a URI-encoded (%xx escapes) string.""" - return uri_decode(s) + return cumulus.util.uri_decode_pathname(s) @staticmethod def raw_str(s):