X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2F__init__.py;h=c5961c71601a527d921b26230d98081d26f86a96;hb=e6a86b518f7336f2ef49a0fd22ca293b84c7fa00;hp=4b7bd6caf3ed40bdf89f9dc94c9db4e680bc8394;hpb=8637c84e5657ec8aa677091a1cde4c2fe666cbfb;p=cumulus.git diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py index 4b7bd6c..c5961c7 100644 --- a/python/cumulus/__init__.py +++ b/python/cumulus/__init__.py @@ -28,11 +28,13 @@ various parts of a Cumulus archive: from __future__ import division, print_function, unicode_literals +import codecs import hashlib import itertools import os import re import sqlite3 +import subprocess import sys import tarfile import tempfile @@ -44,7 +46,7 @@ except ImportError: import cumulus.store import cumulus.store.file -if sys.version < '3': +if sys.version < "3": StringTypes = (str, unicode) else: StringTypes = (str,) @@ -68,6 +70,12 @@ SEGMENT_FILTERS = [ ("", None), ] +def to_lines(data): + """Decode binary data from a file into a sequence of lines. + + Newline markers are retained.""" + return list(codecs.iterdecode(data.splitlines(True), "utf-8")) + def uri_decode(s): """Decode a URI-encoded (%xx escapes) string.""" def hex_decode(m): return chr(int(m.group(1), 16)) @@ -366,13 +374,15 @@ class CumulusStore: def load_snapshot(self, snapshot): snapshot_file = self.backend.open_snapshot(snapshot)[0] - return snapshot_file.read().splitlines(True) + return to_lines(snapshot_file.read()) @staticmethod def filter_data(filehandle, filter_cmd): if filter_cmd is None: return filehandle - (input, output) = os.popen2(filter_cmd) + p = subprocess.Popen(filter_cmd, shell=True, stdin=subprocess.PIPE, + stdout=subprocess.PIPE, close_fds=True) + input, output = p.stdin, p.stdout def copy_thread(src, dst): BLOCK_SIZE = 4096 while True: @@ -381,6 +391,7 @@ class CumulusStore: dst.write(block) src.close() dst.close() + p.wait() _thread.start_new_thread(copy_thread, (filehandle, input)) return output @@ -513,7 +524,7 @@ def read_metadata(object_store, root): def follow_ref(refstr): if len(stack) >= MAX_RECURSION_DEPTH: raise OverflowError - lines = object_store.get(refstr).splitlines(True) + lines = to_lines(object_store.get(refstr)) lines.reverse() stack.append(lines)