X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2F__init__.py;h=b3516583d607b30ce20a2637ca421615b6d7007f;hb=3cf2be27c9e54eaae6737adfb41f1fc86528aa84;hp=8dc4c9875e0f77ef95232f866f37a28e7ae9949e;hpb=a5f66616b1ec0c38328ad5131bf1c889ccc43659;p=cumulus.git

diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py
index 8dc4c98..b351658 100644
--- a/python/cumulus/__init__.py
+++ b/python/cumulus/__init__.py
@@ -26,19 +26,32 @@ various parts of a Cumulus archive:
   - reading and maintaining the local object database
 """
 
+from __future__ import division, print_function, unicode_literals
 
+import codecs
 import hashlib
 import itertools
 import os
+import posixpath
 import re
 import sqlite3
+import subprocess
+import sys
 import tarfile
 import tempfile
-import _thread
+try:
+    import _thread
+except ImportError:
+    import thread as _thread
 
 import cumulus.store
 import cumulus.store.file
 
+if sys.version < "3":
+    StringTypes = (str, unicode)
+else:
+    StringTypes = (str,)
+
 # The largest supported snapshot format that can be understood.
 FORMAT_VERSION = (0, 11)        # Cumulus Snapshot v0.11
 
@@ -58,6 +71,12 @@ SEGMENT_FILTERS = [
     ("", None),
 ]
 
+def to_lines(data):
+    """Decode binary data from a file into a sequence of lines.
+
+    Newline markers are retained."""
+    return list(codecs.iterdecode(data.splitlines(True), "utf-8"))
+
 def uri_decode(s):
     """Decode a URI-encoded (%xx escapes) string."""
     def hex_decode(m): return chr(int(m.group(1), 16))
@@ -150,7 +169,7 @@ class SearchPathEntry(object):
         and context is any additional data associated with this search entry
         (if any).
         """
-        return (os.path.join(self._directory_prefix, basename + self._suffix),
+        return (posixpath.join(self._directory_prefix, basename + self._suffix),
                 self._context)
 
 class SearchPath(object):
@@ -215,7 +234,7 @@ class SearchPath(object):
                 for f in backend.list(d):
                     success = True
                     m = self.match(f)
-                    if m: yield (os.path.join(d, f), m)
+                    if m: yield (posixpath.join(d, f), m)
             except cumulus.store.NotFoundError:
                 pass
         if not success:
@@ -264,11 +283,8 @@ class BackendWrapper(object):
 
         store may either be a Store object or URL.
         """
-        if type(backend) in (str, str):
-            if backend.find(":") >= 0:
-                self._backend = cumulus.store.open(backend)
-            else:
-                self._backend = cumulus.store.file.FileStore(backend)
+        if type(backend) in StringTypes:
+            self._backend = cumulus.store.open(backend)
         else:
             self._backend = backend
 
@@ -295,7 +311,7 @@ class BackendWrapper(object):
     def prefetch_generic(self):
         """Calls scan on directories to prefetch file metadata."""
         directories = set()
-        for typeinfo in list(SEARCH_PATHS.values()):
+        for typeinfo in SEARCH_PATHS.values():
             directories.update(typeinfo.directories())
         for d in directories:
             print("Prefetch", d)
@@ -328,7 +344,7 @@ class CumulusStore:
         if m:
             return ("zero", None, None, (0, int(m.group(1)), False))
 
-        m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(((\d+)\+)?(\d+)|=(\d+))\])?$", refstr)
+        m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(=?(\d+)|(\d+)\+(\d+))\])?$", refstr)
         if not m: return
 
         segment = m.group(1)
@@ -340,12 +356,9 @@ class CumulusStore:
             checksum = checksum.lstrip("(").rstrip(")")
 
         if slice is not None:
-            if m.group(9) is not None:
+            if m.group(6) is not None:
                 # Size-assertion slice
-                slice = (0, int(m.group(9)), True)
-            elif m.group(6) is None:
-                # Abbreviated slice
-                slice = (0, int(m.group(8)), False)
+                slice = (0, int(m.group(6)), True)
             else:
                 slice = (int(m.group(7)), int(m.group(8)), False)
 
@@ -359,13 +372,15 @@ class CumulusStore:
 
     def load_snapshot(self, snapshot):
         snapshot_file = self.backend.open_snapshot(snapshot)[0]
-        return snapshot_file.read().splitlines(True)
+        return to_lines(snapshot_file.read())
 
     @staticmethod
     def filter_data(filehandle, filter_cmd):
         if filter_cmd is None:
             return filehandle
-        (input, output) = os.popen2(filter_cmd)
+        p = subprocess.Popen(filter_cmd, shell=True, stdin=subprocess.PIPE,
+                             stdout=subprocess.PIPE, close_fds=True)
+        input, output = p.stdin, p.stdout
         def copy_thread(src, dst):
             BLOCK_SIZE = 4096
             while True:
@@ -374,6 +389,7 @@ class CumulusStore:
                 dst.write(block)
             src.close()
             dst.close()
+            p.wait()
         _thread.start_new_thread(copy_thread, (filehandle, input))
         return output
 
@@ -433,6 +449,9 @@ class CumulusStore:
 
         if slice is not None:
             (start, length, exact) = slice
+            # Note: The following assertion check may need to be commented out
+            # to restore from pre-v0.8 snapshots, as the syntax for
+            # size-assertion slices has changed.
             if exact and len(data) != length: raise ValueError
             data = data[start:start+length]
             if len(data) != length: raise IndexError
@@ -454,30 +473,33 @@ def parse(lines, terminate=None):
     stop reading input lines.
     """
 
-    dict = {}
+    result = {}
     last_key = None
 
+    def make_result(result):
+        return dict((k, "".join(v)) for (k, v) in result.items())
+
     for l in lines:
         # Strip off a trailing newline, if present
         if len(l) > 0 and l[-1] == "\n":
             l = l[:-1]
 
         if terminate is not None and terminate(l):
-            if len(dict) > 0: yield dict
-            dict = {}
+            if len(result) > 0: yield make_result(result)
+            result = {}
             last_key = None
             continue
 
         m = re.match(r"^([-\w]+):\s*(.*)$", l)
         if m:
-            dict[m.group(1)] = m.group(2)
+            result[m.group(1)] = [m.group(2)]
             last_key = m.group(1)
         elif len(l) > 0 and l[0].isspace() and last_key is not None:
-            dict[last_key] += l
+            result[last_key].append(l)
         else:
             last_key = None
 
-    if len(dict) > 0: yield dict
+    if len(result) > 0: yield make_result(result)
 
 def parse_full(lines):
     try:
@@ -506,7 +528,7 @@ def read_metadata(object_store, root):
 
     def follow_ref(refstr):
         if len(stack) >= MAX_RECURSION_DEPTH: raise OverflowError
-        lines = object_store.get(refstr).splitlines(True)
+        lines = to_lines(object_store.get(refstr))
         lines.reverse()
         stack.append(lines)
 
@@ -566,7 +588,7 @@ class MetadataItem:
     @staticmethod
     def decode_device(s):
         """Decode a device major/minor number."""
-        (major, minor) = list(map(MetadataItem.decode_int, s.split("/")))
+        (major, minor) = map(MetadataItem.decode_int, s.split("/"))
         return (major, minor)
 
     class Items: pass
@@ -578,7 +600,7 @@ class MetadataItem:
         self.object_store = object_store
         self.keys = []
         self.items = self.Items()
-        for (k, v) in list(fields.items()):
+        for (k, v) in fields.items():
             if k in self.field_types:
                 decoder = self.field_types[k]
                 setattr(self.items, k, decoder(v))