Improve parsing performance.

[cumulus.git] / python / cumulus / __init__.py
diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py

index c5961c7..b351658 100644 (file)
--- a/python/cumulus/__init__.py
+++ b/python/cumulus/__init__.py
@@ -32,6 +32,7 @@ import codecs
  import hashlib
  import itertools
  import os
+import posixpath
  import re
  import sqlite3
  import subprocess
@@ -168,7 +169,7 @@ class SearchPathEntry(object):
          and context is any additional data associated with this search entry
          (if any).
          """
-        return (os.path.join(self._directory_prefix, basename + self._suffix),
+        return (posixpath.join(self._directory_prefix, basename + self._suffix),
                  self._context)
  
  class SearchPath(object):
@@ -233,7 +234,7 @@ class SearchPath(object):
                  for f in backend.list(d):
                      success = True
                      m = self.match(f)
-                    if m: yield (os.path.join(d, f), m)
+                    if m: yield (posixpath.join(d, f), m)
              except cumulus.store.NotFoundError:
                  pass
          if not success:
@@ -343,7 +344,7 @@ class CumulusStore:
          if m:
              return ("zero", None, None, (0, int(m.group(1)), False))
  
-        m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(((\d+)\+)?(\d+)|=(\d+))\])?$", refstr)
+        m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(=?(\d+)|(\d+)\+(\d+))\])?$", refstr)
          if not m: return
  
          segment = m.group(1)
@@ -355,12 +356,9 @@ class CumulusStore:
              checksum = checksum.lstrip("(").rstrip(")")
  
          if slice is not None:
-            if m.group(9) is not None:
+            if m.group(6) is not None:
                  # Size-assertion slice
-                slice = (0, int(m.group(9)), True)
-            elif m.group(6) is None:
-                # Abbreviated slice
-                slice = (0, int(m.group(8)), False)
+                slice = (0, int(m.group(6)), True)
              else:
                  slice = (int(m.group(7)), int(m.group(8)), False)
  
@@ -451,6 +449,9 @@ class CumulusStore:
  
          if slice is not None:
              (start, length, exact) = slice
+            # Note: The following assertion check may need to be commented out
+            # to restore from pre-v0.8 snapshots, as the syntax for
+            # size-assertion slices has changed.
              if exact and len(data) != length: raise ValueError
              data = data[start:start+length]
              if len(data) != length: raise IndexError
@@ -472,30 +473,33 @@ def parse(lines, terminate=None):
      stop reading input lines.
      """
  
-    dict = {}
+    result = {}
      last_key = None
  
+    def make_result(result):
+        return dict((k, "".join(v)) for (k, v) in result.items())
+
      for l in lines:
          # Strip off a trailing newline, if present
          if len(l) > 0 and l[-1] == "\n":
              l = l[:-1]
  
          if terminate is not None and terminate(l):
-            if len(dict) > 0: yield dict
-            dict = {}
+            if len(result) > 0: yield make_result(result)
+            result = {}
              last_key = None
              continue
  
          m = re.match(r"^([-\w]+):\s*(.*)$", l)
          if m:
-            dict[m.group(1)] = m.group(2)
+            result[m.group(1)] = [m.group(2)]
              last_key = m.group(1)
          elif len(l) > 0 and l[0].isspace() and last_key is not None:
-            dict[last_key] += l
+            result[last_key].append(l)
          else:
              last_key = None
  
-    if len(dict) > 0: yield dict
+    if len(result) > 0: yield make_result(result)
  
  def parse_full(lines):
      try: