projects
/
cumulus.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Improve parsing performance.
[cumulus.git]
/
python
/
cumulus
/
__init__.py
diff --git
a/python/cumulus/__init__.py
b/python/cumulus/__init__.py
index
0e39d37
..
b351658
100644
(file)
--- a/
python/cumulus/__init__.py
+++ b/
python/cumulus/__init__.py
@@
-28,11
+28,14
@@
various parts of a Cumulus archive:
from __future__ import division, print_function, unicode_literals
from __future__ import division, print_function, unicode_literals
+import codecs
import hashlib
import itertools
import os
import hashlib
import itertools
import os
+import posixpath
import re
import sqlite3
import re
import sqlite3
+import subprocess
import sys
import tarfile
import tempfile
import sys
import tarfile
import tempfile
@@
-44,7
+47,7
@@
except ImportError:
import cumulus.store
import cumulus.store.file
import cumulus.store
import cumulus.store.file
-if sys.version <
'3'
:
+if sys.version <
"3"
:
StringTypes = (str, unicode)
else:
StringTypes = (str,)
StringTypes = (str, unicode)
else:
StringTypes = (str,)
@@
-68,6
+71,12
@@
SEGMENT_FILTERS = [
("", None),
]
("", None),
]
+def to_lines(data):
+ """Decode binary data from a file into a sequence of lines.
+
+ Newline markers are retained."""
+ return list(codecs.iterdecode(data.splitlines(True), "utf-8"))
+
def uri_decode(s):
"""Decode a URI-encoded (%xx escapes) string."""
def hex_decode(m): return chr(int(m.group(1), 16))
def uri_decode(s):
"""Decode a URI-encoded (%xx escapes) string."""
def hex_decode(m): return chr(int(m.group(1), 16))
@@
-160,7
+169,7
@@
class SearchPathEntry(object):
and context is any additional data associated with this search entry
(if any).
"""
and context is any additional data associated with this search entry
(if any).
"""
- return (
os.
path.join(self._directory_prefix, basename + self._suffix),
+ return (
posix
path.join(self._directory_prefix, basename + self._suffix),
self._context)
class SearchPath(object):
self._context)
class SearchPath(object):
@@
-225,7
+234,7
@@
class SearchPath(object):
for f in backend.list(d):
success = True
m = self.match(f)
for f in backend.list(d):
success = True
m = self.match(f)
- if m: yield (
os.
path.join(d, f), m)
+ if m: yield (
posix
path.join(d, f), m)
except cumulus.store.NotFoundError:
pass
if not success:
except cumulus.store.NotFoundError:
pass
if not success:
@@
-275,10
+284,7
@@
class BackendWrapper(object):
store may either be a Store object or URL.
"""
if type(backend) in StringTypes:
store may either be a Store object or URL.
"""
if type(backend) in StringTypes:
- if backend.find(":") >= 0:
- self._backend = cumulus.store.open(backend)
- else:
- self._backend = cumulus.store.file.FileStore(backend)
+ self._backend = cumulus.store.open(backend)
else:
self._backend = backend
else:
self._backend = backend
@@
-338,7
+344,7
@@
class CumulusStore:
if m:
return ("zero", None, None, (0, int(m.group(1)), False))
if m:
return ("zero", None, None, (0, int(m.group(1)), False))
- m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(
((\d+)\+)?(\d+)|=
(\d+))\])?$", refstr)
+ m = re.match(r"^([-0-9a-f]+)\/([0-9a-f]+)(\(\S+\))?(\[(
=?(\d+)|(\d+)\+
(\d+))\])?$", refstr)
if not m: return
segment = m.group(1)
if not m: return
segment = m.group(1)
@@
-350,12
+356,9
@@
class CumulusStore:
checksum = checksum.lstrip("(").rstrip(")")
if slice is not None:
checksum = checksum.lstrip("(").rstrip(")")
if slice is not None:
- if m.group(
9
) is not None:
+ if m.group(
6
) is not None:
# Size-assertion slice
# Size-assertion slice
- slice = (0, int(m.group(9)), True)
- elif m.group(6) is None:
- # Abbreviated slice
- slice = (0, int(m.group(8)), False)
+ slice = (0, int(m.group(6)), True)
else:
slice = (int(m.group(7)), int(m.group(8)), False)
else:
slice = (int(m.group(7)), int(m.group(8)), False)
@@
-369,13
+372,15
@@
class CumulusStore:
def load_snapshot(self, snapshot):
snapshot_file = self.backend.open_snapshot(snapshot)[0]
def load_snapshot(self, snapshot):
snapshot_file = self.backend.open_snapshot(snapshot)[0]
- return
snapshot_file.read().splitlines(True
)
+ return
to_lines(snapshot_file.read()
)
@staticmethod
def filter_data(filehandle, filter_cmd):
if filter_cmd is None:
return filehandle
@staticmethod
def filter_data(filehandle, filter_cmd):
if filter_cmd is None:
return filehandle
- (input, output) = os.popen2(filter_cmd)
+ p = subprocess.Popen(filter_cmd, shell=True, stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE, close_fds=True)
+ input, output = p.stdin, p.stdout
def copy_thread(src, dst):
BLOCK_SIZE = 4096
while True:
def copy_thread(src, dst):
BLOCK_SIZE = 4096
while True:
@@
-384,6
+389,7
@@
class CumulusStore:
dst.write(block)
src.close()
dst.close()
dst.write(block)
src.close()
dst.close()
+ p.wait()
_thread.start_new_thread(copy_thread, (filehandle, input))
return output
_thread.start_new_thread(copy_thread, (filehandle, input))
return output
@@
-443,6
+449,9
@@
class CumulusStore:
if slice is not None:
(start, length, exact) = slice
if slice is not None:
(start, length, exact) = slice
+ # Note: The following assertion check may need to be commented out
+ # to restore from pre-v0.8 snapshots, as the syntax for
+ # size-assertion slices has changed.
if exact and len(data) != length: raise ValueError
data = data[start:start+length]
if len(data) != length: raise IndexError
if exact and len(data) != length: raise ValueError
data = data[start:start+length]
if len(data) != length: raise IndexError
@@
-464,30
+473,33
@@
def parse(lines, terminate=None):
stop reading input lines.
"""
stop reading input lines.
"""
-
dic
t = {}
+
resul
t = {}
last_key = None
last_key = None
+ def make_result(result):
+ return dict((k, "".join(v)) for (k, v) in result.items())
+
for l in lines:
# Strip off a trailing newline, if present
if len(l) > 0 and l[-1] == "\n":
l = l[:-1]
if terminate is not None and terminate(l):
for l in lines:
# Strip off a trailing newline, if present
if len(l) > 0 and l[-1] == "\n":
l = l[:-1]
if terminate is not None and terminate(l):
- if len(
dict) > 0: yield dict
-
dic
t = {}
+ if len(
result) > 0: yield make_result(result)
+
resul
t = {}
last_key = None
continue
m = re.match(r"^([-\w]+):\s*(.*)$", l)
if m:
last_key = None
continue
m = re.match(r"^([-\w]+):\s*(.*)$", l)
if m:
- dict[m.group(1)] = m.group(2)
+ result[m.group(1)] = [m.group(2)]
last_key = m.group(1)
elif len(l) > 0 and l[0].isspace() and last_key is not None:
last_key = m.group(1)
elif len(l) > 0 and l[0].isspace() and last_key is not None:
- dict[last_key] += l
+ result[last_key].append(l)
else:
last_key = None
else:
last_key = None
- if len(
dict) > 0: yield dict
+ if len(
result) > 0: yield make_result(result)
def parse_full(lines):
try:
def parse_full(lines):
try:
@@
-516,7
+528,7
@@
def read_metadata(object_store, root):
def follow_ref(refstr):
if len(stack) >= MAX_RECURSION_DEPTH: raise OverflowError
def follow_ref(refstr):
if len(stack) >= MAX_RECURSION_DEPTH: raise OverflowError
- lines =
object_store.get(refstr).splitlines(True
)
+ lines =
to_lines(object_store.get(refstr)
)
lines.reverse()
stack.append(lines)
lines.reverse()
stack.append(lines)