X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2F__init__.py;h=8dc4c9875e0f77ef95232f866f37a28e7ae9949e;hb=a5f66616b1ec0c38328ad5131bf1c889ccc43659;hp=e8fc538d93f16df245223601751aaa02d8215da4;hpb=5de8b48a12b5698dcc7a66ee459c2dca04a81b74;p=cumulus.git diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py index e8fc538..8dc4c98 100644 --- a/python/cumulus/__init__.py +++ b/python/cumulus/__init__.py @@ -26,15 +26,15 @@ various parts of a Cumulus archive: - reading and maintaining the local object database """ -from __future__ import division + import hashlib import itertools import os import re +import sqlite3 import tarfile import tempfile -import thread -from pysqlite2 import dbapi2 as sqlite3 +import _thread import cumulus.store import cumulus.store.file @@ -55,6 +55,7 @@ SEGMENT_FILTERS = [ (".gpg", "cumulus-filter-gpg --decrypt"), (".gz", "gzip -dc"), (".bz2", "bzip2 -dc"), + ("", None), ] def uri_decode(s): @@ -204,18 +205,21 @@ class SearchPath(object): continue raise cumulus.store.NotFoundError(basename) + def match(self, filename): + return self._regex.match(filename) + def list(self, backend): success = False for d in self.directories(): try: for f in backend.list(d): success = True - m = self._regex.match(f) + m = self.match(f) if m: yield (os.path.join(d, f), m) except cumulus.store.NotFoundError: pass if not success: - raise cumulus.store.NotFoundError(basename) + raise cumulus.store.NotFoundError(backend) def _build_segments_searchpath(prefix): for (extension, filter) in SEGMENT_FILTERS: @@ -227,9 +231,12 @@ SEARCH_PATHS = { [SearchPathEntry("meta", ".sha1sums"), SearchPathEntry("checksums", ".sha1sums"), SearchPathEntry("", ".sha1sums")]), + "meta": SearchPath( + r"^snapshot-(.*)\.meta(\.\S+)?$", + _build_segments_searchpath("meta")), "segments": SearchPath( (r"^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" - r"(\.\S+)?$"), + r"\.tar(\.\S+)?$"), itertools.chain( _build_segments_searchpath("segments0"), _build_segments_searchpath("segments1"), @@ -257,7 +264,7 @@ class BackendWrapper(object): store may either be a Store object or URL. """ - if type(backend) in (str, unicode): + if type(backend) in (str, str): if backend.find(":") >= 0: self._backend = cumulus.store.open(backend) else: @@ -285,6 +292,15 @@ class BackendWrapper(object): return ((x[1].group(1), x[0]) for x in SEARCH_PATHS[filetype].list(self._backend)) + def prefetch_generic(self): + """Calls scan on directories to prefetch file metadata.""" + directories = set() + for typeinfo in list(SEARCH_PATHS.values()): + directories.update(typeinfo.directories()) + for d in directories: + print("Prefetch", d) + self._backend.scan(d) + class CumulusStore: def __init__(self, backend): if isinstance(backend, BackendWrapper): @@ -345,12 +361,10 @@ class CumulusStore: snapshot_file = self.backend.open_snapshot(snapshot)[0] return snapshot_file.read().splitlines(True) - def get_segment(self, segment): - accessed_segments.add(segment) - - (segment_fp, path, filter_cmd) = self.backend.open_segment(segment) + @staticmethod + def filter_data(filehandle, filter_cmd): if filter_cmd is None: - return segment_fp + return filehandle (input, output) = os.popen2(filter_cmd) def copy_thread(src, dst): BLOCK_SIZE = 4096 @@ -360,9 +374,15 @@ class CumulusStore: dst.write(block) src.close() dst.close() - thread.start_new_thread(copy_thread, (segment_fp, input)) + _thread.start_new_thread(copy_thread, (filehandle, input)) return output + def get_segment(self, segment): + accessed_segments.add(segment) + + (segment_fp, path, filter_cmd) = self.backend.open_segment(segment) + return self.filter_data(segment_fp, filter_cmd) + def load_segment(self, segment): seg = tarfile.open(segment, 'r|', self.get_segment(segment)) for item in seg: @@ -419,6 +439,9 @@ class CumulusStore: return data + def prefetch(self): + self.backend.prefetch_generic() + def parse(lines, terminate=None): """Generic parser for RFC822-style "Key: Value" data streams. @@ -458,7 +481,7 @@ def parse(lines, terminate=None): def parse_full(lines): try: - return parse(lines).next() + return next(parse(lines)) except StopIteration: return {} @@ -543,7 +566,7 @@ class MetadataItem: @staticmethod def decode_device(s): """Decode a device major/minor number.""" - (major, minor) = map(MetadataItem.decode_int, s.split("/")) + (major, minor) = list(map(MetadataItem.decode_int, s.split("/"))) return (major, minor) class Items: pass @@ -555,7 +578,7 @@ class MetadataItem: self.object_store = object_store self.keys = [] self.items = self.Items() - for (k, v) in fields.items(): + for (k, v) in list(fields.items()): if k in self.field_types: decoder = self.field_types[k] setattr(self.items, k, decoder(v)) @@ -713,7 +736,7 @@ class LocalDatabase: can_delete = True if can_delete and not first: - print "Delete snapshot %d (%s)" % (id, name) + print("Delete snapshot %d (%s)" % (id, name)) cur.execute("delete from snapshots where snapshotid = ?", (id,)) first = False @@ -919,11 +942,11 @@ class LocalDatabase: target_size = max(2 * segment_size_estimate, total_bytes / target_buckets) - print "segment_size:", segment_size_estimate - print "distribution:", distribution - print "total_bytes:", total_bytes - print "target_buckets:", target_buckets - print "min, target size:", min_size, target_size + print("segment_size:", segment_size_estimate) + print("distribution:", distribution) + print("total_bytes:", total_bytes) + print("target_buckets:", target_buckets) + print("min, target size:", min_size, target_size) # Chosen cutoffs. Each bucket consists of objects with age greater # than one cutoff value, but not greater than the next largest cutoff. @@ -953,7 +976,7 @@ class LocalDatabase: cutoffs.append(-1) cutoffs.append(-1) - print "cutoffs:", cutoffs + print("cutoffs:", cutoffs) # Update the database to assign each object to the appropriate bucket. cutoffs.reverse()