From: Michael Vrable Date: Wed, 23 Oct 2013 20:30:12 +0000 (-0700) Subject: Update backend code and cumulus-sync for the new backup layout. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=5b69b1ec0eeba473dfd2c40de41075a49a147e93;p=cumulus.git Update backend code and cumulus-sync for the new backup layout. --- diff --git a/README b/README index cb0a0a7..1eba0ce 100644 --- a/README +++ b/README @@ -6,7 +6,7 @@ How to Build Dependencies: - libuuid (sometimes part of e2fsprogs) - sqlite3 - - Python (2.5 or later) + - Python (2.6 or later) - boto, the python interface to Amazon's Web Services (for S3 storage) http://code.google.com/p/boto - paramiko, SSH2 protocol for python (for sftp storage) diff --git a/cumulus-sync b/cumulus-sync index 8a474a5..19d4aea 100755 --- a/cumulus-sync +++ b/cumulus-sync @@ -43,14 +43,14 @@ for s in snapshots: items_required.add(s) d = cumulus.parse_full(source.load_snapshot(s)) items_required.update(d['Segments'].split()) -print "Required:", items_required +print "Required:", len(items_required) files_present = set() for filetype in cumulus.SEARCH_PATHS: for (name, path) in store2.list_generic(filetype): items_required.discard(name) files_present.add(path) -print "Files already present:", sorted(files_present) +print "Files already present:", len(sorted(files_present)) files_required = [] items_found = set() @@ -61,9 +61,6 @@ for filetype in cumulus.SEARCH_PATHS: items_found.add(name) files_required.sort() -print "Missing:", items_required.difference(items_found) -print "Required files:", files_required - -for f in files_required: - print f +for i, f in enumerate(files_required): + print "[%d/%d] %s" % (i + 1, len(files_required), f) store2.raw_backend.put(f, store1.raw_backend.get(f)) diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py index ef35325..02f978e 100644 --- a/python/cumulus/__init__.py +++ b/python/cumulus/__init__.py @@ -219,7 +219,7 @@ class SearchPath(object): except cumulus.store.NotFoundError: pass if not success: - raise cumulus.store.NotFoundError(basename) + raise cumulus.store.NotFoundError(backend) def _build_segments_searchpath(prefix): for (extension, filter) in SEGMENT_FILTERS: @@ -231,6 +231,9 @@ SEARCH_PATHS = { [SearchPathEntry("meta", ".sha1sums"), SearchPathEntry("checksums", ".sha1sums"), SearchPathEntry("", ".sha1sums")]), + "meta": SearchPath( + r"^snapshot-(.*)\.meta(\.\S+)?$", + _build_segments_searchpath("meta")), "segments": SearchPath( (r"^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})" r"\.tar(\.\S+)?$"), @@ -289,6 +292,15 @@ class BackendWrapper(object): return ((x[1].group(1), x[0]) for x in SEARCH_PATHS[filetype].list(self._backend)) + def prefetch_generic(self): + """Calls scan on directories to prefetch file metadata.""" + directories = set() + for typeinfo in SEARCH_PATHS.values(): + directories.update(typeinfo.directories()) + for d in directories: + print "Prefetch", d + self._backend.scan(d) + class CumulusStore: def __init__(self, backend): if isinstance(backend, BackendWrapper): @@ -427,6 +439,9 @@ class CumulusStore: return data + def prefetch(self): + self.backend.prefetch_generic() + def parse(lines, terminate=None): """Generic parser for RFC822-style "Key: Value" data streams. diff --git a/python/cumulus/cmd_util.py b/python/cumulus/cmd_util.py index 9d97190..2e163cf 100644 --- a/python/cumulus/cmd_util.py +++ b/python/cumulus/cmd_util.py @@ -85,6 +85,7 @@ def cmd_list_snapshot_sizes(args): """ store = cumulus.CumulusStore(options.store) backend = store.backend + backend.prefetch_generic() previous = set() size = 0 def get_size(segment): diff --git a/python/cumulus/store/__init__.py b/python/cumulus/store/__init__.py index 0899d70..7488b2f 100644 --- a/python/cumulus/store/__init__.py +++ b/python/cumulus/store/__init__.py @@ -70,7 +70,7 @@ class Store (object): def stat(self, path): raise NotImplementedError - def scan(self): + def scan(self, path): """Cache file information stored in this backend. This might make subsequent list or stat calls more efficient, but this diff --git a/python/cumulus/store/file.py b/python/cumulus/store/file.py index 8304401..e2da34a 100644 --- a/python/cumulus/store/file.py +++ b/python/cumulus/store/file.py @@ -29,9 +29,6 @@ class FileStore(cumulus.store.Store): self.path = url self.prefix = self.path.rstrip("/") - def _get_path(self, type, name): - return os.path.join(self.prefix, type, name) - def list(self, subdir): try: return os.listdir(os.path.join(self.prefix, subdir)) diff --git a/python/cumulus/store/s3.py b/python/cumulus/store/s3.py index 4ad403c..7d8aaaf 100644 --- a/python/cumulus/store/s3.py +++ b/python/cumulus/store/s3.py @@ -20,11 +20,25 @@ import os, sys, tempfile import boto +from boto.exception import S3ResponseError from boto.s3.bucket import Bucket from boto.s3.key import Key import cumulus.store +def throw_notfound(method): + """Decorator to convert a 404 error into a cumulus.store.NoutFoundError.""" + def f(*args, **kwargs): + try: + return method(*args, **kwargs) + except S3ResponseError as e: + if e.status == 404: + print "Got a 404:", e + raise cumulus.store.NotFoundError(e) + else: + raise + return f + class S3Store(cumulus.store.Store): def __init__(self, url, **kw): # Old versions of the Python urlparse library will take a URL like @@ -44,39 +58,44 @@ class S3Store(cumulus.store.Store): self.prefix = prefix.strip("/") self.scan_cache = {} - def _get_key(self, type, name): + def _get_key(self, path): k = Key(self.bucket) - k.key = "%s/%s/%s" % (self.prefix, type, name) + k.key = "%s/%s" % (self.prefix, path) return k - def scan(self): - prefix = "%s/" % (self.prefix,) + @throw_notfound + def scan(self, path): + prefix = "%s/%s/" % (self.prefix, path) for i in self.bucket.list(prefix): assert i.key.startswith(prefix) self.scan_cache[i.key] = i - def list(self, type): - prefix = "%s/%s/" % (self.prefix, type) + @throw_notfound + def list(self, path): + prefix = "%s/%s/" % (self.prefix, path) for i in self.bucket.list(prefix): assert i.key.startswith(prefix) yield i.key[len(prefix):] - def get(self, type, name): + @throw_notfound + def get(self, path): fp = tempfile.TemporaryFile() - k = self._get_key(type, name) + k = self._get_key(path) k.get_file(fp) fp.seek(0) return fp - def put(self, type, name, fp): - k = self._get_key(type, name) + @throw_notfound + def put(self, path, fp): + k = self._get_key(path) k.set_contents_from_file(fp) - def delete(self, type, name): - self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name)) + @throw_notfound + def delete(self, path): + self.bucket.delete_key("%s/%s" % (self.prefix, path)) - def stat(self, type, name): - path = "%s/%s/%s" % (self.prefix, type, name) + def stat(self, path): + path = "%s/%s" % (self.prefix, path) if path in self.scan_cache: k = self.scan_cache[path] else: