From 92488d46a0deca6d031d07852c3b79214280ab6d Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Sat, 30 May 2009 23:19:10 -0700 Subject: [PATCH] Implement metadata caching for S3 backend. Amazon S3 will return some limited object metadata when a list operation is performed. This is significantly cheaper than fetching the information for objects one at a time. In the S3 backend, implement a scan() method that will list all objects and cache the metadata, then return cached results when stat() is called. --- python/cumulus/__init__.py | 3 +++ python/cumulus/store/__init__.py | 11 +++++++++++ python/cumulus/store/s3.py | 14 +++++++++++++- 3 files changed, 27 insertions(+), 1 deletion(-) diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py index 46a7b5f..5418d3c 100644 --- a/python/cumulus/__init__.py +++ b/python/cumulus/__init__.py @@ -119,6 +119,9 @@ class LowlevelDataStore: return (t, filename) return (None, filename) + def scan(self): + self.store.scan() + def lowlevel_open(self, filename): """Return a file-like object for reading data from the given file.""" diff --git a/python/cumulus/store/__init__.py b/python/cumulus/store/__init__.py index 7c43b69..35de0ee 100644 --- a/python/cumulus/store/__init__.py +++ b/python/cumulus/store/__init__.py @@ -26,6 +26,17 @@ class Store: def delete(self, type, name): raise NotImplementedException + def stat(self, type, name): + raise NotImplementedException + + def scan(self): + """Cache file information stored in this backend. + + This might make subsequent list or stat calls more efficient, but this + function is intended purely as a performance optimization.""" + + pass + def open(url): (scheme, netloc, path, params, query, fragment) \ = urlparse.urlparse(url) diff --git a/python/cumulus/store/s3.py b/python/cumulus/store/s3.py index 1e7257a..63efa17 100644 --- a/python/cumulus/store/s3.py +++ b/python/cumulus/store/s3.py @@ -11,12 +11,19 @@ class S3Store(cumulus.store.Store): self.bucket = self.conn.create_bucket(bucket) while prefix.endswith("/"): prefix = prefix[:-1] self.prefix = prefix + self.scan_cache = {} def _get_key(self, type, name): k = Key(self.bucket) k.key = "%s/%s/%s" % (self.prefix, type, name) return k + def scan(self): + prefix = "%s/" % (self.prefix,) + for i in self.bucket.list(prefix): + assert i.key.startswith(prefix) + self.scan_cache[i.key] = i + def list(self, type): prefix = "%s/%s/" % (self.prefix, type) for i in self.bucket.list(prefix): @@ -38,7 +45,12 @@ class S3Store(cumulus.store.Store): self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name)) def stat(self, type, name): - k = self.bucket.get_key("%s/%s/%s" % (self.prefix, type, name)) + path = "%s/%s/%s" % (self.prefix, type, name) + if path in self.scan_cache: + k = self.scan_cache[path] + else: + k = self.bucket.get_key(path) if k is None: raise cumulus.store.NotFoundError + return {'size': int(k.size)} -- 2.20.1