Amazon S3 will return some limited object metadata when a list operation is
performed. This is significantly cheaper than fetching the information for
objects one at a time. In the S3 backend, implement a scan() method that
will list all objects and cache the metadata, then return cached results
when stat() is called.
return (t, filename)
return (None, filename)
return (t, filename)
return (None, filename)
+ def scan(self):
+ self.store.scan()
+
def lowlevel_open(self, filename):
"""Return a file-like object for reading data from the given file."""
def lowlevel_open(self, filename):
"""Return a file-like object for reading data from the given file."""
def delete(self, type, name):
raise NotImplementedException
def delete(self, type, name):
raise NotImplementedException
+ def stat(self, type, name):
+ raise NotImplementedException
+
+ def scan(self):
+ """Cache file information stored in this backend.
+
+ This might make subsequent list or stat calls more efficient, but this
+ function is intended purely as a performance optimization."""
+
+ pass
+
def open(url):
(scheme, netloc, path, params, query, fragment) \
= urlparse.urlparse(url)
def open(url):
(scheme, netloc, path, params, query, fragment) \
= urlparse.urlparse(url)
self.bucket = self.conn.create_bucket(bucket)
while prefix.endswith("/"): prefix = prefix[:-1]
self.prefix = prefix
self.bucket = self.conn.create_bucket(bucket)
while prefix.endswith("/"): prefix = prefix[:-1]
self.prefix = prefix
def _get_key(self, type, name):
k = Key(self.bucket)
k.key = "%s/%s/%s" % (self.prefix, type, name)
return k
def _get_key(self, type, name):
k = Key(self.bucket)
k.key = "%s/%s/%s" % (self.prefix, type, name)
return k
+ def scan(self):
+ prefix = "%s/" % (self.prefix,)
+ for i in self.bucket.list(prefix):
+ assert i.key.startswith(prefix)
+ self.scan_cache[i.key] = i
+
def list(self, type):
prefix = "%s/%s/" % (self.prefix, type)
for i in self.bucket.list(prefix):
def list(self, type):
prefix = "%s/%s/" % (self.prefix, type)
for i in self.bucket.list(prefix):
self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name))
def stat(self, type, name):
self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name))
def stat(self, type, name):
- k = self.bucket.get_key("%s/%s/%s" % (self.prefix, type, name))
+ path = "%s/%s/%s" % (self.prefix, type, name)
+ if path in self.scan_cache:
+ k = self.scan_cache[path]
+ else:
+ k = self.bucket.get_key(path)
if k is None:
raise cumulus.store.NotFoundError
if k is None:
raise cumulus.store.NotFoundError
return {'size': int(k.size)}
return {'size': int(k.size)}