Implement metadata caching for S3 backend.

author Michael Vrable <mvrable@cs.ucsd.edu>

Sun, 31 May 2009 06:19:10 +0000 (23:19 -0700)

committer Michael Vrable <mvrable@turin.ucsd.edu>

Sun, 31 May 2009 06:19:10 +0000 (23:19 -0700)
author Michael Vrable <mvrable@cs.ucsd.edu>
Sun, 31 May 2009 06:19:10 +0000 (23:19 -0700)
committer Michael Vrable <mvrable@turin.ucsd.edu>
Sun, 31 May 2009 06:19:10 +0000 (23:19 -0700)
diff --git a/python/cumulus/__init__.py b/python/cumulus/__init__.py

index 46a7b5f..5418d3c 100644 (file)
--- a/python/cumulus/__init__.py
+++ b/python/cumulus/__init__.py
@@ -119,6 +119,9 @@ class LowlevelDataStore:
                  return (t, filename)
          return (None, filename)
  
                  return (t, filename)
          return (None, filename)
  
+    def scan(self):
+        self.store.scan()
+
      def lowlevel_open(self, filename):
          """Return a file-like object for reading data from the given file."""
  
      def lowlevel_open(self, filename):
          """Return a file-like object for reading data from the given file."""
  
diff --git a/python/cumulus/store/__init__.py b/python/cumulus/store/__init__.py

index 7c43b69..35de0ee 100644 (file)
--- a/python/cumulus/store/__init__.py
+++ b/python/cumulus/store/__init__.py
@@ -26,6 +26,17 @@ class Store:
      def delete(self, type, name):
          raise NotImplementedException
  
      def delete(self, type, name):
          raise NotImplementedException
  
+    def stat(self, type, name):
+        raise NotImplementedException
+
+    def scan(self):
+        """Cache file information stored in this backend.
+
+        This might make subsequent list or stat calls more efficient, but this
+        function is intended purely as a performance optimization."""
+
+        pass
+
  def open(url):
      (scheme, netloc, path, params, query, fragment) \
          = urlparse.urlparse(url)
  def open(url):
      (scheme, netloc, path, params, query, fragment) \
          = urlparse.urlparse(url)
diff --git a/python/cumulus/store/s3.py b/python/cumulus/store/s3.py

index 1e7257a..63efa17 100644 (file)
--- a/python/cumulus/store/s3.py
+++ b/python/cumulus/store/s3.py
@@ -11,12 +11,19 @@ class S3Store(cumulus.store.Store):
          self.bucket = self.conn.create_bucket(bucket)
          while prefix.endswith("/"): prefix = prefix[:-1]
          self.prefix = prefix
          self.bucket = self.conn.create_bucket(bucket)
          while prefix.endswith("/"): prefix = prefix[:-1]
          self.prefix = prefix
+        self.scan_cache = {}
  
      def _get_key(self, type, name):
          k = Key(self.bucket)
          k.key = "%s/%s/%s" % (self.prefix, type, name)
          return k
  
  
      def _get_key(self, type, name):
          k = Key(self.bucket)
          k.key = "%s/%s/%s" % (self.prefix, type, name)
          return k
  
+    def scan(self):
+        prefix = "%s/" % (self.prefix,)
+        for i in self.bucket.list(prefix):
+            assert i.key.startswith(prefix)
+            self.scan_cache[i.key] = i
+
      def list(self, type):
          prefix = "%s/%s/" % (self.prefix, type)
          for i in self.bucket.list(prefix):
      def list(self, type):
          prefix = "%s/%s/" % (self.prefix, type)
          for i in self.bucket.list(prefix):
@@ -38,7 +45,12 @@ class S3Store(cumulus.store.Store):
          self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name))
  
      def stat(self, type, name):
          self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name))
  
      def stat(self, type, name):
-        k = self.bucket.get_key("%s/%s/%s" % (self.prefix, type, name))
+        path = "%s/%s/%s" % (self.prefix, type, name)
+        if path in self.scan_cache:
+            k = self.scan_cache[path]
+        else:
+            k = self.bucket.get_key(path)
          if k is None:
              raise cumulus.store.NotFoundError
          if k is None:
              raise cumulus.store.NotFoundError
+
          return {'size': int(k.size)}
          return {'size': int(k.size)}
author	Michael Vrable <mvrable@cs.ucsd.edu>
	Sun, 31 May 2009 06:19:10 +0000 (23:19 -0700)
committer	Michael Vrable <mvrable@turin.ucsd.edu>
	Sun, 31 May 2009 06:19:10 +0000 (23:19 -0700)
python/cumulus/__init__.py		patch \| blob \| history
python/cumulus/store/__init__.py		patch \| blob \| history
python/cumulus/store/s3.py		patch \| blob \| history