Add very basic caching to the cleaner S3 backend.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Sat, 11 Sep 2010 00:11:05 +0000 (17:11 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Sat, 11 Sep 2010 00:11:05 +0000 (17:11 -0700)
cleaner/cleaner

index 4267b6e..336537d 100755 (executable)
@@ -63,10 +63,12 @@ class FileBackend:
 class S3Backend:
     """An interface to BlueSky where the log segments are on in Amazon S3."""
 
-    def __init__(self, bucket, path='', cachedir=None):
+    def __init__(self, bucket, path='', cachedir="."):
         self.conn = boto.connect_s3(is_secure=False)
         self.bucket = self.conn.get_bucket(bucket)
         self.path = path
+        self.cachedir = cachedir
+        self.cache = {}
 
     def list(self):
         files = []
@@ -75,19 +77,32 @@ class S3Backend:
         return files
 
     def read(self, filename):
-        k = Key(self.bucket)
-        k.key = self.path + filename
-        return k.get_contents_as_string()
+        if filename in self.cache:
+            fp = open(os.path.join(self.cachedir, filename), 'rb')
+            return fp.read()
+        else:
+            k = Key(self.bucket)
+            k.key = self.path + filename
+            data = k.get_contents_as_string()
+            fp = open(os.path.join(self.cachedir, filename), 'wb')
+            fp.write(data)
+            fp.close()
+            self.cache[filename] = True
+            return data
 
     def write(self, filename, data):
         k = Key(self.bucket)
         k.key = self.path + filename
         k.set_contents_from_string(data)
+        if filename in self.cache:
+            del self.cache[filename]
 
     def delete(self, filename):
         k = Key(self.bucket)
         k.key = self.path + filename
         k.delete()
+        if filename in self.cache:
+            del self.cache[filename]
 
     def loc_to_name(self, location):
         return "log-%08d-%08d" % (location)