Improve cleaner performance.
authorMichael Vrable <mvrable@cs.ucsd.edu>
Wed, 22 Sep 2010 18:47:17 +0000 (11:47 -0700)
committerMichael Vrable <mvrable@cs.ucsd.edu>
Wed, 22 Sep 2010 18:47:17 +0000 (11:47 -0700)
When reading an object in, seek to and read just the needed bytes instead
of the entire log segment.  Improves performance significantly.

cleaner/cleaner

index 58292e4..009ebe8 100755 (executable)
@@ -42,9 +42,14 @@ class FileBackend:
         return [(f, os.stat(os.path.join(self.path, f)).st_size)
                 for f in files]
 
-    def read(self, filename):
+    def read(self, filename, offset=0, length=None):
         fp = open(os.path.join(self.path, filename), 'rb')
-        return fp.read()
+        if offset > 0:
+            fp.seek(offset)
+        if legnth is None:
+            return fp.read()
+        else:
+            return fp.read(length)
 
     def write(self, filename, data):
         fp = open(os.path.join(self.path, filename), 'wb')
@@ -77,10 +82,15 @@ class S3Backend:
             files.append((k.key, k.size))
         return files
 
-    def read(self, filename):
+    def read(self, filename, offset=0, length=None):
         if filename in self.cache:
             fp = open(os.path.join(self.cachedir, filename), 'rb')
-            return fp.read()
+            if offset > 0:
+                fp.seek(offset)
+            if length is None:
+                return fp.read()
+            else:
+                return fp.read(length)
         else:
             k = Key(self.bucket)
             k.key = self.path + filename
@@ -89,6 +99,10 @@ class S3Backend:
             fp.write(data)
             fp.close()
             self.cache[filename] = True
+            if offset > 0:
+                data = data[offset:]
+            if length is not None:
+                data = data[0:length]
             return data
 
     def write(self, filename, data):
@@ -256,7 +270,7 @@ def load_item(backend, location):
     The elements of the tuple are (directory, sequence, offset, size)."""
 
     filename = backend.loc_to_name((location[0], location[1]))
-    data = backend.read(filename)[location[2] : location[2] + location[3]]
+    data = backend.read(filename, location[2], location[3])
     item = parse_item(data)
     item.location = location
     return item