From: Michael Vrable Date: Wed, 22 Sep 2010 18:47:17 +0000 (-0700) Subject: Improve cleaner performance. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=7daddca3715fee8a3bfff417e769ff08dd7f4cbf;p=bluesky.git Improve cleaner performance. When reading an object in, seek to and read just the needed bytes instead of the entire log segment. Improves performance significantly. --- diff --git a/cleaner/cleaner b/cleaner/cleaner index 58292e4..009ebe8 100755 --- a/cleaner/cleaner +++ b/cleaner/cleaner @@ -42,9 +42,14 @@ class FileBackend: return [(f, os.stat(os.path.join(self.path, f)).st_size) for f in files] - def read(self, filename): + def read(self, filename, offset=0, length=None): fp = open(os.path.join(self.path, filename), 'rb') - return fp.read() + if offset > 0: + fp.seek(offset) + if legnth is None: + return fp.read() + else: + return fp.read(length) def write(self, filename, data): fp = open(os.path.join(self.path, filename), 'wb') @@ -77,10 +82,15 @@ class S3Backend: files.append((k.key, k.size)) return files - def read(self, filename): + def read(self, filename, offset=0, length=None): if filename in self.cache: fp = open(os.path.join(self.cachedir, filename), 'rb') - return fp.read() + if offset > 0: + fp.seek(offset) + if length is None: + return fp.read() + else: + return fp.read(length) else: k = Key(self.bucket) k.key = self.path + filename @@ -89,6 +99,10 @@ class S3Backend: fp.write(data) fp.close() self.cache[filename] = True + if offset > 0: + data = data[offset:] + if length is not None: + data = data[0:length] return data def write(self, filename, data): @@ -256,7 +270,7 @@ def load_item(backend, location): The elements of the tuple are (directory, sequence, offset, size).""" filename = backend.loc_to_name((location[0], location[1])) - data = backend.read(filename)[location[2] : location[2] + location[3]] + data = backend.read(filename, location[2], location[3]) item = parse_item(data) item.location = location return item