From: Michael Vrable Date: Fri, 10 Sep 2010 22:58:28 +0000 (-0700) Subject: Add S3 backend for the cleaner. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=a4406c5b577e751f0fc97aa1d3a1fe553ffff673;p=bluesky.git Add S3 backend for the cleaner. It does not yet cache files so performance is poor. --- diff --git a/cleaner/cleaner b/cleaner/cleaner index b018d0d..4267b6e 100755 --- a/cleaner/cleaner +++ b/cleaner/cleaner @@ -10,6 +10,7 @@ import base64, os, re, struct, sys import boto +from boto.s3.key import Key # The BlueSky 'struct cloudlog_header' data type. HEADER_FORMAT = '<4sb16sQIII' @@ -59,6 +60,42 @@ class FileBackend: m = re.match(r"^log-(\d+)-(\d+)$", name) if m: return (int(m.group(1)), int(m.group(2))) +class S3Backend: + """An interface to BlueSky where the log segments are on in Amazon S3.""" + + def __init__(self, bucket, path='', cachedir=None): + self.conn = boto.connect_s3(is_secure=False) + self.bucket = self.conn.get_bucket(bucket) + self.path = path + + def list(self): + files = [] + for k in self.bucket.list(self.path + 'log-'): + files.append((k.key, k.size)) + return files + + def read(self, filename): + k = Key(self.bucket) + k.key = self.path + filename + return k.get_contents_as_string() + + def write(self, filename, data): + k = Key(self.bucket) + k.key = self.path + filename + k.set_contents_from_string(data) + + def delete(self, filename): + k = Key(self.bucket) + k.key = self.path + filename + k.delete() + + def loc_to_name(self, location): + return "log-%08d-%08d" % (location) + + def name_to_loc(self, name): + m = re.match(r"^log-(\d+)-(\d+)$", name) + if m: return (int(m.group(1)), int(m.group(2))) + class LogItem: """In-memory representation of a single item stored in a log file.""" @@ -232,6 +269,7 @@ def parse_log(data, location=None): def load_checkpoint_record(backend): for (log, size) in reversed(backend.list()): for item in reversed(list(parse_log(backend.read(log), log))): + print item if item.type == ITEM_TYPE.CHECKPOINT: return item @@ -346,7 +384,7 @@ def run_cleaner(backend, inode_map, log): # Determine which segments are poorly utilized and should be cleaned. We # need better heuristics here. for (s, u) in sorted(inode_map.util.segments.items()): - if float(u[1]) / u[0] < 0.6 and u[1] > 0: + if (float(u[1]) / u[0] < 0.6 or u[1] < 32768) and u[1] > 0: print "Should clean segment", s loc = backend.name_to_loc(s) if s: inode_map.obsolete_segments.add(loc) @@ -380,8 +418,9 @@ def run_cleaner(backend, inode_map, log): rewrite_inode(backend, inode_map, i, log, i in dirty_inode_data) if __name__ == '__main__': - backend = FileBackend(".") + backend = S3Backend("mvrable-bluesky", cachedir=".") chkpt = load_checkpoint_record(backend) + print backend.list() imap = InodeMap() imap.build(backend, chkpt) print chkpt