From 8bff41ddef78fa851b09d141c93bdf387abc1dee Mon Sep 17 00:00:00 2001 From: Michael Vrable Date: Wed, 6 Aug 2008 12:11:22 -0700 Subject: [PATCH 1/1] Begin new storage-abstraction layer. Begin work on new Python code for providing uniform access to both local filesystem and remote S3 storage. Convert the existing Python module to use the new interface. --- cumulus-util | 5 +++- lbs.py => python/cumulus/__init__.py | 36 +++++++++++++------------- python/cumulus/store/__init__.py | 22 ++++++++++++++++ python/cumulus/store/s3.py | 38 ++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 19 deletions(-) rename lbs.py => python/cumulus/__init__.py (97%) create mode 100644 python/cumulus/store/__init__.py create mode 100644 python/cumulus/store/s3.py diff --git a/cumulus-util b/cumulus-util index 59c5fc7..f5d5fa3 100755 --- a/cumulus-util +++ b/cumulus-util @@ -4,7 +4,10 @@ import getpass, os, stat, sys, time from optparse import OptionParser -import lbs +import cumulus + +# Compatibility +lbs = cumulus # We support up to "LBS Snapshot v0.8" formats, but are also limited by the lbs # module. diff --git a/lbs.py b/python/cumulus/__init__.py similarity index 97% rename from lbs.py rename to python/cumulus/__init__.py index b8de982..602e70e 100644 --- a/lbs.py +++ b/python/cumulus/__init__.py @@ -12,6 +12,8 @@ from __future__ import division import os, re, sha, tarfile, tempfile, thread from pysqlite2 import dbapi2 as sqlite3 +import cumulus.store, cumulus.store.file + # The largest supported snapshot format that can be understood. FORMAT_VERSION = (0, 8) # LBS Snapshot v0.8 @@ -82,19 +84,19 @@ class LowlevelDataStore: """ def __init__(self, path): - self.path = path - - # Low-level filesystem access. These methods could be overwritten to - # provide access to remote data stores. - def lowlevel_list(self): - """Get a listing of files stored.""" + self.store = cumulus.store.file.FileStore(path) - return os.listdir(self.path) + def _classify(self, filename): + for (t, r) in cumulus.store.type_patterns.items(): + if r.match(filename): + return (t, filename) + return (None, filename) def lowlevel_open(self, filename): """Return a file-like object for reading data from the given file.""" - return open(os.path.join(self.path, filename), 'rb') + (type, filename) = self._classify(filename) + return self.store.get(type, filename) def lowlevel_stat(self, filename): """Return a dictionary of information about the given file. @@ -103,21 +105,19 @@ class LowlevelDataStore: file in bytes. """ - stat = os.stat(os.path.join(self.path, filename)) - return {'size': stat.st_size} + (type, filename) = self._classify(filename) + return self.store.stat(type, filename) # Slightly higher-level list methods. def list_snapshots(self): - for f in self.lowlevel_list(): - m = re.match(r"^snapshot-(.*)\.lbs$", f) - if m: - yield m.group(1) + for f in self.store.list('snapshots'): + m = cumulus.store.type_patterns['snapshots'].match(f) + if m: yield m.group(1) def list_segments(self): - for f in self.lowlevel_list(): - m = re.match(r"^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(\.\S+)?$", f) - if m: - yield m.group(1) + for f in self.store.list('segments'): + m = cumulus.store.type_patterns['segments'].match(f) + if m: yield m.group(1) class ObjectStore: def __init__(self, data_store): diff --git a/python/cumulus/store/__init__.py b/python/cumulus/store/__init__.py new file mode 100644 index 0000000..5ebf0d7 --- /dev/null +++ b/python/cumulus/store/__init__.py @@ -0,0 +1,22 @@ +import re + +type_patterns = { + 'checksums': re.compile(r"^snapshot-(.*)\.(\w+)sums$"), + 'segments': re.compile(r"^([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})(\.\S+)?$"), + 'snapshots': re.compile(r"^snapshot-(.*)\.lbs$") +} + +class Store: + """Base class for all cumulus storage backends.""" + + def list(self, type): + raise NotImplementedException + + def get(self, type, name): + raise NotImplementedException + + def put(self, type, name, fp): + raise NotImplementedException + + def delete(self, type, name): + raise NotImplementedException diff --git a/python/cumulus/store/s3.py b/python/cumulus/store/s3.py new file mode 100644 index 0000000..0453c21 --- /dev/null +++ b/python/cumulus/store/s3.py @@ -0,0 +1,38 @@ +import os, sys, tempfile +import boto +from boto.s3.bucket import Bucket +from boto.s3.key import Key + +import cumulus.store + +class S3Store(cumulus.store.Store): + def __init__(self, bucket, prefix): + self.conn = boto.connect_s3(is_secure=False) + self.bucket = self.conn.create_bucket(bucket) + while prefix.endswith("/"): prefix = prefix[:-1] + self.prefix = prefix + + def _get_key(self, type, name): + k = Key(self.bucket) + k.key = "%s/%s/%s" % (self.prefix, type, name) + return k + + def list(self, type): + prefix = "%s/%s/" % (self.prefix, type) + for i in self.bucket.list(prefix): + assert i.key.startswith(prefix) + yield i.key[len(prefix):] + + def get(self, type, name): + fp = tempfile.TemporaryFile() + k = self._get_key(type, name) + k.get_file(fp) + fp.seek(0) + return fp + + def put(self, type, name, fp): + k = self._get_key(type, name) + k.send_file(fp) + + def delete(self, type, name): + self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name)) -- 2.20.1