X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2Fstore%2Fs3.py;h=4ad403cc3bb6244c323150a5162e72da2bf81326;hb=c343597dac79f9edc63d95d881625a317fadb1d9;hp=6460fb60b90771be6925b298ce6efa20dafc521e;hpb=2ee97034047db53780a52d803b1c577b4c23c303;p=cumulus.git diff --git a/python/cumulus/store/s3.py b/python/cumulus/store/s3.py index 6460fb6..4ad403c 100644 --- a/python/cumulus/store/s3.py +++ b/python/cumulus/store/s3.py @@ -1,3 +1,23 @@ +# Cumulus: Efficient Filesystem Backup to the Cloud +# Copyright (C) 2008-2010 The Cumulus Developers +# See the AUTHORS file for a list of contributors. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +"""Amazon S3 storage backend. Uses a URL of the form s3://BUCKET/PATH/.""" + import os, sys, tempfile import boto from boto.s3.bucket import Bucket @@ -6,17 +26,35 @@ from boto.s3.key import Key import cumulus.store class S3Store(cumulus.store.Store): - def __init__(self, bucket, prefix): + def __init__(self, url, **kw): + # Old versions of the Python urlparse library will take a URL like + # s3://bucket/path/ and include the bucket with the path, while new + # versions (2.6 and later) treat it as the netloc (which seems more + # correct). + # + # But, so that we can work with either behavior, for now just combine + # the netloc and path together before we do any further processing + # (which will then split the combined path apart into a bucket and path + # again). If we didn't want to support Python 2.5, this would be + # easier as we could just use the netloc as the bucket directly. + path = self.netloc + '/' + self.path + (bucket, prefix) = path.lstrip("/").split("/", 1) self.conn = boto.connect_s3(is_secure=False) self.bucket = self.conn.create_bucket(bucket) - while prefix.endswith("/"): prefix = prefix[:-1] - self.prefix = prefix + self.prefix = prefix.strip("/") + self.scan_cache = {} def _get_key(self, type, name): k = Key(self.bucket) k.key = "%s/%s/%s" % (self.prefix, type, name) return k + def scan(self): + prefix = "%s/" % (self.prefix,) + for i in self.bucket.list(prefix): + assert i.key.startswith(prefix) + self.scan_cache[i.key] = i + def list(self, type): prefix = "%s/%s/" % (self.prefix, type) for i in self.bucket.list(prefix): @@ -32,11 +70,20 @@ class S3Store(cumulus.store.Store): def put(self, type, name, fp): k = self._get_key(type, name) - k.send_file(fp) + k.set_contents_from_file(fp) def delete(self, type, name): self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name)) def stat(self, type, name): - k = self.bucket.get_key("%s/%s/%s" % (self.prefix, type, name)) + path = "%s/%s/%s" % (self.prefix, type, name) + if path in self.scan_cache: + k = self.scan_cache[path] + else: + k = self.bucket.get_key(path) + if k is None: + raise cumulus.store.NotFoundError + return {'size': int(k.size)} + +Store = S3Store