python/cumulus/store/s3.py

   1 # Cumulus: Efficient Filesystem Backup to the Cloud
   2 # Copyright (C) 2008-2010 The Cumulus Developers
   3 # See the AUTHORS file for a list of contributors.
   4 #
   5 # This program is free software; you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 2 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License along
  16 # with this program; if not, write to the Free Software Foundation, Inc.,
  17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  18
  19 """Amazon S3 storage backend.  Uses a URL of the form s3://BUCKET/PATH/."""
  20
  21 import os, sys, tempfile
  22 import boto
  23 from boto.s3.bucket import Bucket
  24 from boto.s3.key import Key
  25
  26 import cumulus.store
  27
  28 class S3Store(cumulus.store.Store):
  29     def __init__(self, url, **kw):
  30         # Old versions of the Python urlparse library will take a URL like
  31         # s3://bucket/path/ and include the bucket with the path, while new
  32         # versions (2.6 and later) treat it as the netloc (which seems more
  33         # correct).
  34         #
  35         # But, so that we can work with either behavior, for now just combine
  36         # the netloc and path together before we do any further processing
  37         # (which will then split the combined path apart into a bucket and path
  38         # again).  If we didn't want to support Python 2.5, this would be
  39         # easier as we could just use the netloc as the bucket directly.
  40         path = self.netloc + '/' + self.path
  41         (bucket, prefix) = path.lstrip("/").split("/", 1)
  42         self.conn = boto.connect_s3(is_secure=False)
  43         self.bucket = self.conn.create_bucket(bucket)
  44         self.prefix = prefix.strip("/")
  45         self.scan_cache = {}
  46
  47     def _get_key(self, type, name):
  48         k = Key(self.bucket)
  49         k.key = "%s/%s/%s" % (self.prefix, type, name)
  50         return k
  51
  52     def scan(self):
  53         prefix = "%s/" % (self.prefix,)
  54         for i in self.bucket.list(prefix):
  55             assert i.key.startswith(prefix)
  56             self.scan_cache[i.key] = i
  57
  58     def list(self, type):
  59         prefix = "%s/%s/" % (self.prefix, type)
  60         for i in self.bucket.list(prefix):
  61             assert i.key.startswith(prefix)
  62             yield i.key[len(prefix):]
  63
  64     def get(self, type, name):
  65         fp = tempfile.TemporaryFile()
  66         k = self._get_key(type, name)
  67         k.get_file(fp)
  68         fp.seek(0)
  69         return fp
  70
  71     def put(self, type, name, fp):
  72         k = self._get_key(type, name)
  73         k.set_contents_from_file(fp)
  74
  75     def delete(self, type, name):
  76         self.bucket.delete_key("%s/%s/%s" % (self.prefix, type, name))
  77
  78     def stat(self, type, name):
  79         path = "%s/%s/%s" % (self.prefix, type, name)
  80         if path in self.scan_cache:
  81             k = self.scan_cache[path]
  82         else:
  83             k = self.bucket.get_key(path)
  84         if k is None:
  85             raise cumulus.store.NotFoundError
  86
  87         return {'size': int(k.size)}
  88
  89 Store = S3Store