X-Git-Url: http://git.vrable.net/?a=blobdiff_plain;f=python%2Fcumulus%2Fstore%2Fs3.py;h=52f03dd9fa20111db7ec1dc1993d6aca3ba15b5f;hb=179ae0cb9dc0cdb56217f517bd3dab62b0b3376c;hp=63efa17d5d7b748a55f4aede245147e234350f6c;hpb=92488d46a0deca6d031d07852c3b79214280ab6d;p=cumulus.git

diff --git a/python/cumulus/store/s3.py b/python/cumulus/store/s3.py
index 63efa17..52f03dd 100644
--- a/python/cumulus/store/s3.py
+++ b/python/cumulus/store/s3.py
@@ -1,3 +1,4 @@
+# Amazon S3 storage backend.  Uses a URL of the form s3://BUCKET/PATH/.
 import os, sys, tempfile
 import boto
 from boto.s3.bucket import Bucket
@@ -6,11 +7,22 @@ from boto.s3.key import Key
 import cumulus.store
 
 class S3Store(cumulus.store.Store):
-    def __init__(self, bucket, prefix):
+    def __init__(self, url, **kw):
+        # Old versions of the Python urlparse library will take a URL like
+        # s3://bucket/path/ and include the bucket with the path, while new
+        # versions (2.6 and later) treat it as the netloc (which seems more
+        # correct).
+        #
+        # But, so that we can work with either behavior, for now just combine
+        # the netloc and path together before we do any further processing
+        # (which will then split the combined path apart into a bucket and path
+        # again).  If we didn't want to support Python 2.5, this would be
+        # easier as we could just use the netloc as the bucket directly.
+        path = self.netloc + '/' + self.path
+        (bucket, prefix) = path.lstrip("/").split("/", 1)
         self.conn = boto.connect_s3(is_secure=False)
         self.bucket = self.conn.create_bucket(bucket)
-        while prefix.endswith("/"): prefix = prefix[:-1]
-        self.prefix = prefix
+        self.prefix = prefix.strip("/")
         self.scan_cache = {}
 
     def _get_key(self, type, name):
@@ -54,3 +66,5 @@ class S3Store(cumulus.store.Store):
             raise cumulus.store.NotFoundError
 
         return {'size': int(k.size)}
+
+Store = S3Store