From: Michael Vrable Date: Wed, 30 Jun 2010 19:29:49 +0000 (-0700) Subject: Apply fixes to s3:// URL parsing under Python 2.6. X-Git-Url: https://git.vrable.net/?a=commitdiff_plain;h=179ae0cb9dc0cdb56217f517bd3dab62b0b3376c;p=cumulus.git Apply fixes to s3:// URL parsing under Python 2.6. The urlparse module starting in Python 2.6 appears to be compliant with RFC 3986, while previous versions were not. This causes a change in the behaviro of parsing s3:// URLs, however, resulting in breakage with Python 2.6. Try to fix this by adjusting our code so that it works with either the old or the new behavior. --- diff --git a/python/cumulus/store/s3.py b/python/cumulus/store/s3.py index 65884ea..52f03dd 100644 --- a/python/cumulus/store/s3.py +++ b/python/cumulus/store/s3.py @@ -1,3 +1,4 @@ +# Amazon S3 storage backend. Uses a URL of the form s3://BUCKET/PATH/. import os, sys, tempfile import boto from boto.s3.bucket import Bucket @@ -7,10 +8,21 @@ import cumulus.store class S3Store(cumulus.store.Store): def __init__(self, url, **kw): - (bucket, prefix) = self.path.lstrip("/").split("/", 1) + # Old versions of the Python urlparse library will take a URL like + # s3://bucket/path/ and include the bucket with the path, while new + # versions (2.6 and later) treat it as the netloc (which seems more + # correct). + # + # But, so that we can work with either behavior, for now just combine + # the netloc and path together before we do any further processing + # (which will then split the combined path apart into a bucket and path + # again). If we didn't want to support Python 2.5, this would be + # easier as we could just use the netloc as the bucket directly. + path = self.netloc + '/' + self.path + (bucket, prefix) = path.lstrip("/").split("/", 1) self.conn = boto.connect_s3(is_secure=False) self.bucket = self.conn.create_bucket(bucket) - self.prefix = prefix.rstrip ("/") + self.prefix = prefix.strip("/") self.scan_cache = {} def _get_key(self, type, name):