X-Git-Url: http://git.vrable.net/?p=cumulus.git;a=blobdiff_plain;f=python%2Fcumulus%2Futil.py;fp=python%2Fcumulus%2Futil.py;h=d2ee68ba415ddbd187e598e38e90c98ca3aa66e3;hp=0000000000000000000000000000000000000000;hb=8b4b764f036d9ac945d36a1966f2eb284c4f44a3;hpb=3cf2be27c9e54eaae6737adfb41f1fc86528aa84 diff --git a/python/cumulus/util.py b/python/cumulus/util.py new file mode 100644 index 0000000..d2ee68b --- /dev/null +++ b/python/cumulus/util.py @@ -0,0 +1,82 @@ +# Cumulus: Efficient Filesystem Backup to the Cloud +# Copyright (C) 2014 The Cumulus Developers +# See the AUTHORS file for a list of contributors. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +"""Collected utility functions for use by Cumulus.""" + +from __future__ import division, print_function, unicode_literals + +import re +import six + +# The encoding assumed when interpreting path names. +ENCODING="utf-8" + +# In both Python 2 and Python 3 pathnames are represented using the str type. +# For Python 2, this means that the converting from a bytestring to a pathname +# is a no-op. For Python 3, the conversion assumes a utf-8 encoding, but the +# surrogateescape encoding error handler is used to allow other byte sequences +# to be passed through. +if six.PY2: + def bytes_to_pathname(b): return b + def pathname_to_bytes(p): + if isinstance(p, unicode): + return p.encode(encoding=ENCODING, errors="replace") + else: + return p +elif six.PY3: + def bytes_to_pathname(b): + """Decodes a byte string to a pathname. + + The input is assumed to be encoded using ENCODING (defaults to + utf-8).""" + return b.decode(encoding=ENCODING, errors="surrogateescape") + + def pathname_to_bytes(p): + """Converts a pathname to encoded bytes. + + The input is encoded to ENCODING (defaults to utf-8).""" + return p.encode(encoding=ENCODING, errors="surrogateescape") +else: + raise AssertionError("Unsupported Python version") + +def uri_decode_raw(s): + """Decode a URI-encoded (%xx escapes) string. + + The input should be a string, preferably only using ASCII characters. The + output will be of type bytes.""" + def hex_decode(m): return six.int2byte(int(m.group(1), 16)) + return re.sub(br"%([0-9a-fA-F]{2})", hex_decode, pathname_to_bytes(s)) + +def uri_encode_raw(s): + """Encode a bytes array to URI-encoded (%xx escapes) form.""" + def hex_encode(c): + # Allow certain literal characters: c > "+" and c < "\x7f" and c != "@" + if c > 0x2b and c < 0x7f and c != 0x40: + return chr(c) + else: + return "%%%02x" % c + + return "".join(hex_encode(c) for c in six.iterbytes(s)) + +def uri_decode_pathname(s): + """Decodes a URI-encoded string to a pathname.""" + return bytes_to_pathname(uri_decode_raw(s)) + +def uri_encode_pathname(p): + """Encodes a pathname to a URI-encoded string.""" + return uri_encode_raw(pathname_to_bytes(p))