1 # Cumulus: Efficient Filesystem Backup to the Cloud
2 # Copyright (C) 2014 The Cumulus Developers
3 # See the AUTHORS file for a list of contributors.
5 # This program is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 2 of the License, or
8 # (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
15 # You should have received a copy of the GNU General Public License along
16 # with this program; if not, write to the Free Software Foundation, Inc.,
17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 """Collected utility functions for use by Cumulus."""
21 from __future__ import division, print_function, unicode_literals
26 # The encoding assumed when interpreting path names.
29 # In both Python 2 and Python 3 pathnames are represented using the str type.
30 # For Python 2, this means that the converting from a bytestring to a pathname
31 # is a no-op. For Python 3, the conversion assumes a utf-8 encoding, but the
32 # surrogateescape encoding error handler is used to allow other byte sequences
33 # to be passed through.
35 def bytes_to_pathname(b): return b
36 def pathname_to_bytes(p):
37 if isinstance(p, unicode):
38 return p.encode(encoding=ENCODING, errors="replace")
42 def bytes_to_pathname(b):
43 """Decodes a byte string to a pathname.
45 The input is assumed to be encoded using ENCODING (defaults to
47 return b.decode(encoding=ENCODING, errors="surrogateescape")
49 def pathname_to_bytes(p):
50 """Converts a pathname to encoded bytes.
52 The input is encoded to ENCODING (defaults to utf-8)."""
53 return p.encode(encoding=ENCODING, errors="surrogateescape")
55 raise AssertionError("Unsupported Python version")
57 def uri_decode_raw(s):
58 """Decode a URI-encoded (%xx escapes) string.
60 The input should be a string, preferably only using ASCII characters. The
61 output will be of type bytes."""
62 def hex_decode(m): return six.int2byte(int(m.group(1), 16))
63 return re.sub(br"%([0-9a-fA-F]{2})", hex_decode, pathname_to_bytes(s))
65 def uri_encode_raw(s):
66 """Encode a bytes array to URI-encoded (%xx escapes) form."""
68 # Allow certain literal characters: c > "+" and c < "\x7f" and c != "@"
69 if c > 0x2b and c < 0x7f and c != 0x40:
74 return "".join(hex_encode(c) for c in six.iterbytes(s))
76 def uri_decode_pathname(s):
77 """Decodes a URI-encoded string to a pathname."""
78 return bytes_to_pathname(uri_decode_raw(s))
80 def uri_encode_pathname(p):
81 """Encodes a pathname to a URI-encoded string."""
82 return uri_encode_raw(pathname_to_bytes(p))