1 | n/a | #! /usr/bin/env python3 |
---|
2 | n/a | |
---|
3 | n/a | """Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" |
---|
4 | n/a | |
---|
5 | n/a | # Modified 04-Oct-1995 by Jack Jansen to use binascii module |
---|
6 | n/a | # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support |
---|
7 | n/a | # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere |
---|
8 | n/a | |
---|
9 | n/a | import re |
---|
10 | n/a | import struct |
---|
11 | n/a | import binascii |
---|
12 | n/a | |
---|
13 | n/a | |
---|
14 | n/a | __all__ = [ |
---|
15 | n/a | # Legacy interface exports traditional RFC 2045 Base64 encodings |
---|
16 | n/a | 'encode', 'decode', 'encodebytes', 'decodebytes', |
---|
17 | n/a | # Generalized interface for other encodings |
---|
18 | n/a | 'b64encode', 'b64decode', 'b32encode', 'b32decode', |
---|
19 | n/a | 'b16encode', 'b16decode', |
---|
20 | n/a | # Base85 and Ascii85 encodings |
---|
21 | n/a | 'b85encode', 'b85decode', 'a85encode', 'a85decode', |
---|
22 | n/a | # Standard Base64 encoding |
---|
23 | n/a | 'standard_b64encode', 'standard_b64decode', |
---|
24 | n/a | # Some common Base64 alternatives. As referenced by RFC 3458, see thread |
---|
25 | n/a | # starting at: |
---|
26 | n/a | # |
---|
27 | n/a | # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html |
---|
28 | n/a | 'urlsafe_b64encode', 'urlsafe_b64decode', |
---|
29 | n/a | ] |
---|
30 | n/a | |
---|
31 | n/a | |
---|
32 | n/a | bytes_types = (bytes, bytearray) # Types acceptable as binary data |
---|
33 | n/a | |
---|
34 | n/a | def _bytes_from_decode_data(s): |
---|
35 | n/a | if isinstance(s, str): |
---|
36 | n/a | try: |
---|
37 | n/a | return s.encode('ascii') |
---|
38 | n/a | except UnicodeEncodeError: |
---|
39 | n/a | raise ValueError('string argument should contain only ASCII characters') |
---|
40 | n/a | if isinstance(s, bytes_types): |
---|
41 | n/a | return s |
---|
42 | n/a | try: |
---|
43 | n/a | return memoryview(s).tobytes() |
---|
44 | n/a | except TypeError: |
---|
45 | n/a | raise TypeError("argument should be a bytes-like object or ASCII " |
---|
46 | n/a | "string, not %r" % s.__class__.__name__) from None |
---|
47 | n/a | |
---|
48 | n/a | |
---|
49 | n/a | # Base64 encoding/decoding uses binascii |
---|
50 | n/a | |
---|
51 | n/a | def b64encode(s, altchars=None): |
---|
52 | n/a | """Encode the bytes-like object s using Base64 and return a bytes object. |
---|
53 | n/a | |
---|
54 | n/a | Optional altchars should be a byte string of length 2 which specifies an |
---|
55 | n/a | alternative alphabet for the '+' and '/' characters. This allows an |
---|
56 | n/a | application to e.g. generate url or filesystem safe Base64 strings. |
---|
57 | n/a | """ |
---|
58 | n/a | encoded = binascii.b2a_base64(s, newline=False) |
---|
59 | n/a | if altchars is not None: |
---|
60 | n/a | assert len(altchars) == 2, repr(altchars) |
---|
61 | n/a | return encoded.translate(bytes.maketrans(b'+/', altchars)) |
---|
62 | n/a | return encoded |
---|
63 | n/a | |
---|
64 | n/a | |
---|
65 | n/a | def b64decode(s, altchars=None, validate=False): |
---|
66 | n/a | """Decode the Base64 encoded bytes-like object or ASCII string s. |
---|
67 | n/a | |
---|
68 | n/a | Optional altchars must be a bytes-like object or ASCII string of length 2 |
---|
69 | n/a | which specifies the alternative alphabet used instead of the '+' and '/' |
---|
70 | n/a | characters. |
---|
71 | n/a | |
---|
72 | n/a | The result is returned as a bytes object. A binascii.Error is raised if |
---|
73 | n/a | s is incorrectly padded. |
---|
74 | n/a | |
---|
75 | n/a | If validate is False (the default), characters that are neither in the |
---|
76 | n/a | normal base-64 alphabet nor the alternative alphabet are discarded prior |
---|
77 | n/a | to the padding check. If validate is True, these non-alphabet characters |
---|
78 | n/a | in the input result in a binascii.Error. |
---|
79 | n/a | """ |
---|
80 | n/a | s = _bytes_from_decode_data(s) |
---|
81 | n/a | if altchars is not None: |
---|
82 | n/a | altchars = _bytes_from_decode_data(altchars) |
---|
83 | n/a | assert len(altchars) == 2, repr(altchars) |
---|
84 | n/a | s = s.translate(bytes.maketrans(altchars, b'+/')) |
---|
85 | n/a | if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): |
---|
86 | n/a | raise binascii.Error('Non-base64 digit found') |
---|
87 | n/a | return binascii.a2b_base64(s) |
---|
88 | n/a | |
---|
89 | n/a | |
---|
90 | n/a | def standard_b64encode(s): |
---|
91 | n/a | """Encode bytes-like object s using the standard Base64 alphabet. |
---|
92 | n/a | |
---|
93 | n/a | The result is returned as a bytes object. |
---|
94 | n/a | """ |
---|
95 | n/a | return b64encode(s) |
---|
96 | n/a | |
---|
97 | n/a | def standard_b64decode(s): |
---|
98 | n/a | """Decode bytes encoded with the standard Base64 alphabet. |
---|
99 | n/a | |
---|
100 | n/a | Argument s is a bytes-like object or ASCII string to decode. The result |
---|
101 | n/a | is returned as a bytes object. A binascii.Error is raised if the input |
---|
102 | n/a | is incorrectly padded. Characters that are not in the standard alphabet |
---|
103 | n/a | are discarded prior to the padding check. |
---|
104 | n/a | """ |
---|
105 | n/a | return b64decode(s) |
---|
106 | n/a | |
---|
107 | n/a | |
---|
108 | n/a | _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') |
---|
109 | n/a | _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') |
---|
110 | n/a | |
---|
111 | n/a | def urlsafe_b64encode(s): |
---|
112 | n/a | """Encode bytes using the URL- and filesystem-safe Base64 alphabet. |
---|
113 | n/a | |
---|
114 | n/a | Argument s is a bytes-like object to encode. The result is returned as a |
---|
115 | n/a | bytes object. The alphabet uses '-' instead of '+' and '_' instead of |
---|
116 | n/a | '/'. |
---|
117 | n/a | """ |
---|
118 | n/a | return b64encode(s).translate(_urlsafe_encode_translation) |
---|
119 | n/a | |
---|
120 | n/a | def urlsafe_b64decode(s): |
---|
121 | n/a | """Decode bytes using the URL- and filesystem-safe Base64 alphabet. |
---|
122 | n/a | |
---|
123 | n/a | Argument s is a bytes-like object or ASCII string to decode. The result |
---|
124 | n/a | is returned as a bytes object. A binascii.Error is raised if the input |
---|
125 | n/a | is incorrectly padded. Characters that are not in the URL-safe base-64 |
---|
126 | n/a | alphabet, and are not a plus '+' or slash '/', are discarded prior to the |
---|
127 | n/a | padding check. |
---|
128 | n/a | |
---|
129 | n/a | The alphabet uses '-' instead of '+' and '_' instead of '/'. |
---|
130 | n/a | """ |
---|
131 | n/a | s = _bytes_from_decode_data(s) |
---|
132 | n/a | s = s.translate(_urlsafe_decode_translation) |
---|
133 | n/a | return b64decode(s) |
---|
134 | n/a | |
---|
135 | n/a | |
---|
136 | n/a | |
---|
137 | n/a | # Base32 encoding/decoding must be done in Python |
---|
138 | n/a | _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' |
---|
139 | n/a | _b32tab2 = None |
---|
140 | n/a | _b32rev = None |
---|
141 | n/a | |
---|
142 | n/a | def b32encode(s): |
---|
143 | n/a | """Encode the bytes-like object s using Base32 and return a bytes object. |
---|
144 | n/a | """ |
---|
145 | n/a | global _b32tab2 |
---|
146 | n/a | # Delay the initialization of the table to not waste memory |
---|
147 | n/a | # if the function is never called |
---|
148 | n/a | if _b32tab2 is None: |
---|
149 | n/a | b32tab = [bytes((i,)) for i in _b32alphabet] |
---|
150 | n/a | _b32tab2 = [a + b for a in b32tab for b in b32tab] |
---|
151 | n/a | b32tab = None |
---|
152 | n/a | |
---|
153 | n/a | if not isinstance(s, bytes_types): |
---|
154 | n/a | s = memoryview(s).tobytes() |
---|
155 | n/a | leftover = len(s) % 5 |
---|
156 | n/a | # Pad the last quantum with zero bits if necessary |
---|
157 | n/a | if leftover: |
---|
158 | n/a | s = s + b'\0' * (5 - leftover) # Don't use += ! |
---|
159 | n/a | encoded = bytearray() |
---|
160 | n/a | from_bytes = int.from_bytes |
---|
161 | n/a | b32tab2 = _b32tab2 |
---|
162 | n/a | for i in range(0, len(s), 5): |
---|
163 | n/a | c = from_bytes(s[i: i + 5], 'big') |
---|
164 | n/a | encoded += (b32tab2[c >> 30] + # bits 1 - 10 |
---|
165 | n/a | b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 |
---|
166 | n/a | b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 |
---|
167 | n/a | b32tab2[c & 0x3ff] # bits 31 - 40 |
---|
168 | n/a | ) |
---|
169 | n/a | # Adjust for any leftover partial quanta |
---|
170 | n/a | if leftover == 1: |
---|
171 | n/a | encoded[-6:] = b'======' |
---|
172 | n/a | elif leftover == 2: |
---|
173 | n/a | encoded[-4:] = b'====' |
---|
174 | n/a | elif leftover == 3: |
---|
175 | n/a | encoded[-3:] = b'===' |
---|
176 | n/a | elif leftover == 4: |
---|
177 | n/a | encoded[-1:] = b'=' |
---|
178 | n/a | return bytes(encoded) |
---|
179 | n/a | |
---|
180 | n/a | def b32decode(s, casefold=False, map01=None): |
---|
181 | n/a | """Decode the Base32 encoded bytes-like object or ASCII string s. |
---|
182 | n/a | |
---|
183 | n/a | Optional casefold is a flag specifying whether a lowercase alphabet is |
---|
184 | n/a | acceptable as input. For security purposes, the default is False. |
---|
185 | n/a | |
---|
186 | n/a | RFC 3548 allows for optional mapping of the digit 0 (zero) to the |
---|
187 | n/a | letter O (oh), and for optional mapping of the digit 1 (one) to |
---|
188 | n/a | either the letter I (eye) or letter L (el). The optional argument |
---|
189 | n/a | map01 when not None, specifies which letter the digit 1 should be |
---|
190 | n/a | mapped to (when map01 is not None, the digit 0 is always mapped to |
---|
191 | n/a | the letter O). For security purposes the default is None, so that |
---|
192 | n/a | 0 and 1 are not allowed in the input. |
---|
193 | n/a | |
---|
194 | n/a | The result is returned as a bytes object. A binascii.Error is raised if |
---|
195 | n/a | the input is incorrectly padded or if there are non-alphabet |
---|
196 | n/a | characters present in the input. |
---|
197 | n/a | """ |
---|
198 | n/a | global _b32rev |
---|
199 | n/a | # Delay the initialization of the table to not waste memory |
---|
200 | n/a | # if the function is never called |
---|
201 | n/a | if _b32rev is None: |
---|
202 | n/a | _b32rev = {v: k for k, v in enumerate(_b32alphabet)} |
---|
203 | n/a | s = _bytes_from_decode_data(s) |
---|
204 | n/a | if len(s) % 8: |
---|
205 | n/a | raise binascii.Error('Incorrect padding') |
---|
206 | n/a | # Handle section 2.4 zero and one mapping. The flag map01 will be either |
---|
207 | n/a | # False, or the character to map the digit 1 (one) to. It should be |
---|
208 | n/a | # either L (el) or I (eye). |
---|
209 | n/a | if map01 is not None: |
---|
210 | n/a | map01 = _bytes_from_decode_data(map01) |
---|
211 | n/a | assert len(map01) == 1, repr(map01) |
---|
212 | n/a | s = s.translate(bytes.maketrans(b'01', b'O' + map01)) |
---|
213 | n/a | if casefold: |
---|
214 | n/a | s = s.upper() |
---|
215 | n/a | # Strip off pad characters from the right. We need to count the pad |
---|
216 | n/a | # characters because this will tell us how many null bytes to remove from |
---|
217 | n/a | # the end of the decoded string. |
---|
218 | n/a | l = len(s) |
---|
219 | n/a | s = s.rstrip(b'=') |
---|
220 | n/a | padchars = l - len(s) |
---|
221 | n/a | # Now decode the full quanta |
---|
222 | n/a | decoded = bytearray() |
---|
223 | n/a | b32rev = _b32rev |
---|
224 | n/a | for i in range(0, len(s), 8): |
---|
225 | n/a | quanta = s[i: i + 8] |
---|
226 | n/a | acc = 0 |
---|
227 | n/a | try: |
---|
228 | n/a | for c in quanta: |
---|
229 | n/a | acc = (acc << 5) + b32rev[c] |
---|
230 | n/a | except KeyError: |
---|
231 | n/a | raise binascii.Error('Non-base32 digit found') from None |
---|
232 | n/a | decoded += acc.to_bytes(5, 'big') |
---|
233 | n/a | # Process the last, partial quanta |
---|
234 | n/a | if padchars: |
---|
235 | n/a | acc <<= 5 * padchars |
---|
236 | n/a | last = acc.to_bytes(5, 'big') |
---|
237 | n/a | if padchars == 1: |
---|
238 | n/a | decoded[-5:] = last[:-1] |
---|
239 | n/a | elif padchars == 3: |
---|
240 | n/a | decoded[-5:] = last[:-2] |
---|
241 | n/a | elif padchars == 4: |
---|
242 | n/a | decoded[-5:] = last[:-3] |
---|
243 | n/a | elif padchars == 6: |
---|
244 | n/a | decoded[-5:] = last[:-4] |
---|
245 | n/a | else: |
---|
246 | n/a | raise binascii.Error('Incorrect padding') |
---|
247 | n/a | return bytes(decoded) |
---|
248 | n/a | |
---|
249 | n/a | |
---|
250 | n/a | |
---|
251 | n/a | # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns |
---|
252 | n/a | # lowercase. The RFC also recommends against accepting input case |
---|
253 | n/a | # insensitively. |
---|
254 | n/a | def b16encode(s): |
---|
255 | n/a | """Encode the bytes-like object s using Base16 and return a bytes object. |
---|
256 | n/a | """ |
---|
257 | n/a | return binascii.hexlify(s).upper() |
---|
258 | n/a | |
---|
259 | n/a | |
---|
260 | n/a | def b16decode(s, casefold=False): |
---|
261 | n/a | """Decode the Base16 encoded bytes-like object or ASCII string s. |
---|
262 | n/a | |
---|
263 | n/a | Optional casefold is a flag specifying whether a lowercase alphabet is |
---|
264 | n/a | acceptable as input. For security purposes, the default is False. |
---|
265 | n/a | |
---|
266 | n/a | The result is returned as a bytes object. A binascii.Error is raised if |
---|
267 | n/a | s is incorrectly padded or if there are non-alphabet characters present |
---|
268 | n/a | in the input. |
---|
269 | n/a | """ |
---|
270 | n/a | s = _bytes_from_decode_data(s) |
---|
271 | n/a | if casefold: |
---|
272 | n/a | s = s.upper() |
---|
273 | n/a | if re.search(b'[^0-9A-F]', s): |
---|
274 | n/a | raise binascii.Error('Non-base16 digit found') |
---|
275 | n/a | return binascii.unhexlify(s) |
---|
276 | n/a | |
---|
277 | n/a | # |
---|
278 | n/a | # Ascii85 encoding/decoding |
---|
279 | n/a | # |
---|
280 | n/a | |
---|
281 | n/a | _a85chars = None |
---|
282 | n/a | _a85chars2 = None |
---|
283 | n/a | _A85START = b"<~" |
---|
284 | n/a | _A85END = b"~>" |
---|
285 | n/a | |
---|
286 | n/a | def _85encode(b, chars, chars2, pad=False, foldnuls=False, foldspaces=False): |
---|
287 | n/a | # Helper function for a85encode and b85encode |
---|
288 | n/a | if not isinstance(b, bytes_types): |
---|
289 | n/a | b = memoryview(b).tobytes() |
---|
290 | n/a | |
---|
291 | n/a | padding = (-len(b)) % 4 |
---|
292 | n/a | if padding: |
---|
293 | n/a | b = b + b'\0' * padding |
---|
294 | n/a | words = struct.Struct('!%dI' % (len(b) // 4)).unpack(b) |
---|
295 | n/a | |
---|
296 | n/a | chunks = [b'z' if foldnuls and not word else |
---|
297 | n/a | b'y' if foldspaces and word == 0x20202020 else |
---|
298 | n/a | (chars2[word // 614125] + |
---|
299 | n/a | chars2[word // 85 % 7225] + |
---|
300 | n/a | chars[word % 85]) |
---|
301 | n/a | for word in words] |
---|
302 | n/a | |
---|
303 | n/a | if padding and not pad: |
---|
304 | n/a | if chunks[-1] == b'z': |
---|
305 | n/a | chunks[-1] = chars[0] * 5 |
---|
306 | n/a | chunks[-1] = chunks[-1][:-padding] |
---|
307 | n/a | |
---|
308 | n/a | return b''.join(chunks) |
---|
309 | n/a | |
---|
310 | n/a | def a85encode(b, *, foldspaces=False, wrapcol=0, pad=False, adobe=False): |
---|
311 | n/a | """Encode bytes-like object b using Ascii85 and return a bytes object. |
---|
312 | n/a | |
---|
313 | n/a | foldspaces is an optional flag that uses the special short sequence 'y' |
---|
314 | n/a | instead of 4 consecutive spaces (ASCII 0x20) as supported by 'btoa'. This |
---|
315 | n/a | feature is not supported by the "standard" Adobe encoding. |
---|
316 | n/a | |
---|
317 | n/a | wrapcol controls whether the output should have newline (b'\\n') characters |
---|
318 | n/a | added to it. If this is non-zero, each output line will be at most this |
---|
319 | n/a | many characters long. |
---|
320 | n/a | |
---|
321 | n/a | pad controls whether the input is padded to a multiple of 4 before |
---|
322 | n/a | encoding. Note that the btoa implementation always pads. |
---|
323 | n/a | |
---|
324 | n/a | adobe controls whether the encoded byte sequence is framed with <~ and ~>, |
---|
325 | n/a | which is used by the Adobe implementation. |
---|
326 | n/a | """ |
---|
327 | n/a | global _a85chars, _a85chars2 |
---|
328 | n/a | # Delay the initialization of tables to not waste memory |
---|
329 | n/a | # if the function is never called |
---|
330 | n/a | if _a85chars is None: |
---|
331 | n/a | _a85chars = [bytes((i,)) for i in range(33, 118)] |
---|
332 | n/a | _a85chars2 = [(a + b) for a in _a85chars for b in _a85chars] |
---|
333 | n/a | |
---|
334 | n/a | result = _85encode(b, _a85chars, _a85chars2, pad, True, foldspaces) |
---|
335 | n/a | |
---|
336 | n/a | if adobe: |
---|
337 | n/a | result = _A85START + result |
---|
338 | n/a | if wrapcol: |
---|
339 | n/a | wrapcol = max(2 if adobe else 1, wrapcol) |
---|
340 | n/a | chunks = [result[i: i + wrapcol] |
---|
341 | n/a | for i in range(0, len(result), wrapcol)] |
---|
342 | n/a | if adobe: |
---|
343 | n/a | if len(chunks[-1]) + 2 > wrapcol: |
---|
344 | n/a | chunks.append(b'') |
---|
345 | n/a | result = b'\n'.join(chunks) |
---|
346 | n/a | if adobe: |
---|
347 | n/a | result += _A85END |
---|
348 | n/a | |
---|
349 | n/a | return result |
---|
350 | n/a | |
---|
351 | n/a | def a85decode(b, *, foldspaces=False, adobe=False, ignorechars=b' \t\n\r\v'): |
---|
352 | n/a | """Decode the Ascii85 encoded bytes-like object or ASCII string b. |
---|
353 | n/a | |
---|
354 | n/a | foldspaces is a flag that specifies whether the 'y' short sequence should be |
---|
355 | n/a | accepted as shorthand for 4 consecutive spaces (ASCII 0x20). This feature is |
---|
356 | n/a | not supported by the "standard" Adobe encoding. |
---|
357 | n/a | |
---|
358 | n/a | adobe controls whether the input sequence is in Adobe Ascii85 format (i.e. |
---|
359 | n/a | is framed with <~ and ~>). |
---|
360 | n/a | |
---|
361 | n/a | ignorechars should be a byte string containing characters to ignore from the |
---|
362 | n/a | input. This should only contain whitespace characters, and by default |
---|
363 | n/a | contains all whitespace characters in ASCII. |
---|
364 | n/a | |
---|
365 | n/a | The result is returned as a bytes object. |
---|
366 | n/a | """ |
---|
367 | n/a | b = _bytes_from_decode_data(b) |
---|
368 | n/a | if adobe: |
---|
369 | n/a | if not b.endswith(_A85END): |
---|
370 | n/a | raise ValueError( |
---|
371 | n/a | "Ascii85 encoded byte sequences must end " |
---|
372 | n/a | "with {!r}".format(_A85END) |
---|
373 | n/a | ) |
---|
374 | n/a | if b.startswith(_A85START): |
---|
375 | n/a | b = b[2:-2] # Strip off start/end markers |
---|
376 | n/a | else: |
---|
377 | n/a | b = b[:-2] |
---|
378 | n/a | # |
---|
379 | n/a | # We have to go through this stepwise, so as to ignore spaces and handle |
---|
380 | n/a | # special short sequences |
---|
381 | n/a | # |
---|
382 | n/a | packI = struct.Struct('!I').pack |
---|
383 | n/a | decoded = [] |
---|
384 | n/a | decoded_append = decoded.append |
---|
385 | n/a | curr = [] |
---|
386 | n/a | curr_append = curr.append |
---|
387 | n/a | curr_clear = curr.clear |
---|
388 | n/a | for x in b + b'u' * 4: |
---|
389 | n/a | if b'!'[0] <= x <= b'u'[0]: |
---|
390 | n/a | curr_append(x) |
---|
391 | n/a | if len(curr) == 5: |
---|
392 | n/a | acc = 0 |
---|
393 | n/a | for x in curr: |
---|
394 | n/a | acc = 85 * acc + (x - 33) |
---|
395 | n/a | try: |
---|
396 | n/a | decoded_append(packI(acc)) |
---|
397 | n/a | except struct.error: |
---|
398 | n/a | raise ValueError('Ascii85 overflow') from None |
---|
399 | n/a | curr_clear() |
---|
400 | n/a | elif x == b'z'[0]: |
---|
401 | n/a | if curr: |
---|
402 | n/a | raise ValueError('z inside Ascii85 5-tuple') |
---|
403 | n/a | decoded_append(b'\0\0\0\0') |
---|
404 | n/a | elif foldspaces and x == b'y'[0]: |
---|
405 | n/a | if curr: |
---|
406 | n/a | raise ValueError('y inside Ascii85 5-tuple') |
---|
407 | n/a | decoded_append(b'\x20\x20\x20\x20') |
---|
408 | n/a | elif x in ignorechars: |
---|
409 | n/a | # Skip whitespace |
---|
410 | n/a | continue |
---|
411 | n/a | else: |
---|
412 | n/a | raise ValueError('Non-Ascii85 digit found: %c' % x) |
---|
413 | n/a | |
---|
414 | n/a | result = b''.join(decoded) |
---|
415 | n/a | padding = 4 - len(curr) |
---|
416 | n/a | if padding: |
---|
417 | n/a | # Throw away the extra padding |
---|
418 | n/a | result = result[:-padding] |
---|
419 | n/a | return result |
---|
420 | n/a | |
---|
421 | n/a | # The following code is originally taken (with permission) from Mercurial |
---|
422 | n/a | |
---|
423 | n/a | _b85alphabet = (b"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ" |
---|
424 | n/a | b"abcdefghijklmnopqrstuvwxyz!#$%&()*+-;<=>?@^_`{|}~") |
---|
425 | n/a | _b85chars = None |
---|
426 | n/a | _b85chars2 = None |
---|
427 | n/a | _b85dec = None |
---|
428 | n/a | |
---|
429 | n/a | def b85encode(b, pad=False): |
---|
430 | n/a | """Encode bytes-like object b in base85 format and return a bytes object. |
---|
431 | n/a | |
---|
432 | n/a | If pad is true, the input is padded with b'\\0' so its length is a multiple of |
---|
433 | n/a | 4 bytes before encoding. |
---|
434 | n/a | """ |
---|
435 | n/a | global _b85chars, _b85chars2 |
---|
436 | n/a | # Delay the initialization of tables to not waste memory |
---|
437 | n/a | # if the function is never called |
---|
438 | n/a | if _b85chars is None: |
---|
439 | n/a | _b85chars = [bytes((i,)) for i in _b85alphabet] |
---|
440 | n/a | _b85chars2 = [(a + b) for a in _b85chars for b in _b85chars] |
---|
441 | n/a | return _85encode(b, _b85chars, _b85chars2, pad) |
---|
442 | n/a | |
---|
443 | n/a | def b85decode(b): |
---|
444 | n/a | """Decode the base85-encoded bytes-like object or ASCII string b |
---|
445 | n/a | |
---|
446 | n/a | The result is returned as a bytes object. |
---|
447 | n/a | """ |
---|
448 | n/a | global _b85dec |
---|
449 | n/a | # Delay the initialization of tables to not waste memory |
---|
450 | n/a | # if the function is never called |
---|
451 | n/a | if _b85dec is None: |
---|
452 | n/a | _b85dec = [None] * 256 |
---|
453 | n/a | for i, c in enumerate(_b85alphabet): |
---|
454 | n/a | _b85dec[c] = i |
---|
455 | n/a | |
---|
456 | n/a | b = _bytes_from_decode_data(b) |
---|
457 | n/a | padding = (-len(b)) % 5 |
---|
458 | n/a | b = b + b'~' * padding |
---|
459 | n/a | out = [] |
---|
460 | n/a | packI = struct.Struct('!I').pack |
---|
461 | n/a | for i in range(0, len(b), 5): |
---|
462 | n/a | chunk = b[i:i + 5] |
---|
463 | n/a | acc = 0 |
---|
464 | n/a | try: |
---|
465 | n/a | for c in chunk: |
---|
466 | n/a | acc = acc * 85 + _b85dec[c] |
---|
467 | n/a | except TypeError: |
---|
468 | n/a | for j, c in enumerate(chunk): |
---|
469 | n/a | if _b85dec[c] is None: |
---|
470 | n/a | raise ValueError('bad base85 character at position %d' |
---|
471 | n/a | % (i + j)) from None |
---|
472 | n/a | raise |
---|
473 | n/a | try: |
---|
474 | n/a | out.append(packI(acc)) |
---|
475 | n/a | except struct.error: |
---|
476 | n/a | raise ValueError('base85 overflow in hunk starting at byte %d' |
---|
477 | n/a | % i) from None |
---|
478 | n/a | |
---|
479 | n/a | result = b''.join(out) |
---|
480 | n/a | if padding: |
---|
481 | n/a | result = result[:-padding] |
---|
482 | n/a | return result |
---|
483 | n/a | |
---|
484 | n/a | # Legacy interface. This code could be cleaned up since I don't believe |
---|
485 | n/a | # binascii has any line length limitations. It just doesn't seem worth it |
---|
486 | n/a | # though. The files should be opened in binary mode. |
---|
487 | n/a | |
---|
488 | n/a | MAXLINESIZE = 76 # Excluding the CRLF |
---|
489 | n/a | MAXBINSIZE = (MAXLINESIZE//4)*3 |
---|
490 | n/a | |
---|
491 | n/a | def encode(input, output): |
---|
492 | n/a | """Encode a file; input and output are binary files.""" |
---|
493 | n/a | while True: |
---|
494 | n/a | s = input.read(MAXBINSIZE) |
---|
495 | n/a | if not s: |
---|
496 | n/a | break |
---|
497 | n/a | while len(s) < MAXBINSIZE: |
---|
498 | n/a | ns = input.read(MAXBINSIZE-len(s)) |
---|
499 | n/a | if not ns: |
---|
500 | n/a | break |
---|
501 | n/a | s += ns |
---|
502 | n/a | line = binascii.b2a_base64(s) |
---|
503 | n/a | output.write(line) |
---|
504 | n/a | |
---|
505 | n/a | |
---|
506 | n/a | def decode(input, output): |
---|
507 | n/a | """Decode a file; input and output are binary files.""" |
---|
508 | n/a | while True: |
---|
509 | n/a | line = input.readline() |
---|
510 | n/a | if not line: |
---|
511 | n/a | break |
---|
512 | n/a | s = binascii.a2b_base64(line) |
---|
513 | n/a | output.write(s) |
---|
514 | n/a | |
---|
515 | n/a | def _input_type_check(s): |
---|
516 | n/a | try: |
---|
517 | n/a | m = memoryview(s) |
---|
518 | n/a | except TypeError as err: |
---|
519 | n/a | msg = "expected bytes-like object, not %s" % s.__class__.__name__ |
---|
520 | n/a | raise TypeError(msg) from err |
---|
521 | n/a | if m.format not in ('c', 'b', 'B'): |
---|
522 | n/a | msg = ("expected single byte elements, not %r from %s" % |
---|
523 | n/a | (m.format, s.__class__.__name__)) |
---|
524 | n/a | raise TypeError(msg) |
---|
525 | n/a | if m.ndim != 1: |
---|
526 | n/a | msg = ("expected 1-D data, not %d-D data from %s" % |
---|
527 | n/a | (m.ndim, s.__class__.__name__)) |
---|
528 | n/a | raise TypeError(msg) |
---|
529 | n/a | |
---|
530 | n/a | |
---|
531 | n/a | def encodebytes(s): |
---|
532 | n/a | """Encode a bytestring into a bytes object containing multiple lines |
---|
533 | n/a | of base-64 data.""" |
---|
534 | n/a | _input_type_check(s) |
---|
535 | n/a | pieces = [] |
---|
536 | n/a | for i in range(0, len(s), MAXBINSIZE): |
---|
537 | n/a | chunk = s[i : i + MAXBINSIZE] |
---|
538 | n/a | pieces.append(binascii.b2a_base64(chunk)) |
---|
539 | n/a | return b"".join(pieces) |
---|
540 | n/a | |
---|
541 | n/a | def encodestring(s): |
---|
542 | n/a | """Legacy alias of encodebytes().""" |
---|
543 | n/a | import warnings |
---|
544 | n/a | warnings.warn("encodestring() is a deprecated alias, use encodebytes()", |
---|
545 | n/a | DeprecationWarning, 2) |
---|
546 | n/a | return encodebytes(s) |
---|
547 | n/a | |
---|
548 | n/a | |
---|
549 | n/a | def decodebytes(s): |
---|
550 | n/a | """Decode a bytestring of base-64 data into a bytes object.""" |
---|
551 | n/a | _input_type_check(s) |
---|
552 | n/a | return binascii.a2b_base64(s) |
---|
553 | n/a | |
---|
554 | n/a | def decodestring(s): |
---|
555 | n/a | """Legacy alias of decodebytes().""" |
---|
556 | n/a | import warnings |
---|
557 | n/a | warnings.warn("decodestring() is a deprecated alias, use decodebytes()", |
---|
558 | n/a | DeprecationWarning, 2) |
---|
559 | n/a | return decodebytes(s) |
---|
560 | n/a | |
---|
561 | n/a | |
---|
562 | n/a | # Usable as a script... |
---|
563 | n/a | def main(): |
---|
564 | n/a | """Small main program""" |
---|
565 | n/a | import sys, getopt |
---|
566 | n/a | try: |
---|
567 | n/a | opts, args = getopt.getopt(sys.argv[1:], 'deut') |
---|
568 | n/a | except getopt.error as msg: |
---|
569 | n/a | sys.stdout = sys.stderr |
---|
570 | n/a | print(msg) |
---|
571 | n/a | print("""usage: %s [-d|-e|-u|-t] [file|-] |
---|
572 | n/a | -d, -u: decode |
---|
573 | n/a | -e: encode (default) |
---|
574 | n/a | -t: encode and decode string 'Aladdin:open sesame'"""%sys.argv[0]) |
---|
575 | n/a | sys.exit(2) |
---|
576 | n/a | func = encode |
---|
577 | n/a | for o, a in opts: |
---|
578 | n/a | if o == '-e': func = encode |
---|
579 | n/a | if o == '-d': func = decode |
---|
580 | n/a | if o == '-u': func = decode |
---|
581 | n/a | if o == '-t': test(); return |
---|
582 | n/a | if args and args[0] != '-': |
---|
583 | n/a | with open(args[0], 'rb') as f: |
---|
584 | n/a | func(f, sys.stdout.buffer) |
---|
585 | n/a | else: |
---|
586 | n/a | func(sys.stdin.buffer, sys.stdout.buffer) |
---|
587 | n/a | |
---|
588 | n/a | |
---|
589 | n/a | def test(): |
---|
590 | n/a | s0 = b"Aladdin:open sesame" |
---|
591 | n/a | print(repr(s0)) |
---|
592 | n/a | s1 = encodebytes(s0) |
---|
593 | n/a | print(repr(s1)) |
---|
594 | n/a | s2 = decodebytes(s1) |
---|
595 | n/a | print(repr(s2)) |
---|
596 | n/a | assert s0 == s2 |
---|
597 | n/a | |
---|
598 | n/a | |
---|
599 | n/a | if __name__ == '__main__': |
---|
600 | n/a | main() |
---|