1 | n/a | # Copyright (C) 2002-2007 Python Software Foundation |
---|
2 | n/a | # Author: Ben Gertzfield |
---|
3 | n/a | # Contact: email-sig@python.org |
---|
4 | n/a | |
---|
5 | n/a | """Base64 content transfer encoding per RFCs 2045-2047. |
---|
6 | n/a | |
---|
7 | n/a | This module handles the content transfer encoding method defined in RFC 2045 |
---|
8 | n/a | to encode arbitrary 8-bit data using the three 8-bit bytes in four 7-bit |
---|
9 | n/a | characters encoding known as Base64. |
---|
10 | n/a | |
---|
11 | n/a | It is used in the MIME standards for email to attach images, audio, and text |
---|
12 | n/a | using some 8-bit character sets to messages. |
---|
13 | n/a | |
---|
14 | n/a | This module provides an interface to encode and decode both headers and bodies |
---|
15 | n/a | with Base64 encoding. |
---|
16 | n/a | |
---|
17 | n/a | RFC 2045 defines a method for including character set information in an |
---|
18 | n/a | `encoded-word' in a header. This method is commonly used for 8-bit real names |
---|
19 | n/a | in To:, From:, Cc:, etc. fields, as well as Subject: lines. |
---|
20 | n/a | |
---|
21 | n/a | This module does not do the line wrapping or end-of-line character conversion |
---|
22 | n/a | necessary for proper internationalized headers; it only does dumb encoding and |
---|
23 | n/a | decoding. To deal with the various line wrapping issues, use the email.header |
---|
24 | n/a | module. |
---|
25 | n/a | """ |
---|
26 | n/a | |
---|
27 | n/a | __all__ = [ |
---|
28 | n/a | 'body_decode', |
---|
29 | n/a | 'body_encode', |
---|
30 | n/a | 'decode', |
---|
31 | n/a | 'decodestring', |
---|
32 | n/a | 'header_encode', |
---|
33 | n/a | 'header_length', |
---|
34 | n/a | ] |
---|
35 | n/a | |
---|
36 | n/a | |
---|
37 | n/a | from base64 import b64encode |
---|
38 | n/a | from binascii import b2a_base64, a2b_base64 |
---|
39 | n/a | |
---|
40 | n/a | CRLF = '\r\n' |
---|
41 | n/a | NL = '\n' |
---|
42 | n/a | EMPTYSTRING = '' |
---|
43 | n/a | |
---|
44 | n/a | # See also Charset.py |
---|
45 | n/a | MISC_LEN = 7 |
---|
46 | n/a | |
---|
47 | n/a | |
---|
48 | n/a | |
---|
49 | n/a | # Helpers |
---|
50 | n/a | def header_length(bytearray): |
---|
51 | n/a | """Return the length of s when it is encoded with base64.""" |
---|
52 | n/a | groups_of_3, leftover = divmod(len(bytearray), 3) |
---|
53 | n/a | # 4 bytes out for each 3 bytes (or nonzero fraction thereof) in. |
---|
54 | n/a | n = groups_of_3 * 4 |
---|
55 | n/a | if leftover: |
---|
56 | n/a | n += 4 |
---|
57 | n/a | return n |
---|
58 | n/a | |
---|
59 | n/a | |
---|
60 | n/a | |
---|
61 | n/a | def header_encode(header_bytes, charset='iso-8859-1'): |
---|
62 | n/a | """Encode a single header line with Base64 encoding in a given charset. |
---|
63 | n/a | |
---|
64 | n/a | charset names the character set to use to encode the header. It defaults |
---|
65 | n/a | to iso-8859-1. Base64 encoding is defined in RFC 2045. |
---|
66 | n/a | """ |
---|
67 | n/a | if not header_bytes: |
---|
68 | n/a | return "" |
---|
69 | n/a | if isinstance(header_bytes, str): |
---|
70 | n/a | header_bytes = header_bytes.encode(charset) |
---|
71 | n/a | encoded = b64encode(header_bytes).decode("ascii") |
---|
72 | n/a | return '=?%s?b?%s?=' % (charset, encoded) |
---|
73 | n/a | |
---|
74 | n/a | |
---|
75 | n/a | |
---|
76 | n/a | def body_encode(s, maxlinelen=76, eol=NL): |
---|
77 | n/a | r"""Encode a string with base64. |
---|
78 | n/a | |
---|
79 | n/a | Each line will be wrapped at, at most, maxlinelen characters (defaults to |
---|
80 | n/a | 76 characters). |
---|
81 | n/a | |
---|
82 | n/a | Each line of encoded text will end with eol, which defaults to "\n". Set |
---|
83 | n/a | this to "\r\n" if you will be using the result of this function directly |
---|
84 | n/a | in an email. |
---|
85 | n/a | """ |
---|
86 | n/a | if not s: |
---|
87 | n/a | return s |
---|
88 | n/a | |
---|
89 | n/a | encvec = [] |
---|
90 | n/a | max_unencoded = maxlinelen * 3 // 4 |
---|
91 | n/a | for i in range(0, len(s), max_unencoded): |
---|
92 | n/a | # BAW: should encode() inherit b2a_base64()'s dubious behavior in |
---|
93 | n/a | # adding a newline to the encoded string? |
---|
94 | n/a | enc = b2a_base64(s[i:i + max_unencoded]).decode("ascii") |
---|
95 | n/a | if enc.endswith(NL) and eol != NL: |
---|
96 | n/a | enc = enc[:-1] + eol |
---|
97 | n/a | encvec.append(enc) |
---|
98 | n/a | return EMPTYSTRING.join(encvec) |
---|
99 | n/a | |
---|
100 | n/a | |
---|
101 | n/a | |
---|
102 | n/a | def decode(string): |
---|
103 | n/a | """Decode a raw base64 string, returning a bytes object. |
---|
104 | n/a | |
---|
105 | n/a | This function does not parse a full MIME header value encoded with |
---|
106 | n/a | base64 (like =?iso-8859-1?b?bmloISBuaWgh?=) -- please use the high |
---|
107 | n/a | level email.header class for that functionality. |
---|
108 | n/a | """ |
---|
109 | n/a | if not string: |
---|
110 | n/a | return bytes() |
---|
111 | n/a | elif isinstance(string, str): |
---|
112 | n/a | return a2b_base64(string.encode('raw-unicode-escape')) |
---|
113 | n/a | else: |
---|
114 | n/a | return a2b_base64(string) |
---|
115 | n/a | |
---|
116 | n/a | |
---|
117 | n/a | # For convenience and backwards compatibility w/ standard base64 module |
---|
118 | n/a | body_decode = decode |
---|
119 | n/a | decodestring = decode |
---|