1 | n/a | # Copyright (C) 2001-2007 Python Software Foundation |
---|
2 | n/a | # Author: Barry Warsaw |
---|
3 | n/a | # Contact: email-sig@python.org |
---|
4 | n/a | |
---|
5 | n/a | """Basic message object for the email package object model.""" |
---|
6 | n/a | |
---|
7 | n/a | __all__ = ['Message', 'EmailMessage'] |
---|
8 | n/a | |
---|
9 | n/a | import re |
---|
10 | n/a | import uu |
---|
11 | n/a | import quopri |
---|
12 | n/a | from io import BytesIO, StringIO |
---|
13 | n/a | |
---|
14 | n/a | # Intrapackage imports |
---|
15 | n/a | from email import utils |
---|
16 | n/a | from email import errors |
---|
17 | n/a | from email._policybase import Policy, compat32 |
---|
18 | n/a | from email import charset as _charset |
---|
19 | n/a | from email._encoded_words import decode_b |
---|
20 | n/a | Charset = _charset.Charset |
---|
21 | n/a | |
---|
22 | n/a | SEMISPACE = '; ' |
---|
23 | n/a | |
---|
24 | n/a | # Regular expression that matches `special' characters in parameters, the |
---|
25 | n/a | # existence of which force quoting of the parameter value. |
---|
26 | n/a | tspecials = re.compile(r'[ \(\)<>@,;:\\"/\[\]\?=]') |
---|
27 | n/a | |
---|
28 | n/a | |
---|
29 | n/a | def _splitparam(param): |
---|
30 | n/a | # Split header parameters. BAW: this may be too simple. It isn't |
---|
31 | n/a | # strictly RFC 2045 (section 5.1) compliant, but it catches most headers |
---|
32 | n/a | # found in the wild. We may eventually need a full fledged parser. |
---|
33 | n/a | # RDM: we might have a Header here; for now just stringify it. |
---|
34 | n/a | a, sep, b = str(param).partition(';') |
---|
35 | n/a | if not sep: |
---|
36 | n/a | return a.strip(), None |
---|
37 | n/a | return a.strip(), b.strip() |
---|
38 | n/a | |
---|
39 | n/a | def _formatparam(param, value=None, quote=True): |
---|
40 | n/a | """Convenience function to format and return a key=value pair. |
---|
41 | n/a | |
---|
42 | n/a | This will quote the value if needed or if quote is true. If value is a |
---|
43 | n/a | three tuple (charset, language, value), it will be encoded according |
---|
44 | n/a | to RFC2231 rules. If it contains non-ascii characters it will likewise |
---|
45 | n/a | be encoded according to RFC2231 rules, using the utf-8 charset and |
---|
46 | n/a | a null language. |
---|
47 | n/a | """ |
---|
48 | n/a | if value is not None and len(value) > 0: |
---|
49 | n/a | # A tuple is used for RFC 2231 encoded parameter values where items |
---|
50 | n/a | # are (charset, language, value). charset is a string, not a Charset |
---|
51 | n/a | # instance. RFC 2231 encoded values are never quoted, per RFC. |
---|
52 | n/a | if isinstance(value, tuple): |
---|
53 | n/a | # Encode as per RFC 2231 |
---|
54 | n/a | param += '*' |
---|
55 | n/a | value = utils.encode_rfc2231(value[2], value[0], value[1]) |
---|
56 | n/a | return '%s=%s' % (param, value) |
---|
57 | n/a | else: |
---|
58 | n/a | try: |
---|
59 | n/a | value.encode('ascii') |
---|
60 | n/a | except UnicodeEncodeError: |
---|
61 | n/a | param += '*' |
---|
62 | n/a | value = utils.encode_rfc2231(value, 'utf-8', '') |
---|
63 | n/a | return '%s=%s' % (param, value) |
---|
64 | n/a | # BAW: Please check this. I think that if quote is set it should |
---|
65 | n/a | # force quoting even if not necessary. |
---|
66 | n/a | if quote or tspecials.search(value): |
---|
67 | n/a | return '%s="%s"' % (param, utils.quote(value)) |
---|
68 | n/a | else: |
---|
69 | n/a | return '%s=%s' % (param, value) |
---|
70 | n/a | else: |
---|
71 | n/a | return param |
---|
72 | n/a | |
---|
73 | n/a | def _parseparam(s): |
---|
74 | n/a | # RDM This might be a Header, so for now stringify it. |
---|
75 | n/a | s = ';' + str(s) |
---|
76 | n/a | plist = [] |
---|
77 | n/a | while s[:1] == ';': |
---|
78 | n/a | s = s[1:] |
---|
79 | n/a | end = s.find(';') |
---|
80 | n/a | while end > 0 and (s.count('"', 0, end) - s.count('\\"', 0, end)) % 2: |
---|
81 | n/a | end = s.find(';', end + 1) |
---|
82 | n/a | if end < 0: |
---|
83 | n/a | end = len(s) |
---|
84 | n/a | f = s[:end] |
---|
85 | n/a | if '=' in f: |
---|
86 | n/a | i = f.index('=') |
---|
87 | n/a | f = f[:i].strip().lower() + '=' + f[i+1:].strip() |
---|
88 | n/a | plist.append(f.strip()) |
---|
89 | n/a | s = s[end:] |
---|
90 | n/a | return plist |
---|
91 | n/a | |
---|
92 | n/a | |
---|
93 | n/a | def _unquotevalue(value): |
---|
94 | n/a | # This is different than utils.collapse_rfc2231_value() because it doesn't |
---|
95 | n/a | # try to convert the value to a unicode. Message.get_param() and |
---|
96 | n/a | # Message.get_params() are both currently defined to return the tuple in |
---|
97 | n/a | # the face of RFC 2231 parameters. |
---|
98 | n/a | if isinstance(value, tuple): |
---|
99 | n/a | return value[0], value[1], utils.unquote(value[2]) |
---|
100 | n/a | else: |
---|
101 | n/a | return utils.unquote(value) |
---|
102 | n/a | |
---|
103 | n/a | |
---|
104 | n/a | |
---|
105 | n/a | class Message: |
---|
106 | n/a | """Basic message object. |
---|
107 | n/a | |
---|
108 | n/a | A message object is defined as something that has a bunch of RFC 2822 |
---|
109 | n/a | headers and a payload. It may optionally have an envelope header |
---|
110 | n/a | (a.k.a. Unix-From or From_ header). If the message is a container (i.e. a |
---|
111 | n/a | multipart or a message/rfc822), then the payload is a list of Message |
---|
112 | n/a | objects, otherwise it is a string. |
---|
113 | n/a | |
---|
114 | n/a | Message objects implement part of the `mapping' interface, which assumes |
---|
115 | n/a | there is exactly one occurrence of the header per message. Some headers |
---|
116 | n/a | do in fact appear multiple times (e.g. Received) and for those headers, |
---|
117 | n/a | you must use the explicit API to set or get all the headers. Not all of |
---|
118 | n/a | the mapping methods are implemented. |
---|
119 | n/a | """ |
---|
120 | n/a | def __init__(self, policy=compat32): |
---|
121 | n/a | self.policy = policy |
---|
122 | n/a | self._headers = [] |
---|
123 | n/a | self._unixfrom = None |
---|
124 | n/a | self._payload = None |
---|
125 | n/a | self._charset = None |
---|
126 | n/a | # Defaults for multipart messages |
---|
127 | n/a | self.preamble = self.epilogue = None |
---|
128 | n/a | self.defects = [] |
---|
129 | n/a | # Default content type |
---|
130 | n/a | self._default_type = 'text/plain' |
---|
131 | n/a | |
---|
132 | n/a | def __str__(self): |
---|
133 | n/a | """Return the entire formatted message as a string. |
---|
134 | n/a | """ |
---|
135 | n/a | return self.as_string() |
---|
136 | n/a | |
---|
137 | n/a | def as_string(self, unixfrom=False, maxheaderlen=0, policy=None): |
---|
138 | n/a | """Return the entire formatted message as a string. |
---|
139 | n/a | |
---|
140 | n/a | Optional 'unixfrom', when true, means include the Unix From_ envelope |
---|
141 | n/a | header. For backward compatibility reasons, if maxheaderlen is |
---|
142 | n/a | not specified it defaults to 0, so you must override it explicitly |
---|
143 | n/a | if you want a different maxheaderlen. 'policy' is passed to the |
---|
144 | n/a | Generator instance used to serialize the mesasge; if it is not |
---|
145 | n/a | specified the policy associated with the message instance is used. |
---|
146 | n/a | |
---|
147 | n/a | If the message object contains binary data that is not encoded |
---|
148 | n/a | according to RFC standards, the non-compliant data will be replaced by |
---|
149 | n/a | unicode "unknown character" code points. |
---|
150 | n/a | """ |
---|
151 | n/a | from email.generator import Generator |
---|
152 | n/a | policy = self.policy if policy is None else policy |
---|
153 | n/a | fp = StringIO() |
---|
154 | n/a | g = Generator(fp, |
---|
155 | n/a | mangle_from_=False, |
---|
156 | n/a | maxheaderlen=maxheaderlen, |
---|
157 | n/a | policy=policy) |
---|
158 | n/a | g.flatten(self, unixfrom=unixfrom) |
---|
159 | n/a | return fp.getvalue() |
---|
160 | n/a | |
---|
161 | n/a | def __bytes__(self): |
---|
162 | n/a | """Return the entire formatted message as a bytes object. |
---|
163 | n/a | """ |
---|
164 | n/a | return self.as_bytes() |
---|
165 | n/a | |
---|
166 | n/a | def as_bytes(self, unixfrom=False, policy=None): |
---|
167 | n/a | """Return the entire formatted message as a bytes object. |
---|
168 | n/a | |
---|
169 | n/a | Optional 'unixfrom', when true, means include the Unix From_ envelope |
---|
170 | n/a | header. 'policy' is passed to the BytesGenerator instance used to |
---|
171 | n/a | serialize the message; if not specified the policy associated with |
---|
172 | n/a | the message instance is used. |
---|
173 | n/a | """ |
---|
174 | n/a | from email.generator import BytesGenerator |
---|
175 | n/a | policy = self.policy if policy is None else policy |
---|
176 | n/a | fp = BytesIO() |
---|
177 | n/a | g = BytesGenerator(fp, mangle_from_=False, policy=policy) |
---|
178 | n/a | g.flatten(self, unixfrom=unixfrom) |
---|
179 | n/a | return fp.getvalue() |
---|
180 | n/a | |
---|
181 | n/a | def is_multipart(self): |
---|
182 | n/a | """Return True if the message consists of multiple parts.""" |
---|
183 | n/a | return isinstance(self._payload, list) |
---|
184 | n/a | |
---|
185 | n/a | # |
---|
186 | n/a | # Unix From_ line |
---|
187 | n/a | # |
---|
188 | n/a | def set_unixfrom(self, unixfrom): |
---|
189 | n/a | self._unixfrom = unixfrom |
---|
190 | n/a | |
---|
191 | n/a | def get_unixfrom(self): |
---|
192 | n/a | return self._unixfrom |
---|
193 | n/a | |
---|
194 | n/a | # |
---|
195 | n/a | # Payload manipulation. |
---|
196 | n/a | # |
---|
197 | n/a | def attach(self, payload): |
---|
198 | n/a | """Add the given payload to the current payload. |
---|
199 | n/a | |
---|
200 | n/a | The current payload will always be a list of objects after this method |
---|
201 | n/a | is called. If you want to set the payload to a scalar object, use |
---|
202 | n/a | set_payload() instead. |
---|
203 | n/a | """ |
---|
204 | n/a | if self._payload is None: |
---|
205 | n/a | self._payload = [payload] |
---|
206 | n/a | else: |
---|
207 | n/a | try: |
---|
208 | n/a | self._payload.append(payload) |
---|
209 | n/a | except AttributeError: |
---|
210 | n/a | raise TypeError("Attach is not valid on a message with a" |
---|
211 | n/a | " non-multipart payload") |
---|
212 | n/a | |
---|
213 | n/a | def get_payload(self, i=None, decode=False): |
---|
214 | n/a | """Return a reference to the payload. |
---|
215 | n/a | |
---|
216 | n/a | The payload will either be a list object or a string. If you mutate |
---|
217 | n/a | the list object, you modify the message's payload in place. Optional |
---|
218 | n/a | i returns that index into the payload. |
---|
219 | n/a | |
---|
220 | n/a | Optional decode is a flag indicating whether the payload should be |
---|
221 | n/a | decoded or not, according to the Content-Transfer-Encoding header |
---|
222 | n/a | (default is False). |
---|
223 | n/a | |
---|
224 | n/a | When True and the message is not a multipart, the payload will be |
---|
225 | n/a | decoded if this header's value is `quoted-printable' or `base64'. If |
---|
226 | n/a | some other encoding is used, or the header is missing, or if the |
---|
227 | n/a | payload has bogus data (i.e. bogus base64 or uuencoded data), the |
---|
228 | n/a | payload is returned as-is. |
---|
229 | n/a | |
---|
230 | n/a | If the message is a multipart and the decode flag is True, then None |
---|
231 | n/a | is returned. |
---|
232 | n/a | """ |
---|
233 | n/a | # Here is the logic table for this code, based on the email5.0.0 code: |
---|
234 | n/a | # i decode is_multipart result |
---|
235 | n/a | # ------ ------ ------------ ------------------------------ |
---|
236 | n/a | # None True True None |
---|
237 | n/a | # i True True None |
---|
238 | n/a | # None False True _payload (a list) |
---|
239 | n/a | # i False True _payload element i (a Message) |
---|
240 | n/a | # i False False error (not a list) |
---|
241 | n/a | # i True False error (not a list) |
---|
242 | n/a | # None False False _payload |
---|
243 | n/a | # None True False _payload decoded (bytes) |
---|
244 | n/a | # Note that Barry planned to factor out the 'decode' case, but that |
---|
245 | n/a | # isn't so easy now that we handle the 8 bit data, which needs to be |
---|
246 | n/a | # converted in both the decode and non-decode path. |
---|
247 | n/a | if self.is_multipart(): |
---|
248 | n/a | if decode: |
---|
249 | n/a | return None |
---|
250 | n/a | if i is None: |
---|
251 | n/a | return self._payload |
---|
252 | n/a | else: |
---|
253 | n/a | return self._payload[i] |
---|
254 | n/a | # For backward compatibility, Use isinstance and this error message |
---|
255 | n/a | # instead of the more logical is_multipart test. |
---|
256 | n/a | if i is not None and not isinstance(self._payload, list): |
---|
257 | n/a | raise TypeError('Expected list, got %s' % type(self._payload)) |
---|
258 | n/a | payload = self._payload |
---|
259 | n/a | # cte might be a Header, so for now stringify it. |
---|
260 | n/a | cte = str(self.get('content-transfer-encoding', '')).lower() |
---|
261 | n/a | # payload may be bytes here. |
---|
262 | n/a | if isinstance(payload, str): |
---|
263 | n/a | if utils._has_surrogates(payload): |
---|
264 | n/a | bpayload = payload.encode('ascii', 'surrogateescape') |
---|
265 | n/a | if not decode: |
---|
266 | n/a | try: |
---|
267 | n/a | payload = bpayload.decode(self.get_param('charset', 'ascii'), 'replace') |
---|
268 | n/a | except LookupError: |
---|
269 | n/a | payload = bpayload.decode('ascii', 'replace') |
---|
270 | n/a | elif decode: |
---|
271 | n/a | try: |
---|
272 | n/a | bpayload = payload.encode('ascii') |
---|
273 | n/a | except UnicodeError: |
---|
274 | n/a | # This won't happen for RFC compliant messages (messages |
---|
275 | n/a | # containing only ASCII code points in the unicode input). |
---|
276 | n/a | # If it does happen, turn the string into bytes in a way |
---|
277 | n/a | # guaranteed not to fail. |
---|
278 | n/a | bpayload = payload.encode('raw-unicode-escape') |
---|
279 | n/a | if not decode: |
---|
280 | n/a | return payload |
---|
281 | n/a | if cte == 'quoted-printable': |
---|
282 | n/a | return quopri.decodestring(bpayload) |
---|
283 | n/a | elif cte == 'base64': |
---|
284 | n/a | # XXX: this is a bit of a hack; decode_b should probably be factored |
---|
285 | n/a | # out somewhere, but I haven't figured out where yet. |
---|
286 | n/a | value, defects = decode_b(b''.join(bpayload.splitlines())) |
---|
287 | n/a | for defect in defects: |
---|
288 | n/a | self.policy.handle_defect(self, defect) |
---|
289 | n/a | return value |
---|
290 | n/a | elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'): |
---|
291 | n/a | in_file = BytesIO(bpayload) |
---|
292 | n/a | out_file = BytesIO() |
---|
293 | n/a | try: |
---|
294 | n/a | uu.decode(in_file, out_file, quiet=True) |
---|
295 | n/a | return out_file.getvalue() |
---|
296 | n/a | except uu.Error: |
---|
297 | n/a | # Some decoding problem |
---|
298 | n/a | return bpayload |
---|
299 | n/a | if isinstance(payload, str): |
---|
300 | n/a | return bpayload |
---|
301 | n/a | return payload |
---|
302 | n/a | |
---|
303 | n/a | def set_payload(self, payload, charset=None): |
---|
304 | n/a | """Set the payload to the given value. |
---|
305 | n/a | |
---|
306 | n/a | Optional charset sets the message's default character set. See |
---|
307 | n/a | set_charset() for details. |
---|
308 | n/a | """ |
---|
309 | n/a | if hasattr(payload, 'encode'): |
---|
310 | n/a | if charset is None: |
---|
311 | n/a | self._payload = payload |
---|
312 | n/a | return |
---|
313 | n/a | if not isinstance(charset, Charset): |
---|
314 | n/a | charset = Charset(charset) |
---|
315 | n/a | payload = payload.encode(charset.output_charset) |
---|
316 | n/a | if hasattr(payload, 'decode'): |
---|
317 | n/a | self._payload = payload.decode('ascii', 'surrogateescape') |
---|
318 | n/a | else: |
---|
319 | n/a | self._payload = payload |
---|
320 | n/a | if charset is not None: |
---|
321 | n/a | self.set_charset(charset) |
---|
322 | n/a | |
---|
323 | n/a | def set_charset(self, charset): |
---|
324 | n/a | """Set the charset of the payload to a given character set. |
---|
325 | n/a | |
---|
326 | n/a | charset can be a Charset instance, a string naming a character set, or |
---|
327 | n/a | None. If it is a string it will be converted to a Charset instance. |
---|
328 | n/a | If charset is None, the charset parameter will be removed from the |
---|
329 | n/a | Content-Type field. Anything else will generate a TypeError. |
---|
330 | n/a | |
---|
331 | n/a | The message will be assumed to be of type text/* encoded with |
---|
332 | n/a | charset.input_charset. It will be converted to charset.output_charset |
---|
333 | n/a | and encoded properly, if needed, when generating the plain text |
---|
334 | n/a | representation of the message. MIME headers (MIME-Version, |
---|
335 | n/a | Content-Type, Content-Transfer-Encoding) will be added as needed. |
---|
336 | n/a | """ |
---|
337 | n/a | if charset is None: |
---|
338 | n/a | self.del_param('charset') |
---|
339 | n/a | self._charset = None |
---|
340 | n/a | return |
---|
341 | n/a | if not isinstance(charset, Charset): |
---|
342 | n/a | charset = Charset(charset) |
---|
343 | n/a | self._charset = charset |
---|
344 | n/a | if 'MIME-Version' not in self: |
---|
345 | n/a | self.add_header('MIME-Version', '1.0') |
---|
346 | n/a | if 'Content-Type' not in self: |
---|
347 | n/a | self.add_header('Content-Type', 'text/plain', |
---|
348 | n/a | charset=charset.get_output_charset()) |
---|
349 | n/a | else: |
---|
350 | n/a | self.set_param('charset', charset.get_output_charset()) |
---|
351 | n/a | if charset != charset.get_output_charset(): |
---|
352 | n/a | self._payload = charset.body_encode(self._payload) |
---|
353 | n/a | if 'Content-Transfer-Encoding' not in self: |
---|
354 | n/a | cte = charset.get_body_encoding() |
---|
355 | n/a | try: |
---|
356 | n/a | cte(self) |
---|
357 | n/a | except TypeError: |
---|
358 | n/a | # This 'if' is for backward compatibility, it allows unicode |
---|
359 | n/a | # through even though that won't work correctly if the |
---|
360 | n/a | # message is serialized. |
---|
361 | n/a | payload = self._payload |
---|
362 | n/a | if payload: |
---|
363 | n/a | try: |
---|
364 | n/a | payload = payload.encode('ascii', 'surrogateescape') |
---|
365 | n/a | except UnicodeError: |
---|
366 | n/a | payload = payload.encode(charset.output_charset) |
---|
367 | n/a | self._payload = charset.body_encode(payload) |
---|
368 | n/a | self.add_header('Content-Transfer-Encoding', cte) |
---|
369 | n/a | |
---|
370 | n/a | def get_charset(self): |
---|
371 | n/a | """Return the Charset instance associated with the message's payload. |
---|
372 | n/a | """ |
---|
373 | n/a | return self._charset |
---|
374 | n/a | |
---|
375 | n/a | # |
---|
376 | n/a | # MAPPING INTERFACE (partial) |
---|
377 | n/a | # |
---|
378 | n/a | def __len__(self): |
---|
379 | n/a | """Return the total number of headers, including duplicates.""" |
---|
380 | n/a | return len(self._headers) |
---|
381 | n/a | |
---|
382 | n/a | def __getitem__(self, name): |
---|
383 | n/a | """Get a header value. |
---|
384 | n/a | |
---|
385 | n/a | Return None if the header is missing instead of raising an exception. |
---|
386 | n/a | |
---|
387 | n/a | Note that if the header appeared multiple times, exactly which |
---|
388 | n/a | occurrence gets returned is undefined. Use get_all() to get all |
---|
389 | n/a | the values matching a header field name. |
---|
390 | n/a | """ |
---|
391 | n/a | return self.get(name) |
---|
392 | n/a | |
---|
393 | n/a | def __setitem__(self, name, val): |
---|
394 | n/a | """Set the value of a header. |
---|
395 | n/a | |
---|
396 | n/a | Note: this does not overwrite an existing header with the same field |
---|
397 | n/a | name. Use __delitem__() first to delete any existing headers. |
---|
398 | n/a | """ |
---|
399 | n/a | max_count = self.policy.header_max_count(name) |
---|
400 | n/a | if max_count: |
---|
401 | n/a | lname = name.lower() |
---|
402 | n/a | found = 0 |
---|
403 | n/a | for k, v in self._headers: |
---|
404 | n/a | if k.lower() == lname: |
---|
405 | n/a | found += 1 |
---|
406 | n/a | if found >= max_count: |
---|
407 | n/a | raise ValueError("There may be at most {} {} headers " |
---|
408 | n/a | "in a message".format(max_count, name)) |
---|
409 | n/a | self._headers.append(self.policy.header_store_parse(name, val)) |
---|
410 | n/a | |
---|
411 | n/a | def __delitem__(self, name): |
---|
412 | n/a | """Delete all occurrences of a header, if present. |
---|
413 | n/a | |
---|
414 | n/a | Does not raise an exception if the header is missing. |
---|
415 | n/a | """ |
---|
416 | n/a | name = name.lower() |
---|
417 | n/a | newheaders = [] |
---|
418 | n/a | for k, v in self._headers: |
---|
419 | n/a | if k.lower() != name: |
---|
420 | n/a | newheaders.append((k, v)) |
---|
421 | n/a | self._headers = newheaders |
---|
422 | n/a | |
---|
423 | n/a | def __contains__(self, name): |
---|
424 | n/a | return name.lower() in [k.lower() for k, v in self._headers] |
---|
425 | n/a | |
---|
426 | n/a | def __iter__(self): |
---|
427 | n/a | for field, value in self._headers: |
---|
428 | n/a | yield field |
---|
429 | n/a | |
---|
430 | n/a | def keys(self): |
---|
431 | n/a | """Return a list of all the message's header field names. |
---|
432 | n/a | |
---|
433 | n/a | These will be sorted in the order they appeared in the original |
---|
434 | n/a | message, or were added to the message, and may contain duplicates. |
---|
435 | n/a | Any fields deleted and re-inserted are always appended to the header |
---|
436 | n/a | list. |
---|
437 | n/a | """ |
---|
438 | n/a | return [k for k, v in self._headers] |
---|
439 | n/a | |
---|
440 | n/a | def values(self): |
---|
441 | n/a | """Return a list of all the message's header values. |
---|
442 | n/a | |
---|
443 | n/a | These will be sorted in the order they appeared in the original |
---|
444 | n/a | message, or were added to the message, and may contain duplicates. |
---|
445 | n/a | Any fields deleted and re-inserted are always appended to the header |
---|
446 | n/a | list. |
---|
447 | n/a | """ |
---|
448 | n/a | return [self.policy.header_fetch_parse(k, v) |
---|
449 | n/a | for k, v in self._headers] |
---|
450 | n/a | |
---|
451 | n/a | def items(self): |
---|
452 | n/a | """Get all the message's header fields and values. |
---|
453 | n/a | |
---|
454 | n/a | These will be sorted in the order they appeared in the original |
---|
455 | n/a | message, or were added to the message, and may contain duplicates. |
---|
456 | n/a | Any fields deleted and re-inserted are always appended to the header |
---|
457 | n/a | list. |
---|
458 | n/a | """ |
---|
459 | n/a | return [(k, self.policy.header_fetch_parse(k, v)) |
---|
460 | n/a | for k, v in self._headers] |
---|
461 | n/a | |
---|
462 | n/a | def get(self, name, failobj=None): |
---|
463 | n/a | """Get a header value. |
---|
464 | n/a | |
---|
465 | n/a | Like __getitem__() but return failobj instead of None when the field |
---|
466 | n/a | is missing. |
---|
467 | n/a | """ |
---|
468 | n/a | name = name.lower() |
---|
469 | n/a | for k, v in self._headers: |
---|
470 | n/a | if k.lower() == name: |
---|
471 | n/a | return self.policy.header_fetch_parse(k, v) |
---|
472 | n/a | return failobj |
---|
473 | n/a | |
---|
474 | n/a | # |
---|
475 | n/a | # "Internal" methods (public API, but only intended for use by a parser |
---|
476 | n/a | # or generator, not normal application code. |
---|
477 | n/a | # |
---|
478 | n/a | |
---|
479 | n/a | def set_raw(self, name, value): |
---|
480 | n/a | """Store name and value in the model without modification. |
---|
481 | n/a | |
---|
482 | n/a | This is an "internal" API, intended only for use by a parser. |
---|
483 | n/a | """ |
---|
484 | n/a | self._headers.append((name, value)) |
---|
485 | n/a | |
---|
486 | n/a | def raw_items(self): |
---|
487 | n/a | """Return the (name, value) header pairs without modification. |
---|
488 | n/a | |
---|
489 | n/a | This is an "internal" API, intended only for use by a generator. |
---|
490 | n/a | """ |
---|
491 | n/a | return iter(self._headers.copy()) |
---|
492 | n/a | |
---|
493 | n/a | # |
---|
494 | n/a | # Additional useful stuff |
---|
495 | n/a | # |
---|
496 | n/a | |
---|
497 | n/a | def get_all(self, name, failobj=None): |
---|
498 | n/a | """Return a list of all the values for the named field. |
---|
499 | n/a | |
---|
500 | n/a | These will be sorted in the order they appeared in the original |
---|
501 | n/a | message, and may contain duplicates. Any fields deleted and |
---|
502 | n/a | re-inserted are always appended to the header list. |
---|
503 | n/a | |
---|
504 | n/a | If no such fields exist, failobj is returned (defaults to None). |
---|
505 | n/a | """ |
---|
506 | n/a | values = [] |
---|
507 | n/a | name = name.lower() |
---|
508 | n/a | for k, v in self._headers: |
---|
509 | n/a | if k.lower() == name: |
---|
510 | n/a | values.append(self.policy.header_fetch_parse(k, v)) |
---|
511 | n/a | if not values: |
---|
512 | n/a | return failobj |
---|
513 | n/a | return values |
---|
514 | n/a | |
---|
515 | n/a | def add_header(self, _name, _value, **_params): |
---|
516 | n/a | """Extended header setting. |
---|
517 | n/a | |
---|
518 | n/a | name is the header field to add. keyword arguments can be used to set |
---|
519 | n/a | additional parameters for the header field, with underscores converted |
---|
520 | n/a | to dashes. Normally the parameter will be added as key="value" unless |
---|
521 | n/a | value is None, in which case only the key will be added. If a |
---|
522 | n/a | parameter value contains non-ASCII characters it can be specified as a |
---|
523 | n/a | three-tuple of (charset, language, value), in which case it will be |
---|
524 | n/a | encoded according to RFC2231 rules. Otherwise it will be encoded using |
---|
525 | n/a | the utf-8 charset and a language of ''. |
---|
526 | n/a | |
---|
527 | n/a | Examples: |
---|
528 | n/a | |
---|
529 | n/a | msg.add_header('content-disposition', 'attachment', filename='bud.gif') |
---|
530 | n/a | msg.add_header('content-disposition', 'attachment', |
---|
531 | n/a | filename=('utf-8', '', Fuรballer.ppt')) |
---|
532 | n/a | msg.add_header('content-disposition', 'attachment', |
---|
533 | n/a | filename='Fuรballer.ppt')) |
---|
534 | n/a | """ |
---|
535 | n/a | parts = [] |
---|
536 | n/a | for k, v in _params.items(): |
---|
537 | n/a | if v is None: |
---|
538 | n/a | parts.append(k.replace('_', '-')) |
---|
539 | n/a | else: |
---|
540 | n/a | parts.append(_formatparam(k.replace('_', '-'), v)) |
---|
541 | n/a | if _value is not None: |
---|
542 | n/a | parts.insert(0, _value) |
---|
543 | n/a | self[_name] = SEMISPACE.join(parts) |
---|
544 | n/a | |
---|
545 | n/a | def replace_header(self, _name, _value): |
---|
546 | n/a | """Replace a header. |
---|
547 | n/a | |
---|
548 | n/a | Replace the first matching header found in the message, retaining |
---|
549 | n/a | header order and case. If no matching header was found, a KeyError is |
---|
550 | n/a | raised. |
---|
551 | n/a | """ |
---|
552 | n/a | _name = _name.lower() |
---|
553 | n/a | for i, (k, v) in zip(range(len(self._headers)), self._headers): |
---|
554 | n/a | if k.lower() == _name: |
---|
555 | n/a | self._headers[i] = self.policy.header_store_parse(k, _value) |
---|
556 | n/a | break |
---|
557 | n/a | else: |
---|
558 | n/a | raise KeyError(_name) |
---|
559 | n/a | |
---|
560 | n/a | # |
---|
561 | n/a | # Use these three methods instead of the three above. |
---|
562 | n/a | # |
---|
563 | n/a | |
---|
564 | n/a | def get_content_type(self): |
---|
565 | n/a | """Return the message's content type. |
---|
566 | n/a | |
---|
567 | n/a | The returned string is coerced to lower case of the form |
---|
568 | n/a | `maintype/subtype'. If there was no Content-Type header in the |
---|
569 | n/a | message, the default type as given by get_default_type() will be |
---|
570 | n/a | returned. Since according to RFC 2045, messages always have a default |
---|
571 | n/a | type this will always return a value. |
---|
572 | n/a | |
---|
573 | n/a | RFC 2045 defines a message's default type to be text/plain unless it |
---|
574 | n/a | appears inside a multipart/digest container, in which case it would be |
---|
575 | n/a | message/rfc822. |
---|
576 | n/a | """ |
---|
577 | n/a | missing = object() |
---|
578 | n/a | value = self.get('content-type', missing) |
---|
579 | n/a | if value is missing: |
---|
580 | n/a | # This should have no parameters |
---|
581 | n/a | return self.get_default_type() |
---|
582 | n/a | ctype = _splitparam(value)[0].lower() |
---|
583 | n/a | # RFC 2045, section 5.2 says if its invalid, use text/plain |
---|
584 | n/a | if ctype.count('/') != 1: |
---|
585 | n/a | return 'text/plain' |
---|
586 | n/a | return ctype |
---|
587 | n/a | |
---|
588 | n/a | def get_content_maintype(self): |
---|
589 | n/a | """Return the message's main content type. |
---|
590 | n/a | |
---|
591 | n/a | This is the `maintype' part of the string returned by |
---|
592 | n/a | get_content_type(). |
---|
593 | n/a | """ |
---|
594 | n/a | ctype = self.get_content_type() |
---|
595 | n/a | return ctype.split('/')[0] |
---|
596 | n/a | |
---|
597 | n/a | def get_content_subtype(self): |
---|
598 | n/a | """Returns the message's sub-content type. |
---|
599 | n/a | |
---|
600 | n/a | This is the `subtype' part of the string returned by |
---|
601 | n/a | get_content_type(). |
---|
602 | n/a | """ |
---|
603 | n/a | ctype = self.get_content_type() |
---|
604 | n/a | return ctype.split('/')[1] |
---|
605 | n/a | |
---|
606 | n/a | def get_default_type(self): |
---|
607 | n/a | """Return the `default' content type. |
---|
608 | n/a | |
---|
609 | n/a | Most messages have a default content type of text/plain, except for |
---|
610 | n/a | messages that are subparts of multipart/digest containers. Such |
---|
611 | n/a | subparts have a default content type of message/rfc822. |
---|
612 | n/a | """ |
---|
613 | n/a | return self._default_type |
---|
614 | n/a | |
---|
615 | n/a | def set_default_type(self, ctype): |
---|
616 | n/a | """Set the `default' content type. |
---|
617 | n/a | |
---|
618 | n/a | ctype should be either "text/plain" or "message/rfc822", although this |
---|
619 | n/a | is not enforced. The default content type is not stored in the |
---|
620 | n/a | Content-Type header. |
---|
621 | n/a | """ |
---|
622 | n/a | self._default_type = ctype |
---|
623 | n/a | |
---|
624 | n/a | def _get_params_preserve(self, failobj, header): |
---|
625 | n/a | # Like get_params() but preserves the quoting of values. BAW: |
---|
626 | n/a | # should this be part of the public interface? |
---|
627 | n/a | missing = object() |
---|
628 | n/a | value = self.get(header, missing) |
---|
629 | n/a | if value is missing: |
---|
630 | n/a | return failobj |
---|
631 | n/a | params = [] |
---|
632 | n/a | for p in _parseparam(value): |
---|
633 | n/a | try: |
---|
634 | n/a | name, val = p.split('=', 1) |
---|
635 | n/a | name = name.strip() |
---|
636 | n/a | val = val.strip() |
---|
637 | n/a | except ValueError: |
---|
638 | n/a | # Must have been a bare attribute |
---|
639 | n/a | name = p.strip() |
---|
640 | n/a | val = '' |
---|
641 | n/a | params.append((name, val)) |
---|
642 | n/a | params = utils.decode_params(params) |
---|
643 | n/a | return params |
---|
644 | n/a | |
---|
645 | n/a | def get_params(self, failobj=None, header='content-type', unquote=True): |
---|
646 | n/a | """Return the message's Content-Type parameters, as a list. |
---|
647 | n/a | |
---|
648 | n/a | The elements of the returned list are 2-tuples of key/value pairs, as |
---|
649 | n/a | split on the `=' sign. The left hand side of the `=' is the key, |
---|
650 | n/a | while the right hand side is the value. If there is no `=' sign in |
---|
651 | n/a | the parameter the value is the empty string. The value is as |
---|
652 | n/a | described in the get_param() method. |
---|
653 | n/a | |
---|
654 | n/a | Optional failobj is the object to return if there is no Content-Type |
---|
655 | n/a | header. Optional header is the header to search instead of |
---|
656 | n/a | Content-Type. If unquote is True, the value is unquoted. |
---|
657 | n/a | """ |
---|
658 | n/a | missing = object() |
---|
659 | n/a | params = self._get_params_preserve(missing, header) |
---|
660 | n/a | if params is missing: |
---|
661 | n/a | return failobj |
---|
662 | n/a | if unquote: |
---|
663 | n/a | return [(k, _unquotevalue(v)) for k, v in params] |
---|
664 | n/a | else: |
---|
665 | n/a | return params |
---|
666 | n/a | |
---|
667 | n/a | def get_param(self, param, failobj=None, header='content-type', |
---|
668 | n/a | unquote=True): |
---|
669 | n/a | """Return the parameter value if found in the Content-Type header. |
---|
670 | n/a | |
---|
671 | n/a | Optional failobj is the object to return if there is no Content-Type |
---|
672 | n/a | header, or the Content-Type header has no such parameter. Optional |
---|
673 | n/a | header is the header to search instead of Content-Type. |
---|
674 | n/a | |
---|
675 | n/a | Parameter keys are always compared case insensitively. The return |
---|
676 | n/a | value can either be a string, or a 3-tuple if the parameter was RFC |
---|
677 | n/a | 2231 encoded. When it's a 3-tuple, the elements of the value are of |
---|
678 | n/a | the form (CHARSET, LANGUAGE, VALUE). Note that both CHARSET and |
---|
679 | n/a | LANGUAGE can be None, in which case you should consider VALUE to be |
---|
680 | n/a | encoded in the us-ascii charset. You can usually ignore LANGUAGE. |
---|
681 | n/a | The parameter value (either the returned string, or the VALUE item in |
---|
682 | n/a | the 3-tuple) is always unquoted, unless unquote is set to False. |
---|
683 | n/a | |
---|
684 | n/a | If your application doesn't care whether the parameter was RFC 2231 |
---|
685 | n/a | encoded, it can turn the return value into a string as follows: |
---|
686 | n/a | |
---|
687 | n/a | rawparam = msg.get_param('foo') |
---|
688 | n/a | param = email.utils.collapse_rfc2231_value(rawparam) |
---|
689 | n/a | |
---|
690 | n/a | """ |
---|
691 | n/a | if header not in self: |
---|
692 | n/a | return failobj |
---|
693 | n/a | for k, v in self._get_params_preserve(failobj, header): |
---|
694 | n/a | if k.lower() == param.lower(): |
---|
695 | n/a | if unquote: |
---|
696 | n/a | return _unquotevalue(v) |
---|
697 | n/a | else: |
---|
698 | n/a | return v |
---|
699 | n/a | return failobj |
---|
700 | n/a | |
---|
701 | n/a | def set_param(self, param, value, header='Content-Type', requote=True, |
---|
702 | n/a | charset=None, language='', replace=False): |
---|
703 | n/a | """Set a parameter in the Content-Type header. |
---|
704 | n/a | |
---|
705 | n/a | If the parameter already exists in the header, its value will be |
---|
706 | n/a | replaced with the new value. |
---|
707 | n/a | |
---|
708 | n/a | If header is Content-Type and has not yet been defined for this |
---|
709 | n/a | message, it will be set to "text/plain" and the new parameter and |
---|
710 | n/a | value will be appended as per RFC 2045. |
---|
711 | n/a | |
---|
712 | n/a | An alternate header can be specified in the header argument, and all |
---|
713 | n/a | parameters will be quoted as necessary unless requote is False. |
---|
714 | n/a | |
---|
715 | n/a | If charset is specified, the parameter will be encoded according to RFC |
---|
716 | n/a | 2231. Optional language specifies the RFC 2231 language, defaulting |
---|
717 | n/a | to the empty string. Both charset and language should be strings. |
---|
718 | n/a | """ |
---|
719 | n/a | if not isinstance(value, tuple) and charset: |
---|
720 | n/a | value = (charset, language, value) |
---|
721 | n/a | |
---|
722 | n/a | if header not in self and header.lower() == 'content-type': |
---|
723 | n/a | ctype = 'text/plain' |
---|
724 | n/a | else: |
---|
725 | n/a | ctype = self.get(header) |
---|
726 | n/a | if not self.get_param(param, header=header): |
---|
727 | n/a | if not ctype: |
---|
728 | n/a | ctype = _formatparam(param, value, requote) |
---|
729 | n/a | else: |
---|
730 | n/a | ctype = SEMISPACE.join( |
---|
731 | n/a | [ctype, _formatparam(param, value, requote)]) |
---|
732 | n/a | else: |
---|
733 | n/a | ctype = '' |
---|
734 | n/a | for old_param, old_value in self.get_params(header=header, |
---|
735 | n/a | unquote=requote): |
---|
736 | n/a | append_param = '' |
---|
737 | n/a | if old_param.lower() == param.lower(): |
---|
738 | n/a | append_param = _formatparam(param, value, requote) |
---|
739 | n/a | else: |
---|
740 | n/a | append_param = _formatparam(old_param, old_value, requote) |
---|
741 | n/a | if not ctype: |
---|
742 | n/a | ctype = append_param |
---|
743 | n/a | else: |
---|
744 | n/a | ctype = SEMISPACE.join([ctype, append_param]) |
---|
745 | n/a | if ctype != self.get(header): |
---|
746 | n/a | if replace: |
---|
747 | n/a | self.replace_header(header, ctype) |
---|
748 | n/a | else: |
---|
749 | n/a | del self[header] |
---|
750 | n/a | self[header] = ctype |
---|
751 | n/a | |
---|
752 | n/a | def del_param(self, param, header='content-type', requote=True): |
---|
753 | n/a | """Remove the given parameter completely from the Content-Type header. |
---|
754 | n/a | |
---|
755 | n/a | The header will be re-written in place without the parameter or its |
---|
756 | n/a | value. All values will be quoted as necessary unless requote is |
---|
757 | n/a | False. Optional header specifies an alternative to the Content-Type |
---|
758 | n/a | header. |
---|
759 | n/a | """ |
---|
760 | n/a | if header not in self: |
---|
761 | n/a | return |
---|
762 | n/a | new_ctype = '' |
---|
763 | n/a | for p, v in self.get_params(header=header, unquote=requote): |
---|
764 | n/a | if p.lower() != param.lower(): |
---|
765 | n/a | if not new_ctype: |
---|
766 | n/a | new_ctype = _formatparam(p, v, requote) |
---|
767 | n/a | else: |
---|
768 | n/a | new_ctype = SEMISPACE.join([new_ctype, |
---|
769 | n/a | _formatparam(p, v, requote)]) |
---|
770 | n/a | if new_ctype != self.get(header): |
---|
771 | n/a | del self[header] |
---|
772 | n/a | self[header] = new_ctype |
---|
773 | n/a | |
---|
774 | n/a | def set_type(self, type, header='Content-Type', requote=True): |
---|
775 | n/a | """Set the main type and subtype for the Content-Type header. |
---|
776 | n/a | |
---|
777 | n/a | type must be a string in the form "maintype/subtype", otherwise a |
---|
778 | n/a | ValueError is raised. |
---|
779 | n/a | |
---|
780 | n/a | This method replaces the Content-Type header, keeping all the |
---|
781 | n/a | parameters in place. If requote is False, this leaves the existing |
---|
782 | n/a | header's quoting as is. Otherwise, the parameters will be quoted (the |
---|
783 | n/a | default). |
---|
784 | n/a | |
---|
785 | n/a | An alternative header can be specified in the header argument. When |
---|
786 | n/a | the Content-Type header is set, we'll always also add a MIME-Version |
---|
787 | n/a | header. |
---|
788 | n/a | """ |
---|
789 | n/a | # BAW: should we be strict? |
---|
790 | n/a | if not type.count('/') == 1: |
---|
791 | n/a | raise ValueError |
---|
792 | n/a | # Set the Content-Type, you get a MIME-Version |
---|
793 | n/a | if header.lower() == 'content-type': |
---|
794 | n/a | del self['mime-version'] |
---|
795 | n/a | self['MIME-Version'] = '1.0' |
---|
796 | n/a | if header not in self: |
---|
797 | n/a | self[header] = type |
---|
798 | n/a | return |
---|
799 | n/a | params = self.get_params(header=header, unquote=requote) |
---|
800 | n/a | del self[header] |
---|
801 | n/a | self[header] = type |
---|
802 | n/a | # Skip the first param; it's the old type. |
---|
803 | n/a | for p, v in params[1:]: |
---|
804 | n/a | self.set_param(p, v, header, requote) |
---|
805 | n/a | |
---|
806 | n/a | def get_filename(self, failobj=None): |
---|
807 | n/a | """Return the filename associated with the payload if present. |
---|
808 | n/a | |
---|
809 | n/a | The filename is extracted from the Content-Disposition header's |
---|
810 | n/a | `filename' parameter, and it is unquoted. If that header is missing |
---|
811 | n/a | the `filename' parameter, this method falls back to looking for the |
---|
812 | n/a | `name' parameter. |
---|
813 | n/a | """ |
---|
814 | n/a | missing = object() |
---|
815 | n/a | filename = self.get_param('filename', missing, 'content-disposition') |
---|
816 | n/a | if filename is missing: |
---|
817 | n/a | filename = self.get_param('name', missing, 'content-type') |
---|
818 | n/a | if filename is missing: |
---|
819 | n/a | return failobj |
---|
820 | n/a | return utils.collapse_rfc2231_value(filename).strip() |
---|
821 | n/a | |
---|
822 | n/a | def get_boundary(self, failobj=None): |
---|
823 | n/a | """Return the boundary associated with the payload if present. |
---|
824 | n/a | |
---|
825 | n/a | The boundary is extracted from the Content-Type header's `boundary' |
---|
826 | n/a | parameter, and it is unquoted. |
---|
827 | n/a | """ |
---|
828 | n/a | missing = object() |
---|
829 | n/a | boundary = self.get_param('boundary', missing) |
---|
830 | n/a | if boundary is missing: |
---|
831 | n/a | return failobj |
---|
832 | n/a | # RFC 2046 says that boundaries may begin but not end in w/s |
---|
833 | n/a | return utils.collapse_rfc2231_value(boundary).rstrip() |
---|
834 | n/a | |
---|
835 | n/a | def set_boundary(self, boundary): |
---|
836 | n/a | """Set the boundary parameter in Content-Type to 'boundary'. |
---|
837 | n/a | |
---|
838 | n/a | This is subtly different than deleting the Content-Type header and |
---|
839 | n/a | adding a new one with a new boundary parameter via add_header(). The |
---|
840 | n/a | main difference is that using the set_boundary() method preserves the |
---|
841 | n/a | order of the Content-Type header in the original message. |
---|
842 | n/a | |
---|
843 | n/a | HeaderParseError is raised if the message has no Content-Type header. |
---|
844 | n/a | """ |
---|
845 | n/a | missing = object() |
---|
846 | n/a | params = self._get_params_preserve(missing, 'content-type') |
---|
847 | n/a | if params is missing: |
---|
848 | n/a | # There was no Content-Type header, and we don't know what type |
---|
849 | n/a | # to set it to, so raise an exception. |
---|
850 | n/a | raise errors.HeaderParseError('No Content-Type header found') |
---|
851 | n/a | newparams = [] |
---|
852 | n/a | foundp = False |
---|
853 | n/a | for pk, pv in params: |
---|
854 | n/a | if pk.lower() == 'boundary': |
---|
855 | n/a | newparams.append(('boundary', '"%s"' % boundary)) |
---|
856 | n/a | foundp = True |
---|
857 | n/a | else: |
---|
858 | n/a | newparams.append((pk, pv)) |
---|
859 | n/a | if not foundp: |
---|
860 | n/a | # The original Content-Type header had no boundary attribute. |
---|
861 | n/a | # Tack one on the end. BAW: should we raise an exception |
---|
862 | n/a | # instead??? |
---|
863 | n/a | newparams.append(('boundary', '"%s"' % boundary)) |
---|
864 | n/a | # Replace the existing Content-Type header with the new value |
---|
865 | n/a | newheaders = [] |
---|
866 | n/a | for h, v in self._headers: |
---|
867 | n/a | if h.lower() == 'content-type': |
---|
868 | n/a | parts = [] |
---|
869 | n/a | for k, v in newparams: |
---|
870 | n/a | if v == '': |
---|
871 | n/a | parts.append(k) |
---|
872 | n/a | else: |
---|
873 | n/a | parts.append('%s=%s' % (k, v)) |
---|
874 | n/a | val = SEMISPACE.join(parts) |
---|
875 | n/a | newheaders.append(self.policy.header_store_parse(h, val)) |
---|
876 | n/a | |
---|
877 | n/a | else: |
---|
878 | n/a | newheaders.append((h, v)) |
---|
879 | n/a | self._headers = newheaders |
---|
880 | n/a | |
---|
881 | n/a | def get_content_charset(self, failobj=None): |
---|
882 | n/a | """Return the charset parameter of the Content-Type header. |
---|
883 | n/a | |
---|
884 | n/a | The returned string is always coerced to lower case. If there is no |
---|
885 | n/a | Content-Type header, or if that header has no charset parameter, |
---|
886 | n/a | failobj is returned. |
---|
887 | n/a | """ |
---|
888 | n/a | missing = object() |
---|
889 | n/a | charset = self.get_param('charset', missing) |
---|
890 | n/a | if charset is missing: |
---|
891 | n/a | return failobj |
---|
892 | n/a | if isinstance(charset, tuple): |
---|
893 | n/a | # RFC 2231 encoded, so decode it, and it better end up as ascii. |
---|
894 | n/a | pcharset = charset[0] or 'us-ascii' |
---|
895 | n/a | try: |
---|
896 | n/a | # LookupError will be raised if the charset isn't known to |
---|
897 | n/a | # Python. UnicodeError will be raised if the encoded text |
---|
898 | n/a | # contains a character not in the charset. |
---|
899 | n/a | as_bytes = charset[2].encode('raw-unicode-escape') |
---|
900 | n/a | charset = str(as_bytes, pcharset) |
---|
901 | n/a | except (LookupError, UnicodeError): |
---|
902 | n/a | charset = charset[2] |
---|
903 | n/a | # charset characters must be in us-ascii range |
---|
904 | n/a | try: |
---|
905 | n/a | charset.encode('us-ascii') |
---|
906 | n/a | except UnicodeError: |
---|
907 | n/a | return failobj |
---|
908 | n/a | # RFC 2046, $4.1.2 says charsets are not case sensitive |
---|
909 | n/a | return charset.lower() |
---|
910 | n/a | |
---|
911 | n/a | def get_charsets(self, failobj=None): |
---|
912 | n/a | """Return a list containing the charset(s) used in this message. |
---|
913 | n/a | |
---|
914 | n/a | The returned list of items describes the Content-Type headers' |
---|
915 | n/a | charset parameter for this message and all the subparts in its |
---|
916 | n/a | payload. |
---|
917 | n/a | |
---|
918 | n/a | Each item will either be a string (the value of the charset parameter |
---|
919 | n/a | in the Content-Type header of that part) or the value of the |
---|
920 | n/a | 'failobj' parameter (defaults to None), if the part does not have a |
---|
921 | n/a | main MIME type of "text", or the charset is not defined. |
---|
922 | n/a | |
---|
923 | n/a | The list will contain one string for each part of the message, plus |
---|
924 | n/a | one for the container message (i.e. self), so that a non-multipart |
---|
925 | n/a | message will still return a list of length 1. |
---|
926 | n/a | """ |
---|
927 | n/a | return [part.get_content_charset(failobj) for part in self.walk()] |
---|
928 | n/a | |
---|
929 | n/a | def get_content_disposition(self): |
---|
930 | n/a | """Return the message's content-disposition if it exists, or None. |
---|
931 | n/a | |
---|
932 | n/a | The return values can be either 'inline', 'attachment' or None |
---|
933 | n/a | according to the rfc2183. |
---|
934 | n/a | """ |
---|
935 | n/a | value = self.get('content-disposition') |
---|
936 | n/a | if value is None: |
---|
937 | n/a | return None |
---|
938 | n/a | c_d = _splitparam(value)[0].lower() |
---|
939 | n/a | return c_d |
---|
940 | n/a | |
---|
941 | n/a | # I.e. def walk(self): ... |
---|
942 | n/a | from email.iterators import walk |
---|
943 | n/a | |
---|
944 | n/a | |
---|
945 | n/a | class MIMEPart(Message): |
---|
946 | n/a | |
---|
947 | n/a | def __init__(self, policy=None): |
---|
948 | n/a | if policy is None: |
---|
949 | n/a | from email.policy import default |
---|
950 | n/a | policy = default |
---|
951 | n/a | Message.__init__(self, policy) |
---|
952 | n/a | |
---|
953 | n/a | |
---|
954 | n/a | def as_string(self, unixfrom=False, maxheaderlen=None, policy=None): |
---|
955 | n/a | """Return the entire formatted message as a string. |
---|
956 | n/a | |
---|
957 | n/a | Optional 'unixfrom', when true, means include the Unix From_ envelope |
---|
958 | n/a | header. maxheaderlen is retained for backward compatibility with the |
---|
959 | n/a | base Message class, but defaults to None, meaning that the policy value |
---|
960 | n/a | for max_line_length controls the header maximum length. 'policy' is |
---|
961 | n/a | passed to the Generator instance used to serialize the mesasge; if it |
---|
962 | n/a | is not specified the policy associated with the message instance is |
---|
963 | n/a | used. |
---|
964 | n/a | """ |
---|
965 | n/a | policy = self.policy if policy is None else policy |
---|
966 | n/a | if maxheaderlen is None: |
---|
967 | n/a | maxheaderlen = policy.max_line_length |
---|
968 | n/a | return super().as_string(maxheaderlen=maxheaderlen, policy=policy) |
---|
969 | n/a | |
---|
970 | n/a | def __str__(self): |
---|
971 | n/a | return self.as_string(policy=self.policy.clone(utf8=True)) |
---|
972 | n/a | |
---|
973 | n/a | def is_attachment(self): |
---|
974 | n/a | c_d = self.get('content-disposition') |
---|
975 | n/a | return False if c_d is None else c_d.content_disposition == 'attachment' |
---|
976 | n/a | |
---|
977 | n/a | def _find_body(self, part, preferencelist): |
---|
978 | n/a | if part.is_attachment(): |
---|
979 | n/a | return |
---|
980 | n/a | maintype, subtype = part.get_content_type().split('/') |
---|
981 | n/a | if maintype == 'text': |
---|
982 | n/a | if subtype in preferencelist: |
---|
983 | n/a | yield (preferencelist.index(subtype), part) |
---|
984 | n/a | return |
---|
985 | n/a | if maintype != 'multipart': |
---|
986 | n/a | return |
---|
987 | n/a | if subtype != 'related': |
---|
988 | n/a | for subpart in part.iter_parts(): |
---|
989 | n/a | yield from self._find_body(subpart, preferencelist) |
---|
990 | n/a | return |
---|
991 | n/a | if 'related' in preferencelist: |
---|
992 | n/a | yield (preferencelist.index('related'), part) |
---|
993 | n/a | candidate = None |
---|
994 | n/a | start = part.get_param('start') |
---|
995 | n/a | if start: |
---|
996 | n/a | for subpart in part.iter_parts(): |
---|
997 | n/a | if subpart['content-id'] == start: |
---|
998 | n/a | candidate = subpart |
---|
999 | n/a | break |
---|
1000 | n/a | if candidate is None: |
---|
1001 | n/a | subparts = part.get_payload() |
---|
1002 | n/a | candidate = subparts[0] if subparts else None |
---|
1003 | n/a | if candidate is not None: |
---|
1004 | n/a | yield from self._find_body(candidate, preferencelist) |
---|
1005 | n/a | |
---|
1006 | n/a | def get_body(self, preferencelist=('related', 'html', 'plain')): |
---|
1007 | n/a | """Return best candidate mime part for display as 'body' of message. |
---|
1008 | n/a | |
---|
1009 | n/a | Do a depth first search, starting with self, looking for the first part |
---|
1010 | n/a | matching each of the items in preferencelist, and return the part |
---|
1011 | n/a | corresponding to the first item that has a match, or None if no items |
---|
1012 | n/a | have a match. If 'related' is not included in preferencelist, consider |
---|
1013 | n/a | the root part of any multipart/related encountered as a candidate |
---|
1014 | n/a | match. Ignore parts with 'Content-Disposition: attachment'. |
---|
1015 | n/a | """ |
---|
1016 | n/a | best_prio = len(preferencelist) |
---|
1017 | n/a | body = None |
---|
1018 | n/a | for prio, part in self._find_body(self, preferencelist): |
---|
1019 | n/a | if prio < best_prio: |
---|
1020 | n/a | best_prio = prio |
---|
1021 | n/a | body = part |
---|
1022 | n/a | if prio == 0: |
---|
1023 | n/a | break |
---|
1024 | n/a | return body |
---|
1025 | n/a | |
---|
1026 | n/a | _body_types = {('text', 'plain'), |
---|
1027 | n/a | ('text', 'html'), |
---|
1028 | n/a | ('multipart', 'related'), |
---|
1029 | n/a | ('multipart', 'alternative')} |
---|
1030 | n/a | def iter_attachments(self): |
---|
1031 | n/a | """Return an iterator over the non-main parts of a multipart. |
---|
1032 | n/a | |
---|
1033 | n/a | Skip the first of each occurrence of text/plain, text/html, |
---|
1034 | n/a | multipart/related, or multipart/alternative in the multipart (unless |
---|
1035 | n/a | they have a 'Content-Disposition: attachment' header) and include all |
---|
1036 | n/a | remaining subparts in the returned iterator. When applied to a |
---|
1037 | n/a | multipart/related, return all parts except the root part. Return an |
---|
1038 | n/a | empty iterator when applied to a multipart/alternative or a |
---|
1039 | n/a | non-multipart. |
---|
1040 | n/a | """ |
---|
1041 | n/a | maintype, subtype = self.get_content_type().split('/') |
---|
1042 | n/a | if maintype != 'multipart' or subtype == 'alternative': |
---|
1043 | n/a | return |
---|
1044 | n/a | parts = self.get_payload().copy() |
---|
1045 | n/a | if maintype == 'multipart' and subtype == 'related': |
---|
1046 | n/a | # For related, we treat everything but the root as an attachment. |
---|
1047 | n/a | # The root may be indicated by 'start'; if there's no start or we |
---|
1048 | n/a | # can't find the named start, treat the first subpart as the root. |
---|
1049 | n/a | start = self.get_param('start') |
---|
1050 | n/a | if start: |
---|
1051 | n/a | found = False |
---|
1052 | n/a | attachments = [] |
---|
1053 | n/a | for part in parts: |
---|
1054 | n/a | if part.get('content-id') == start: |
---|
1055 | n/a | found = True |
---|
1056 | n/a | else: |
---|
1057 | n/a | attachments.append(part) |
---|
1058 | n/a | if found: |
---|
1059 | n/a | yield from attachments |
---|
1060 | n/a | return |
---|
1061 | n/a | parts.pop(0) |
---|
1062 | n/a | yield from parts |
---|
1063 | n/a | return |
---|
1064 | n/a | # Otherwise we more or less invert the remaining logic in get_body. |
---|
1065 | n/a | # This only really works in edge cases (ex: non-text related or |
---|
1066 | n/a | # alternatives) if the sending agent sets content-disposition. |
---|
1067 | n/a | seen = [] # Only skip the first example of each candidate type. |
---|
1068 | n/a | for part in parts: |
---|
1069 | n/a | maintype, subtype = part.get_content_type().split('/') |
---|
1070 | n/a | if ((maintype, subtype) in self._body_types and |
---|
1071 | n/a | not part.is_attachment() and subtype not in seen): |
---|
1072 | n/a | seen.append(subtype) |
---|
1073 | n/a | continue |
---|
1074 | n/a | yield part |
---|
1075 | n/a | |
---|
1076 | n/a | def iter_parts(self): |
---|
1077 | n/a | """Return an iterator over all immediate subparts of a multipart. |
---|
1078 | n/a | |
---|
1079 | n/a | Return an empty iterator for a non-multipart. |
---|
1080 | n/a | """ |
---|
1081 | n/a | if self.get_content_maintype() == 'multipart': |
---|
1082 | n/a | yield from self.get_payload() |
---|
1083 | n/a | |
---|
1084 | n/a | def get_content(self, *args, content_manager=None, **kw): |
---|
1085 | n/a | if content_manager is None: |
---|
1086 | n/a | content_manager = self.policy.content_manager |
---|
1087 | n/a | return content_manager.get_content(self, *args, **kw) |
---|
1088 | n/a | |
---|
1089 | n/a | def set_content(self, *args, content_manager=None, **kw): |
---|
1090 | n/a | if content_manager is None: |
---|
1091 | n/a | content_manager = self.policy.content_manager |
---|
1092 | n/a | content_manager.set_content(self, *args, **kw) |
---|
1093 | n/a | |
---|
1094 | n/a | def _make_multipart(self, subtype, disallowed_subtypes, boundary): |
---|
1095 | n/a | if self.get_content_maintype() == 'multipart': |
---|
1096 | n/a | existing_subtype = self.get_content_subtype() |
---|
1097 | n/a | disallowed_subtypes = disallowed_subtypes + (subtype,) |
---|
1098 | n/a | if existing_subtype in disallowed_subtypes: |
---|
1099 | n/a | raise ValueError("Cannot convert {} to {}".format( |
---|
1100 | n/a | existing_subtype, subtype)) |
---|
1101 | n/a | keep_headers = [] |
---|
1102 | n/a | part_headers = [] |
---|
1103 | n/a | for name, value in self._headers: |
---|
1104 | n/a | if name.lower().startswith('content-'): |
---|
1105 | n/a | part_headers.append((name, value)) |
---|
1106 | n/a | else: |
---|
1107 | n/a | keep_headers.append((name, value)) |
---|
1108 | n/a | if part_headers: |
---|
1109 | n/a | # There is existing content, move it to the first subpart. |
---|
1110 | n/a | part = type(self)(policy=self.policy) |
---|
1111 | n/a | part._headers = part_headers |
---|
1112 | n/a | part._payload = self._payload |
---|
1113 | n/a | self._payload = [part] |
---|
1114 | n/a | else: |
---|
1115 | n/a | self._payload = [] |
---|
1116 | n/a | self._headers = keep_headers |
---|
1117 | n/a | self['Content-Type'] = 'multipart/' + subtype |
---|
1118 | n/a | if boundary is not None: |
---|
1119 | n/a | self.set_param('boundary', boundary) |
---|
1120 | n/a | |
---|
1121 | n/a | def make_related(self, boundary=None): |
---|
1122 | n/a | self._make_multipart('related', ('alternative', 'mixed'), boundary) |
---|
1123 | n/a | |
---|
1124 | n/a | def make_alternative(self, boundary=None): |
---|
1125 | n/a | self._make_multipart('alternative', ('mixed',), boundary) |
---|
1126 | n/a | |
---|
1127 | n/a | def make_mixed(self, boundary=None): |
---|
1128 | n/a | self._make_multipart('mixed', (), boundary) |
---|
1129 | n/a | |
---|
1130 | n/a | def _add_multipart(self, _subtype, *args, _disp=None, **kw): |
---|
1131 | n/a | if (self.get_content_maintype() != 'multipart' or |
---|
1132 | n/a | self.get_content_subtype() != _subtype): |
---|
1133 | n/a | getattr(self, 'make_' + _subtype)() |
---|
1134 | n/a | part = type(self)(policy=self.policy) |
---|
1135 | n/a | part.set_content(*args, **kw) |
---|
1136 | n/a | if _disp and 'content-disposition' not in part: |
---|
1137 | n/a | part['Content-Disposition'] = _disp |
---|
1138 | n/a | self.attach(part) |
---|
1139 | n/a | |
---|
1140 | n/a | def add_related(self, *args, **kw): |
---|
1141 | n/a | self._add_multipart('related', *args, _disp='inline', **kw) |
---|
1142 | n/a | |
---|
1143 | n/a | def add_alternative(self, *args, **kw): |
---|
1144 | n/a | self._add_multipart('alternative', *args, **kw) |
---|
1145 | n/a | |
---|
1146 | n/a | def add_attachment(self, *args, **kw): |
---|
1147 | n/a | self._add_multipart('mixed', *args, _disp='attachment', **kw) |
---|
1148 | n/a | |
---|
1149 | n/a | def clear(self): |
---|
1150 | n/a | self._headers = [] |
---|
1151 | n/a | self._payload = None |
---|
1152 | n/a | |
---|
1153 | n/a | def clear_content(self): |
---|
1154 | n/a | self._headers = [(n, v) for n, v in self._headers |
---|
1155 | n/a | if not n.lower().startswith('content-')] |
---|
1156 | n/a | self._payload = None |
---|
1157 | n/a | |
---|
1158 | n/a | |
---|
1159 | n/a | class EmailMessage(MIMEPart): |
---|
1160 | n/a | |
---|
1161 | n/a | def set_content(self, *args, **kw): |
---|
1162 | n/a | super().set_content(*args, **kw) |
---|
1163 | n/a | if 'MIME-Version' not in self: |
---|
1164 | n/a | self['MIME-Version'] = '1.0' |
---|