Python code coverage for Lib/email/generator.py

#	count	content
1	n/a	# Copyright (C) 2001-2010 Python Software Foundation
2	n/a	# Author: Barry Warsaw
3	n/a	# Contact: email-sig@python.org
4	n/a
5	n/a	"""Classes to generate plain text from a message object tree."""
6	n/a
7	n/a	__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
8	n/a
9	n/a	import re
10	n/a	import sys
11	n/a	import time
12	n/a	import random
13	n/a
14	n/a	from copy import deepcopy
15	n/a	from io import StringIO, BytesIO
16	n/a	from email.utils import _has_surrogates
17	n/a
18	n/a	UNDERSCORE = '_'
19	n/a	NL = '\n' # XXX: no longer used by the code below.
20	n/a
21	n/a	NLCRE = re.compile(r'\r\n\|\r\|\n')
22	n/a	fcre = re.compile(r'^From ', re.MULTILINE)
23	n/a
24	n/a
25	n/a
26	n/a	class Generator:
27	n/a	"""Generates output from a Message object tree.
28	n/a
29	n/a	This basic generator writes the message to the given file object as plain
30	n/a	text.
31	n/a	"""
32	n/a	#
33	n/a	# Public interface
34	n/a	#
35	n/a
36	n/a	def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
37	n/a	policy=None):
38	n/a	"""Create the generator for message flattening.
39	n/a
40	n/a	outfp is the output file-like object for writing the message to. It
41	n/a	must have a write() method.
42	n/a
43	n/a	Optional mangle_from_ is a flag that, when True (the default if policy
44	n/a	is not set), escapes From_ lines in the body of the message by putting
45	n/a	a `>' in front of them.
46	n/a
47	n/a	Optional maxheaderlen specifies the longest length for a non-continued
48	n/a	header. When a header line is longer (in characters, with tabs
49	n/a	expanded to 8 spaces) than maxheaderlen, the header will split as
50	n/a	defined in the Header class. Set maxheaderlen to zero to disable
51	n/a	header wrapping. The default is 78, as recommended (but not required)
52	n/a	by RFC 2822.
53	n/a
54	n/a	The policy keyword specifies a policy object that controls a number of
55	n/a	aspects of the generator's operation. If no policy is specified,
56	n/a	the policy associated with the Message object passed to the
57	n/a	flatten method is used.
58	n/a
59	n/a	"""
60	n/a
61	n/a	if mangle_from_ is None:
62	n/a	mangle_from_ = True if policy is None else policy.mangle_from_
63	n/a	self._fp = outfp
64	n/a	self._mangle_from_ = mangle_from_
65	n/a	self.maxheaderlen = maxheaderlen
66	n/a	self.policy = policy
67	n/a
68	n/a	def write(self, s):
69	n/a	# Just delegate to the file object
70	n/a	self._fp.write(s)
71	n/a
72	n/a	def flatten(self, msg, unixfrom=False, linesep=None):
73	n/a	r"""Print the message object tree rooted at msg to the output file
74	n/a	specified when the Generator instance was created.
75	n/a
76	n/a	unixfrom is a flag that forces the printing of a Unix From_ delimiter
77	n/a	before the first object in the message tree. If the original message
78	n/a	has no From_ delimiter, a `standard' one is crafted. By default, this
79	n/a	is False to inhibit the printing of any From_ delimiter.
80	n/a
81	n/a	Note that for subobjects, no From_ line is printed.
82	n/a
83	n/a	linesep specifies the characters used to indicate a new line in
84	n/a	the output. The default value is determined by the policy specified
85	n/a	when the Generator instance was created or, if none was specified,
86	n/a	from the policy associated with the msg.
87	n/a
88	n/a	"""
89	n/a	# We use the _XXX constants for operating on data that comes directly
90	n/a	# from the msg, and _encoded_XXX constants for operating on data that
91	n/a	# has already been converted (to bytes in the BytesGenerator) and
92	n/a	# inserted into a temporary buffer.
93	n/a	policy = msg.policy if self.policy is None else self.policy
94	n/a	if linesep is not None:
95	n/a	policy = policy.clone(linesep=linesep)
96	n/a	if self.maxheaderlen is not None:
97	n/a	policy = policy.clone(max_line_length=self.maxheaderlen)
98	n/a	self._NL = policy.linesep
99	n/a	self._encoded_NL = self._encode(self._NL)
100	n/a	self._EMPTY = ''
101	n/a	self._encoded_EMPTY = self._encode(self._EMPTY)
102	n/a	# Because we use clone (below) when we recursively process message
103	n/a	# subparts, and because clone uses the computed policy (not None),
104	n/a	# submessages will automatically get set to the computed policy when
105	n/a	# they are processed by this code.
106	n/a	old_gen_policy = self.policy
107	n/a	old_msg_policy = msg.policy
108	n/a	try:
109	n/a	self.policy = policy
110	n/a	msg.policy = policy
111	n/a	if unixfrom:
112	n/a	ufrom = msg.get_unixfrom()
113	n/a	if not ufrom:
114	n/a	ufrom = 'From nobody ' + time.ctime(time.time())
115	n/a	self.write(ufrom + self._NL)
116	n/a	self._write(msg)
117	n/a	finally:
118	n/a	self.policy = old_gen_policy
119	n/a	msg.policy = old_msg_policy
120	n/a
121	n/a	def clone(self, fp):
122	n/a	"""Clone this generator with the exact same options."""
123	n/a	return self.__class__(fp,
124	n/a	self._mangle_from_,
125	n/a	None, # Use policy setting, which we've adjusted
126	n/a	policy=self.policy)
127	n/a
128	n/a	#
129	n/a	# Protected interface - undocumented ;/
130	n/a	#
131	n/a
132	n/a	# Note that we use 'self.write' when what we are writing is coming from
133	n/a	# the source, and self._fp.write when what we are writing is coming from a
134	n/a	# buffer (because the Bytes subclass has already had a chance to transform
135	n/a	# the data in its write method in that case). This is an entirely
136	n/a	# pragmatic split determined by experiment; we could be more general by
137	n/a	# always using write and having the Bytes subclass write method detect when
138	n/a	# it has already transformed the input; but, since this whole thing is a
139	n/a	# hack anyway this seems good enough.
140	n/a
141	n/a	def _new_buffer(self):
142	n/a	# BytesGenerator overrides this to return BytesIO.
143	n/a	return StringIO()
144	n/a
145	n/a	def _encode(self, s):
146	n/a	# BytesGenerator overrides this to encode strings to bytes.
147	n/a	return s
148	n/a
149	n/a	def _write_lines(self, lines):
150	n/a	# We have to transform the line endings.
151	n/a	if not lines:
152	n/a	return
153	n/a	lines = NLCRE.split(lines)
154	n/a	for line in lines[:-1]:
155	n/a	self.write(line)
156	n/a	self.write(self._NL)
157	n/a	if lines[-1]:
158	n/a	self.write(lines[-1])
159	n/a	# XXX logic tells me this else should be needed, but the tests fail
160	n/a	# with it and pass without it. (NLCRE.split ends with a blank element
161	n/a	# if and only if there was a trailing newline.)
162	n/a	#else:
163	n/a	# self.write(self._NL)
164	n/a
165	n/a	def _write(self, msg):
166	n/a	# We can't write the headers yet because of the following scenario:
167	n/a	# say a multipart message includes the boundary string somewhere in
168	n/a	# its body. We'd have to calculate the new boundary /before/ we write
169	n/a	# the headers so that we can write the correct Content-Type:
170	n/a	# parameter.
171	n/a	#
172	n/a	# The way we do this, so as to make the _handle_*() methods simpler,
173	n/a	# is to cache any subpart writes into a buffer. The we write the
174	n/a	# headers and the buffer contents. That way, subpart handlers can
175	n/a	# Do The Right Thing, and can still modify the Content-Type: header if
176	n/a	# necessary.
177	n/a	oldfp = self._fp
178	n/a	try:
179	n/a	self._munge_cte = None
180	n/a	self._fp = sfp = self._new_buffer()
181	n/a	self._dispatch(msg)
182	n/a	finally:
183	n/a	self._fp = oldfp
184	n/a	munge_cte = self._munge_cte
185	n/a	del self._munge_cte
186	n/a	# If we munged the cte, copy the message again and re-fix the CTE.
187	n/a	if munge_cte:
188	n/a	msg = deepcopy(msg)
189	n/a	msg.replace_header('content-transfer-encoding', munge_cte[0])
190	n/a	msg.replace_header('content-type', munge_cte[1])
191	n/a	# Write the headers. First we see if the message object wants to
192	n/a	# handle that itself. If not, we'll do it generically.
193	n/a	meth = getattr(msg, '_write_headers', None)
194	n/a	if meth is None:
195	n/a	self._write_headers(msg)
196	n/a	else:
197	n/a	meth(self)
198	n/a	self._fp.write(sfp.getvalue())
199	n/a
200	n/a	def _dispatch(self, msg):
201	n/a	# Get the Content-Type: for the message, then try to dispatch to
202	n/a	# self._handle_<maintype>_<subtype>(). If there's no handler for the
203	n/a	# full MIME type, then dispatch to self._handle_<maintype>(). If
204	n/a	# that's missing too, then dispatch to self._writeBody().
205	n/a	main = msg.get_content_maintype()
206	n/a	sub = msg.get_content_subtype()
207	n/a	specific = UNDERSCORE.join((main, sub)).replace('-', '_')
208	n/a	meth = getattr(self, '_handle_' + specific, None)
209	n/a	if meth is None:
210	n/a	generic = main.replace('-', '_')
211	n/a	meth = getattr(self, '_handle_' + generic, None)
212	n/a	if meth is None:
213	n/a	meth = self._writeBody
214	n/a	meth(msg)
215	n/a
216	n/a	#
217	n/a	# Default handlers
218	n/a	#
219	n/a
220	n/a	def _write_headers(self, msg):
221	n/a	for h, v in msg.raw_items():
222	n/a	self.write(self.policy.fold(h, v))
223	n/a	# A blank line always separates headers from body
224	n/a	self.write(self._NL)
225	n/a
226	n/a	#
227	n/a	# Handlers for writing types and subtypes
228	n/a	#
229	n/a
230	n/a	def _handle_text(self, msg):
231	n/a	payload = msg.get_payload()
232	n/a	if payload is None:
233	n/a	return
234	n/a	if not isinstance(payload, str):
235	n/a	raise TypeError('string payload expected: %s' % type(payload))
236	n/a	if _has_surrogates(msg._payload):
237	n/a	charset = msg.get_param('charset')
238	n/a	if charset is not None:
239	n/a	# XXX: This copy stuff is an ugly hack to avoid modifying the
240	n/a	# existing message.
241	n/a	msg = deepcopy(msg)
242	n/a	del msg['content-transfer-encoding']
243	n/a	msg.set_payload(payload, charset)
244	n/a	payload = msg.get_payload()
245	n/a	self._munge_cte = (msg['content-transfer-encoding'],
246	n/a	msg['content-type'])
247	n/a	if self._mangle_from_:
248	n/a	payload = fcre.sub('>From ', payload)
249	n/a	self._write_lines(payload)
250	n/a
251	n/a	# Default body handler
252	n/a	_writeBody = _handle_text
253	n/a
254	n/a	def _handle_multipart(self, msg):
255	n/a	# The trick here is to write out each part separately, merge them all
256	n/a	# together, and then make sure that the boundary we've chosen isn't
257	n/a	# present in the payload.
258	n/a	msgtexts = []
259	n/a	subparts = msg.get_payload()
260	n/a	if subparts is None:
261	n/a	subparts = []
262	n/a	elif isinstance(subparts, str):
263	n/a	# e.g. a non-strict parse of a message with no starting boundary.
264	n/a	self.write(subparts)
265	n/a	return
266	n/a	elif not isinstance(subparts, list):
267	n/a	# Scalar payload
268	n/a	subparts = [subparts]
269	n/a	for part in subparts:
270	n/a	s = self._new_buffer()
271	n/a	g = self.clone(s)
272	n/a	g.flatten(part, unixfrom=False, linesep=self._NL)
273	n/a	msgtexts.append(s.getvalue())
274	n/a	# BAW: What about boundaries that are wrapped in double-quotes?
275	n/a	boundary = msg.get_boundary()
276	n/a	if not boundary:
277	n/a	# Create a boundary that doesn't appear in any of the
278	n/a	# message texts.
279	n/a	alltext = self._encoded_NL.join(msgtexts)
280	n/a	boundary = self._make_boundary(alltext)
281	n/a	msg.set_boundary(boundary)
282	n/a	# If there's a preamble, write it out, with a trailing CRLF
283	n/a	if msg.preamble is not None:
284	n/a	if self._mangle_from_:
285	n/a	preamble = fcre.sub('>From ', msg.preamble)
286	n/a	else:
287	n/a	preamble = msg.preamble
288	n/a	self._write_lines(preamble)
289	n/a	self.write(self._NL)
290	n/a	# dash-boundary transport-padding CRLF
291	n/a	self.write('--' + boundary + self._NL)
292	n/a	# body-part
293	n/a	if msgtexts:
294	n/a	self._fp.write(msgtexts.pop(0))
295	n/a	# *encapsulation
296	n/a	# --> delimiter transport-padding
297	n/a	# --> CRLF body-part
298	n/a	for body_part in msgtexts:
299	n/a	# delimiter transport-padding CRLF
300	n/a	self.write(self._NL + '--' + boundary + self._NL)
301	n/a	# body-part
302	n/a	self._fp.write(body_part)
303	n/a	# close-delimiter transport-padding
304	n/a	self.write(self._NL + '--' + boundary + '--' + self._NL)
305	n/a	if msg.epilogue is not None:
306	n/a	if self._mangle_from_:
307	n/a	epilogue = fcre.sub('>From ', msg.epilogue)
308	n/a	else:
309	n/a	epilogue = msg.epilogue
310	n/a	self._write_lines(epilogue)
311	n/a
312	n/a	def _handle_multipart_signed(self, msg):
313	n/a	# The contents of signed parts has to stay unmodified in order to keep
314	n/a	# the signature intact per RFC1847 2.1, so we disable header wrapping.
315	n/a	# RDM: This isn't enough to completely preserve the part, but it helps.
316	n/a	p = self.policy
317	n/a	self.policy = p.clone(max_line_length=0)
318	n/a	try:
319	n/a	self._handle_multipart(msg)
320	n/a	finally:
321	n/a	self.policy = p
322	n/a
323	n/a	def _handle_message_delivery_status(self, msg):
324	n/a	# We can't just write the headers directly to self's file object
325	n/a	# because this will leave an extra newline between the last header
326	n/a	# block and the boundary. Sigh.
327	n/a	blocks = []
328	n/a	for part in msg.get_payload():
329	n/a	s = self._new_buffer()
330	n/a	g = self.clone(s)
331	n/a	g.flatten(part, unixfrom=False, linesep=self._NL)
332	n/a	text = s.getvalue()
333	n/a	lines = text.split(self._encoded_NL)
334	n/a	# Strip off the unnecessary trailing empty line
335	n/a	if lines and lines[-1] == self._encoded_EMPTY:
336	n/a	blocks.append(self._encoded_NL.join(lines[:-1]))
337	n/a	else:
338	n/a	blocks.append(text)
339	n/a	# Now join all the blocks with an empty line. This has the lovely
340	n/a	# effect of separating each block with an empty line, but not adding
341	n/a	# an extra one after the last one.
342	n/a	self._fp.write(self._encoded_NL.join(blocks))
343	n/a
344	n/a	def _handle_message(self, msg):
345	n/a	s = self._new_buffer()
346	n/a	g = self.clone(s)
347	n/a	# The payload of a message/rfc822 part should be a multipart sequence
348	n/a	# of length 1. The zeroth element of the list should be the Message
349	n/a	# object for the subpart. Extract that object, stringify it, and
350	n/a	# write it out.
351	n/a	# Except, it turns out, when it's a string instead, which happens when
352	n/a	# and only when HeaderParser is used on a message of mime type
353	n/a	# message/rfc822. Such messages are generated by, for example,
354	n/a	# Groupwise when forwarding unadorned messages. (Issue 7970.) So
355	n/a	# in that case we just emit the string body.
356	n/a	payload = msg._payload
357	n/a	if isinstance(payload, list):
358	n/a	g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
359	n/a	payload = s.getvalue()
360	n/a	else:
361	n/a	payload = self._encode(payload)
362	n/a	self._fp.write(payload)
363	n/a
364	n/a	# This used to be a module level function; we use a classmethod for this
365	n/a	# and _compile_re so we can continue to provide the module level function
366	n/a	# for backward compatibility by doing
367	n/a	# _make_boundary = Generator._make_boundary
368	n/a	# at the end of the module. It is internal, so we could drop that...
369	n/a	@classmethod
370	n/a	def _make_boundary(cls, text=None):
371	n/a	# Craft a random boundary. If text is given, ensure that the chosen
372	n/a	# boundary doesn't appear in the text.
373	n/a	token = random.randrange(sys.maxsize)
374	n/a	boundary = ('=' * 15) + (_fmt % token) + '=='
375	n/a	if text is None:
376	n/a	return boundary
377	n/a	b = boundary
378	n/a	counter = 0
379	n/a	while True:
380	n/a	cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
381	n/a	if not cre.search(text):
382	n/a	break
383	n/a	b = boundary + '.' + str(counter)
384	n/a	counter += 1
385	n/a	return b
386	n/a
387	n/a	@classmethod
388	n/a	def _compile_re(cls, s, flags):
389	n/a	return re.compile(s, flags)
390	n/a
391	n/a
392	n/a	class BytesGenerator(Generator):
393	n/a	"""Generates a bytes version of a Message object tree.
394	n/a
395	n/a	Functionally identical to the base Generator except that the output is
396	n/a	bytes and not string. When surrogates were used in the input to encode
397	n/a	bytes, these are decoded back to bytes for output. If the policy has
398	n/a	cte_type set to 7bit, then the message is transformed such that the
399	n/a	non-ASCII bytes are properly content transfer encoded, using the charset
400	n/a	unknown-8bit.
401	n/a
402	n/a	The outfp object must accept bytes in its write method.
403	n/a	"""
404	n/a
405	n/a	def write(self, s):
406	n/a	self._fp.write(s.encode('ascii', 'surrogateescape'))
407	n/a
408	n/a	def _new_buffer(self):
409	n/a	return BytesIO()
410	n/a
411	n/a	def _encode(self, s):
412	n/a	return s.encode('ascii')
413	n/a
414	n/a	def _write_headers(self, msg):
415	n/a	# This is almost the same as the string version, except for handling
416	n/a	# strings with 8bit bytes.
417	n/a	for h, v in msg.raw_items():
418	n/a	self._fp.write(self.policy.fold_binary(h, v))
419	n/a	# A blank line always separates headers from body
420	n/a	self.write(self._NL)
421	n/a
422	n/a	def _handle_text(self, msg):
423	n/a	# If the string has surrogates the original source was bytes, so
424	n/a	# just write it back out.
425	n/a	if msg._payload is None:
426	n/a	return
427	n/a	if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
428	n/a	if self._mangle_from_:
429	n/a	msg._payload = fcre.sub(">From ", msg._payload)
430	n/a	self._write_lines(msg._payload)
431	n/a	else:
432	n/a	super(BytesGenerator,self)._handle_text(msg)
433	n/a
434	n/a	# Default body handler
435	n/a	_writeBody = _handle_text
436	n/a
437	n/a	@classmethod
438	n/a	def _compile_re(cls, s, flags):
439	n/a	return re.compile(s.encode('ascii'), flags)
440	n/a
441	n/a
442	n/a
443	n/a	_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
444	n/a
445	n/a	class DecodedGenerator(Generator):
446	n/a	"""Generates a text representation of a message.
447	n/a
448	n/a	Like the Generator base class, except that non-text parts are substituted
449	n/a	with a format string representing the part.
450	n/a	"""
451	n/a	def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *,
452	n/a	policy=None):
453	n/a	"""Like Generator.__init__() except that an additional optional
454	n/a	argument is allowed.
455	n/a
456	n/a	Walks through all subparts of a message. If the subpart is of main
457	n/a	type `text', then it prints the decoded payload of the subpart.
458	n/a
459	n/a	Otherwise, fmt is a format string that is used instead of the message
460	n/a	payload. fmt is expanded with the following keywords (in
461	n/a	%(keyword)s format):
462	n/a
463	n/a	type : Full MIME type of the non-text part
464	n/a	maintype : Main MIME type of the non-text part
465	n/a	subtype : Sub-MIME type of the non-text part
466	n/a	filename : Filename of the non-text part
467	n/a	description: Description associated with the non-text part
468	n/a	encoding : Content transfer encoding of the non-text part
469	n/a
470	n/a	The default value for fmt is None, meaning
471	n/a
472	n/a	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
473	n/a	"""
474	n/a	Generator.__init__(self, outfp, mangle_from_, maxheaderlen,
475	n/a	policy=policy)
476	n/a	if fmt is None:
477	n/a	self._fmt = _FMT
478	n/a	else:
479	n/a	self._fmt = fmt
480	n/a
481	n/a	def _dispatch(self, msg):
482	n/a	for part in msg.walk():
483	n/a	maintype = part.get_content_maintype()
484	n/a	if maintype == 'text':
485	n/a	print(part.get_payload(decode=False), file=self)
486	n/a	elif maintype == 'multipart':
487	n/a	# Just skip this
488	n/a	pass
489	n/a	else:
490	n/a	print(self._fmt % {
491	n/a	'type' : part.get_content_type(),
492	n/a	'maintype' : part.get_content_maintype(),
493	n/a	'subtype' : part.get_content_subtype(),
494	n/a	'filename' : part.get_filename('[no filename]'),
495	n/a	'description': part.get('Content-Description',
496	n/a	'[no description]'),
497	n/a	'encoding' : part.get('Content-Transfer-Encoding',
498	n/a	'[no encoding]'),
499	n/a	}, file=self)
500	n/a
501	n/a
502	n/a
503	n/a	# Helper used by Generator._make_boundary
504	n/a	_width = len(repr(sys.maxsize-1))
505	n/a	_fmt = '%%0%dd' % _width
506	n/a
507	n/a	# Backward compatibility
508	n/a	_make_boundary = Generator._make_boundary