ยปCore Development>Code coverage>Lib/email/generator.py

Python code coverage for Lib/email/generator.py

#countcontent
1n/a# Copyright (C) 2001-2010 Python Software Foundation
2n/a# Author: Barry Warsaw
3n/a# Contact: email-sig@python.org
4n/a
5n/a"""Classes to generate plain text from a message object tree."""
6n/a
7n/a__all__ = ['Generator', 'DecodedGenerator', 'BytesGenerator']
8n/a
9n/aimport re
10n/aimport sys
11n/aimport time
12n/aimport random
13n/a
14n/afrom copy import deepcopy
15n/afrom io import StringIO, BytesIO
16n/afrom email.utils import _has_surrogates
17n/a
18n/aUNDERSCORE = '_'
19n/aNL = '\n' # XXX: no longer used by the code below.
20n/a
21n/aNLCRE = re.compile(r'\r\n|\r|\n')
22n/afcre = re.compile(r'^From ', re.MULTILINE)
23n/a
24n/a
25n/a
26n/aclass Generator:
27n/a """Generates output from a Message object tree.
28n/a
29n/a This basic generator writes the message to the given file object as plain
30n/a text.
31n/a """
32n/a #
33n/a # Public interface
34n/a #
35n/a
36n/a def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, *,
37n/a policy=None):
38n/a """Create the generator for message flattening.
39n/a
40n/a outfp is the output file-like object for writing the message to. It
41n/a must have a write() method.
42n/a
43n/a Optional mangle_from_ is a flag that, when True (the default if policy
44n/a is not set), escapes From_ lines in the body of the message by putting
45n/a a `>' in front of them.
46n/a
47n/a Optional maxheaderlen specifies the longest length for a non-continued
48n/a header. When a header line is longer (in characters, with tabs
49n/a expanded to 8 spaces) than maxheaderlen, the header will split as
50n/a defined in the Header class. Set maxheaderlen to zero to disable
51n/a header wrapping. The default is 78, as recommended (but not required)
52n/a by RFC 2822.
53n/a
54n/a The policy keyword specifies a policy object that controls a number of
55n/a aspects of the generator's operation. If no policy is specified,
56n/a the policy associated with the Message object passed to the
57n/a flatten method is used.
58n/a
59n/a """
60n/a
61n/a if mangle_from_ is None:
62n/a mangle_from_ = True if policy is None else policy.mangle_from_
63n/a self._fp = outfp
64n/a self._mangle_from_ = mangle_from_
65n/a self.maxheaderlen = maxheaderlen
66n/a self.policy = policy
67n/a
68n/a def write(self, s):
69n/a # Just delegate to the file object
70n/a self._fp.write(s)
71n/a
72n/a def flatten(self, msg, unixfrom=False, linesep=None):
73n/a r"""Print the message object tree rooted at msg to the output file
74n/a specified when the Generator instance was created.
75n/a
76n/a unixfrom is a flag that forces the printing of a Unix From_ delimiter
77n/a before the first object in the message tree. If the original message
78n/a has no From_ delimiter, a `standard' one is crafted. By default, this
79n/a is False to inhibit the printing of any From_ delimiter.
80n/a
81n/a Note that for subobjects, no From_ line is printed.
82n/a
83n/a linesep specifies the characters used to indicate a new line in
84n/a the output. The default value is determined by the policy specified
85n/a when the Generator instance was created or, if none was specified,
86n/a from the policy associated with the msg.
87n/a
88n/a """
89n/a # We use the _XXX constants for operating on data that comes directly
90n/a # from the msg, and _encoded_XXX constants for operating on data that
91n/a # has already been converted (to bytes in the BytesGenerator) and
92n/a # inserted into a temporary buffer.
93n/a policy = msg.policy if self.policy is None else self.policy
94n/a if linesep is not None:
95n/a policy = policy.clone(linesep=linesep)
96n/a if self.maxheaderlen is not None:
97n/a policy = policy.clone(max_line_length=self.maxheaderlen)
98n/a self._NL = policy.linesep
99n/a self._encoded_NL = self._encode(self._NL)
100n/a self._EMPTY = ''
101n/a self._encoded_EMPTY = self._encode(self._EMPTY)
102n/a # Because we use clone (below) when we recursively process message
103n/a # subparts, and because clone uses the computed policy (not None),
104n/a # submessages will automatically get set to the computed policy when
105n/a # they are processed by this code.
106n/a old_gen_policy = self.policy
107n/a old_msg_policy = msg.policy
108n/a try:
109n/a self.policy = policy
110n/a msg.policy = policy
111n/a if unixfrom:
112n/a ufrom = msg.get_unixfrom()
113n/a if not ufrom:
114n/a ufrom = 'From nobody ' + time.ctime(time.time())
115n/a self.write(ufrom + self._NL)
116n/a self._write(msg)
117n/a finally:
118n/a self.policy = old_gen_policy
119n/a msg.policy = old_msg_policy
120n/a
121n/a def clone(self, fp):
122n/a """Clone this generator with the exact same options."""
123n/a return self.__class__(fp,
124n/a self._mangle_from_,
125n/a None, # Use policy setting, which we've adjusted
126n/a policy=self.policy)
127n/a
128n/a #
129n/a # Protected interface - undocumented ;/
130n/a #
131n/a
132n/a # Note that we use 'self.write' when what we are writing is coming from
133n/a # the source, and self._fp.write when what we are writing is coming from a
134n/a # buffer (because the Bytes subclass has already had a chance to transform
135n/a # the data in its write method in that case). This is an entirely
136n/a # pragmatic split determined by experiment; we could be more general by
137n/a # always using write and having the Bytes subclass write method detect when
138n/a # it has already transformed the input; but, since this whole thing is a
139n/a # hack anyway this seems good enough.
140n/a
141n/a def _new_buffer(self):
142n/a # BytesGenerator overrides this to return BytesIO.
143n/a return StringIO()
144n/a
145n/a def _encode(self, s):
146n/a # BytesGenerator overrides this to encode strings to bytes.
147n/a return s
148n/a
149n/a def _write_lines(self, lines):
150n/a # We have to transform the line endings.
151n/a if not lines:
152n/a return
153n/a lines = NLCRE.split(lines)
154n/a for line in lines[:-1]:
155n/a self.write(line)
156n/a self.write(self._NL)
157n/a if lines[-1]:
158n/a self.write(lines[-1])
159n/a # XXX logic tells me this else should be needed, but the tests fail
160n/a # with it and pass without it. (NLCRE.split ends with a blank element
161n/a # if and only if there was a trailing newline.)
162n/a #else:
163n/a # self.write(self._NL)
164n/a
165n/a def _write(self, msg):
166n/a # We can't write the headers yet because of the following scenario:
167n/a # say a multipart message includes the boundary string somewhere in
168n/a # its body. We'd have to calculate the new boundary /before/ we write
169n/a # the headers so that we can write the correct Content-Type:
170n/a # parameter.
171n/a #
172n/a # The way we do this, so as to make the _handle_*() methods simpler,
173n/a # is to cache any subpart writes into a buffer. The we write the
174n/a # headers and the buffer contents. That way, subpart handlers can
175n/a # Do The Right Thing, and can still modify the Content-Type: header if
176n/a # necessary.
177n/a oldfp = self._fp
178n/a try:
179n/a self._munge_cte = None
180n/a self._fp = sfp = self._new_buffer()
181n/a self._dispatch(msg)
182n/a finally:
183n/a self._fp = oldfp
184n/a munge_cte = self._munge_cte
185n/a del self._munge_cte
186n/a # If we munged the cte, copy the message again and re-fix the CTE.
187n/a if munge_cte:
188n/a msg = deepcopy(msg)
189n/a msg.replace_header('content-transfer-encoding', munge_cte[0])
190n/a msg.replace_header('content-type', munge_cte[1])
191n/a # Write the headers. First we see if the message object wants to
192n/a # handle that itself. If not, we'll do it generically.
193n/a meth = getattr(msg, '_write_headers', None)
194n/a if meth is None:
195n/a self._write_headers(msg)
196n/a else:
197n/a meth(self)
198n/a self._fp.write(sfp.getvalue())
199n/a
200n/a def _dispatch(self, msg):
201n/a # Get the Content-Type: for the message, then try to dispatch to
202n/a # self._handle_<maintype>_<subtype>(). If there's no handler for the
203n/a # full MIME type, then dispatch to self._handle_<maintype>(). If
204n/a # that's missing too, then dispatch to self._writeBody().
205n/a main = msg.get_content_maintype()
206n/a sub = msg.get_content_subtype()
207n/a specific = UNDERSCORE.join((main, sub)).replace('-', '_')
208n/a meth = getattr(self, '_handle_' + specific, None)
209n/a if meth is None:
210n/a generic = main.replace('-', '_')
211n/a meth = getattr(self, '_handle_' + generic, None)
212n/a if meth is None:
213n/a meth = self._writeBody
214n/a meth(msg)
215n/a
216n/a #
217n/a # Default handlers
218n/a #
219n/a
220n/a def _write_headers(self, msg):
221n/a for h, v in msg.raw_items():
222n/a self.write(self.policy.fold(h, v))
223n/a # A blank line always separates headers from body
224n/a self.write(self._NL)
225n/a
226n/a #
227n/a # Handlers for writing types and subtypes
228n/a #
229n/a
230n/a def _handle_text(self, msg):
231n/a payload = msg.get_payload()
232n/a if payload is None:
233n/a return
234n/a if not isinstance(payload, str):
235n/a raise TypeError('string payload expected: %s' % type(payload))
236n/a if _has_surrogates(msg._payload):
237n/a charset = msg.get_param('charset')
238n/a if charset is not None:
239n/a # XXX: This copy stuff is an ugly hack to avoid modifying the
240n/a # existing message.
241n/a msg = deepcopy(msg)
242n/a del msg['content-transfer-encoding']
243n/a msg.set_payload(payload, charset)
244n/a payload = msg.get_payload()
245n/a self._munge_cte = (msg['content-transfer-encoding'],
246n/a msg['content-type'])
247n/a if self._mangle_from_:
248n/a payload = fcre.sub('>From ', payload)
249n/a self._write_lines(payload)
250n/a
251n/a # Default body handler
252n/a _writeBody = _handle_text
253n/a
254n/a def _handle_multipart(self, msg):
255n/a # The trick here is to write out each part separately, merge them all
256n/a # together, and then make sure that the boundary we've chosen isn't
257n/a # present in the payload.
258n/a msgtexts = []
259n/a subparts = msg.get_payload()
260n/a if subparts is None:
261n/a subparts = []
262n/a elif isinstance(subparts, str):
263n/a # e.g. a non-strict parse of a message with no starting boundary.
264n/a self.write(subparts)
265n/a return
266n/a elif not isinstance(subparts, list):
267n/a # Scalar payload
268n/a subparts = [subparts]
269n/a for part in subparts:
270n/a s = self._new_buffer()
271n/a g = self.clone(s)
272n/a g.flatten(part, unixfrom=False, linesep=self._NL)
273n/a msgtexts.append(s.getvalue())
274n/a # BAW: What about boundaries that are wrapped in double-quotes?
275n/a boundary = msg.get_boundary()
276n/a if not boundary:
277n/a # Create a boundary that doesn't appear in any of the
278n/a # message texts.
279n/a alltext = self._encoded_NL.join(msgtexts)
280n/a boundary = self._make_boundary(alltext)
281n/a msg.set_boundary(boundary)
282n/a # If there's a preamble, write it out, with a trailing CRLF
283n/a if msg.preamble is not None:
284n/a if self._mangle_from_:
285n/a preamble = fcre.sub('>From ', msg.preamble)
286n/a else:
287n/a preamble = msg.preamble
288n/a self._write_lines(preamble)
289n/a self.write(self._NL)
290n/a # dash-boundary transport-padding CRLF
291n/a self.write('--' + boundary + self._NL)
292n/a # body-part
293n/a if msgtexts:
294n/a self._fp.write(msgtexts.pop(0))
295n/a # *encapsulation
296n/a # --> delimiter transport-padding
297n/a # --> CRLF body-part
298n/a for body_part in msgtexts:
299n/a # delimiter transport-padding CRLF
300n/a self.write(self._NL + '--' + boundary + self._NL)
301n/a # body-part
302n/a self._fp.write(body_part)
303n/a # close-delimiter transport-padding
304n/a self.write(self._NL + '--' + boundary + '--' + self._NL)
305n/a if msg.epilogue is not None:
306n/a if self._mangle_from_:
307n/a epilogue = fcre.sub('>From ', msg.epilogue)
308n/a else:
309n/a epilogue = msg.epilogue
310n/a self._write_lines(epilogue)
311n/a
312n/a def _handle_multipart_signed(self, msg):
313n/a # The contents of signed parts has to stay unmodified in order to keep
314n/a # the signature intact per RFC1847 2.1, so we disable header wrapping.
315n/a # RDM: This isn't enough to completely preserve the part, but it helps.
316n/a p = self.policy
317n/a self.policy = p.clone(max_line_length=0)
318n/a try:
319n/a self._handle_multipart(msg)
320n/a finally:
321n/a self.policy = p
322n/a
323n/a def _handle_message_delivery_status(self, msg):
324n/a # We can't just write the headers directly to self's file object
325n/a # because this will leave an extra newline between the last header
326n/a # block and the boundary. Sigh.
327n/a blocks = []
328n/a for part in msg.get_payload():
329n/a s = self._new_buffer()
330n/a g = self.clone(s)
331n/a g.flatten(part, unixfrom=False, linesep=self._NL)
332n/a text = s.getvalue()
333n/a lines = text.split(self._encoded_NL)
334n/a # Strip off the unnecessary trailing empty line
335n/a if lines and lines[-1] == self._encoded_EMPTY:
336n/a blocks.append(self._encoded_NL.join(lines[:-1]))
337n/a else:
338n/a blocks.append(text)
339n/a # Now join all the blocks with an empty line. This has the lovely
340n/a # effect of separating each block with an empty line, but not adding
341n/a # an extra one after the last one.
342n/a self._fp.write(self._encoded_NL.join(blocks))
343n/a
344n/a def _handle_message(self, msg):
345n/a s = self._new_buffer()
346n/a g = self.clone(s)
347n/a # The payload of a message/rfc822 part should be a multipart sequence
348n/a # of length 1. The zeroth element of the list should be the Message
349n/a # object for the subpart. Extract that object, stringify it, and
350n/a # write it out.
351n/a # Except, it turns out, when it's a string instead, which happens when
352n/a # and only when HeaderParser is used on a message of mime type
353n/a # message/rfc822. Such messages are generated by, for example,
354n/a # Groupwise when forwarding unadorned messages. (Issue 7970.) So
355n/a # in that case we just emit the string body.
356n/a payload = msg._payload
357n/a if isinstance(payload, list):
358n/a g.flatten(msg.get_payload(0), unixfrom=False, linesep=self._NL)
359n/a payload = s.getvalue()
360n/a else:
361n/a payload = self._encode(payload)
362n/a self._fp.write(payload)
363n/a
364n/a # This used to be a module level function; we use a classmethod for this
365n/a # and _compile_re so we can continue to provide the module level function
366n/a # for backward compatibility by doing
367n/a # _make_boundary = Generator._make_boundary
368n/a # at the end of the module. It *is* internal, so we could drop that...
369n/a @classmethod
370n/a def _make_boundary(cls, text=None):
371n/a # Craft a random boundary. If text is given, ensure that the chosen
372n/a # boundary doesn't appear in the text.
373n/a token = random.randrange(sys.maxsize)
374n/a boundary = ('=' * 15) + (_fmt % token) + '=='
375n/a if text is None:
376n/a return boundary
377n/a b = boundary
378n/a counter = 0
379n/a while True:
380n/a cre = cls._compile_re('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
381n/a if not cre.search(text):
382n/a break
383n/a b = boundary + '.' + str(counter)
384n/a counter += 1
385n/a return b
386n/a
387n/a @classmethod
388n/a def _compile_re(cls, s, flags):
389n/a return re.compile(s, flags)
390n/a
391n/a
392n/aclass BytesGenerator(Generator):
393n/a """Generates a bytes version of a Message object tree.
394n/a
395n/a Functionally identical to the base Generator except that the output is
396n/a bytes and not string. When surrogates were used in the input to encode
397n/a bytes, these are decoded back to bytes for output. If the policy has
398n/a cte_type set to 7bit, then the message is transformed such that the
399n/a non-ASCII bytes are properly content transfer encoded, using the charset
400n/a unknown-8bit.
401n/a
402n/a The outfp object must accept bytes in its write method.
403n/a """
404n/a
405n/a def write(self, s):
406n/a self._fp.write(s.encode('ascii', 'surrogateescape'))
407n/a
408n/a def _new_buffer(self):
409n/a return BytesIO()
410n/a
411n/a def _encode(self, s):
412n/a return s.encode('ascii')
413n/a
414n/a def _write_headers(self, msg):
415n/a # This is almost the same as the string version, except for handling
416n/a # strings with 8bit bytes.
417n/a for h, v in msg.raw_items():
418n/a self._fp.write(self.policy.fold_binary(h, v))
419n/a # A blank line always separates headers from body
420n/a self.write(self._NL)
421n/a
422n/a def _handle_text(self, msg):
423n/a # If the string has surrogates the original source was bytes, so
424n/a # just write it back out.
425n/a if msg._payload is None:
426n/a return
427n/a if _has_surrogates(msg._payload) and not self.policy.cte_type=='7bit':
428n/a if self._mangle_from_:
429n/a msg._payload = fcre.sub(">From ", msg._payload)
430n/a self._write_lines(msg._payload)
431n/a else:
432n/a super(BytesGenerator,self)._handle_text(msg)
433n/a
434n/a # Default body handler
435n/a _writeBody = _handle_text
436n/a
437n/a @classmethod
438n/a def _compile_re(cls, s, flags):
439n/a return re.compile(s.encode('ascii'), flags)
440n/a
441n/a
442n/a
443n/a_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
444n/a
445n/aclass DecodedGenerator(Generator):
446n/a """Generates a text representation of a message.
447n/a
448n/a Like the Generator base class, except that non-text parts are substituted
449n/a with a format string representing the part.
450n/a """
451n/a def __init__(self, outfp, mangle_from_=None, maxheaderlen=None, fmt=None, *,
452n/a policy=None):
453n/a """Like Generator.__init__() except that an additional optional
454n/a argument is allowed.
455n/a
456n/a Walks through all subparts of a message. If the subpart is of main
457n/a type `text', then it prints the decoded payload of the subpart.
458n/a
459n/a Otherwise, fmt is a format string that is used instead of the message
460n/a payload. fmt is expanded with the following keywords (in
461n/a %(keyword)s format):
462n/a
463n/a type : Full MIME type of the non-text part
464n/a maintype : Main MIME type of the non-text part
465n/a subtype : Sub-MIME type of the non-text part
466n/a filename : Filename of the non-text part
467n/a description: Description associated with the non-text part
468n/a encoding : Content transfer encoding of the non-text part
469n/a
470n/a The default value for fmt is None, meaning
471n/a
472n/a [Non-text (%(type)s) part of message omitted, filename %(filename)s]
473n/a """
474n/a Generator.__init__(self, outfp, mangle_from_, maxheaderlen,
475n/a policy=policy)
476n/a if fmt is None:
477n/a self._fmt = _FMT
478n/a else:
479n/a self._fmt = fmt
480n/a
481n/a def _dispatch(self, msg):
482n/a for part in msg.walk():
483n/a maintype = part.get_content_maintype()
484n/a if maintype == 'text':
485n/a print(part.get_payload(decode=False), file=self)
486n/a elif maintype == 'multipart':
487n/a # Just skip this
488n/a pass
489n/a else:
490n/a print(self._fmt % {
491n/a 'type' : part.get_content_type(),
492n/a 'maintype' : part.get_content_maintype(),
493n/a 'subtype' : part.get_content_subtype(),
494n/a 'filename' : part.get_filename('[no filename]'),
495n/a 'description': part.get('Content-Description',
496n/a '[no description]'),
497n/a 'encoding' : part.get('Content-Transfer-Encoding',
498n/a '[no encoding]'),
499n/a }, file=self)
500n/a
501n/a
502n/a
503n/a# Helper used by Generator._make_boundary
504n/a_width = len(repr(sys.maxsize-1))
505n/a_fmt = '%%0%dd' % _width
506n/a
507n/a# Backward compatibility
508n/a_make_boundary = Generator._make_boundary