1 | n/a | """This will be the home for the policy that hooks in the new |
---|
2 | n/a | code that adds all the email6 features. |
---|
3 | n/a | """ |
---|
4 | n/a | |
---|
5 | n/a | import re |
---|
6 | n/a | from email._policybase import Policy, Compat32, compat32, _extend_docstrings |
---|
7 | n/a | from email.utils import _has_surrogates |
---|
8 | n/a | from email.headerregistry import HeaderRegistry as HeaderRegistry |
---|
9 | n/a | from email.contentmanager import raw_data_manager |
---|
10 | n/a | from email.message import EmailMessage |
---|
11 | n/a | |
---|
12 | n/a | __all__ = [ |
---|
13 | n/a | 'Compat32', |
---|
14 | n/a | 'compat32', |
---|
15 | n/a | 'Policy', |
---|
16 | n/a | 'EmailPolicy', |
---|
17 | n/a | 'default', |
---|
18 | n/a | 'strict', |
---|
19 | n/a | 'SMTP', |
---|
20 | n/a | 'HTTP', |
---|
21 | n/a | ] |
---|
22 | n/a | |
---|
23 | n/a | linesep_splitter = re.compile(r'\n|\r') |
---|
24 | n/a | |
---|
25 | n/a | @_extend_docstrings |
---|
26 | n/a | class EmailPolicy(Policy): |
---|
27 | n/a | |
---|
28 | n/a | """+ |
---|
29 | n/a | PROVISIONAL |
---|
30 | n/a | |
---|
31 | n/a | The API extensions enabled by this policy are currently provisional. |
---|
32 | n/a | Refer to the documentation for details. |
---|
33 | n/a | |
---|
34 | n/a | This policy adds new header parsing and folding algorithms. Instead of |
---|
35 | n/a | simple strings, headers are custom objects with custom attributes |
---|
36 | n/a | depending on the type of the field. The folding algorithm fully |
---|
37 | n/a | implements RFCs 2047 and 5322. |
---|
38 | n/a | |
---|
39 | n/a | In addition to the settable attributes listed above that apply to |
---|
40 | n/a | all Policies, this policy adds the following additional attributes: |
---|
41 | n/a | |
---|
42 | n/a | utf8 -- if False (the default) message headers will be |
---|
43 | n/a | serialized as ASCII, using encoded words to encode |
---|
44 | n/a | any non-ASCII characters in the source strings. If |
---|
45 | n/a | True, the message headers will be serialized using |
---|
46 | n/a | utf8 and will not contain encoded words (see RFC |
---|
47 | n/a | 6532 for more on this serialization format). |
---|
48 | n/a | |
---|
49 | n/a | refold_source -- if the value for a header in the Message object |
---|
50 | n/a | came from the parsing of some source, this attribute |
---|
51 | n/a | indicates whether or not a generator should refold |
---|
52 | n/a | that value when transforming the message back into |
---|
53 | n/a | stream form. The possible values are: |
---|
54 | n/a | |
---|
55 | n/a | none -- all source values use original folding |
---|
56 | n/a | long -- source values that have any line that is |
---|
57 | n/a | longer than max_line_length will be |
---|
58 | n/a | refolded |
---|
59 | n/a | all -- all values are refolded. |
---|
60 | n/a | |
---|
61 | n/a | The default is 'long'. |
---|
62 | n/a | |
---|
63 | n/a | header_factory -- a callable that takes two arguments, 'name' and |
---|
64 | n/a | 'value', where 'name' is a header field name and |
---|
65 | n/a | 'value' is an unfolded header field value, and |
---|
66 | n/a | returns a string-like object that represents that |
---|
67 | n/a | header. A default header_factory is provided that |
---|
68 | n/a | understands some of the RFC5322 header field types. |
---|
69 | n/a | (Currently address fields and date fields have |
---|
70 | n/a | special treatment, while all other fields are |
---|
71 | n/a | treated as unstructured. This list will be |
---|
72 | n/a | completed before the extension is marked stable.) |
---|
73 | n/a | |
---|
74 | n/a | content_manager -- an object with at least two methods: get_content |
---|
75 | n/a | and set_content. When the get_content or |
---|
76 | n/a | set_content method of a Message object is called, |
---|
77 | n/a | it calls the corresponding method of this object, |
---|
78 | n/a | passing it the message object as its first argument, |
---|
79 | n/a | and any arguments or keywords that were passed to |
---|
80 | n/a | it as additional arguments. The default |
---|
81 | n/a | content_manager is |
---|
82 | n/a | :data:`~email.contentmanager.raw_data_manager`. |
---|
83 | n/a | |
---|
84 | n/a | """ |
---|
85 | n/a | |
---|
86 | n/a | message_factory = EmailMessage |
---|
87 | n/a | utf8 = False |
---|
88 | n/a | refold_source = 'long' |
---|
89 | n/a | header_factory = HeaderRegistry() |
---|
90 | n/a | content_manager = raw_data_manager |
---|
91 | n/a | |
---|
92 | n/a | def __init__(self, **kw): |
---|
93 | n/a | # Ensure that each new instance gets a unique header factory |
---|
94 | n/a | # (as opposed to clones, which share the factory). |
---|
95 | n/a | if 'header_factory' not in kw: |
---|
96 | n/a | object.__setattr__(self, 'header_factory', HeaderRegistry()) |
---|
97 | n/a | super().__init__(**kw) |
---|
98 | n/a | |
---|
99 | n/a | def header_max_count(self, name): |
---|
100 | n/a | """+ |
---|
101 | n/a | The implementation for this class returns the max_count attribute from |
---|
102 | n/a | the specialized header class that would be used to construct a header |
---|
103 | n/a | of type 'name'. |
---|
104 | n/a | """ |
---|
105 | n/a | return self.header_factory[name].max_count |
---|
106 | n/a | |
---|
107 | n/a | # The logic of the next three methods is chosen such that it is possible to |
---|
108 | n/a | # switch a Message object between a Compat32 policy and a policy derived |
---|
109 | n/a | # from this class and have the results stay consistent. This allows a |
---|
110 | n/a | # Message object constructed with this policy to be passed to a library |
---|
111 | n/a | # that only handles Compat32 objects, or to receive such an object and |
---|
112 | n/a | # convert it to use the newer style by just changing its policy. It is |
---|
113 | n/a | # also chosen because it postpones the relatively expensive full rfc5322 |
---|
114 | n/a | # parse until as late as possible when parsing from source, since in many |
---|
115 | n/a | # applications only a few headers will actually be inspected. |
---|
116 | n/a | |
---|
117 | n/a | def header_source_parse(self, sourcelines): |
---|
118 | n/a | """+ |
---|
119 | n/a | The name is parsed as everything up to the ':' and returned unmodified. |
---|
120 | n/a | The value is determined by stripping leading whitespace off the |
---|
121 | n/a | remainder of the first line, joining all subsequent lines together, and |
---|
122 | n/a | stripping any trailing carriage return or linefeed characters. (This |
---|
123 | n/a | is the same as Compat32). |
---|
124 | n/a | |
---|
125 | n/a | """ |
---|
126 | n/a | name, value = sourcelines[0].split(':', 1) |
---|
127 | n/a | value = value.lstrip(' \t') + ''.join(sourcelines[1:]) |
---|
128 | n/a | return (name, value.rstrip('\r\n')) |
---|
129 | n/a | |
---|
130 | n/a | def header_store_parse(self, name, value): |
---|
131 | n/a | """+ |
---|
132 | n/a | The name is returned unchanged. If the input value has a 'name' |
---|
133 | n/a | attribute and it matches the name ignoring case, the value is returned |
---|
134 | n/a | unchanged. Otherwise the name and value are passed to header_factory |
---|
135 | n/a | method, and the resulting custom header object is returned as the |
---|
136 | n/a | value. In this case a ValueError is raised if the input value contains |
---|
137 | n/a | CR or LF characters. |
---|
138 | n/a | |
---|
139 | n/a | """ |
---|
140 | n/a | if hasattr(value, 'name') and value.name.lower() == name.lower(): |
---|
141 | n/a | return (name, value) |
---|
142 | n/a | if isinstance(value, str) and len(value.splitlines())>1: |
---|
143 | n/a | # XXX this error message isn't quite right when we use splitlines |
---|
144 | n/a | # (see issue 22233), but I'm not sure what should happen here. |
---|
145 | n/a | raise ValueError("Header values may not contain linefeed " |
---|
146 | n/a | "or carriage return characters") |
---|
147 | n/a | return (name, self.header_factory(name, value)) |
---|
148 | n/a | |
---|
149 | n/a | def header_fetch_parse(self, name, value): |
---|
150 | n/a | """+ |
---|
151 | n/a | If the value has a 'name' attribute, it is returned to unmodified. |
---|
152 | n/a | Otherwise the name and the value with any linesep characters removed |
---|
153 | n/a | are passed to the header_factory method, and the resulting custom |
---|
154 | n/a | header object is returned. Any surrogateescaped bytes get turned |
---|
155 | n/a | into the unicode unknown-character glyph. |
---|
156 | n/a | |
---|
157 | n/a | """ |
---|
158 | n/a | if hasattr(value, 'name'): |
---|
159 | n/a | return value |
---|
160 | n/a | # We can't use splitlines here because it splits on more than \r and \n. |
---|
161 | n/a | value = ''.join(linesep_splitter.split(value)) |
---|
162 | n/a | return self.header_factory(name, value) |
---|
163 | n/a | |
---|
164 | n/a | def fold(self, name, value): |
---|
165 | n/a | """+ |
---|
166 | n/a | Header folding is controlled by the refold_source policy setting. A |
---|
167 | n/a | value is considered to be a 'source value' if and only if it does not |
---|
168 | n/a | have a 'name' attribute (having a 'name' attribute means it is a header |
---|
169 | n/a | object of some sort). If a source value needs to be refolded according |
---|
170 | n/a | to the policy, it is converted into a custom header object by passing |
---|
171 | n/a | the name and the value with any linesep characters removed to the |
---|
172 | n/a | header_factory method. Folding of a custom header object is done by |
---|
173 | n/a | calling its fold method with the current policy. |
---|
174 | n/a | |
---|
175 | n/a | Source values are split into lines using splitlines. If the value is |
---|
176 | n/a | not to be refolded, the lines are rejoined using the linesep from the |
---|
177 | n/a | policy and returned. The exception is lines containing non-ascii |
---|
178 | n/a | binary data. In that case the value is refolded regardless of the |
---|
179 | n/a | refold_source setting, which causes the binary data to be CTE encoded |
---|
180 | n/a | using the unknown-8bit charset. |
---|
181 | n/a | |
---|
182 | n/a | """ |
---|
183 | n/a | return self._fold(name, value, refold_binary=True) |
---|
184 | n/a | |
---|
185 | n/a | def fold_binary(self, name, value): |
---|
186 | n/a | """+ |
---|
187 | n/a | The same as fold if cte_type is 7bit, except that the returned value is |
---|
188 | n/a | bytes. |
---|
189 | n/a | |
---|
190 | n/a | If cte_type is 8bit, non-ASCII binary data is converted back into |
---|
191 | n/a | bytes. Headers with binary data are not refolded, regardless of the |
---|
192 | n/a | refold_header setting, since there is no way to know whether the binary |
---|
193 | n/a | data consists of single byte characters or multibyte characters. |
---|
194 | n/a | |
---|
195 | n/a | If utf8 is true, headers are encoded to utf8, otherwise to ascii with |
---|
196 | n/a | non-ASCII unicode rendered as encoded words. |
---|
197 | n/a | |
---|
198 | n/a | """ |
---|
199 | n/a | folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') |
---|
200 | n/a | charset = 'utf8' if self.utf8 else 'ascii' |
---|
201 | n/a | return folded.encode(charset, 'surrogateescape') |
---|
202 | n/a | |
---|
203 | n/a | def _fold(self, name, value, refold_binary=False): |
---|
204 | n/a | if hasattr(value, 'name'): |
---|
205 | n/a | return value.fold(policy=self) |
---|
206 | n/a | maxlen = self.max_line_length if self.max_line_length else float('inf') |
---|
207 | n/a | lines = value.splitlines() |
---|
208 | n/a | refold = (self.refold_source == 'all' or |
---|
209 | n/a | self.refold_source == 'long' and |
---|
210 | n/a | (lines and len(lines[0])+len(name)+2 > maxlen or |
---|
211 | n/a | any(len(x) > maxlen for x in lines[1:]))) |
---|
212 | n/a | if refold or refold_binary and _has_surrogates(value): |
---|
213 | n/a | return self.header_factory(name, ''.join(lines)).fold(policy=self) |
---|
214 | n/a | return name + ': ' + self.linesep.join(lines) + self.linesep |
---|
215 | n/a | |
---|
216 | n/a | |
---|
217 | n/a | default = EmailPolicy() |
---|
218 | n/a | # Make the default policy use the class default header_factory |
---|
219 | n/a | del default.header_factory |
---|
220 | n/a | strict = default.clone(raise_on_defect=True) |
---|
221 | n/a | SMTP = default.clone(linesep='\r\n') |
---|
222 | n/a | HTTP = default.clone(linesep='\r\n', max_line_length=None) |
---|
223 | n/a | SMTPUTF8 = SMTP.clone(utf8=True) |
---|