| 1 | n/a | """This will be the home for the policy that hooks in the new |
|---|
| 2 | n/a | code that adds all the email6 features. |
|---|
| 3 | n/a | """ |
|---|
| 4 | n/a | |
|---|
| 5 | n/a | import re |
|---|
| 6 | n/a | from email._policybase import Policy, Compat32, compat32, _extend_docstrings |
|---|
| 7 | n/a | from email.utils import _has_surrogates |
|---|
| 8 | n/a | from email.headerregistry import HeaderRegistry as HeaderRegistry |
|---|
| 9 | n/a | from email.contentmanager import raw_data_manager |
|---|
| 10 | n/a | from email.message import EmailMessage |
|---|
| 11 | n/a | |
|---|
| 12 | n/a | __all__ = [ |
|---|
| 13 | n/a | 'Compat32', |
|---|
| 14 | n/a | 'compat32', |
|---|
| 15 | n/a | 'Policy', |
|---|
| 16 | n/a | 'EmailPolicy', |
|---|
| 17 | n/a | 'default', |
|---|
| 18 | n/a | 'strict', |
|---|
| 19 | n/a | 'SMTP', |
|---|
| 20 | n/a | 'HTTP', |
|---|
| 21 | n/a | ] |
|---|
| 22 | n/a | |
|---|
| 23 | n/a | linesep_splitter = re.compile(r'\n|\r') |
|---|
| 24 | n/a | |
|---|
| 25 | n/a | @_extend_docstrings |
|---|
| 26 | n/a | class EmailPolicy(Policy): |
|---|
| 27 | n/a | |
|---|
| 28 | n/a | """+ |
|---|
| 29 | n/a | PROVISIONAL |
|---|
| 30 | n/a | |
|---|
| 31 | n/a | The API extensions enabled by this policy are currently provisional. |
|---|
| 32 | n/a | Refer to the documentation for details. |
|---|
| 33 | n/a | |
|---|
| 34 | n/a | This policy adds new header parsing and folding algorithms. Instead of |
|---|
| 35 | n/a | simple strings, headers are custom objects with custom attributes |
|---|
| 36 | n/a | depending on the type of the field. The folding algorithm fully |
|---|
| 37 | n/a | implements RFCs 2047 and 5322. |
|---|
| 38 | n/a | |
|---|
| 39 | n/a | In addition to the settable attributes listed above that apply to |
|---|
| 40 | n/a | all Policies, this policy adds the following additional attributes: |
|---|
| 41 | n/a | |
|---|
| 42 | n/a | utf8 -- if False (the default) message headers will be |
|---|
| 43 | n/a | serialized as ASCII, using encoded words to encode |
|---|
| 44 | n/a | any non-ASCII characters in the source strings. If |
|---|
| 45 | n/a | True, the message headers will be serialized using |
|---|
| 46 | n/a | utf8 and will not contain encoded words (see RFC |
|---|
| 47 | n/a | 6532 for more on this serialization format). |
|---|
| 48 | n/a | |
|---|
| 49 | n/a | refold_source -- if the value for a header in the Message object |
|---|
| 50 | n/a | came from the parsing of some source, this attribute |
|---|
| 51 | n/a | indicates whether or not a generator should refold |
|---|
| 52 | n/a | that value when transforming the message back into |
|---|
| 53 | n/a | stream form. The possible values are: |
|---|
| 54 | n/a | |
|---|
| 55 | n/a | none -- all source values use original folding |
|---|
| 56 | n/a | long -- source values that have any line that is |
|---|
| 57 | n/a | longer than max_line_length will be |
|---|
| 58 | n/a | refolded |
|---|
| 59 | n/a | all -- all values are refolded. |
|---|
| 60 | n/a | |
|---|
| 61 | n/a | The default is 'long'. |
|---|
| 62 | n/a | |
|---|
| 63 | n/a | header_factory -- a callable that takes two arguments, 'name' and |
|---|
| 64 | n/a | 'value', where 'name' is a header field name and |
|---|
| 65 | n/a | 'value' is an unfolded header field value, and |
|---|
| 66 | n/a | returns a string-like object that represents that |
|---|
| 67 | n/a | header. A default header_factory is provided that |
|---|
| 68 | n/a | understands some of the RFC5322 header field types. |
|---|
| 69 | n/a | (Currently address fields and date fields have |
|---|
| 70 | n/a | special treatment, while all other fields are |
|---|
| 71 | n/a | treated as unstructured. This list will be |
|---|
| 72 | n/a | completed before the extension is marked stable.) |
|---|
| 73 | n/a | |
|---|
| 74 | n/a | content_manager -- an object with at least two methods: get_content |
|---|
| 75 | n/a | and set_content. When the get_content or |
|---|
| 76 | n/a | set_content method of a Message object is called, |
|---|
| 77 | n/a | it calls the corresponding method of this object, |
|---|
| 78 | n/a | passing it the message object as its first argument, |
|---|
| 79 | n/a | and any arguments or keywords that were passed to |
|---|
| 80 | n/a | it as additional arguments. The default |
|---|
| 81 | n/a | content_manager is |
|---|
| 82 | n/a | :data:`~email.contentmanager.raw_data_manager`. |
|---|
| 83 | n/a | |
|---|
| 84 | n/a | """ |
|---|
| 85 | n/a | |
|---|
| 86 | n/a | message_factory = EmailMessage |
|---|
| 87 | n/a | utf8 = False |
|---|
| 88 | n/a | refold_source = 'long' |
|---|
| 89 | n/a | header_factory = HeaderRegistry() |
|---|
| 90 | n/a | content_manager = raw_data_manager |
|---|
| 91 | n/a | |
|---|
| 92 | n/a | def __init__(self, **kw): |
|---|
| 93 | n/a | # Ensure that each new instance gets a unique header factory |
|---|
| 94 | n/a | # (as opposed to clones, which share the factory). |
|---|
| 95 | n/a | if 'header_factory' not in kw: |
|---|
| 96 | n/a | object.__setattr__(self, 'header_factory', HeaderRegistry()) |
|---|
| 97 | n/a | super().__init__(**kw) |
|---|
| 98 | n/a | |
|---|
| 99 | n/a | def header_max_count(self, name): |
|---|
| 100 | n/a | """+ |
|---|
| 101 | n/a | The implementation for this class returns the max_count attribute from |
|---|
| 102 | n/a | the specialized header class that would be used to construct a header |
|---|
| 103 | n/a | of type 'name'. |
|---|
| 104 | n/a | """ |
|---|
| 105 | n/a | return self.header_factory[name].max_count |
|---|
| 106 | n/a | |
|---|
| 107 | n/a | # The logic of the next three methods is chosen such that it is possible to |
|---|
| 108 | n/a | # switch a Message object between a Compat32 policy and a policy derived |
|---|
| 109 | n/a | # from this class and have the results stay consistent. This allows a |
|---|
| 110 | n/a | # Message object constructed with this policy to be passed to a library |
|---|
| 111 | n/a | # that only handles Compat32 objects, or to receive such an object and |
|---|
| 112 | n/a | # convert it to use the newer style by just changing its policy. It is |
|---|
| 113 | n/a | # also chosen because it postpones the relatively expensive full rfc5322 |
|---|
| 114 | n/a | # parse until as late as possible when parsing from source, since in many |
|---|
| 115 | n/a | # applications only a few headers will actually be inspected. |
|---|
| 116 | n/a | |
|---|
| 117 | n/a | def header_source_parse(self, sourcelines): |
|---|
| 118 | n/a | """+ |
|---|
| 119 | n/a | The name is parsed as everything up to the ':' and returned unmodified. |
|---|
| 120 | n/a | The value is determined by stripping leading whitespace off the |
|---|
| 121 | n/a | remainder of the first line, joining all subsequent lines together, and |
|---|
| 122 | n/a | stripping any trailing carriage return or linefeed characters. (This |
|---|
| 123 | n/a | is the same as Compat32). |
|---|
| 124 | n/a | |
|---|
| 125 | n/a | """ |
|---|
| 126 | n/a | name, value = sourcelines[0].split(':', 1) |
|---|
| 127 | n/a | value = value.lstrip(' \t') + ''.join(sourcelines[1:]) |
|---|
| 128 | n/a | return (name, value.rstrip('\r\n')) |
|---|
| 129 | n/a | |
|---|
| 130 | n/a | def header_store_parse(self, name, value): |
|---|
| 131 | n/a | """+ |
|---|
| 132 | n/a | The name is returned unchanged. If the input value has a 'name' |
|---|
| 133 | n/a | attribute and it matches the name ignoring case, the value is returned |
|---|
| 134 | n/a | unchanged. Otherwise the name and value are passed to header_factory |
|---|
| 135 | n/a | method, and the resulting custom header object is returned as the |
|---|
| 136 | n/a | value. In this case a ValueError is raised if the input value contains |
|---|
| 137 | n/a | CR or LF characters. |
|---|
| 138 | n/a | |
|---|
| 139 | n/a | """ |
|---|
| 140 | n/a | if hasattr(value, 'name') and value.name.lower() == name.lower(): |
|---|
| 141 | n/a | return (name, value) |
|---|
| 142 | n/a | if isinstance(value, str) and len(value.splitlines())>1: |
|---|
| 143 | n/a | # XXX this error message isn't quite right when we use splitlines |
|---|
| 144 | n/a | # (see issue 22233), but I'm not sure what should happen here. |
|---|
| 145 | n/a | raise ValueError("Header values may not contain linefeed " |
|---|
| 146 | n/a | "or carriage return characters") |
|---|
| 147 | n/a | return (name, self.header_factory(name, value)) |
|---|
| 148 | n/a | |
|---|
| 149 | n/a | def header_fetch_parse(self, name, value): |
|---|
| 150 | n/a | """+ |
|---|
| 151 | n/a | If the value has a 'name' attribute, it is returned to unmodified. |
|---|
| 152 | n/a | Otherwise the name and the value with any linesep characters removed |
|---|
| 153 | n/a | are passed to the header_factory method, and the resulting custom |
|---|
| 154 | n/a | header object is returned. Any surrogateescaped bytes get turned |
|---|
| 155 | n/a | into the unicode unknown-character glyph. |
|---|
| 156 | n/a | |
|---|
| 157 | n/a | """ |
|---|
| 158 | n/a | if hasattr(value, 'name'): |
|---|
| 159 | n/a | return value |
|---|
| 160 | n/a | # We can't use splitlines here because it splits on more than \r and \n. |
|---|
| 161 | n/a | value = ''.join(linesep_splitter.split(value)) |
|---|
| 162 | n/a | return self.header_factory(name, value) |
|---|
| 163 | n/a | |
|---|
| 164 | n/a | def fold(self, name, value): |
|---|
| 165 | n/a | """+ |
|---|
| 166 | n/a | Header folding is controlled by the refold_source policy setting. A |
|---|
| 167 | n/a | value is considered to be a 'source value' if and only if it does not |
|---|
| 168 | n/a | have a 'name' attribute (having a 'name' attribute means it is a header |
|---|
| 169 | n/a | object of some sort). If a source value needs to be refolded according |
|---|
| 170 | n/a | to the policy, it is converted into a custom header object by passing |
|---|
| 171 | n/a | the name and the value with any linesep characters removed to the |
|---|
| 172 | n/a | header_factory method. Folding of a custom header object is done by |
|---|
| 173 | n/a | calling its fold method with the current policy. |
|---|
| 174 | n/a | |
|---|
| 175 | n/a | Source values are split into lines using splitlines. If the value is |
|---|
| 176 | n/a | not to be refolded, the lines are rejoined using the linesep from the |
|---|
| 177 | n/a | policy and returned. The exception is lines containing non-ascii |
|---|
| 178 | n/a | binary data. In that case the value is refolded regardless of the |
|---|
| 179 | n/a | refold_source setting, which causes the binary data to be CTE encoded |
|---|
| 180 | n/a | using the unknown-8bit charset. |
|---|
| 181 | n/a | |
|---|
| 182 | n/a | """ |
|---|
| 183 | n/a | return self._fold(name, value, refold_binary=True) |
|---|
| 184 | n/a | |
|---|
| 185 | n/a | def fold_binary(self, name, value): |
|---|
| 186 | n/a | """+ |
|---|
| 187 | n/a | The same as fold if cte_type is 7bit, except that the returned value is |
|---|
| 188 | n/a | bytes. |
|---|
| 189 | n/a | |
|---|
| 190 | n/a | If cte_type is 8bit, non-ASCII binary data is converted back into |
|---|
| 191 | n/a | bytes. Headers with binary data are not refolded, regardless of the |
|---|
| 192 | n/a | refold_header setting, since there is no way to know whether the binary |
|---|
| 193 | n/a | data consists of single byte characters or multibyte characters. |
|---|
| 194 | n/a | |
|---|
| 195 | n/a | If utf8 is true, headers are encoded to utf8, otherwise to ascii with |
|---|
| 196 | n/a | non-ASCII unicode rendered as encoded words. |
|---|
| 197 | n/a | |
|---|
| 198 | n/a | """ |
|---|
| 199 | n/a | folded = self._fold(name, value, refold_binary=self.cte_type=='7bit') |
|---|
| 200 | n/a | charset = 'utf8' if self.utf8 else 'ascii' |
|---|
| 201 | n/a | return folded.encode(charset, 'surrogateescape') |
|---|
| 202 | n/a | |
|---|
| 203 | n/a | def _fold(self, name, value, refold_binary=False): |
|---|
| 204 | n/a | if hasattr(value, 'name'): |
|---|
| 205 | n/a | return value.fold(policy=self) |
|---|
| 206 | n/a | maxlen = self.max_line_length if self.max_line_length else float('inf') |
|---|
| 207 | n/a | lines = value.splitlines() |
|---|
| 208 | n/a | refold = (self.refold_source == 'all' or |
|---|
| 209 | n/a | self.refold_source == 'long' and |
|---|
| 210 | n/a | (lines and len(lines[0])+len(name)+2 > maxlen or |
|---|
| 211 | n/a | any(len(x) > maxlen for x in lines[1:]))) |
|---|
| 212 | n/a | if refold or refold_binary and _has_surrogates(value): |
|---|
| 213 | n/a | return self.header_factory(name, ''.join(lines)).fold(policy=self) |
|---|
| 214 | n/a | return name + ': ' + self.linesep.join(lines) + self.linesep |
|---|
| 215 | n/a | |
|---|
| 216 | n/a | |
|---|
| 217 | n/a | default = EmailPolicy() |
|---|
| 218 | n/a | # Make the default policy use the class default header_factory |
|---|
| 219 | n/a | del default.header_factory |
|---|
| 220 | n/a | strict = default.clone(raise_on_defect=True) |
|---|
| 221 | n/a | SMTP = default.clone(linesep='\r\n') |
|---|
| 222 | n/a | HTTP = default.clone(linesep='\r\n', max_line_length=None) |
|---|
| 223 | n/a | SMTPUTF8 = SMTP.clone(utf8=True) |
|---|