Python code coverage: Lib/email/policy.py

#	count	content
1	n/a	"""This will be the home for the policy that hooks in the new
2	n/a	code that adds all the email6 features.
3	n/a	"""
4	n/a
5	n/a	import re
6	n/a	from email._policybase import Policy, Compat32, compat32, _extend_docstrings
7	n/a	from email.utils import _has_surrogates
8	n/a	from email.headerregistry import HeaderRegistry as HeaderRegistry
9	n/a	from email.contentmanager import raw_data_manager
10	n/a	from email.message import EmailMessage
11	n/a
12	n/a	__all__ = [
13	n/a	'Compat32',
14	n/a	'compat32',
15	n/a	'Policy',
16	n/a	'EmailPolicy',
17	n/a	'default',
18	n/a	'strict',
19	n/a	'SMTP',
20	n/a	'HTTP',
21	n/a	]
22	n/a
23	n/a	linesep_splitter = re.compile(r'\n\|\r')
24	n/a
25	n/a	@_extend_docstrings
26	n/a	class EmailPolicy(Policy):
27	n/a
28	n/a	"""+
29	n/a	PROVISIONAL
30	n/a
31	n/a	The API extensions enabled by this policy are currently provisional.
32	n/a	Refer to the documentation for details.
33	n/a
34	n/a	This policy adds new header parsing and folding algorithms. Instead of
35	n/a	simple strings, headers are custom objects with custom attributes
36	n/a	depending on the type of the field. The folding algorithm fully
37	n/a	implements RFCs 2047 and 5322.
38	n/a
39	n/a	In addition to the settable attributes listed above that apply to
40	n/a	all Policies, this policy adds the following additional attributes:
41	n/a
42	n/a	utf8 -- if False (the default) message headers will be
43	n/a	serialized as ASCII, using encoded words to encode
44	n/a	any non-ASCII characters in the source strings. If
45	n/a	True, the message headers will be serialized using
46	n/a	utf8 and will not contain encoded words (see RFC
47	n/a	6532 for more on this serialization format).
48	n/a
49	n/a	refold_source -- if the value for a header in the Message object
50	n/a	came from the parsing of some source, this attribute
51	n/a	indicates whether or not a generator should refold
52	n/a	that value when transforming the message back into
53	n/a	stream form. The possible values are:
54	n/a
55	n/a	none -- all source values use original folding
56	n/a	long -- source values that have any line that is
57	n/a	longer than max_line_length will be
58	n/a	refolded
59	n/a	all -- all values are refolded.
60	n/a
61	n/a	The default is 'long'.
62	n/a
63	n/a	header_factory -- a callable that takes two arguments, 'name' and
64	n/a	'value', where 'name' is a header field name and
65	n/a	'value' is an unfolded header field value, and
66	n/a	returns a string-like object that represents that
67	n/a	header. A default header_factory is provided that
68	n/a	understands some of the RFC5322 header field types.
69	n/a	(Currently address fields and date fields have
70	n/a	special treatment, while all other fields are
71	n/a	treated as unstructured. This list will be
72	n/a	completed before the extension is marked stable.)
73	n/a
74	n/a	content_manager -- an object with at least two methods: get_content
75	n/a	and set_content. When the get_content or
76	n/a	set_content method of a Message object is called,
77	n/a	it calls the corresponding method of this object,
78	n/a	passing it the message object as its first argument,
79	n/a	and any arguments or keywords that were passed to
80	n/a	it as additional arguments. The default
81	n/a	content_manager is
82	n/a	:data:`~email.contentmanager.raw_data_manager`.
83	n/a
84	n/a	"""
85	n/a
86	n/a	message_factory = EmailMessage
87	n/a	utf8 = False
88	n/a	refold_source = 'long'
89	n/a	header_factory = HeaderRegistry()
90	n/a	content_manager = raw_data_manager
91	n/a
92	n/a	def __init__(self, **kw):
93	n/a	# Ensure that each new instance gets a unique header factory
94	n/a	# (as opposed to clones, which share the factory).
95	n/a	if 'header_factory' not in kw:
96	n/a	object.__setattr__(self, 'header_factory', HeaderRegistry())
97	n/a	super().__init__(**kw)
98	n/a
99	n/a	def header_max_count(self, name):
100	n/a	"""+
101	n/a	The implementation for this class returns the max_count attribute from
102	n/a	the specialized header class that would be used to construct a header
103	n/a	of type 'name'.
104	n/a	"""
105	n/a	return self.header_factory[name].max_count
106	n/a
107	n/a	# The logic of the next three methods is chosen such that it is possible to
108	n/a	# switch a Message object between a Compat32 policy and a policy derived
109	n/a	# from this class and have the results stay consistent. This allows a
110	n/a	# Message object constructed with this policy to be passed to a library
111	n/a	# that only handles Compat32 objects, or to receive such an object and
112	n/a	# convert it to use the newer style by just changing its policy. It is
113	n/a	# also chosen because it postpones the relatively expensive full rfc5322
114	n/a	# parse until as late as possible when parsing from source, since in many
115	n/a	# applications only a few headers will actually be inspected.
116	n/a
117	n/a	def header_source_parse(self, sourcelines):
118	n/a	"""+
119	n/a	The name is parsed as everything up to the ':' and returned unmodified.
120	n/a	The value is determined by stripping leading whitespace off the
121	n/a	remainder of the first line, joining all subsequent lines together, and
122	n/a	stripping any trailing carriage return or linefeed characters. (This
123	n/a	is the same as Compat32).
124	n/a
125	n/a	"""
126	n/a	name, value = sourcelines[0].split(':', 1)
127	n/a	value = value.lstrip(' \t') + ''.join(sourcelines[1:])
128	n/a	return (name, value.rstrip('\r\n'))
129	n/a
130	n/a	def header_store_parse(self, name, value):
131	n/a	"""+
132	n/a	The name is returned unchanged. If the input value has a 'name'
133	n/a	attribute and it matches the name ignoring case, the value is returned
134	n/a	unchanged. Otherwise the name and value are passed to header_factory
135	n/a	method, and the resulting custom header object is returned as the
136	n/a	value. In this case a ValueError is raised if the input value contains
137	n/a	CR or LF characters.
138	n/a
139	n/a	"""
140	n/a	if hasattr(value, 'name') and value.name.lower() == name.lower():
141	n/a	return (name, value)
142	n/a	if isinstance(value, str) and len(value.splitlines())>1:
143	n/a	# XXX this error message isn't quite right when we use splitlines
144	n/a	# (see issue 22233), but I'm not sure what should happen here.
145	n/a	raise ValueError("Header values may not contain linefeed "
146	n/a	"or carriage return characters")
147	n/a	return (name, self.header_factory(name, value))
148	n/a
149	n/a	def header_fetch_parse(self, name, value):
150	n/a	"""+
151	n/a	If the value has a 'name' attribute, it is returned to unmodified.
152	n/a	Otherwise the name and the value with any linesep characters removed
153	n/a	are passed to the header_factory method, and the resulting custom
154	n/a	header object is returned. Any surrogateescaped bytes get turned
155	n/a	into the unicode unknown-character glyph.
156	n/a
157	n/a	"""
158	n/a	if hasattr(value, 'name'):
159	n/a	return value
160	n/a	# We can't use splitlines here because it splits on more than \r and \n.
161	n/a	value = ''.join(linesep_splitter.split(value))
162	n/a	return self.header_factory(name, value)
163	n/a
164	n/a	def fold(self, name, value):
165	n/a	"""+
166	n/a	Header folding is controlled by the refold_source policy setting. A
167	n/a	value is considered to be a 'source value' if and only if it does not
168	n/a	have a 'name' attribute (having a 'name' attribute means it is a header
169	n/a	object of some sort). If a source value needs to be refolded according
170	n/a	to the policy, it is converted into a custom header object by passing
171	n/a	the name and the value with any linesep characters removed to the
172	n/a	header_factory method. Folding of a custom header object is done by
173	n/a	calling its fold method with the current policy.
174	n/a
175	n/a	Source values are split into lines using splitlines. If the value is
176	n/a	not to be refolded, the lines are rejoined using the linesep from the
177	n/a	policy and returned. The exception is lines containing non-ascii
178	n/a	binary data. In that case the value is refolded regardless of the
179	n/a	refold_source setting, which causes the binary data to be CTE encoded
180	n/a	using the unknown-8bit charset.
181	n/a
182	n/a	"""
183	n/a	return self._fold(name, value, refold_binary=True)
184	n/a
185	n/a	def fold_binary(self, name, value):
186	n/a	"""+
187	n/a	The same as fold if cte_type is 7bit, except that the returned value is
188	n/a	bytes.
189	n/a
190	n/a	If cte_type is 8bit, non-ASCII binary data is converted back into
191	n/a	bytes. Headers with binary data are not refolded, regardless of the
192	n/a	refold_header setting, since there is no way to know whether the binary
193	n/a	data consists of single byte characters or multibyte characters.
194	n/a
195	n/a	If utf8 is true, headers are encoded to utf8, otherwise to ascii with
196	n/a	non-ASCII unicode rendered as encoded words.
197	n/a
198	n/a	"""
199	n/a	folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')
200	n/a	charset = 'utf8' if self.utf8 else 'ascii'
201	n/a	return folded.encode(charset, 'surrogateescape')
202	n/a
203	n/a	def _fold(self, name, value, refold_binary=False):
204	n/a	if hasattr(value, 'name'):
205	n/a	return value.fold(policy=self)
206	n/a	maxlen = self.max_line_length if self.max_line_length else float('inf')
207	n/a	lines = value.splitlines()
208	n/a	refold = (self.refold_source == 'all' or
209	n/a	self.refold_source == 'long' and
210	n/a	(lines and len(lines[0])+len(name)+2 > maxlen or
211	n/a	any(len(x) > maxlen for x in lines[1:])))
212	n/a	if refold or refold_binary and _has_surrogates(value):
213	n/a	return self.header_factory(name, ''.join(lines)).fold(policy=self)
214	n/a	return name + ': ' + self.linesep.join(lines) + self.linesep
215	n/a
216	n/a
217	n/a	default = EmailPolicy()
218	n/a	# Make the default policy use the class default header_factory
219	n/a	del default.header_factory
220	n/a	strict = default.clone(raise_on_defect=True)
221	n/a	SMTP = default.clone(linesep='\r\n')
222	n/a	HTTP = default.clone(linesep='\r\n', max_line_length=None)
223	n/a	SMTPUTF8 = SMTP.clone(utf8=True)

1

n/a

"""This will be the home for the policy that hooks in the new

2

n/a

code that adds all the email6 features.

3

n/a

"""

4

n/a

5

n/a

import re

6

n/a

from email._policybase import Policy, Compat32, compat32, _extend_docstrings

7

n/a

from email.utils import _has_surrogates

8

n/a

from email.headerregistry import HeaderRegistry as HeaderRegistry

9

n/a

from email.contentmanager import raw_data_manager

10

n/a

from email.message import EmailMessage

11

n/a

12

n/a

__all__ = [

13

n/a

'Compat32',

14

n/a

'compat32',

15

n/a

'Policy',

16

n/a

'EmailPolicy',

17

n/a

'default',

18

n/a

'strict',

19

n/a

'SMTP',

20

n/a

'HTTP',

21

n/a

]

22

n/a

23

n/a

linesep_splitter = re.compile(r'\n|\r')

24

n/a

25

n/a

@_extend_docstrings

26

n/a

class EmailPolicy(Policy):

27

n/a

28

n/a

"""+

29

n/a

PROVISIONAL

30

n/a

31

n/a

The API extensions enabled by this policy are currently provisional.

32

n/a

Refer to the documentation for details.

33

n/a

34

n/a

This policy adds new header parsing and folding algorithms. Instead of

35

n/a

simple strings, headers are custom objects with custom attributes

36

n/a

depending on the type of the field. The folding algorithm fully

37

n/a

implements RFCs 2047 and 5322.

38

n/a

39

n/a

In addition to the settable attributes listed above that apply to

40

n/a

all Policies, this policy adds the following additional attributes:

41

n/a

42

n/a

utf8 -- if False (the default) message headers will be

43

n/a

serialized as ASCII, using encoded words to encode

44

n/a

any non-ASCII characters in the source strings. If

45

n/a

True, the message headers will be serialized using

46

n/a

utf8 and will not contain encoded words (see RFC

47

n/a

6532 for more on this serialization format).

48

n/a

49

n/a

refold_source -- if the value for a header in the Message object

50

n/a

came from the parsing of some source, this attribute

51

n/a

indicates whether or not a generator should refold

52

n/a

that value when transforming the message back into

53

n/a

stream form. The possible values are:

54

n/a

55

n/a

none -- all source values use original folding

56

n/a

long -- source values that have any line that is

57

n/a

longer than max_line_length will be

58

n/a

refolded

59

n/a

all -- all values are refolded.

60

n/a

61

n/a

The default is 'long'.

62

n/a

63

n/a

header_factory -- a callable that takes two arguments, 'name' and

64

n/a

'value', where 'name' is a header field name and

65

n/a

'value' is an unfolded header field value, and

66

n/a

returns a string-like object that represents that

67

n/a

header. A default header_factory is provided that

68

n/a

understands some of the RFC5322 header field types.

69

n/a

(Currently address fields and date fields have

70

n/a

special treatment, while all other fields are

71

n/a

treated as unstructured. This list will be

72

n/a

completed before the extension is marked stable.)

73

n/a

74

n/a

content_manager -- an object with at least two methods: get_content

75

n/a

and set_content. When the get_content or

76

n/a

set_content method of a Message object is called,

77

n/a

it calls the corresponding method of this object,

78

n/a

passing it the message object as its first argument,

79

n/a

and any arguments or keywords that were passed to

80

n/a

it as additional arguments. The default

81

n/a

content_manager is

82

n/a

:data:`~email.contentmanager.raw_data_manager`.

83

n/a

84

n/a

"""

85

n/a

86

n/a

message_factory = EmailMessage

87

n/a

utf8 = False

88

n/a

refold_source = 'long'

89

n/a

header_factory = HeaderRegistry()

90

n/a

content_manager = raw_data_manager

91

n/a

92

n/a

def __init__(self, **kw):

93

n/a

# Ensure that each new instance gets a unique header factory

94

n/a

# (as opposed to clones, which share the factory).

95

n/a

if 'header_factory' not in kw:

96

n/a

object.__setattr__(self, 'header_factory', HeaderRegistry())

97

n/a

super().__init__(**kw)

98

n/a

99

n/a

def header_max_count(self, name):

100

n/a

"""+

101

n/a

The implementation for this class returns the max_count attribute from

102

n/a

the specialized header class that would be used to construct a header

103

n/a

of type 'name'.

104

n/a

"""

105

n/a

return self.header_factory[name].max_count

106

n/a

107

n/a

# The logic of the next three methods is chosen such that it is possible to

108

n/a

# switch a Message object between a Compat32 policy and a policy derived

109

n/a

# from this class and have the results stay consistent. This allows a

110

n/a

# Message object constructed with this policy to be passed to a library

111

n/a

# that only handles Compat32 objects, or to receive such an object and

112

n/a

# convert it to use the newer style by just changing its policy. It is

113

n/a

# also chosen because it postpones the relatively expensive full rfc5322

114

n/a

# parse until as late as possible when parsing from source, since in many

115

n/a

# applications only a few headers will actually be inspected.

116

n/a

117

n/a

def header_source_parse(self, sourcelines):

118

n/a

"""+

119

n/a

The name is parsed as everything up to the ':' and returned unmodified.

120

n/a

The value is determined by stripping leading whitespace off the

121

n/a

remainder of the first line, joining all subsequent lines together, and

122

n/a

stripping any trailing carriage return or linefeed characters. (This

123

n/a

is the same as Compat32).

124

n/a

125

n/a

"""

126

n/a

name, value = sourcelines[0].split(':', 1)

127

n/a

value = value.lstrip(' \t') + ''.join(sourcelines[1:])

128

n/a

return (name, value.rstrip('\r\n'))

129

n/a

130

n/a

def header_store_parse(self, name, value):

131

n/a

"""+

132

n/a

The name is returned unchanged. If the input value has a 'name'

133

n/a

attribute and it matches the name ignoring case, the value is returned

134

n/a

unchanged. Otherwise the name and value are passed to header_factory

135

n/a

method, and the resulting custom header object is returned as the

136

n/a

value. In this case a ValueError is raised if the input value contains

137

n/a

CR or LF characters.

138

n/a

139

n/a

"""

140

n/a

if hasattr(value, 'name') and value.name.lower() == name.lower():

141

n/a

return (name, value)

142

n/a

if isinstance(value, str) and len(value.splitlines())>1:

143

n/a

# XXX this error message isn't quite right when we use splitlines

144

n/a

# (see issue 22233), but I'm not sure what should happen here.

145

n/a

raise ValueError("Header values may not contain linefeed "

146

n/a

"or carriage return characters")

147

n/a

return (name, self.header_factory(name, value))

148

n/a

149

n/a

def header_fetch_parse(self, name, value):

150

n/a

"""+

151

n/a

If the value has a 'name' attribute, it is returned to unmodified.

152

n/a

Otherwise the name and the value with any linesep characters removed

153

n/a

are passed to the header_factory method, and the resulting custom

154

n/a

header object is returned. Any surrogateescaped bytes get turned

155

n/a

into the unicode unknown-character glyph.

156

n/a

157

n/a

"""

158

n/a

if hasattr(value, 'name'):

159

n/a

return value

160

n/a

# We can't use splitlines here because it splits on more than \r and \n.

161

n/a

value = ''.join(linesep_splitter.split(value))

162

n/a

return self.header_factory(name, value)

163

n/a

164

n/a

def fold(self, name, value):

165

n/a

"""+

166

n/a

Header folding is controlled by the refold_source policy setting. A

167

n/a

value is considered to be a 'source value' if and only if it does not

168

n/a

have a 'name' attribute (having a 'name' attribute means it is a header

169

n/a

object of some sort). If a source value needs to be refolded according

170

n/a

to the policy, it is converted into a custom header object by passing

171

n/a

the name and the value with any linesep characters removed to the

172

n/a

header_factory method. Folding of a custom header object is done by

173

n/a

calling its fold method with the current policy.

174

n/a

175

n/a

Source values are split into lines using splitlines. If the value is

176

n/a

not to be refolded, the lines are rejoined using the linesep from the

177

n/a

policy and returned. The exception is lines containing non-ascii

178

n/a

binary data. In that case the value is refolded regardless of the

179

n/a

refold_source setting, which causes the binary data to be CTE encoded

180

n/a

using the unknown-8bit charset.

181

n/a

182

n/a

"""

183

n/a

return self._fold(name, value, refold_binary=True)

184

n/a

185

n/a

def fold_binary(self, name, value):

186

n/a

"""+

187

n/a

The same as fold if cte_type is 7bit, except that the returned value is

188

n/a

bytes.

189

n/a

190

n/a

If cte_type is 8bit, non-ASCII binary data is converted back into

191

n/a

bytes. Headers with binary data are not refolded, regardless of the

192

n/a

refold_header setting, since there is no way to know whether the binary

193

n/a

data consists of single byte characters or multibyte characters.

194

n/a

195

n/a

If utf8 is true, headers are encoded to utf8, otherwise to ascii with

196

n/a

non-ASCII unicode rendered as encoded words.

197

n/a

198

n/a

"""

199

n/a

folded = self._fold(name, value, refold_binary=self.cte_type=='7bit')

200

n/a

charset = 'utf8' if self.utf8 else 'ascii'

201

n/a

return folded.encode(charset, 'surrogateescape')

202

n/a

203

n/a

def _fold(self, name, value, refold_binary=False):

204

n/a

if hasattr(value, 'name'):

205

n/a

return value.fold(policy=self)

206

n/a

maxlen = self.max_line_length if self.max_line_length else float('inf')

207

n/a

lines = value.splitlines()

208

n/a

refold = (self.refold_source == 'all' or

209

n/a

self.refold_source == 'long' and

210

n/a

(lines and len(lines[0])+len(name)+2 > maxlen or

211

n/a

any(len(x) > maxlen for x in lines[1:])))

212

n/a

if refold or refold_binary and _has_surrogates(value):

213

n/a

return self.header_factory(name, ''.join(lines)).fold(policy=self)

214

n/a

return name + ': ' + self.linesep.join(lines) + self.linesep

215

n/a

216

n/a

217

n/a

default = EmailPolicy()

218

n/a

# Make the default policy use the class default header_factory

219

n/a

del default.header_factory

220

n/a

strict = default.clone(raise_on_defect=True)

221

n/a

SMTP = default.clone(linesep='\r\n')

222

n/a

HTTP = default.clone(linesep='\r\n', max_line_length=None)

223

n/a

SMTPUTF8 = SMTP.clone(utf8=True)

Python code coverage for Lib/email/policy.py