Python code coverage for Lib/rfc822.py

#	count	content
1	n/a	"""RFC 2822 message manipulation.
2	n/a
3	n/a	Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4	n/a	the tokenizing of addresses does not adhere to all the quoting rules.
5	n/a
6	n/a	Note: RFC 2822 is a long awaited update to RFC 822. This module should
7	n/a	conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8	n/a	effort at RFC 2822 updates have been made, but a thorough audit has not been
9	n/a	performed. Consider any RFC 2822 non-conformance to be a bug.
10	n/a
11	n/a	RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12	n/a	RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
13	n/a
14	n/a	Directions for use:
15	n/a
16	n/a	To create a Message object: first open a file, e.g.:
17	n/a
18	n/a	fp = open(file, 'r')
19	n/a
20	n/a	You can use any other legal way of getting an open file object, e.g. use
21	n/a	sys.stdin or call os.popen(). Then pass the open file object to the Message()
22	n/a	constructor:
23	n/a
24	n/a	m = Message(fp)
25	n/a
26	n/a	This class can work with any input object that supports a readline method. If
27	n/a	the input object has seek and tell capability, the rewindbody method will
28	n/a	work; also illegal lines will be pushed back onto the input stream. If the
29	n/a	input object lacks seek but has an `unread' method that can push back a line
30	n/a	of input, Message will use that to push back illegal lines. Thus this class
31	n/a	can be used to parse messages coming from a buffered stream.
32	n/a
33	n/a	The optional `seekable' argument is provided as a workaround for certain stdio
34	n/a	libraries in which tell() discards buffered data before discovering that the
35	n/a	lseek() system call doesn't work. For maximum portability, you should set the
36	n/a	seekable argument to zero to prevent that initial \code{tell} when passing in
37	n/a	an unseekable object such as a a file object created from a socket object. If
38	n/a	it is 1 on entry -- which it is by default -- the tell() method of the open
39	n/a	file object is called once; if this raises an exception, seekable is reset to
40	n/a	0. For other nonzero values of seekable, this test is not made.
41	n/a
42	n/a	To get the text of a particular header there are several methods:
43	n/a
44	n/a	str = m.getheader(name)
45	n/a	str = m.getrawheader(name)
46	n/a
47	n/a	where name is the name of the header, e.g. 'Subject'. The difference is that
48	n/a	getheader() strips the leading and trailing whitespace, while getrawheader()
49	n/a	doesn't. Both functions retain embedded whitespace (including newlines)
50	n/a	exactly as they are specified in the header, and leave the case of the text
51	n/a	unchanged.
52	n/a
53	n/a	For addresses and address lists there are functions
54	n/a
55	n/a	realname, mailaddress = m.getaddr(name)
56	n/a	list = m.getaddrlist(name)
57	n/a
58	n/a	where the latter returns a list of (realname, mailaddr) tuples.
59	n/a
60	n/a	There is also a method
61	n/a
62	n/a	time = m.getdate(name)
63	n/a
64	n/a	which parses a Date-like field and returns a time-compatible tuple,
65	n/a	i.e. a tuple such as returned by time.localtime() or accepted by
66	n/a	time.mktime().
67	n/a
68	n/a	See the class definition for lower level access methods.
69	n/a
70	n/a	There are also some utility functions here.
71	1	"""
72	n/a	# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
73	n/a
74	1	import time
75	n/a
76	1	from warnings import warnpy3k
77	1	warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
78	1	stacklevel=2)
79	n/a
80	1	__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
81	n/a
82	1	_blanklines = ('\r\n', '\n') # Optimization for islast()
83	n/a
84	n/a
85	2	class Message:
86	1	"""Represents a single RFC 2822-compliant message."""
87	n/a
88	1	def __init__(self, fp, seekable = 1):
89	n/a	"""Initialize the class instance and read the headers."""
90	483	if seekable == 1:
91	n/a	# Exercise tell() to make sure it works
92	n/a	# (and then assume seek() works, too)
93	262	try:
94	262	fp.tell()
95	0	except (AttributeError, IOError):
96	0	seekable = 0
97	483	self.fp = fp
98	483	self.seekable = seekable
99	483	self.startofheaders = None
100	483	self.startofbody = None
101	n/a	#
102	483	if self.seekable:
103	262	try:
104	262	self.startofheaders = self.fp.tell()
105	0	except IOError:
106	0	self.seekable = 0
107	n/a	#
108	483	self.readheaders()
109	n/a	#
110	483	if self.seekable:
111	262	try:
112	262	self.startofbody = self.fp.tell()
113	0	except IOError:
114	0	self.seekable = 0
115	n/a
116	1	def rewindbody(self):
117	n/a	"""Rewind the file to the start of the body (if seekable)."""
118	0	if not self.seekable:
119	0	raise IOError, "unseekable file"
120	0	self.fp.seek(self.startofbody)
121	n/a
122	1	def readheaders(self):
123	n/a	"""Read header lines.
124	n/a
125	n/a	Read header lines up to the entirely blank line that terminates them.
126	n/a	The (normally blank) line that ends the headers is skipped, but not
127	n/a	included in the returned list. If a non-header line ends the headers,
128	n/a	(which is an error), an attempt is made to backspace over it; it is
129	n/a	never included in the returned list.
130	n/a
131	n/a	The variable self.status is set to the empty string if all went well,
132	n/a	otherwise it is an error message. The variable self.headers is a
133	n/a	completely uninterpreted list of lines contained in the header (so
134	n/a	printing them will reproduce the header exactly as it appears in the
135	n/a	file).
136	n/a	"""
137	349	self.dict = {}
138	349	self.unixfrom = ''
139	349	self.headers = lst = []
140	349	self.status = ''
141	349	headerseen = ""
142	349	firstline = 1
143	349	startofline = unread = tell = None
144	349	if hasattr(self.fp, 'unread'):
145	0	unread = self.fp.unread
146	349	elif self.seekable:
147	261	tell = self.fp.tell
148	349	while 1:
149	1013	if tell:
150	604	try:
151	604	startofline = tell()
152	0	except IOError:
153	0	startofline = tell = None
154	0	self.seekable = 0
155	1013	line = self.fp.readline()
156	1013	if not line:
157	194	self.status = 'EOF in headers'
158	194	break
159	n/a	# Skip unix From name time lines
160	819	if firstline and line.startswith('From '):
161	4	self.unixfrom = self.unixfrom + line
162	4	continue
163	815	firstline = 0
164	815	if headerseen and line[0] in ' \t':
165	n/a	# It's a continuation line.
166	6	lst.append(line)
167	6	x = (self.dict[headerseen] + "\n " + line.strip())
168	6	self.dict[headerseen] = x.strip()
169	6	continue
170	809	elif self.iscomment(line):
171	n/a	# It's a comment. Ignore it.
172	0	continue
173	809	elif self.islast(line):
174	n/a	# Note! No pushback here! The delimiter line gets eaten.
175	155	break
176	654	headerseen = self.isheader(line)
177	654	if headerseen:
178	n/a	# It's a legal header line, save it.
179	654	lst.append(line)
180	654	self.dict[headerseen] = line[len(headerseen)+1:].strip()
181	654	continue
182	n/a	else:
183	n/a	# It's not a header line; throw it back and stop here.
184	0	if not self.dict:
185	0	self.status = 'No headers'
186	n/a	else:
187	0	self.status = 'Non-header line where header expected'
188	n/a	# Try to undo the read.
189	0	if unread:
190	0	unread(line)
191	0	elif tell:
192	0	self.fp.seek(startofline)
193	n/a	else:
194	0	self.status = self.status + '; bad seek'
195	0	break
196	n/a
197	1	def isheader(self, line):
198	n/a	"""Determine whether a given line is a legal header.
199	n/a
200	n/a	This method should return the header name, suitably canonicalized.
201	n/a	You may override this method in order to use Message parsing on tagged
202	n/a	data in RFC 2822-like formats with special header formats.
203	n/a	"""
204	1278	i = line.find(':')
205	1278	if i > 0:
206	1278	return line[:i].lower()
207	0	return None
208	n/a
209	1	def islast(self, line):
210	n/a	"""Determine whether a line is a legal end of RFC 2822 headers.
211	n/a
212	n/a	You may override this method if your application wants to bend the
213	n/a	rules, e.g. to strip trailing whitespace, or to recognize MH template
214	n/a	separators ('--------'). For convenience (e.g. for code reading from
215	n/a	sockets) a line consisting of \r\n also matches.
216	n/a	"""
217	1565	return line in _blanklines
218	n/a
219	1	def iscomment(self, line):
220	n/a	"""Determine whether a line should be skipped entirely.
221	n/a
222	n/a	You may override this method in order to use Message parsing on tagged
223	n/a	data in RFC 2822-like formats that support embedded comments or
224	n/a	free-text data.
225	n/a	"""
226	1565	return False
227	n/a
228	1	def getallmatchingheaders(self, name):
229	n/a	"""Find all header lines matching a given header name.
230	n/a
231	n/a	Look through the list of headers and find all lines matching a given
232	n/a	header name (and their continuation lines). A list of the lines is
233	n/a	returned, without interpretation. If the header does not occur, an
234	n/a	empty list is returned. If the header occurs multiple times, all
235	n/a	occurrences are returned. Case is not important in the header name.
236	n/a	"""
237	366	name = name.lower() + ':'
238	366	n = len(name)
239	366	lst = []
240	366	hit = 0
241	754	for line in self.headers:
242	388	if line[:n].lower() == name:
243	168	hit = 1
244	220	elif not line[:1].isspace():
245	214	hit = 0
246	388	if hit:
247	171	lst.append(line)
248	366	return lst
249	n/a
250	1	def getfirstmatchingheader(self, name):
251	n/a	"""Get the first header line matching name.
252	n/a
253	n/a	This is similar to getallmatchingheaders, but it returns only the
254	n/a	first matching header (and its continuation lines).
255	n/a	"""
256	0	name = name.lower() + ':'
257	0	n = len(name)
258	0	lst = []
259	0	hit = 0
260	0	for line in self.headers:
261	0	if hit:
262	0	if not line[:1].isspace():
263	0	break
264	0	elif line[:n].lower() == name:
265	0	hit = 1
266	0	if hit:
267	0	lst.append(line)
268	0	return lst
269	n/a
270	1	def getrawheader(self, name):
271	n/a	"""A higher-level interface to getfirstmatchingheader().
272	n/a
273	n/a	Return a string containing the literal text of the header but with the
274	n/a	keyword stripped. All leading, trailing and embedded whitespace is
275	n/a	kept in the string, however. Return None if the header does not
276	n/a	occur.
277	n/a	"""
278	n/a
279	0	lst = self.getfirstmatchingheader(name)
280	0	if not lst:
281	0	return None
282	0	lst[0] = lst[0][len(name) + 1:]
283	0	return ''.join(lst)
284	n/a
285	1	def getheader(self, name, default=None):
286	n/a	"""Get the header value for a name.
287	n/a
288	n/a	This is the normal interface: it returns a stripped version of the
289	n/a	header value for a given header name, or None if it doesn't exist.
290	n/a	This uses the dictionary version which finds the last such header.
291	n/a	"""
292	1685	return self.dict.get(name.lower(), default)
293	1	get = getheader
294	n/a
295	1	def getheaders(self, name):
296	n/a	"""Get all values for a header.
297	n/a
298	n/a	This returns a list of values for headers given more than once; each
299	n/a	value in the result list is stripped in the same way as the result of
300	n/a	getheader(). If the header is not given, return an empty list.
301	n/a	"""
302	332	result = []
303	332	current = ''
304	332	have_header = 0
305	475	for s in self.getallmatchingheaders(name):
306	143	if s[0].isspace():
307	0	if current:
308	0	current = "%s\n %s" % (current, s.strip())
309	n/a	else:
310	0	current = s.strip()
311	n/a	else:
312	143	if have_header:
313	9	result.append(current)
314	143	current = s[s.find(":") + 1:].strip()
315	143	have_header = 1
316	332	if have_header:
317	134	result.append(current)
318	332	return result
319	n/a
320	1	def getaddr(self, name):
321	n/a	"""Get a single address from a header, as a tuple.
322	n/a
323	n/a	An example return value:
324	n/a	('Guido van Rossum', 'guido@cwi.nl')
325	n/a	"""
326	n/a	# New, by Ben Escoto
327	0	alist = self.getaddrlist(name)
328	0	if alist:
329	0	return alist[0]
330	n/a	else:
331	0	return (None, None)
332	n/a
333	1	def getaddrlist(self, name):
334	n/a	"""Get a list of addresses from a header.
335	n/a
336	n/a	Retrieves a list of addresses from a header, where each address is a
337	n/a	tuple as returned by getaddr(). Scans all named headers, so it works
338	n/a	properly with multiple To: or Cc: headers for example.
339	n/a	"""
340	30	raw = []
341	58	for h in self.getallmatchingheaders(name):
342	28	if h[0] in ' \t':
343	3	raw.append(h)
344	n/a	else:
345	25	if raw:
346	6	raw.append(', ')
347	25	i = h.find(':')
348	25	if i > 0:
349	25	addr = h[i+1:]
350	25	raw.append(addr)
351	30	alladdrs = ''.join(raw)
352	30	a = AddressList(alladdrs)
353	30	return a.addresslist
354	n/a
355	1	def getdate(self, name):
356	n/a	"""Retrieve a date field from a header.
357	n/a
358	n/a	Retrieves a date field from the named header, returning a tuple
359	n/a	compatible with time.mktime().
360	n/a	"""
361	14	try:
362	14	data = self[name]
363	6	except KeyError:
364	6	return None
365	8	return parsedate(data)
366	n/a
367	1	def getdate_tz(self, name):
368	n/a	"""Retrieve a date field from a header as a 10-tuple.
369	n/a
370	n/a	The first 9 elements make up a tuple compatible with time.mktime(),
371	n/a	and the 10th is the offset of the poster's time zone from GMT/UTC.
372	n/a	"""
373	0	try:
374	0	data = self[name]
375	0	except KeyError:
376	0	return None
377	0	return parsedate_tz(data)
378	n/a
379	n/a
380	n/a	# Access as a dictionary (only finds last header of each type):
381	n/a
382	1	def __len__(self):
383	n/a	"""Get the number of headers in a message."""
384	1	return len(self.dict)
385	n/a
386	1	def __getitem__(self, name):
387	n/a	"""Get a specific header, as from a dictionary."""
388	98	return self.dict[name.lower()]
389	n/a
390	1	def __setitem__(self, name, value):
391	n/a	"""Set the value of a header.
392	n/a
393	n/a	Note: This is not a perfect inversion of __getitem__, because any
394	n/a	changed headers get stuck at the end of the raw-headers list rather
395	n/a	than where the altered header was.
396	n/a	"""
397	0	del self[name] # Won't fail if it doesn't exist
398	0	self.dict[name.lower()] = value
399	0	text = name + ": " + value
400	0	for line in text.split("\n"):
401	0	self.headers.append(line + "\n")
402	n/a
403	1	def __delitem__(self, name):
404	n/a	"""Delete all occurrences of a specific header, if it is present."""
405	0	name = name.lower()
406	0	if not name in self.dict:
407	0	return
408	0	del self.dict[name]
409	0	name = name + ':'
410	0	n = len(name)
411	0	lst = []
412	0	hit = 0
413	0	for i in range(len(self.headers)):
414	0	line = self.headers[i]
415	0	if line[:n].lower() == name:
416	0	hit = 1
417	0	elif not line[:1].isspace():
418	0	hit = 0
419	0	if hit:
420	0	lst.append(i)
421	0	for i in reversed(lst):
422	0	del self.headers[i]
423	n/a
424	1	def setdefault(self, name, default=""):
425	3	lowername = name.lower()
426	3	if lowername in self.dict:
427	1	return self.dict[lowername]
428	n/a	else:
429	2	text = name + ": " + default
430	4	for line in text.split("\n"):
431	2	self.headers.append(line + "\n")
432	2	self.dict[lowername] = default
433	2	return default
434	n/a
435	1	def has_key(self, name):
436	n/a	"""Determine whether a message contains the named header."""
437	1	return name.lower() in self.dict
438	n/a
439	1	def __contains__(self, name):
440	n/a	"""Determine whether a message contains the named header."""
441	52	return name.lower() in self.dict
442	n/a
443	1	def __iter__(self):
444	1	return iter(self.dict)
445	n/a
446	1	def keys(self):
447	n/a	"""Get all of a message's header field names."""
448	0	return self.dict.keys()
449	n/a
450	1	def values(self):
451	n/a	"""Get all of a message's header field values."""
452	0	return self.dict.values()
453	n/a
454	1	def items(self):
455	n/a	"""Get all of a message's headers.
456	n/a
457	n/a	Returns a list of name, value tuples.
458	n/a	"""
459	0	return self.dict.items()
460	n/a
461	1	def __str__(self):
462	1	return ''.join(self.headers)
463	n/a
464	n/a
465	n/a	# Utility functions
466	n/a	# -----------------
467	n/a
468	n/a	# XXX Should fix unquote() and quote() to be really conformant.
469	n/a	# XXX The inverses of the parse functions may also be useful.
470	n/a
471	n/a
472	1	def unquote(s):
473	n/a	"""Remove quotes from a string."""
474	5	if len(s) > 1:
475	5	if s.startswith('"') and s.endswith('"'):
476	3	return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
477	2	if s.startswith('<') and s.endswith('>'):
478	0	return s[1:-1]
479	2	return s
480	n/a
481	n/a
482	1	def quote(s):
483	n/a	"""Add quotes around a string."""
484	1	return s.replace('\\', '\\\\').replace('"', '\\"')
485	n/a
486	n/a
487	1	def parseaddr(address):
488	n/a	"""Parse an address into a (realname, mailaddr) tuple."""
489	4	a = AddressList(address)
490	4	lst = a.addresslist
491	4	if not lst:
492	0	return (None, None)
493	4	return lst[0]
494	n/a
495	n/a
496	2	class AddrlistClass:
497	n/a	"""Address parser class by Ben Escoto.
498	n/a
499	n/a	To understand what this class does, it helps to have a copy of
500	n/a	RFC 2822 in front of you.
501	n/a
502	n/a	http://www.faqs.org/rfcs/rfc2822.html
503	n/a
504	n/a	Note: this class interface is deprecated and may be removed in the future.
505	n/a	Use rfc822.AddressList instead.
506	1	"""
507	n/a
508	1	def __init__(self, field):
509	n/a	"""Initialize a new instance.
510	n/a
511	n/a	`field' is an unparsed address header field, containing one or more
512	n/a	addresses.
513	n/a	"""
514	34	self.specials = '()<>@,:;.\"[]'
515	34	self.pos = 0
516	34	self.LWS = ' \t'
517	34	self.CR = '\r\n'
518	34	self.atomends = self.specials + self.LWS + self.CR
519	n/a	# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
520	n/a	# is obsolete syntax. RFC 2822 requires that we recognize obsolete
521	n/a	# syntax, so allow dots in phrases.
522	34	self.phraseends = self.atomends.replace('.', '')
523	34	self.field = field
524	34	self.commentlist = []
525	n/a
526	1	def gotonext(self):
527	n/a	"""Parse up to the start of the next address."""
528	366	while self.pos < len(self.field):
529	272	if self.field[self.pos] in self.LWS + '\n\r':
530	73	self.pos = self.pos + 1
531	199	elif self.field[self.pos] == '(':
532	1	self.commentlist.append(self.getcomment())
533	198	else: break
534	n/a
535	1	def getaddrlist(self):
536	n/a	"""Parse all addresses.
537	n/a
538	n/a	Returns a list containing all of the addresses.
539	n/a	"""
540	23	result = []
541	23	ad = self.getaddress()
542	55	while ad:
543	32	result += ad
544	32	ad = self.getaddress()
545	23	return result
546	n/a
547	1	def getaddress(self):
548	n/a	"""Parse the next address."""
549	60	self.commentlist = []
550	60	self.gotonext()
551	n/a
552	60	oldpos = self.pos
553	60	oldcl = self.commentlist
554	60	plist = self.getphraselist()
555	n/a
556	60	self.gotonext()
557	60	returnlist = []
558	n/a
559	60	if self.pos >= len(self.field):
560	n/a	# Bad email address technically, no domain.
561	23	if plist:
562	0	returnlist = [(' '.join(self.commentlist), plist[0])]
563	n/a
564	37	elif self.field[self.pos] in '.@':
565	n/a	# email address is just an addrspec
566	n/a	# this isn't very efficient since we start over
567	20	self.pos = oldpos
568	20	self.commentlist = oldcl
569	20	addrspec = self.getaddrspec()
570	20	returnlist = [(' '.join(self.commentlist), addrspec)]
571	n/a
572	17	elif self.field[self.pos] == ':':
573	n/a	# address is a group
574	2	returnlist = []
575	n/a
576	2	fieldlen = len(self.field)
577	2	self.pos += 1
578	7	while self.pos < len(self.field):
579	5	self.gotonext()
580	5	if self.pos < fieldlen and self.field[self.pos] == ';':
581	0	self.pos += 1
582	0	break
583	5	returnlist = returnlist + self.getaddress()
584	n/a
585	15	elif self.field[self.pos] == '<':
586	n/a	# Address is a phrase then a route addr
587	13	routeaddr = self.getrouteaddr()
588	n/a
589	13	if self.commentlist:
590	n/a	returnlist = [(' '.join(plist) + ' (' + \
591	0	' '.join(self.commentlist) + ')', routeaddr)]
592	13	else: returnlist = [(' '.join(plist), routeaddr)]
593	n/a
594	n/a	else:
595	2	if plist:
596	0	returnlist = [(' '.join(self.commentlist), plist[0])]
597	2	elif self.field[self.pos] in self.specials:
598	2	self.pos += 1
599	n/a
600	60	self.gotonext()
601	60	if self.pos < len(self.field) and self.field[self.pos] == ',':
602	9	self.pos += 1
603	60	return returnlist
604	n/a
605	1	def getrouteaddr(self):
606	n/a	"""Parse a route address (Return-path value).
607	n/a
608	n/a	This method just skips all the route stuff and returns the addrspec.
609	n/a	"""
610	13	if self.field[self.pos] != '<':
611	0	return
612	n/a
613	13	expectroute = 0
614	13	self.pos += 1
615	13	self.gotonext()
616	13	adlist = ""
617	13	while self.pos < len(self.field):
618	13	if expectroute:
619	0	self.getdomain()
620	0	expectroute = 0
621	13	elif self.field[self.pos] == '>':
622	1	self.pos += 1
623	1	break
624	12	elif self.field[self.pos] == '@':
625	0	self.pos += 1
626	0	expectroute = 1
627	12	elif self.field[self.pos] == ':':
628	0	self.pos += 1
629	n/a	else:
630	12	adlist = self.getaddrspec()
631	12	self.pos += 1
632	12	break
633	0	self.gotonext()
634	n/a
635	13	return adlist
636	n/a
637	1	def getaddrspec(self):
638	n/a	"""Parse an RFC 2822 addr-spec."""
639	32	aslist = []
640	n/a
641	32	self.gotonext()
642	63	while self.pos < len(self.field):
643	63	if self.field[self.pos] == '.':
644	0	aslist.append('.')
645	0	self.pos += 1
646	63	elif self.field[self.pos] == '"':
647	1	aslist.append('"%s"' % self.getquote())
648	62	elif self.field[self.pos] in self.atomends:
649	32	break
650	30	else: aslist.append(self.getatom())
651	31	self.gotonext()
652	n/a
653	32	if self.pos >= len(self.field) or self.field[self.pos] != '@':
654	1	return ''.join(aslist)
655	n/a
656	31	aslist.append('@')
657	31	self.pos += 1
658	31	self.gotonext()
659	31	return ''.join(aslist) + self.getdomain()
660	n/a
661	1	def getdomain(self):
662	n/a	"""Get the complete domain name from an address."""
663	31	sdlist = []
664	138	while self.pos < len(self.field):
665	136	if self.field[self.pos] in self.LWS:
666	5	self.pos += 1
667	131	elif self.field[self.pos] == '(':
668	5	self.commentlist.append(self.getcomment())
669	126	elif self.field[self.pos] == '[':
670	1	sdlist.append(self.getdomainliteral())
671	125	elif self.field[self.pos] == '.':
672	33	self.pos += 1
673	33	sdlist.append('.')
674	92	elif self.field[self.pos] in self.atomends:
675	29	break
676	63	else: sdlist.append(self.getatom())
677	31	return ''.join(sdlist)
678	n/a
679	1	def getdelimited(self, beginchar, endchars, allowcomments = 1):
680	n/a	"""Parse a header fragment delimited by special characters.
681	n/a
682	n/a	`beginchar' is the start character for the fragment. If self is not
683	n/a	looking at an instance of `beginchar' then getdelimited returns the
684	n/a	empty string.
685	n/a
686	n/a	`endchars' is a sequence of allowable end-delimiting characters.
687	n/a	Parsing stops when one of these is encountered.
688	n/a
689	n/a	If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
690	n/a	within the parsed fragment.
691	n/a	"""
692	16	if self.field[self.pos] != beginchar:
693	0	return ''
694	n/a
695	16	slist = ['']
696	16	quote = 0
697	16	self.pos += 1
698	276	while self.pos < len(self.field):
699	276	if quote == 1:
700	0	slist.append(self.field[self.pos])
701	0	quote = 0
702	276	elif self.field[self.pos] in endchars:
703	16	self.pos += 1
704	16	break
705	260	elif allowcomments and self.field[self.pos] == '(':
706	1	slist.append(self.getcomment())
707	1	continue # have already advanced pos from getcomment
708	259	elif self.field[self.pos] == '\\':
709	0	quote = 1
710	n/a	else:
711	259	slist.append(self.field[self.pos])
712	259	self.pos += 1
713	n/a
714	16	return ''.join(slist)
715	n/a
716	1	def getquote(self):
717	n/a	"""Get a quote-delimited fragment from self's field."""
718	8	return self.getdelimited('"', '"\r', 0)
719	n/a
720	1	def getcomment(self):
721	n/a	"""Get a parenthesis-delimited fragment from self's field."""
722	7	return self.getdelimited('(', ')\r', 1)
723	n/a
724	1	def getdomainliteral(self):
725	n/a	"""Parse an RFC 2822 domain-literal."""
726	1	return '[%s]' % self.getdelimited('[', ']\r', 0)
727	n/a
728	1	def getatom(self, atomends=None):
729	n/a	"""Parse an RFC 2822 atom.
730	n/a
731	n/a	Optional atomends specifies a different set of end token delimiters
732	n/a	(the default is to use self.atomends). This is used e.g. in
733	n/a	getphraselist() since phrase endings must not include the `.' (which
734	n/a	is legal in phrases)."""
735	124	atomlist = ['']
736	124	if atomends is None:
737	93	atomends = self.atomends
738	n/a
739	713	while self.pos < len(self.field):
740	712	if self.field[self.pos] in atomends:
741	123	break
742	589	else: atomlist.append(self.field[self.pos])
743	589	self.pos += 1
744	n/a
745	124	return ''.join(atomlist)
746	n/a
747	1	def getphraselist(self):
748	n/a	"""Parse a sequence of RFC 2822 phrases.
749	n/a
750	n/a	A phrase is a sequence of words, which are in turn either RFC 2822
751	n/a	atoms or quoted-strings. Phrases are canonicalized by squeezing all
752	n/a	runs of continuous whitespace into one space.
753	n/a	"""
754	60	plist = []
755	n/a
756	115	while self.pos < len(self.field):
757	92	if self.field[self.pos] in self.LWS:
758	17	self.pos += 1
759	75	elif self.field[self.pos] == '"':
760	7	plist.append(self.getquote())
761	68	elif self.field[self.pos] == '(':
762	0	self.commentlist.append(self.getcomment())
763	68	elif self.field[self.pos] in self.phraseends:
764	37	break
765	n/a	else:
766	31	plist.append(self.getatom(self.phraseends))
767	n/a
768	60	return plist
769	n/a
770	2	class AddressList(AddrlistClass):
771	1	"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
772	1	def __init__(self, field):
773	34	AddrlistClass.__init__(self, field)
774	34	if field:
775	23	self.addresslist = self.getaddrlist()
776	n/a	else:
777	11	self.addresslist = []
778	n/a
779	1	def __len__(self):
780	0	return len(self.addresslist)
781	n/a
782	1	def __str__(self):
783	0	return ", ".join(map(dump_address_pair, self.addresslist))
784	n/a
785	1	def __add__(self, other):
786	n/a	# Set union
787	0	newaddr = AddressList(None)
788	0	newaddr.addresslist = self.addresslist[:]
789	0	for x in other.addresslist:
790	0	if not x in self.addresslist:
791	0	newaddr.addresslist.append(x)
792	0	return newaddr
793	n/a
794	1	def __iadd__(self, other):
795	n/a	# Set union, in-place
796	0	for x in other.addresslist:
797	0	if not x in self.addresslist:
798	0	self.addresslist.append(x)
799	0	return self
800	n/a
801	1	def __sub__(self, other):
802	n/a	# Set difference
803	0	newaddr = AddressList(None)
804	0	for x in self.addresslist:
805	0	if not x in other.addresslist:
806	0	newaddr.addresslist.append(x)
807	0	return newaddr
808	n/a
809	1	def __isub__(self, other):
810	n/a	# Set difference, in-place
811	0	for x in other.addresslist:
812	0	if x in self.addresslist:
813	0	self.addresslist.remove(x)
814	0	return self
815	n/a
816	1	def __getitem__(self, index):
817	n/a	# Make indexing, slices, and 'in' work
818	0	return self.addresslist[index]
819	n/a
820	1	def dump_address_pair(pair):
821	n/a	"""Dump a (name, address) pair in a canonicalized form."""
822	0	if pair[0]:
823	0	return '"' + pair[0] + '" <' + pair[1] + '>'
824	n/a	else:
825	0	return pair[1]
826	n/a
827	n/a	# Parse a date field
828	n/a
829	1	_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
830	1	'aug', 'sep', 'oct', 'nov', 'dec',
831	1	'january', 'february', 'march', 'april', 'may', 'june', 'july',
832	1	'august', 'september', 'october', 'november', 'december']
833	1	_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
834	n/a
835	n/a	# The timezone table does not include the military time zones defined
836	n/a	# in RFC822, other than Z. According to RFC1123, the description in
837	n/a	# RFC822 gets the signs wrong, so we can't rely on any such time
838	n/a	# zones. RFC1123 recommends that numeric timezone indicators be used
839	n/a	# instead of timezone names.
840	n/a
841	1	_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
842	1	'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
843	1	'EST': -500, 'EDT': -400, # Eastern
844	1	'CST': -600, 'CDT': -500, # Central
845	1	'MST': -700, 'MDT': -600, # Mountain
846	1	'PST': -800, 'PDT': -700 # Pacific
847	n/a	}
848	n/a
849	n/a
850	1	def parsedate_tz(data):
851	n/a	"""Convert a date string to a time tuple.
852	n/a
853	n/a	Accounts for military timezones.
854	n/a	"""
855	8	if not data:
856	1	return None
857	7	data = data.split()
858	7	if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
859	n/a	# There's a dayname here. Skip it
860	6	del data[0]
861	n/a	else:
862	n/a	# no space after the "weekday,"?
863	1	i = data[0].rfind(',')
864	1	if i >= 0:
865	0	data[0] = data[0][i+1:]
866	7	if len(data) == 3: # RFC 850 date, deprecated
867	1	stuff = data[0].split('-')
868	1	if len(stuff) == 3:
869	1	data = stuff + data[1:]
870	7	if len(data) == 4:
871	0	s = data[3]
872	0	i = s.find('+')
873	0	if i > 0:
874	0	data[3:] = [s[:i], s[i+1:]]
875	n/a	else:
876	0	data.append('') # Dummy tz
877	7	if len(data) < 5:
878	0	return None
879	7	data = data[:5]
880	7	[dd, mm, yy, tm, tz] = data
881	7	mm = mm.lower()
882	7	if not mm in _monthnames:
883	1	dd, mm = mm, dd.lower()
884	1	if not mm in _monthnames:
885	0	return None
886	7	mm = _monthnames.index(mm)+1
887	7	if mm > 12: mm = mm - 12
888	7	if dd[-1] == ',':
889	0	dd = dd[:-1]
890	7	i = yy.find(':')
891	7	if i > 0:
892	0	yy, tm = tm, yy
893	7	if yy[-1] == ',':
894	0	yy = yy[:-1]
895	7	if not yy[0].isdigit():
896	0	yy, tz = tz, yy
897	7	if tm[-1] == ',':
898	0	tm = tm[:-1]
899	7	tm = tm.split(':')
900	7	if len(tm) == 2:
901	0	[thh, tmm] = tm
902	0	tss = '0'
903	7	elif len(tm) == 3:
904	7	[thh, tmm, tss] = tm
905	n/a	else:
906	0	return None
907	7	try:
908	7	yy = int(yy)
909	7	dd = int(dd)
910	7	thh = int(thh)
911	7	tmm = int(tmm)
912	7	tss = int(tss)
913	0	except ValueError:
914	0	return None
915	7	tzoffset = None
916	7	tz = tz.upper()
917	7	if tz in _timezones:
918	1	tzoffset = _timezones[tz]
919	n/a	else:
920	6	try:
921	6	tzoffset = int(tz)
922	0	except ValueError:
923	0	pass
924	n/a	# Convert a timezone offset into seconds ; -0500 -> -18000
925	7	if tzoffset:
926	7	if tzoffset < 0:
927	7	tzsign = -1
928	7	tzoffset = -tzoffset
929	n/a	else:
930	0	tzsign = 1
931	7	tzoffset = tzsign * ( (tzoffset//100)3600 + (tzoffset % 100)60)
932	7	return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
933	n/a
934	n/a
935	1	def parsedate(data):
936	n/a	"""Convert a time string to a time tuple."""
937	8	t = parsedate_tz(data)
938	8	if t is None:
939	1	return t
940	7	return t[:9]
941	n/a
942	n/a
943	1	def mktime_tz(data):
944	n/a	"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
945	0	if data[9] is None:
946	n/a	# No zone info, so localtime is better assumption than GMT
947	0	return time.mktime(data[:8] + (-1,))
948	n/a	else:
949	0	t = time.mktime(data[:8] + (0,))
950	0	return t - data[9] - time.timezone
951	n/a
952	1	def formatdate(timeval=None):
953	n/a	"""Returns time format preferred for Internet standards.
954	n/a
955	n/a	Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
956	n/a
957	n/a	According to RFC 1123, day and month names must always be in
958	n/a	English. If not for that, this code could use strftime(). It
959	n/a	can't because strftime() honors the locale and could generated
960	n/a	non-English names.
961	n/a	"""
962	4	if timeval is None:
963	0	timeval = time.time()
964	4	timeval = time.gmtime(timeval)
965	4	return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
966	4	("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
967	4	timeval[2],
968	0	("Jan", "Feb", "Mar", "Apr", "May", "Jun",
969	4	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
970	4	timeval[0], timeval[3], timeval[4], timeval[5])
971	n/a
972	n/a
973	n/a	# When used as script, run a small test program.
974	n/a	# The first command line argument must be a filename containing one
975	n/a	# message in RFC-822 format.
976	n/a
977	1	if __name__ == '__main__':
978	0	import sys, os
979	0	file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
980	0	if sys.argv[1:]: file = sys.argv[1]
981	0	f = open(file, 'r')
982	0	m = Message(f)
983	0	print 'From:', m.getaddr('from')
984	0	print 'To:', m.getaddrlist('to')
985	0	print 'Subject:', m.getheader('subject')
986	0	print 'Date:', m.getheader('date')
987	0	date = m.getdate_tz('date')
988	0	tz = date[-1]
989	0	date = time.localtime(mktime_tz(date))
990	0	if date:
991	0	print 'ParsedDate:', time.asctime(date),
992	0	hhmmss = tz
993	0	hhmm, ss = divmod(hhmmss, 60)
994	0	hh, mm = divmod(hhmm, 60)
995	0	print "%+03d%02d" % (hh, mm),
996	0	if ss: print ".%02d" % ss,
997	0	print
998	n/a	else:
999	0	print 'ParsedDate:', None
1000	0	m.rewindbody()
1001	0	n = 0
1002	0	while f.readline():
1003	0	n += 1
1004	0	print 'Lines:', n
1005	0	print '-'*70
1006	0	print 'len =', len(m)
1007	0	if 'Date' in m: print 'Date =', m['Date']
1008	0	if 'X-Nonsense' in m: pass
1009	0	print 'keys =', m.keys()
1010	0	print 'values =', m.values()
1011	0	print 'items =', m.items()