ยปCore Development>Code coverage>Lib/rfc822.py

Python code coverage for Lib/rfc822.py

#countcontent
1n/a"""RFC 2822 message manipulation.
2n/a
3n/aNote: This is only a very rough sketch of a full RFC-822 parser; in particular
4n/athe tokenizing of addresses does not adhere to all the quoting rules.
5n/a
6n/aNote: RFC 2822 is a long awaited update to RFC 822. This module should
7n/aconform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8n/aeffort at RFC 2822 updates have been made, but a thorough audit has not been
9n/aperformed. Consider any RFC 2822 non-conformance to be a bug.
10n/a
11n/a RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12n/a RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
13n/a
14n/aDirections for use:
15n/a
16n/aTo create a Message object: first open a file, e.g.:
17n/a
18n/a fp = open(file, 'r')
19n/a
20n/aYou can use any other legal way of getting an open file object, e.g. use
21n/asys.stdin or call os.popen(). Then pass the open file object to the Message()
22n/aconstructor:
23n/a
24n/a m = Message(fp)
25n/a
26n/aThis class can work with any input object that supports a readline method. If
27n/athe input object has seek and tell capability, the rewindbody method will
28n/awork; also illegal lines will be pushed back onto the input stream. If the
29n/ainput object lacks seek but has an `unread' method that can push back a line
30n/aof input, Message will use that to push back illegal lines. Thus this class
31n/acan be used to parse messages coming from a buffered stream.
32n/a
33n/aThe optional `seekable' argument is provided as a workaround for certain stdio
34n/alibraries in which tell() discards buffered data before discovering that the
35n/alseek() system call doesn't work. For maximum portability, you should set the
36n/aseekable argument to zero to prevent that initial \code{tell} when passing in
37n/aan unseekable object such as a a file object created from a socket object. If
38n/ait is 1 on entry -- which it is by default -- the tell() method of the open
39n/afile object is called once; if this raises an exception, seekable is reset to
40n/a0. For other nonzero values of seekable, this test is not made.
41n/a
42n/aTo get the text of a particular header there are several methods:
43n/a
44n/a str = m.getheader(name)
45n/a str = m.getrawheader(name)
46n/a
47n/awhere name is the name of the header, e.g. 'Subject'. The difference is that
48n/agetheader() strips the leading and trailing whitespace, while getrawheader()
49n/adoesn't. Both functions retain embedded whitespace (including newlines)
50n/aexactly as they are specified in the header, and leave the case of the text
51n/aunchanged.
52n/a
53n/aFor addresses and address lists there are functions
54n/a
55n/a realname, mailaddress = m.getaddr(name)
56n/a list = m.getaddrlist(name)
57n/a
58n/awhere the latter returns a list of (realname, mailaddr) tuples.
59n/a
60n/aThere is also a method
61n/a
62n/a time = m.getdate(name)
63n/a
64n/awhich parses a Date-like field and returns a time-compatible tuple,
65n/ai.e. a tuple such as returned by time.localtime() or accepted by
66n/atime.mktime().
67n/a
68n/aSee the class definition for lower level access methods.
69n/a
70n/aThere are also some utility functions here.
711"""
72n/a# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
73n/a
741import time
75n/a
761from warnings import warnpy3k
771warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
781 stacklevel=2)
79n/a
801__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
81n/a
821_blanklines = ('\r\n', '\n') # Optimization for islast()
83n/a
84n/a
852class Message:
861 """Represents a single RFC 2822-compliant message."""
87n/a
881 def __init__(self, fp, seekable = 1):
89n/a """Initialize the class instance and read the headers."""
90483 if seekable == 1:
91n/a # Exercise tell() to make sure it works
92n/a # (and then assume seek() works, too)
93262 try:
94262 fp.tell()
950 except (AttributeError, IOError):
960 seekable = 0
97483 self.fp = fp
98483 self.seekable = seekable
99483 self.startofheaders = None
100483 self.startofbody = None
101n/a #
102483 if self.seekable:
103262 try:
104262 self.startofheaders = self.fp.tell()
1050 except IOError:
1060 self.seekable = 0
107n/a #
108483 self.readheaders()
109n/a #
110483 if self.seekable:
111262 try:
112262 self.startofbody = self.fp.tell()
1130 except IOError:
1140 self.seekable = 0
115n/a
1161 def rewindbody(self):
117n/a """Rewind the file to the start of the body (if seekable)."""
1180 if not self.seekable:
1190 raise IOError, "unseekable file"
1200 self.fp.seek(self.startofbody)
121n/a
1221 def readheaders(self):
123n/a """Read header lines.
124n/a
125n/a Read header lines up to the entirely blank line that terminates them.
126n/a The (normally blank) line that ends the headers is skipped, but not
127n/a included in the returned list. If a non-header line ends the headers,
128n/a (which is an error), an attempt is made to backspace over it; it is
129n/a never included in the returned list.
130n/a
131n/a The variable self.status is set to the empty string if all went well,
132n/a otherwise it is an error message. The variable self.headers is a
133n/a completely uninterpreted list of lines contained in the header (so
134n/a printing them will reproduce the header exactly as it appears in the
135n/a file).
136n/a """
137349 self.dict = {}
138349 self.unixfrom = ''
139349 self.headers = lst = []
140349 self.status = ''
141349 headerseen = ""
142349 firstline = 1
143349 startofline = unread = tell = None
144349 if hasattr(self.fp, 'unread'):
1450 unread = self.fp.unread
146349 elif self.seekable:
147261 tell = self.fp.tell
148349 while 1:
1491013 if tell:
150604 try:
151604 startofline = tell()
1520 except IOError:
1530 startofline = tell = None
1540 self.seekable = 0
1551013 line = self.fp.readline()
1561013 if not line:
157194 self.status = 'EOF in headers'
158194 break
159n/a # Skip unix From name time lines
160819 if firstline and line.startswith('From '):
1614 self.unixfrom = self.unixfrom + line
1624 continue
163815 firstline = 0
164815 if headerseen and line[0] in ' \t':
165n/a # It's a continuation line.
1666 lst.append(line)
1676 x = (self.dict[headerseen] + "\n " + line.strip())
1686 self.dict[headerseen] = x.strip()
1696 continue
170809 elif self.iscomment(line):
171n/a # It's a comment. Ignore it.
1720 continue
173809 elif self.islast(line):
174n/a # Note! No pushback here! The delimiter line gets eaten.
175155 break
176654 headerseen = self.isheader(line)
177654 if headerseen:
178n/a # It's a legal header line, save it.
179654 lst.append(line)
180654 self.dict[headerseen] = line[len(headerseen)+1:].strip()
181654 continue
182n/a else:
183n/a # It's not a header line; throw it back and stop here.
1840 if not self.dict:
1850 self.status = 'No headers'
186n/a else:
1870 self.status = 'Non-header line where header expected'
188n/a # Try to undo the read.
1890 if unread:
1900 unread(line)
1910 elif tell:
1920 self.fp.seek(startofline)
193n/a else:
1940 self.status = self.status + '; bad seek'
1950 break
196n/a
1971 def isheader(self, line):
198n/a """Determine whether a given line is a legal header.
199n/a
200n/a This method should return the header name, suitably canonicalized.
201n/a You may override this method in order to use Message parsing on tagged
202n/a data in RFC 2822-like formats with special header formats.
203n/a """
2041278 i = line.find(':')
2051278 if i > 0:
2061278 return line[:i].lower()
2070 return None
208n/a
2091 def islast(self, line):
210n/a """Determine whether a line is a legal end of RFC 2822 headers.
211n/a
212n/a You may override this method if your application wants to bend the
213n/a rules, e.g. to strip trailing whitespace, or to recognize MH template
214n/a separators ('--------'). For convenience (e.g. for code reading from
215n/a sockets) a line consisting of \r\n also matches.
216n/a """
2171565 return line in _blanklines
218n/a
2191 def iscomment(self, line):
220n/a """Determine whether a line should be skipped entirely.
221n/a
222n/a You may override this method in order to use Message parsing on tagged
223n/a data in RFC 2822-like formats that support embedded comments or
224n/a free-text data.
225n/a """
2261565 return False
227n/a
2281 def getallmatchingheaders(self, name):
229n/a """Find all header lines matching a given header name.
230n/a
231n/a Look through the list of headers and find all lines matching a given
232n/a header name (and their continuation lines). A list of the lines is
233n/a returned, without interpretation. If the header does not occur, an
234n/a empty list is returned. If the header occurs multiple times, all
235n/a occurrences are returned. Case is not important in the header name.
236n/a """
237366 name = name.lower() + ':'
238366 n = len(name)
239366 lst = []
240366 hit = 0
241754 for line in self.headers:
242388 if line[:n].lower() == name:
243168 hit = 1
244220 elif not line[:1].isspace():
245214 hit = 0
246388 if hit:
247171 lst.append(line)
248366 return lst
249n/a
2501 def getfirstmatchingheader(self, name):
251n/a """Get the first header line matching name.
252n/a
253n/a This is similar to getallmatchingheaders, but it returns only the
254n/a first matching header (and its continuation lines).
255n/a """
2560 name = name.lower() + ':'
2570 n = len(name)
2580 lst = []
2590 hit = 0
2600 for line in self.headers:
2610 if hit:
2620 if not line[:1].isspace():
2630 break
2640 elif line[:n].lower() == name:
2650 hit = 1
2660 if hit:
2670 lst.append(line)
2680 return lst
269n/a
2701 def getrawheader(self, name):
271n/a """A higher-level interface to getfirstmatchingheader().
272n/a
273n/a Return a string containing the literal text of the header but with the
274n/a keyword stripped. All leading, trailing and embedded whitespace is
275n/a kept in the string, however. Return None if the header does not
276n/a occur.
277n/a """
278n/a
2790 lst = self.getfirstmatchingheader(name)
2800 if not lst:
2810 return None
2820 lst[0] = lst[0][len(name) + 1:]
2830 return ''.join(lst)
284n/a
2851 def getheader(self, name, default=None):
286n/a """Get the header value for a name.
287n/a
288n/a This is the normal interface: it returns a stripped version of the
289n/a header value for a given header name, or None if it doesn't exist.
290n/a This uses the dictionary version which finds the *last* such header.
291n/a """
2921685 return self.dict.get(name.lower(), default)
2931 get = getheader
294n/a
2951 def getheaders(self, name):
296n/a """Get all values for a header.
297n/a
298n/a This returns a list of values for headers given more than once; each
299n/a value in the result list is stripped in the same way as the result of
300n/a getheader(). If the header is not given, return an empty list.
301n/a """
302332 result = []
303332 current = ''
304332 have_header = 0
305475 for s in self.getallmatchingheaders(name):
306143 if s[0].isspace():
3070 if current:
3080 current = "%s\n %s" % (current, s.strip())
309n/a else:
3100 current = s.strip()
311n/a else:
312143 if have_header:
3139 result.append(current)
314143 current = s[s.find(":") + 1:].strip()
315143 have_header = 1
316332 if have_header:
317134 result.append(current)
318332 return result
319n/a
3201 def getaddr(self, name):
321n/a """Get a single address from a header, as a tuple.
322n/a
323n/a An example return value:
324n/a ('Guido van Rossum', 'guido@cwi.nl')
325n/a """
326n/a # New, by Ben Escoto
3270 alist = self.getaddrlist(name)
3280 if alist:
3290 return alist[0]
330n/a else:
3310 return (None, None)
332n/a
3331 def getaddrlist(self, name):
334n/a """Get a list of addresses from a header.
335n/a
336n/a Retrieves a list of addresses from a header, where each address is a
337n/a tuple as returned by getaddr(). Scans all named headers, so it works
338n/a properly with multiple To: or Cc: headers for example.
339n/a """
34030 raw = []
34158 for h in self.getallmatchingheaders(name):
34228 if h[0] in ' \t':
3433 raw.append(h)
344n/a else:
34525 if raw:
3466 raw.append(', ')
34725 i = h.find(':')
34825 if i > 0:
34925 addr = h[i+1:]
35025 raw.append(addr)
35130 alladdrs = ''.join(raw)
35230 a = AddressList(alladdrs)
35330 return a.addresslist
354n/a
3551 def getdate(self, name):
356n/a """Retrieve a date field from a header.
357n/a
358n/a Retrieves a date field from the named header, returning a tuple
359n/a compatible with time.mktime().
360n/a """
36114 try:
36214 data = self[name]
3636 except KeyError:
3646 return None
3658 return parsedate(data)
366n/a
3671 def getdate_tz(self, name):
368n/a """Retrieve a date field from a header as a 10-tuple.
369n/a
370n/a The first 9 elements make up a tuple compatible with time.mktime(),
371n/a and the 10th is the offset of the poster's time zone from GMT/UTC.
372n/a """
3730 try:
3740 data = self[name]
3750 except KeyError:
3760 return None
3770 return parsedate_tz(data)
378n/a
379n/a
380n/a # Access as a dictionary (only finds *last* header of each type):
381n/a
3821 def __len__(self):
383n/a """Get the number of headers in a message."""
3841 return len(self.dict)
385n/a
3861 def __getitem__(self, name):
387n/a """Get a specific header, as from a dictionary."""
38898 return self.dict[name.lower()]
389n/a
3901 def __setitem__(self, name, value):
391n/a """Set the value of a header.
392n/a
393n/a Note: This is not a perfect inversion of __getitem__, because any
394n/a changed headers get stuck at the end of the raw-headers list rather
395n/a than where the altered header was.
396n/a """
3970 del self[name] # Won't fail if it doesn't exist
3980 self.dict[name.lower()] = value
3990 text = name + ": " + value
4000 for line in text.split("\n"):
4010 self.headers.append(line + "\n")
402n/a
4031 def __delitem__(self, name):
404n/a """Delete all occurrences of a specific header, if it is present."""
4050 name = name.lower()
4060 if not name in self.dict:
4070 return
4080 del self.dict[name]
4090 name = name + ':'
4100 n = len(name)
4110 lst = []
4120 hit = 0
4130 for i in range(len(self.headers)):
4140 line = self.headers[i]
4150 if line[:n].lower() == name:
4160 hit = 1
4170 elif not line[:1].isspace():
4180 hit = 0
4190 if hit:
4200 lst.append(i)
4210 for i in reversed(lst):
4220 del self.headers[i]
423n/a
4241 def setdefault(self, name, default=""):
4253 lowername = name.lower()
4263 if lowername in self.dict:
4271 return self.dict[lowername]
428n/a else:
4292 text = name + ": " + default
4304 for line in text.split("\n"):
4312 self.headers.append(line + "\n")
4322 self.dict[lowername] = default
4332 return default
434n/a
4351 def has_key(self, name):
436n/a """Determine whether a message contains the named header."""
4371 return name.lower() in self.dict
438n/a
4391 def __contains__(self, name):
440n/a """Determine whether a message contains the named header."""
44152 return name.lower() in self.dict
442n/a
4431 def __iter__(self):
4441 return iter(self.dict)
445n/a
4461 def keys(self):
447n/a """Get all of a message's header field names."""
4480 return self.dict.keys()
449n/a
4501 def values(self):
451n/a """Get all of a message's header field values."""
4520 return self.dict.values()
453n/a
4541 def items(self):
455n/a """Get all of a message's headers.
456n/a
457n/a Returns a list of name, value tuples.
458n/a """
4590 return self.dict.items()
460n/a
4611 def __str__(self):
4621 return ''.join(self.headers)
463n/a
464n/a
465n/a# Utility functions
466n/a# -----------------
467n/a
468n/a# XXX Should fix unquote() and quote() to be really conformant.
469n/a# XXX The inverses of the parse functions may also be useful.
470n/a
471n/a
4721def unquote(s):
473n/a """Remove quotes from a string."""
4745 if len(s) > 1:
4755 if s.startswith('"') and s.endswith('"'):
4763 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
4772 if s.startswith('<') and s.endswith('>'):
4780 return s[1:-1]
4792 return s
480n/a
481n/a
4821def quote(s):
483n/a """Add quotes around a string."""
4841 return s.replace('\\', '\\\\').replace('"', '\\"')
485n/a
486n/a
4871def parseaddr(address):
488n/a """Parse an address into a (realname, mailaddr) tuple."""
4894 a = AddressList(address)
4904 lst = a.addresslist
4914 if not lst:
4920 return (None, None)
4934 return lst[0]
494n/a
495n/a
4962class AddrlistClass:
497n/a """Address parser class by Ben Escoto.
498n/a
499n/a To understand what this class does, it helps to have a copy of
500n/a RFC 2822 in front of you.
501n/a
502n/a http://www.faqs.org/rfcs/rfc2822.html
503n/a
504n/a Note: this class interface is deprecated and may be removed in the future.
505n/a Use rfc822.AddressList instead.
5061 """
507n/a
5081 def __init__(self, field):
509n/a """Initialize a new instance.
510n/a
511n/a `field' is an unparsed address header field, containing one or more
512n/a addresses.
513n/a """
51434 self.specials = '()<>@,:;.\"[]'
51534 self.pos = 0
51634 self.LWS = ' \t'
51734 self.CR = '\r\n'
51834 self.atomends = self.specials + self.LWS + self.CR
519n/a # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
520n/a # is obsolete syntax. RFC 2822 requires that we recognize obsolete
521n/a # syntax, so allow dots in phrases.
52234 self.phraseends = self.atomends.replace('.', '')
52334 self.field = field
52434 self.commentlist = []
525n/a
5261 def gotonext(self):
527n/a """Parse up to the start of the next address."""
528366 while self.pos < len(self.field):
529272 if self.field[self.pos] in self.LWS + '\n\r':
53073 self.pos = self.pos + 1
531199 elif self.field[self.pos] == '(':
5321 self.commentlist.append(self.getcomment())
533198 else: break
534n/a
5351 def getaddrlist(self):
536n/a """Parse all addresses.
537n/a
538n/a Returns a list containing all of the addresses.
539n/a """
54023 result = []
54123 ad = self.getaddress()
54255 while ad:
54332 result += ad
54432 ad = self.getaddress()
54523 return result
546n/a
5471 def getaddress(self):
548n/a """Parse the next address."""
54960 self.commentlist = []
55060 self.gotonext()
551n/a
55260 oldpos = self.pos
55360 oldcl = self.commentlist
55460 plist = self.getphraselist()
555n/a
55660 self.gotonext()
55760 returnlist = []
558n/a
55960 if self.pos >= len(self.field):
560n/a # Bad email address technically, no domain.
56123 if plist:
5620 returnlist = [(' '.join(self.commentlist), plist[0])]
563n/a
56437 elif self.field[self.pos] in '.@':
565n/a # email address is just an addrspec
566n/a # this isn't very efficient since we start over
56720 self.pos = oldpos
56820 self.commentlist = oldcl
56920 addrspec = self.getaddrspec()
57020 returnlist = [(' '.join(self.commentlist), addrspec)]
571n/a
57217 elif self.field[self.pos] == ':':
573n/a # address is a group
5742 returnlist = []
575n/a
5762 fieldlen = len(self.field)
5772 self.pos += 1
5787 while self.pos < len(self.field):
5795 self.gotonext()
5805 if self.pos < fieldlen and self.field[self.pos] == ';':
5810 self.pos += 1
5820 break
5835 returnlist = returnlist + self.getaddress()
584n/a
58515 elif self.field[self.pos] == '<':
586n/a # Address is a phrase then a route addr
58713 routeaddr = self.getrouteaddr()
588n/a
58913 if self.commentlist:
590n/a returnlist = [(' '.join(plist) + ' (' + \
5910 ' '.join(self.commentlist) + ')', routeaddr)]
59213 else: returnlist = [(' '.join(plist), routeaddr)]
593n/a
594n/a else:
5952 if plist:
5960 returnlist = [(' '.join(self.commentlist), plist[0])]
5972 elif self.field[self.pos] in self.specials:
5982 self.pos += 1
599n/a
60060 self.gotonext()
60160 if self.pos < len(self.field) and self.field[self.pos] == ',':
6029 self.pos += 1
60360 return returnlist
604n/a
6051 def getrouteaddr(self):
606n/a """Parse a route address (Return-path value).
607n/a
608n/a This method just skips all the route stuff and returns the addrspec.
609n/a """
61013 if self.field[self.pos] != '<':
6110 return
612n/a
61313 expectroute = 0
61413 self.pos += 1
61513 self.gotonext()
61613 adlist = ""
61713 while self.pos < len(self.field):
61813 if expectroute:
6190 self.getdomain()
6200 expectroute = 0
62113 elif self.field[self.pos] == '>':
6221 self.pos += 1
6231 break
62412 elif self.field[self.pos] == '@':
6250 self.pos += 1
6260 expectroute = 1
62712 elif self.field[self.pos] == ':':
6280 self.pos += 1
629n/a else:
63012 adlist = self.getaddrspec()
63112 self.pos += 1
63212 break
6330 self.gotonext()
634n/a
63513 return adlist
636n/a
6371 def getaddrspec(self):
638n/a """Parse an RFC 2822 addr-spec."""
63932 aslist = []
640n/a
64132 self.gotonext()
64263 while self.pos < len(self.field):
64363 if self.field[self.pos] == '.':
6440 aslist.append('.')
6450 self.pos += 1
64663 elif self.field[self.pos] == '"':
6471 aslist.append('"%s"' % self.getquote())
64862 elif self.field[self.pos] in self.atomends:
64932 break
65030 else: aslist.append(self.getatom())
65131 self.gotonext()
652n/a
65332 if self.pos >= len(self.field) or self.field[self.pos] != '@':
6541 return ''.join(aslist)
655n/a
65631 aslist.append('@')
65731 self.pos += 1
65831 self.gotonext()
65931 return ''.join(aslist) + self.getdomain()
660n/a
6611 def getdomain(self):
662n/a """Get the complete domain name from an address."""
66331 sdlist = []
664138 while self.pos < len(self.field):
665136 if self.field[self.pos] in self.LWS:
6665 self.pos += 1
667131 elif self.field[self.pos] == '(':
6685 self.commentlist.append(self.getcomment())
669126 elif self.field[self.pos] == '[':
6701 sdlist.append(self.getdomainliteral())
671125 elif self.field[self.pos] == '.':
67233 self.pos += 1
67333 sdlist.append('.')
67492 elif self.field[self.pos] in self.atomends:
67529 break
67663 else: sdlist.append(self.getatom())
67731 return ''.join(sdlist)
678n/a
6791 def getdelimited(self, beginchar, endchars, allowcomments = 1):
680n/a """Parse a header fragment delimited by special characters.
681n/a
682n/a `beginchar' is the start character for the fragment. If self is not
683n/a looking at an instance of `beginchar' then getdelimited returns the
684n/a empty string.
685n/a
686n/a `endchars' is a sequence of allowable end-delimiting characters.
687n/a Parsing stops when one of these is encountered.
688n/a
689n/a If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
690n/a within the parsed fragment.
691n/a """
69216 if self.field[self.pos] != beginchar:
6930 return ''
694n/a
69516 slist = ['']
69616 quote = 0
69716 self.pos += 1
698276 while self.pos < len(self.field):
699276 if quote == 1:
7000 slist.append(self.field[self.pos])
7010 quote = 0
702276 elif self.field[self.pos] in endchars:
70316 self.pos += 1
70416 break
705260 elif allowcomments and self.field[self.pos] == '(':
7061 slist.append(self.getcomment())
7071 continue # have already advanced pos from getcomment
708259 elif self.field[self.pos] == '\\':
7090 quote = 1
710n/a else:
711259 slist.append(self.field[self.pos])
712259 self.pos += 1
713n/a
71416 return ''.join(slist)
715n/a
7161 def getquote(self):
717n/a """Get a quote-delimited fragment from self's field."""
7188 return self.getdelimited('"', '"\r', 0)
719n/a
7201 def getcomment(self):
721n/a """Get a parenthesis-delimited fragment from self's field."""
7227 return self.getdelimited('(', ')\r', 1)
723n/a
7241 def getdomainliteral(self):
725n/a """Parse an RFC 2822 domain-literal."""
7261 return '[%s]' % self.getdelimited('[', ']\r', 0)
727n/a
7281 def getatom(self, atomends=None):
729n/a """Parse an RFC 2822 atom.
730n/a
731n/a Optional atomends specifies a different set of end token delimiters
732n/a (the default is to use self.atomends). This is used e.g. in
733n/a getphraselist() since phrase endings must not include the `.' (which
734n/a is legal in phrases)."""
735124 atomlist = ['']
736124 if atomends is None:
73793 atomends = self.atomends
738n/a
739713 while self.pos < len(self.field):
740712 if self.field[self.pos] in atomends:
741123 break
742589 else: atomlist.append(self.field[self.pos])
743589 self.pos += 1
744n/a
745124 return ''.join(atomlist)
746n/a
7471 def getphraselist(self):
748n/a """Parse a sequence of RFC 2822 phrases.
749n/a
750n/a A phrase is a sequence of words, which are in turn either RFC 2822
751n/a atoms or quoted-strings. Phrases are canonicalized by squeezing all
752n/a runs of continuous whitespace into one space.
753n/a """
75460 plist = []
755n/a
756115 while self.pos < len(self.field):
75792 if self.field[self.pos] in self.LWS:
75817 self.pos += 1
75975 elif self.field[self.pos] == '"':
7607 plist.append(self.getquote())
76168 elif self.field[self.pos] == '(':
7620 self.commentlist.append(self.getcomment())
76368 elif self.field[self.pos] in self.phraseends:
76437 break
765n/a else:
76631 plist.append(self.getatom(self.phraseends))
767n/a
76860 return plist
769n/a
7702class AddressList(AddrlistClass):
7711 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
7721 def __init__(self, field):
77334 AddrlistClass.__init__(self, field)
77434 if field:
77523 self.addresslist = self.getaddrlist()
776n/a else:
77711 self.addresslist = []
778n/a
7791 def __len__(self):
7800 return len(self.addresslist)
781n/a
7821 def __str__(self):
7830 return ", ".join(map(dump_address_pair, self.addresslist))
784n/a
7851 def __add__(self, other):
786n/a # Set union
7870 newaddr = AddressList(None)
7880 newaddr.addresslist = self.addresslist[:]
7890 for x in other.addresslist:
7900 if not x in self.addresslist:
7910 newaddr.addresslist.append(x)
7920 return newaddr
793n/a
7941 def __iadd__(self, other):
795n/a # Set union, in-place
7960 for x in other.addresslist:
7970 if not x in self.addresslist:
7980 self.addresslist.append(x)
7990 return self
800n/a
8011 def __sub__(self, other):
802n/a # Set difference
8030 newaddr = AddressList(None)
8040 for x in self.addresslist:
8050 if not x in other.addresslist:
8060 newaddr.addresslist.append(x)
8070 return newaddr
808n/a
8091 def __isub__(self, other):
810n/a # Set difference, in-place
8110 for x in other.addresslist:
8120 if x in self.addresslist:
8130 self.addresslist.remove(x)
8140 return self
815n/a
8161 def __getitem__(self, index):
817n/a # Make indexing, slices, and 'in' work
8180 return self.addresslist[index]
819n/a
8201def dump_address_pair(pair):
821n/a """Dump a (name, address) pair in a canonicalized form."""
8220 if pair[0]:
8230 return '"' + pair[0] + '" <' + pair[1] + '>'
824n/a else:
8250 return pair[1]
826n/a
827n/a# Parse a date field
828n/a
8291_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
8301 'aug', 'sep', 'oct', 'nov', 'dec',
8311 'january', 'february', 'march', 'april', 'may', 'june', 'july',
8321 'august', 'september', 'october', 'november', 'december']
8331_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
834n/a
835n/a# The timezone table does not include the military time zones defined
836n/a# in RFC822, other than Z. According to RFC1123, the description in
837n/a# RFC822 gets the signs wrong, so we can't rely on any such time
838n/a# zones. RFC1123 recommends that numeric timezone indicators be used
839n/a# instead of timezone names.
840n/a
8411_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
8421 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
8431 'EST': -500, 'EDT': -400, # Eastern
8441 'CST': -600, 'CDT': -500, # Central
8451 'MST': -700, 'MDT': -600, # Mountain
8461 'PST': -800, 'PDT': -700 # Pacific
847n/a }
848n/a
849n/a
8501def parsedate_tz(data):
851n/a """Convert a date string to a time tuple.
852n/a
853n/a Accounts for military timezones.
854n/a """
8558 if not data:
8561 return None
8577 data = data.split()
8587 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
859n/a # There's a dayname here. Skip it
8606 del data[0]
861n/a else:
862n/a # no space after the "weekday,"?
8631 i = data[0].rfind(',')
8641 if i >= 0:
8650 data[0] = data[0][i+1:]
8667 if len(data) == 3: # RFC 850 date, deprecated
8671 stuff = data[0].split('-')
8681 if len(stuff) == 3:
8691 data = stuff + data[1:]
8707 if len(data) == 4:
8710 s = data[3]
8720 i = s.find('+')
8730 if i > 0:
8740 data[3:] = [s[:i], s[i+1:]]
875n/a else:
8760 data.append('') # Dummy tz
8777 if len(data) < 5:
8780 return None
8797 data = data[:5]
8807 [dd, mm, yy, tm, tz] = data
8817 mm = mm.lower()
8827 if not mm in _monthnames:
8831 dd, mm = mm, dd.lower()
8841 if not mm in _monthnames:
8850 return None
8867 mm = _monthnames.index(mm)+1
8877 if mm > 12: mm = mm - 12
8887 if dd[-1] == ',':
8890 dd = dd[:-1]
8907 i = yy.find(':')
8917 if i > 0:
8920 yy, tm = tm, yy
8937 if yy[-1] == ',':
8940 yy = yy[:-1]
8957 if not yy[0].isdigit():
8960 yy, tz = tz, yy
8977 if tm[-1] == ',':
8980 tm = tm[:-1]
8997 tm = tm.split(':')
9007 if len(tm) == 2:
9010 [thh, tmm] = tm
9020 tss = '0'
9037 elif len(tm) == 3:
9047 [thh, tmm, tss] = tm
905n/a else:
9060 return None
9077 try:
9087 yy = int(yy)
9097 dd = int(dd)
9107 thh = int(thh)
9117 tmm = int(tmm)
9127 tss = int(tss)
9130 except ValueError:
9140 return None
9157 tzoffset = None
9167 tz = tz.upper()
9177 if tz in _timezones:
9181 tzoffset = _timezones[tz]
919n/a else:
9206 try:
9216 tzoffset = int(tz)
9220 except ValueError:
9230 pass
924n/a # Convert a timezone offset into seconds ; -0500 -> -18000
9257 if tzoffset:
9267 if tzoffset < 0:
9277 tzsign = -1
9287 tzoffset = -tzoffset
929n/a else:
9300 tzsign = 1
9317 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
9327 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
933n/a
934n/a
9351def parsedate(data):
936n/a """Convert a time string to a time tuple."""
9378 t = parsedate_tz(data)
9388 if t is None:
9391 return t
9407 return t[:9]
941n/a
942n/a
9431def mktime_tz(data):
944n/a """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
9450 if data[9] is None:
946n/a # No zone info, so localtime is better assumption than GMT
9470 return time.mktime(data[:8] + (-1,))
948n/a else:
9490 t = time.mktime(data[:8] + (0,))
9500 return t - data[9] - time.timezone
951n/a
9521def formatdate(timeval=None):
953n/a """Returns time format preferred for Internet standards.
954n/a
955n/a Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
956n/a
957n/a According to RFC 1123, day and month names must always be in
958n/a English. If not for that, this code could use strftime(). It
959n/a can't because strftime() honors the locale and could generated
960n/a non-English names.
961n/a """
9624 if timeval is None:
9630 timeval = time.time()
9644 timeval = time.gmtime(timeval)
9654 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
9664 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
9674 timeval[2],
9680 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
9694 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
9704 timeval[0], timeval[3], timeval[4], timeval[5])
971n/a
972n/a
973n/a# When used as script, run a small test program.
974n/a# The first command line argument must be a filename containing one
975n/a# message in RFC-822 format.
976n/a
9771if __name__ == '__main__':
9780 import sys, os
9790 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
9800 if sys.argv[1:]: file = sys.argv[1]
9810 f = open(file, 'r')
9820 m = Message(f)
9830 print 'From:', m.getaddr('from')
9840 print 'To:', m.getaddrlist('to')
9850 print 'Subject:', m.getheader('subject')
9860 print 'Date:', m.getheader('date')
9870 date = m.getdate_tz('date')
9880 tz = date[-1]
9890 date = time.localtime(mktime_tz(date))
9900 if date:
9910 print 'ParsedDate:', time.asctime(date),
9920 hhmmss = tz
9930 hhmm, ss = divmod(hhmmss, 60)
9940 hh, mm = divmod(hhmm, 60)
9950 print "%+03d%02d" % (hh, mm),
9960 if ss: print ".%02d" % ss,
9970 print
998n/a else:
9990 print 'ParsedDate:', None
10000 m.rewindbody()
10010 n = 0
10020 while f.readline():
10030 n += 1
10040 print 'Lines:', n
10050 print '-'*70
10060 print 'len =', len(m)
10070 if 'Date' in m: print 'Date =', m['Date']
10080 if 'X-Nonsense' in m: pass
10090 print 'keys =', m.keys()
10100 print 'values =', m.values()
10110 print 'items =', m.items()