ยปCore Development>Code coverage>Lib/email/_parseaddr.py

Python code coverage for Lib/email/_parseaddr.py

#countcontent
1n/a# Copyright (C) 2002-2007 Python Software Foundation
2n/a# Contact: email-sig@python.org
3n/a
4n/a"""Email address parsing code.
5n/a
6n/aLifted directly from rfc822.py. This should eventually be rewritten.
7n/a"""
8n/a
9n/a__all__ = [
10n/a 'mktime_tz',
11n/a 'parsedate',
12n/a 'parsedate_tz',
13n/a 'quote',
14n/a ]
15n/a
16n/aimport time, calendar
17n/a
18n/aSPACE = ' '
19n/aEMPTYSTRING = ''
20n/aCOMMASPACE = ', '
21n/a
22n/a# Parse a date field
23n/a_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
24n/a 'aug', 'sep', 'oct', 'nov', 'dec',
25n/a 'january', 'february', 'march', 'april', 'may', 'june', 'july',
26n/a 'august', 'september', 'october', 'november', 'december']
27n/a
28n/a_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
29n/a
30n/a# The timezone table does not include the military time zones defined
31n/a# in RFC822, other than Z. According to RFC1123, the description in
32n/a# RFC822 gets the signs wrong, so we can't rely on any such time
33n/a# zones. RFC1123 recommends that numeric timezone indicators be used
34n/a# instead of timezone names.
35n/a
36n/a_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
37n/a 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
38n/a 'EST': -500, 'EDT': -400, # Eastern
39n/a 'CST': -600, 'CDT': -500, # Central
40n/a 'MST': -700, 'MDT': -600, # Mountain
41n/a 'PST': -800, 'PDT': -700 # Pacific
42n/a }
43n/a
44n/a
45n/adef parsedate_tz(data):
46n/a """Convert a date string to a time tuple.
47n/a
48n/a Accounts for military timezones.
49n/a """
50n/a res = _parsedate_tz(data)
51n/a if not res:
52n/a return
53n/a if res[9] is None:
54n/a res[9] = 0
55n/a return tuple(res)
56n/a
57n/adef _parsedate_tz(data):
58n/a """Convert date to extended time tuple.
59n/a
60n/a The last (additional) element is the time zone offset in seconds, except if
61n/a the timezone was specified as -0000. In that case the last element is
62n/a None. This indicates a UTC timestamp that explicitly declaims knowledge of
63n/a the source timezone, as opposed to a +0000 timestamp that indicates the
64n/a source timezone really was UTC.
65n/a
66n/a """
67n/a if not data:
68n/a return
69n/a data = data.split()
70n/a # The FWS after the comma after the day-of-week is optional, so search and
71n/a # adjust for this.
72n/a if data[0].endswith(',') or data[0].lower() in _daynames:
73n/a # There's a dayname here. Skip it
74n/a del data[0]
75n/a else:
76n/a i = data[0].rfind(',')
77n/a if i >= 0:
78n/a data[0] = data[0][i+1:]
79n/a if len(data) == 3: # RFC 850 date, deprecated
80n/a stuff = data[0].split('-')
81n/a if len(stuff) == 3:
82n/a data = stuff + data[1:]
83n/a if len(data) == 4:
84n/a s = data[3]
85n/a i = s.find('+')
86n/a if i == -1:
87n/a i = s.find('-')
88n/a if i > 0:
89n/a data[3:] = [s[:i], s[i:]]
90n/a else:
91n/a data.append('') # Dummy tz
92n/a if len(data) < 5:
93n/a return None
94n/a data = data[:5]
95n/a [dd, mm, yy, tm, tz] = data
96n/a mm = mm.lower()
97n/a if mm not in _monthnames:
98n/a dd, mm = mm, dd.lower()
99n/a if mm not in _monthnames:
100n/a return None
101n/a mm = _monthnames.index(mm) + 1
102n/a if mm > 12:
103n/a mm -= 12
104n/a if dd[-1] == ',':
105n/a dd = dd[:-1]
106n/a i = yy.find(':')
107n/a if i > 0:
108n/a yy, tm = tm, yy
109n/a if yy[-1] == ',':
110n/a yy = yy[:-1]
111n/a if not yy[0].isdigit():
112n/a yy, tz = tz, yy
113n/a if tm[-1] == ',':
114n/a tm = tm[:-1]
115n/a tm = tm.split(':')
116n/a if len(tm) == 2:
117n/a [thh, tmm] = tm
118n/a tss = '0'
119n/a elif len(tm) == 3:
120n/a [thh, tmm, tss] = tm
121n/a elif len(tm) == 1 and '.' in tm[0]:
122n/a # Some non-compliant MUAs use '.' to separate time elements.
123n/a tm = tm[0].split('.')
124n/a if len(tm) == 2:
125n/a [thh, tmm] = tm
126n/a tss = 0
127n/a elif len(tm) == 3:
128n/a [thh, tmm, tss] = tm
129n/a else:
130n/a return None
131n/a try:
132n/a yy = int(yy)
133n/a dd = int(dd)
134n/a thh = int(thh)
135n/a tmm = int(tmm)
136n/a tss = int(tss)
137n/a except ValueError:
138n/a return None
139n/a # Check for a yy specified in two-digit format, then convert it to the
140n/a # appropriate four-digit format, according to the POSIX standard. RFC 822
141n/a # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
142n/a # mandates a 4-digit yy. For more information, see the documentation for
143n/a # the time module.
144n/a if yy < 100:
145n/a # The year is between 1969 and 1999 (inclusive).
146n/a if yy > 68:
147n/a yy += 1900
148n/a # The year is between 2000 and 2068 (inclusive).
149n/a else:
150n/a yy += 2000
151n/a tzoffset = None
152n/a tz = tz.upper()
153n/a if tz in _timezones:
154n/a tzoffset = _timezones[tz]
155n/a else:
156n/a try:
157n/a tzoffset = int(tz)
158n/a except ValueError:
159n/a pass
160n/a if tzoffset==0 and tz.startswith('-'):
161n/a tzoffset = None
162n/a # Convert a timezone offset into seconds ; -0500 -> -18000
163n/a if tzoffset:
164n/a if tzoffset < 0:
165n/a tzsign = -1
166n/a tzoffset = -tzoffset
167n/a else:
168n/a tzsign = 1
169n/a tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
170n/a # Daylight Saving Time flag is set to -1, since DST is unknown.
171n/a return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]
172n/a
173n/a
174n/adef parsedate(data):
175n/a """Convert a time string to a time tuple."""
176n/a t = parsedate_tz(data)
177n/a if isinstance(t, tuple):
178n/a return t[:9]
179n/a else:
180n/a return t
181n/a
182n/a
183n/adef mktime_tz(data):
184n/a """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
185n/a if data[9] is None:
186n/a # No zone info, so localtime is better assumption than GMT
187n/a return time.mktime(data[:8] + (-1,))
188n/a else:
189n/a t = calendar.timegm(data)
190n/a return t - data[9]
191n/a
192n/a
193n/adef quote(str):
194n/a """Prepare string to be used in a quoted string.
195n/a
196n/a Turns backslash and double quote characters into quoted pairs. These
197n/a are the only characters that need to be quoted inside a quoted string.
198n/a Does not add the surrounding double quotes.
199n/a """
200n/a return str.replace('\\', '\\\\').replace('"', '\\"')
201n/a
202n/a
203n/aclass AddrlistClass:
204n/a """Address parser class by Ben Escoto.
205n/a
206n/a To understand what this class does, it helps to have a copy of RFC 2822 in
207n/a front of you.
208n/a
209n/a Note: this class interface is deprecated and may be removed in the future.
210n/a Use email.utils.AddressList instead.
211n/a """
212n/a
213n/a def __init__(self, field):
214n/a """Initialize a new instance.
215n/a
216n/a `field' is an unparsed address header field, containing
217n/a one or more addresses.
218n/a """
219n/a self.specials = '()<>@,:;.\"[]'
220n/a self.pos = 0
221n/a self.LWS = ' \t'
222n/a self.CR = '\r\n'
223n/a self.FWS = self.LWS + self.CR
224n/a self.atomends = self.specials + self.LWS + self.CR
225n/a # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
226n/a # is obsolete syntax. RFC 2822 requires that we recognize obsolete
227n/a # syntax, so allow dots in phrases.
228n/a self.phraseends = self.atomends.replace('.', '')
229n/a self.field = field
230n/a self.commentlist = []
231n/a
232n/a def gotonext(self):
233n/a """Skip white space and extract comments."""
234n/a wslist = []
235n/a while self.pos < len(self.field):
236n/a if self.field[self.pos] in self.LWS + '\n\r':
237n/a if self.field[self.pos] not in '\n\r':
238n/a wslist.append(self.field[self.pos])
239n/a self.pos += 1
240n/a elif self.field[self.pos] == '(':
241n/a self.commentlist.append(self.getcomment())
242n/a else:
243n/a break
244n/a return EMPTYSTRING.join(wslist)
245n/a
246n/a def getaddrlist(self):
247n/a """Parse all addresses.
248n/a
249n/a Returns a list containing all of the addresses.
250n/a """
251n/a result = []
252n/a while self.pos < len(self.field):
253n/a ad = self.getaddress()
254n/a if ad:
255n/a result += ad
256n/a else:
257n/a result.append(('', ''))
258n/a return result
259n/a
260n/a def getaddress(self):
261n/a """Parse the next address."""
262n/a self.commentlist = []
263n/a self.gotonext()
264n/a
265n/a oldpos = self.pos
266n/a oldcl = self.commentlist
267n/a plist = self.getphraselist()
268n/a
269n/a self.gotonext()
270n/a returnlist = []
271n/a
272n/a if self.pos >= len(self.field):
273n/a # Bad email address technically, no domain.
274n/a if plist:
275n/a returnlist = [(SPACE.join(self.commentlist), plist[0])]
276n/a
277n/a elif self.field[self.pos] in '.@':
278n/a # email address is just an addrspec
279n/a # this isn't very efficient since we start over
280n/a self.pos = oldpos
281n/a self.commentlist = oldcl
282n/a addrspec = self.getaddrspec()
283n/a returnlist = [(SPACE.join(self.commentlist), addrspec)]
284n/a
285n/a elif self.field[self.pos] == ':':
286n/a # address is a group
287n/a returnlist = []
288n/a
289n/a fieldlen = len(self.field)
290n/a self.pos += 1
291n/a while self.pos < len(self.field):
292n/a self.gotonext()
293n/a if self.pos < fieldlen and self.field[self.pos] == ';':
294n/a self.pos += 1
295n/a break
296n/a returnlist = returnlist + self.getaddress()
297n/a
298n/a elif self.field[self.pos] == '<':
299n/a # Address is a phrase then a route addr
300n/a routeaddr = self.getrouteaddr()
301n/a
302n/a if self.commentlist:
303n/a returnlist = [(SPACE.join(plist) + ' (' +
304n/a ' '.join(self.commentlist) + ')', routeaddr)]
305n/a else:
306n/a returnlist = [(SPACE.join(plist), routeaddr)]
307n/a
308n/a else:
309n/a if plist:
310n/a returnlist = [(SPACE.join(self.commentlist), plist[0])]
311n/a elif self.field[self.pos] in self.specials:
312n/a self.pos += 1
313n/a
314n/a self.gotonext()
315n/a if self.pos < len(self.field) and self.field[self.pos] == ',':
316n/a self.pos += 1
317n/a return returnlist
318n/a
319n/a def getrouteaddr(self):
320n/a """Parse a route address (Return-path value).
321n/a
322n/a This method just skips all the route stuff and returns the addrspec.
323n/a """
324n/a if self.field[self.pos] != '<':
325n/a return
326n/a
327n/a expectroute = False
328n/a self.pos += 1
329n/a self.gotonext()
330n/a adlist = ''
331n/a while self.pos < len(self.field):
332n/a if expectroute:
333n/a self.getdomain()
334n/a expectroute = False
335n/a elif self.field[self.pos] == '>':
336n/a self.pos += 1
337n/a break
338n/a elif self.field[self.pos] == '@':
339n/a self.pos += 1
340n/a expectroute = True
341n/a elif self.field[self.pos] == ':':
342n/a self.pos += 1
343n/a else:
344n/a adlist = self.getaddrspec()
345n/a self.pos += 1
346n/a break
347n/a self.gotonext()
348n/a
349n/a return adlist
350n/a
351n/a def getaddrspec(self):
352n/a """Parse an RFC 2822 addr-spec."""
353n/a aslist = []
354n/a
355n/a self.gotonext()
356n/a while self.pos < len(self.field):
357n/a preserve_ws = True
358n/a if self.field[self.pos] == '.':
359n/a if aslist and not aslist[-1].strip():
360n/a aslist.pop()
361n/a aslist.append('.')
362n/a self.pos += 1
363n/a preserve_ws = False
364n/a elif self.field[self.pos] == '"':
365n/a aslist.append('"%s"' % quote(self.getquote()))
366n/a elif self.field[self.pos] in self.atomends:
367n/a if aslist and not aslist[-1].strip():
368n/a aslist.pop()
369n/a break
370n/a else:
371n/a aslist.append(self.getatom())
372n/a ws = self.gotonext()
373n/a if preserve_ws and ws:
374n/a aslist.append(ws)
375n/a
376n/a if self.pos >= len(self.field) or self.field[self.pos] != '@':
377n/a return EMPTYSTRING.join(aslist)
378n/a
379n/a aslist.append('@')
380n/a self.pos += 1
381n/a self.gotonext()
382n/a return EMPTYSTRING.join(aslist) + self.getdomain()
383n/a
384n/a def getdomain(self):
385n/a """Get the complete domain name from an address."""
386n/a sdlist = []
387n/a while self.pos < len(self.field):
388n/a if self.field[self.pos] in self.LWS:
389n/a self.pos += 1
390n/a elif self.field[self.pos] == '(':
391n/a self.commentlist.append(self.getcomment())
392n/a elif self.field[self.pos] == '[':
393n/a sdlist.append(self.getdomainliteral())
394n/a elif self.field[self.pos] == '.':
395n/a self.pos += 1
396n/a sdlist.append('.')
397n/a elif self.field[self.pos] in self.atomends:
398n/a break
399n/a else:
400n/a sdlist.append(self.getatom())
401n/a return EMPTYSTRING.join(sdlist)
402n/a
403n/a def getdelimited(self, beginchar, endchars, allowcomments=True):
404n/a """Parse a header fragment delimited by special characters.
405n/a
406n/a `beginchar' is the start character for the fragment.
407n/a If self is not looking at an instance of `beginchar' then
408n/a getdelimited returns the empty string.
409n/a
410n/a `endchars' is a sequence of allowable end-delimiting characters.
411n/a Parsing stops when one of these is encountered.
412n/a
413n/a If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
414n/a within the parsed fragment.
415n/a """
416n/a if self.field[self.pos] != beginchar:
417n/a return ''
418n/a
419n/a slist = ['']
420n/a quote = False
421n/a self.pos += 1
422n/a while self.pos < len(self.field):
423n/a if quote:
424n/a slist.append(self.field[self.pos])
425n/a quote = False
426n/a elif self.field[self.pos] in endchars:
427n/a self.pos += 1
428n/a break
429n/a elif allowcomments and self.field[self.pos] == '(':
430n/a slist.append(self.getcomment())
431n/a continue # have already advanced pos from getcomment
432n/a elif self.field[self.pos] == '\\':
433n/a quote = True
434n/a else:
435n/a slist.append(self.field[self.pos])
436n/a self.pos += 1
437n/a
438n/a return EMPTYSTRING.join(slist)
439n/a
440n/a def getquote(self):
441n/a """Get a quote-delimited fragment from self's field."""
442n/a return self.getdelimited('"', '"\r', False)
443n/a
444n/a def getcomment(self):
445n/a """Get a parenthesis-delimited fragment from self's field."""
446n/a return self.getdelimited('(', ')\r', True)
447n/a
448n/a def getdomainliteral(self):
449n/a """Parse an RFC 2822 domain-literal."""
450n/a return '[%s]' % self.getdelimited('[', ']\r', False)
451n/a
452n/a def getatom(self, atomends=None):
453n/a """Parse an RFC 2822 atom.
454n/a
455n/a Optional atomends specifies a different set of end token delimiters
456n/a (the default is to use self.atomends). This is used e.g. in
457n/a getphraselist() since phrase endings must not include the `.' (which
458n/a is legal in phrases)."""
459n/a atomlist = ['']
460n/a if atomends is None:
461n/a atomends = self.atomends
462n/a
463n/a while self.pos < len(self.field):
464n/a if self.field[self.pos] in atomends:
465n/a break
466n/a else:
467n/a atomlist.append(self.field[self.pos])
468n/a self.pos += 1
469n/a
470n/a return EMPTYSTRING.join(atomlist)
471n/a
472n/a def getphraselist(self):
473n/a """Parse a sequence of RFC 2822 phrases.
474n/a
475n/a A phrase is a sequence of words, which are in turn either RFC 2822
476n/a atoms or quoted-strings. Phrases are canonicalized by squeezing all
477n/a runs of continuous whitespace into one space.
478n/a """
479n/a plist = []
480n/a
481n/a while self.pos < len(self.field):
482n/a if self.field[self.pos] in self.FWS:
483n/a self.pos += 1
484n/a elif self.field[self.pos] == '"':
485n/a plist.append(self.getquote())
486n/a elif self.field[self.pos] == '(':
487n/a self.commentlist.append(self.getcomment())
488n/a elif self.field[self.pos] in self.phraseends:
489n/a break
490n/a else:
491n/a plist.append(self.getatom(self.phraseends))
492n/a
493n/a return plist
494n/a
495n/aclass AddressList(AddrlistClass):
496n/a """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
497n/a def __init__(self, field):
498n/a AddrlistClass.__init__(self, field)
499n/a if field:
500n/a self.addresslist = self.getaddrlist()
501n/a else:
502n/a self.addresslist = []
503n/a
504n/a def __len__(self):
505n/a return len(self.addresslist)
506n/a
507n/a def __add__(self, other):
508n/a # Set union
509n/a newaddr = AddressList(None)
510n/a newaddr.addresslist = self.addresslist[:]
511n/a for x in other.addresslist:
512n/a if not x in self.addresslist:
513n/a newaddr.addresslist.append(x)
514n/a return newaddr
515n/a
516n/a def __iadd__(self, other):
517n/a # Set union, in-place
518n/a for x in other.addresslist:
519n/a if not x in self.addresslist:
520n/a self.addresslist.append(x)
521n/a return self
522n/a
523n/a def __sub__(self, other):
524n/a # Set difference
525n/a newaddr = AddressList(None)
526n/a for x in self.addresslist:
527n/a if not x in other.addresslist:
528n/a newaddr.addresslist.append(x)
529n/a return newaddr
530n/a
531n/a def __isub__(self, other):
532n/a # Set difference, in-place
533n/a for x in other.addresslist:
534n/a if x in self.addresslist:
535n/a self.addresslist.remove(x)
536n/a return self
537n/a
538n/a def __getitem__(self, index):
539n/a # Make indexing, slices, and 'in' work
540n/a return self.addresslist[index]