ยปCore Development>Code coverage>Lib/cookielib.py

Python code coverage for Lib/cookielib.py

#countcontent
1n/a"""HTTP cookie handling for web clients.
2n/a
3n/aThis module has (now fairly distant) origins in Gisle Aas' Perl module
4n/aHTTP::Cookies, from the libwww-perl library.
5n/a
6n/aDocstrings, comments and debug strings in this code refer to the
7n/aattributes of the HTTP cookie system as cookie-attributes, to distinguish
8n/athem clearly from Python attributes.
9n/a
10n/aClass diagram (note that BSDDBCookieJar and the MSIE* classes are not
11n/adistributed with the Python standard library, but are available from
12n/ahttp://wwwsearch.sf.net/):
13n/a
14n/a CookieJar____
15n/a / \ \
16n/a FileCookieJar \ \
17n/a / | \ \ \
18n/a MozillaCookieJar | LWPCookieJar \ \
19n/a | | \
20n/a | ---MSIEBase | \
21n/a | / | | \
22n/a | / MSIEDBCookieJar BSDDBCookieJar
23n/a |/
24n/a MSIECookieJar
25n/a
261"""
27n/a
281__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
291 'FileCookieJar', 'LWPCookieJar', 'lwp_cookie_str', 'LoadError',
301 'MozillaCookieJar']
31n/a
321import re, urlparse, copy, time, urllib
331try:
341 import threading as _threading
350except ImportError:
360 import dummy_threading as _threading
371import httplib # only for the default HTTP port
381from calendar import timegm
39n/a
401debug = False # set to True to enable debugging via the logging module
411logger = None
42n/a
431def _debug(*args):
441044 if not debug:
451044 return
46n/a global logger
470 if not logger:
480 import logging
490 logger = logging.getLogger("cookielib")
500 return logger.debug(*args)
51n/a
52n/a
531DEFAULT_HTTP_PORT = str(httplib.HTTP_PORT)
541MISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
55n/a "instance initialised with one)")
56n/a
571def _warn_unhandled_exception():
58n/a # There are a few catch-all except: statements in this module, for
59n/a # catching input that's bad in unexpected ways. Warn if any
60n/a # exceptions are caught there.
610 import warnings, traceback, StringIO
620 f = StringIO.StringIO()
630 traceback.print_exc(None, f)
640 msg = f.getvalue()
650 warnings.warn("cookielib bug!\n%s" % msg, stacklevel=2)
66n/a
67n/a
68n/a# Date/time conversion
69n/a# -----------------------------------------------------------------------------
70n/a
711EPOCH_YEAR = 1970
721def _timegm(tt):
7370 year, month, mday, hour, min, sec = tt[:6]
7470 if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
7566 (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
7660 return timegm(tt)
77n/a else:
7810 return None
79n/a
801DAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
811MONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
821 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
831MONTHS_LOWER = []
8413for month in MONTHS: MONTHS_LOWER.append(month.lower())
85n/a
861def time2isoz(t=None):
87n/a """Return a string representing time in seconds since epoch, t.
88n/a
89n/a If the function is called without an argument, it will use the current
90n/a time.
91n/a
92n/a The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
93n/a representing Universal Time (UTC, aka GMT). An example of this format is:
94n/a
95n/a 1994-11-24 08:49:37Z
96n/a
97n/a """
989 if t is None: t = time.time()
999 year, mon, mday, hour, min, sec = time.gmtime(t)[:6]
1009 return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
1019 year, mon, mday, hour, min, sec)
102n/a
1031def time2netscape(t=None):
104n/a """Return a string representing time in seconds since epoch, t.
105n/a
106n/a If the function is called without an argument, it will use the current
107n/a time.
108n/a
109n/a The format of the returned string is like this:
110n/a
111n/a Wed, DD-Mon-YYYY HH:MM:SS GMT
112n/a
113n/a """
1142 if t is None: t = time.time()
1152 year, mon, mday, hour, min, sec, wday = time.gmtime(t)[:7]
1162 return "%s %02d-%s-%04d %02d:%02d:%02d GMT" % (
1172 DAYS[wday], mday, MONTHS[mon-1], year, hour, min, sec)
118n/a
119n/a
1201UTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
121n/a
1221TIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$")
1231def offset_from_tz_string(tz):
12459 offset = None
12559 if tz in UTC_ZONES:
12659 offset = 0
127n/a else:
1280 m = TIMEZONE_RE.search(tz)
1290 if m:
1300 offset = 3600 * int(m.group(2))
1310 if m.group(3):
1320 offset = offset + 60 * int(m.group(3))
1330 if m.group(1) == '-':
1340 offset = -offset
13559 return offset
136n/a
1371def _str2time(day, mon, yr, hr, min, sec, tz):
138n/a # translate month name to number
139n/a # month numbers start with 1 (January)
14073 try:
14173 mon = MONTHS_LOWER.index(mon.lower())+1
14216 except ValueError:
143n/a # maybe it's already a number
14416 try:
14516 imon = int(mon)
1460 except ValueError:
1470 return None
14816 if 1 <= imon <= 12:
14912 mon = imon
150n/a else:
1514 return None
152n/a
153n/a # make sure clock elements are defined
15469 if hr is None: hr = 0
15569 if min is None: min = 0
15669 if sec is None: sec = 0
157n/a
15869 yr = int(yr)
15969 day = int(day)
16069 hr = int(hr)
16169 min = int(min)
16269 sec = int(sec)
163n/a
16469 if yr < 1000:
165n/a # find "obvious" year
16611 cur_yr = time.localtime(time.time())[0]
16711 m = cur_yr % 100
16811 tmp = yr
16911 yr = yr + cur_yr - m
17011 m = m - tmp
17111 if abs(m) > 50:
17210 if m > 0: yr = yr + 100
17310 else: yr = yr - 100
174n/a
175n/a # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
17669 t = _timegm((yr, mon, day, hr, min, sec, tz))
177n/a
17869 if t is not None:
179n/a # adjust time using timezone string, to get absolute time since epoch
18059 if tz is None:
18121 tz = "UTC"
18259 tz = tz.upper()
18359 offset = offset_from_tz_string(tz)
18459 if offset is None:
1850 return None
18659 t = t - offset
187n/a
18869 return t
189n/a
1901STRICT_DATE_RE = re.compile(
1911 r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
192n/a "(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$")
1931WEEKDAY_RE = re.compile(
1941 r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I)
1951LOOSE_HTTP_DATE_RE = re.compile(
196n/a r"""^
197n/a (\d\d?) # day
198n/a (?:\s+|[-\/])
199n/a (\w+) # month
200n/a (?:\s+|[-\/])
201n/a (\d+) # year
202n/a (?:
203n/a (?:\s+|:) # separator before clock
204n/a (\d\d?):(\d\d) # hour:min
205n/a (?::(\d\d))? # optional seconds
206n/a )? # optional clock
207n/a \s*
208n/a ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
209n/a \s*
210n/a (?:\(\w+\))? # ASCII representation of timezone in parens.
2111 \s*$""", re.X)
2121def http2time(text):
213n/a """Returns time in seconds since epoch of time represented by a string.
214n/a
215n/a Return value is an integer.
216n/a
217n/a None is returned if the format of str is unrecognized, the time is outside
218n/a the representable range, or the timezone string is not recognized. If the
219n/a string contains no timezone, UTC is assumed.
220n/a
221n/a The timezone in the string may be numerical (like "-0800" or "+0100") or a
222n/a string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
223n/a timezone strings equivalent to UTC (zero offset) are known to the function.
224n/a
225n/a The function loosely parses the following formats:
226n/a
227n/a Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
228n/a Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
229n/a Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
230n/a 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
231n/a 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
232n/a 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
233n/a
234n/a The parser ignores leading and trailing whitespace. The time may be
235n/a absent.
236n/a
237n/a If the year is given with only 2 digits, the function will select the
238n/a century that makes the year closest to the current date.
239n/a
240n/a """
241n/a # fast exit for strictly conforming string
24282 m = STRICT_DATE_RE.search(text)
24382 if m:
2441 g = m.groups()
2451 mon = MONTHS_LOWER.index(g[1].lower()) + 1
2461 tt = (int(g[2]), mon, int(g[0]),
2471 int(g[3]), int(g[4]), float(g[5]))
2481 return _timegm(tt)
249n/a
250n/a # No, we need some messy parsing...
251n/a
252n/a # clean up
25381 text = text.lstrip()
25481 text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
255n/a
256n/a # tz is time zone specifier string
25781 day, mon, yr, hr, min, sec, tz = [None]*7
258n/a
259n/a # loose regexp parse
26081 m = LOOSE_HTTP_DATE_RE.search(text)
26181 if m is not None:
26271 day, mon, yr, hr, min, sec, tz = m.groups()
263n/a else:
26410 return None # bad format
265n/a
26671 return _str2time(day, mon, yr, hr, min, sec, tz)
267n/a
2681ISO_DATE_RE = re.compile(
269n/a """^
270n/a (\d{4}) # year
271n/a [-\/]?
272n/a (\d\d?) # numerical month
273n/a [-\/]?
274n/a (\d\d?) # day
275n/a (?:
276n/a (?:\s+|[-:Tt]) # separator before clock
277n/a (\d\d?):?(\d\d) # hour:min
278n/a (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
279n/a )? # optional clock
280n/a \s*
281n/a ([-+]?\d\d?:?(:?\d\d)?
282n/a |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
2831 \s*$""", re.X)
2841def iso2time(text):
285n/a """
286n/a As for http2time, but parses the ISO 8601 formats:
287n/a
288n/a 1994-02-03 14:15:29 -0100 -- ISO 8601 format
289n/a 1994-02-03 14:15:29 -- zone is optional
290n/a 1994-02-03 -- only date
291n/a 1994-02-03T14:15:29 -- Use T as separator
292n/a 19940203T141529Z -- ISO 8601 compact format
293n/a 19940203 -- only date
294n/a
295n/a """
296n/a # clean up
2972 text = text.lstrip()
298n/a
299n/a # tz is time zone specifier string
3002 day, mon, yr, hr, min, sec, tz = [None]*7
301n/a
302n/a # loose regexp parse
3032 m = ISO_DATE_RE.search(text)
3042 if m is not None:
305n/a # XXX there's an extra bit of the timezone I'm ignoring here: is
306n/a # this the right thing to do?
3072 yr, mon, day, hr, min, sec, tz, _ = m.groups()
308n/a else:
3090 return None # bad format
310n/a
3112 return _str2time(day, mon, yr, hr, min, sec, tz)
312n/a
313n/a
314n/a# Header parsing
315n/a# -----------------------------------------------------------------------------
316n/a
3171def unmatched(match):
318n/a """Return unmatched part of re.Match object."""
319490 start, end = match.span(0)
320490 return match.string[:start]+match.string[end:]
321n/a
3221HEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
3231HEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
3241HEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
3251HEADER_ESCAPE_RE = re.compile(r"\\(.)")
3261def split_header_words(header_values):
327n/a r"""Parse header values into a list of lists containing key,value pairs.
328n/a
329n/a The function knows how to deal with ",", ";" and "=" as well as quoted
330n/a values after "=". A list of space separated tokens are parsed as if they
331n/a were separated by ";".
332n/a
333n/a If the header_values passed as argument contains multiple values, then they
334n/a are treated as if they were a single value separated by comma ",".
335n/a
336n/a This means that this function is useful for parsing header fields that
337n/a follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
338n/a the requirement for tokens).
339n/a
340n/a headers = #header
341n/a header = (token | parameter) *( [";"] (token | parameter))
342n/a
343n/a token = 1*<any CHAR except CTLs or separators>
344n/a separators = "(" | ")" | "<" | ">" | "@"
345n/a | "," | ";" | ":" | "\" | <">
346n/a | "/" | "[" | "]" | "?" | "="
347n/a | "{" | "}" | SP | HT
348n/a
349n/a quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
350n/a qdtext = <any TEXT except <">>
351n/a quoted-pair = "\" CHAR
352n/a
353n/a parameter = attribute "=" value
354n/a attribute = token
355n/a value = token | quoted-string
356n/a
357n/a Each header is represented by a list of key/value pairs. The value for a
358n/a simple token (not part of a parameter) is None. Syntactically incorrect
359n/a headers will not necessarily be parsed as you would want.
360n/a
361n/a This is easier to describe with some examples:
362n/a
363n/a >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
364n/a [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
365n/a >>> split_header_words(['text/html; charset="iso-8859-1"'])
366n/a [[('text/html', None), ('charset', 'iso-8859-1')]]
367n/a >>> split_header_words([r'Basic realm="\"foo\bar\""'])
368n/a [[('Basic', None), ('realm', '"foobar"')]]
369n/a
370n/a """
371159 assert not isinstance(header_values, basestring)
372159 result = []
373249 for text in header_values:
37490 orig_text = text
37590 pairs = []
376532 while text:
377442 m = HEADER_TOKEN_RE.search(text)
378442 if m:
379266 text = unmatched(m)
380266 name = m.group(1)
381266 m = HEADER_QUOTED_VALUE_RE.search(text)
382266 if m: # quoted value
38383 text = unmatched(m)
38483 value = m.group(1)
38583 value = HEADER_ESCAPE_RE.sub(r"\1", value)
386n/a else:
387183 m = HEADER_VALUE_RE.search(text)
388183 if m: # unquoted value
389141 text = unmatched(m)
390141 value = m.group(1)
391141 value = value.rstrip()
392n/a else:
393n/a # no value, a lone token
39442 value = None
395266 pairs.append((name, value))
396176 elif text.lstrip().startswith(","):
397n/a # concatenated headers, as per RFC 2616 section 4.2
39810 text = text.lstrip()[1:]
39910 if pairs: result.append(pairs)
40010 pairs = []
401n/a else:
402n/a # skip junk
403166 non_junk, nr_junk_chars = re.subn("^[=\s;]*", "", text)
404166 assert nr_junk_chars > 0, (
4050 "split_header_words bug: '%s', '%s', %s" %
4060 (orig_text, text, pairs))
407166 text = non_junk
40890 if pairs: result.append(pairs)
409159 return result
410n/a
4111HEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
4121def join_header_words(lists):
413n/a """Do the inverse (almost) of the conversion done by split_header_words.
414n/a
415n/a Takes a list of lists of (key, value) pairs and produces a single header
416n/a value. Attribute values are quoted if needed.
417n/a
418n/a >>> join_header_words([[("text/plain", None), ("charset", "iso-8859/1")]])
419n/a 'text/plain; charset="iso-8859/1"'
420n/a >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859/1")]])
421n/a 'text/plain, charset="iso-8859/1"'
422n/a
423n/a """
42423 headers = []
42549 for pairs in lists:
42626 attr = []
427104 for k, v in pairs:
42878 if v is not None:
42951 if not re.search(r"^\w+$", v):
43026 v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
43126 v = '"%s"' % v
43251 k = "%s=%s" % (k, v)
43378 attr.append(k)
43426 if attr: headers.append("; ".join(attr))
43523 return ", ".join(headers)
436n/a
4371def _strip_quotes(text):
43829 if text.startswith('"'):
4394 text = text[1:]
44029 if text.endswith('"'):
4414 text = text[:-1]
44229 return text
443n/a
4441def parse_ns_headers(ns_headers):
445n/a """Ad-hoc parser for Netscape protocol cookie-attributes.
446n/a
447n/a The old Netscape cookie format for Set-Cookie can for instance contain
448n/a an unquoted "," in the expires field, so we have to use this ad-hoc
449n/a parser instead of split_header_words.
450n/a
451n/a XXX This may not make the best possible effort to parse all the crap
452n/a that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
453n/a parser is probably better, so could do worse than following that if
454n/a this ever gives any trouble.
455n/a
456n/a Currently, this is also used for parsing RFC 2109 cookies.
457n/a
458n/a """
4590 known_attrs = ("expires", "domain", "path", "secure",
460n/a # RFC 2109 attrs (may turn up in Netscape cookies, too)
46180 "version", "port", "max-age")
462n/a
46380 result = []
464168 for ns_header in ns_headers:
46588 pairs = []
46688 version_set = False
467259 for ii, param in enumerate(re.split(r";\s*", ns_header)):
468171 param = param.rstrip()
469171 if param == "": continue
470169 if "=" not in param:
4719 k, v = param, None
472n/a else:
473160 k, v = re.split(r"\s*=\s*", param, 1)
474160 k = k.lstrip()
475169 if ii != 0:
47683 lc = k.lower()
47783 if lc in known_attrs:
47881 k = lc
47983 if k == "version":
480n/a # This is an RFC 2109 cookie.
4819 v = _strip_quotes(v)
4829 version_set = True
48383 if k == "expires":
484n/a # convert expires date to seconds since epoch
48520 v = http2time(_strip_quotes(v)) # None if invalid
486169 pairs.append((k, v))
487n/a
48888 if pairs:
48986 if not version_set:
49077 pairs.append(("version", "0"))
49186 result.append(pairs)
492n/a
49380 return result
494n/a
495n/a
4961IPV4_RE = re.compile(r"\.\d+$")
4971def is_HDN(text):
498n/a """Return True if text is a host domain name."""
499n/a # XXX
500n/a # This may well be wrong. Which RFC is HDN defined in, if any (for
501n/a # the purposes of RFC 2965)?
502n/a # For the current implementation, what about IPv6? Remember to look
503n/a # at other uses of IPV4_RE also, if change this.
50478 if IPV4_RE.search(text):
5054 return False
50674 if text == "":
5072 return False
50872 if text[0] == "." or text[-1] == ".":
5098 return False
51064 return True
511n/a
5121def domain_match(A, B):
513n/a """Return True if domain A domain-matches domain B, according to RFC 2965.
514n/a
515n/a A and B may be host domain names or IP addresses.
516n/a
517n/a RFC 2965, section 1:
518n/a
519n/a Host names can be specified either as an IP address or a HDN string.
520n/a Sometimes we compare one host name with another. (Such comparisons SHALL
521n/a be case-insensitive.) Host A's name domain-matches host B's if
522n/a
523n/a * their host name strings string-compare equal; or
524n/a
525n/a * A is a HDN string and has the form NB, where N is a non-empty
526n/a name string, B has the form .B', and B' is a HDN string. (So,
527n/a x.y.com domain-matches .Y.com but not Y.com.)
528n/a
529n/a Note that domain-match is not a commutative operation: a.b.c.com
530n/a domain-matches .c.com, but not the reverse.
531n/a
532n/a """
533n/a # Note that, if A or B are IP addresses, the only relevant part of the
534n/a # definition of the domain-match algorithm is the direct string-compare.
53576 A = A.lower()
53676 B = B.lower()
53776 if A == B:
53839 return True
53937 if not is_HDN(A):
5404 return False
54133 i = A.rfind(B)
54233 if i == -1 or i == 0:
543n/a # A does not have form NB, or N is the empty string
5442 return False
54531 if not B.startswith("."):
5465 return False
54726 if not is_HDN(B[1:]):
5480 return False
54926 return True
550n/a
5511def liberal_is_HDN(text):
552n/a """Return True if text is a sort-of-like a host domain name.
553n/a
554n/a For accepting/blocking domains.
555n/a
556n/a """
55762 if IPV4_RE.search(text):
5584 return False
55958 return True
560n/a
5611def user_domain_match(A, B):
562n/a """For blocking/accepting domains.
563n/a
564n/a A and B may be host domain names or IP addresses.
565n/a
566n/a """
56733 A = A.lower()
56833 B = B.lower()
56933 if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
5704 if A == B:
571n/a # equal IP addresses
5721 return True
5733 return False
57429 initial_dot = B.startswith(".")
57529 if initial_dot and A.endswith(B):
5767 return True
57722 if not initial_dot and A == B:
5785 return True
57917 return False
580n/a
5811cut_port_re = re.compile(r":\d+$")
5821def request_host(request):
583n/a """Return request-host, as defined by RFC 2965.
584n/a
585n/a Variation from RFC: returned value is lowercased, for convenient
586n/a comparison.
587n/a
588n/a """
589407 url = request.get_full_url()
590407 host = urlparse.urlparse(url)[1]
591407 if host == "":
5921 host = request.get_header("Host", "")
593n/a
594n/a # remove port, if present
595407 host = cut_port_re.sub("", host, 1)
596407 return host.lower()
597n/a
5981def eff_request_host(request):
599n/a """Return a tuple (request-host, effective request-host name).
600n/a
601n/a As defined by RFC 2965, except both are lowercased.
602n/a
603n/a """
604403 erhn = req_host = request_host(request)
605403 if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
60627 erhn = req_host + ".local"
607403 return req_host, erhn
608n/a
6091def request_path(request):
610n/a """request-URI, as defined by RFC 2965."""
611245 url = request.get_full_url()
612n/a #scheme, netloc, path, parameters, query, frag = urlparse.urlparse(url)
613n/a #req_path = escape_path("".join(urlparse.urlparse(url)[2:]))
614245 path, parameters, query, frag = urlparse.urlparse(url)[2:]
615245 if parameters:
6161 path = "%s;%s" % (path, parameters)
617245 path = escape_path(path)
618245 req_path = urlparse.urlunparse(("", "", path, "", query, frag))
619245 if not req_path.startswith("/"):
620n/a # fix bad RFC 2396 absoluteURI
62121 req_path = "/"+req_path
622245 return req_path
623n/a
6241def request_port(request):
62530 host = request.get_host()
62630 i = host.find(':')
62730 if i >= 0:
6287 port = host[i+1:]
6297 try:
6307 int(port)
6310 except ValueError:
6320 _debug("nonnumeric port: '%s'", port)
6330 return None
634n/a else:
63523 port = DEFAULT_HTTP_PORT
63630 return port
637n/a
638n/a# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
639n/a# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
6401HTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
6411ESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
6421def uppercase_escaped_char(match):
64348 return "%%%s" % match.group(1).upper()
6441def escape_path(path):
645n/a """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
646n/a # There's no knowing what character encoding was used to create URLs
647n/a # containing %-escapes, but since we have to pick one to escape invalid
648n/a # path characters, we pick UTF-8, as recommended in the HTML 4.0
649n/a # specification:
650n/a # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
651n/a # And here, kind of: draft-fielding-uri-rfc2396bis-03
652n/a # (And in draft IRI specification: draft-duerst-iri-05)
653n/a # (And here, for new URI schemes: RFC 2718)
654292 if isinstance(path, unicode):
6552 path = path.encode("utf-8")
656292 path = urllib.quote(path, HTTP_PATH_SAFE)
657292 path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
658292 return path
659n/a
6601def reach(h):
661n/a """Return reach of host h, as defined by RFC 2965, section 1.
662n/a
663n/a The reach R of a host name H is defined as follows:
664n/a
665n/a * If
666n/a
667n/a - H is the host domain name of a host; and,
668n/a
669n/a - H has the form A.B; and
670n/a
671n/a - A has no embedded (that is, interior) dots; and
672n/a
673n/a - B has at least one embedded dot, or B is the string "local".
674n/a then the reach of H is .B.
675n/a
676n/a * Otherwise, the reach of H is H.
677n/a
678n/a >>> reach("www.acme.com")
679n/a '.acme.com'
680n/a >>> reach("acme.com")
681n/a 'acme.com'
682n/a >>> reach("acme.local")
683n/a '.local'
684n/a
685n/a """
6868 i = h.find(".")
6878 if i >= 0:
688n/a #a = h[:i] # this line is only here to show what a is
6897 b = h[i+1:]
6907 i = b.find(".")
6917 if is_HDN(h) and (i >= 0 or b == "local"):
6922 return "."+b
6936 return h
694n/a
6951def is_third_party(request):
696n/a """
697n/a
698n/a RFC 2965, section 3.3.6:
699n/a
700n/a An unverifiable transaction is to a third-party host if its request-
701n/a host U does not domain-match the reach R of the request-host O in the
702n/a origin transaction.
703n/a
704n/a """
7050 req_host = request_host(request)
7060 if not domain_match(req_host, reach(request.get_origin_req_host())):
7070 return True
708n/a else:
7090 return False
710n/a
711n/a
7122class Cookie:
713n/a """HTTP Cookie.
714n/a
715n/a This class represents both Netscape and RFC 2965 cookies.
716n/a
717n/a This is deliberately a very simple class. It just holds attributes. It's
718n/a possible to construct Cookie instances that don't comply with the cookie
719n/a standards. CookieJar.make_cookies is the factory function for Cookie
720n/a objects -- it deals with cookie parsing, supplying defaults, and
721n/a normalising to the representation used in this class. CookiePolicy is
722n/a responsible for checking them to see whether they should be accepted from
723n/a and returned to the server.
724n/a
725n/a Note that the port may be present in the headers, but unspecified ("Port"
726n/a rather than"Port=80", for example); if this is the case, port is None.
727n/a
7281 """
729n/a
730n/a def __init__(self, version, name, value,
731n/a port, port_specified,
732n/a domain, domain_specified, domain_initial_dot,
733n/a path, path_specified,
734n/a secure,
735n/a expires,
736n/a discard,
737n/a comment,
738n/a comment_url,
739n/a rest,
7401 rfc2109=False,
741n/a ):
742n/a
743149 if version is not None: version = int(version)
744149 if expires is not None: expires = int(expires)
745149 if port is None and port_specified is True:
7460 raise ValueError("if port is None, port_specified must be false")
747n/a
748149 self.version = version
749149 self.name = name
750149 self.value = value
751149 self.port = port
752149 self.port_specified = port_specified
753n/a # normalise case, as per RFC 2965 section 3.3.3
754149 self.domain = domain.lower()
755149 self.domain_specified = domain_specified
756n/a # Sigh. We need to know whether the domain given in the
757n/a # cookie-attribute had an initial dot, in order to follow RFC 2965
758n/a # (as clarified in draft errata). Needed for the returned $Domain
759n/a # value.
760149 self.domain_initial_dot = domain_initial_dot
761149 self.path = path
762149 self.path_specified = path_specified
763149 self.secure = secure
764149 self.expires = expires
765149 self.discard = discard
766149 self.comment = comment
767149 self.comment_url = comment_url
768149 self.rfc2109 = rfc2109
769n/a
770149 self._rest = copy.copy(rest)
771n/a
7721 def has_nonstandard_attr(self, name):
7732 return name in self._rest
7741 def get_nonstandard_attr(self, name, default=None):
7750 return self._rest.get(name, default)
7761 def set_nonstandard_attr(self, name, value):
7770 self._rest[name] = value
778n/a
7791 def is_expired(self, now=None):
780325 if now is None: now = time.time()
781325 if (self.expires is not None) and (self.expires <= now):
7820 return True
783325 return False
784n/a
7851 def __str__(self):
7860 if self.port is None: p = ""
7870 else: p = ":"+self.port
7880 limit = self.domain + p + self.path
7890 if self.value is not None:
7900 namevalue = "%s=%s" % (self.name, self.value)
791n/a else:
7920 namevalue = self.name
7930 return "<Cookie %s for %s>" % (namevalue, limit)
794n/a
7951 def __repr__(self):
79626 args = []
79726 for name in ("version", "name", "value",
7980 "port", "port_specified",
7990 "domain", "domain_specified", "domain_initial_dot",
8000 "path", "path_specified",
801416 "secure", "expires", "discard", "comment", "comment_url",
802n/a ):
803390 attr = getattr(self, name)
804390 args.append("%s=%s" % (name, repr(attr)))
80526 args.append("rest=%s" % repr(self._rest))
80626 args.append("rfc2109=%s" % repr(self.rfc2109))
80726 return "Cookie(%s)" % ", ".join(args)
808n/a
809n/a
8102class CookiePolicy:
811n/a """Defines which cookies get accepted from and returned to server.
812n/a
813n/a May also modify cookies, though this is probably a bad idea.
814n/a
815n/a The subclass DefaultCookiePolicy defines the standard rules for Netscape
816n/a and RFC 2965 cookies -- override that if you want a customised policy.
817n/a
8181 """
8191 def set_ok(self, cookie, request):
820n/a """Return true if (and only if) cookie should be accepted from server.
821n/a
822n/a Currently, pre-expired cookies never get this far -- the CookieJar
823n/a class deletes such cookies itself.
824n/a
825n/a """
8260 raise NotImplementedError()
827n/a
8281 def return_ok(self, cookie, request):
829n/a """Return true if (and only if) cookie should be returned to server."""
8300 raise NotImplementedError()
831n/a
8321 def domain_return_ok(self, domain, request):
833n/a """Return false if cookies should not be returned, given cookie domain.
834n/a """
8350 return True
836n/a
8371 def path_return_ok(self, path, request):
838n/a """Return false if cookies should not be returned, given cookie path.
839n/a """
8400 return True
841n/a
842n/a
8432class DefaultCookiePolicy(CookiePolicy):
8441 """Implements the standard rules for accepting and returning cookies."""
845n/a
8461 DomainStrictNoDots = 1
8471 DomainStrictNonDomain = 2
8481 DomainRFC2965Match = 4
849n/a
8501 DomainLiberal = 0
8511 DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
852n/a
853n/a def __init__(self,
8541 blocked_domains=None, allowed_domains=None,
8551 netscape=True, rfc2965=False,
8561 rfc2109_as_netscape=None,
8571 hide_cookie2=False,
8581 strict_domain=False,
8591 strict_rfc2965_unverifiable=True,
8601 strict_ns_unverifiable=False,
8611 strict_ns_domain=DomainLiberal,
8621 strict_ns_set_initial_dollar=False,
8631 strict_ns_set_path=False,
864n/a ):
865n/a """Constructor arguments should be passed as keyword arguments only."""
86668 self.netscape = netscape
86768 self.rfc2965 = rfc2965
86868 self.rfc2109_as_netscape = rfc2109_as_netscape
86968 self.hide_cookie2 = hide_cookie2
87068 self.strict_domain = strict_domain
87168 self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
87268 self.strict_ns_unverifiable = strict_ns_unverifiable
87368 self.strict_ns_domain = strict_ns_domain
87468 self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
87568 self.strict_ns_set_path = strict_ns_set_path
876n/a
87768 if blocked_domains is not None:
8782 self._blocked_domains = tuple(blocked_domains)
879n/a else:
88066 self._blocked_domains = ()
881n/a
88268 if allowed_domains is not None:
8831 allowed_domains = tuple(allowed_domains)
88468 self._allowed_domains = allowed_domains
885n/a
8861 def blocked_domains(self):
887n/a """Return the sequence of blocked domains (as a tuple)."""
8880 return self._blocked_domains
8891 def set_blocked_domains(self, blocked_domains):
890n/a """Set the sequence of blocked domains."""
8912 self._blocked_domains = tuple(blocked_domains)
892n/a
8931 def is_blocked(self, domain):
894245 for blocked_domain in self._blocked_domains:
89510 if user_domain_match(domain, blocked_domain):
8963 return True
897235 return False
898n/a
8991 def allowed_domains(self):
900n/a """Return None, or the sequence of allowed domains (as a tuple)."""
9010 return self._allowed_domains
9021 def set_allowed_domains(self, allowed_domains):
903n/a """Set the sequence of allowed domains, or None."""
9040 if allowed_domains is not None:
9050 allowed_domains = tuple(allowed_domains)
9060 self._allowed_domains = allowed_domains
907n/a
9081 def is_not_allowed(self, domain):
909235 if self._allowed_domains is None:
910232 return False
9115 for allowed_domain in self._allowed_domains:
9123 if user_domain_match(domain, allowed_domain):
9131 return False
9142 return True
915n/a
9161 def set_ok(self, cookie, request):
917n/a """
918n/a If you override .set_ok(), be sure to call this method. If it returns
919n/a false, so should your subclass (assuming your subclass wants to be more
920n/a strict about which cookies to accept).
921n/a
922n/a """
923128 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
924n/a
925128 assert cookie.name is not None
926n/a
927851 for n in "version", "verifiability", "name", "path", "domain", "port":
928741 fn_name = "set_ok_"+n
929741 fn = getattr(self, fn_name)
930741 if not fn(cookie, request):
93118 return False
932n/a
933110 return True
934n/a
9351 def set_ok_version(self, cookie, request):
936128 if cookie.version is None:
937n/a # Version is always set to 0 by parse_ns_headers if it's a Netscape
938n/a # cookie, so this must be an invalid RFC 2965 cookie.
9390 _debug(" Set-Cookie2 without version attribute (%s=%s)",
9400 cookie.name, cookie.value)
9410 return False
942128 if cookie.version > 0 and not self.rfc2965:
9432 _debug(" RFC 2965 cookies are switched off")
9442 return False
945126 elif cookie.version == 0 and not self.netscape:
9460 _debug(" Netscape cookies are switched off")
9470 return False
948126 return True
949n/a
9501 def set_ok_verifiability(self, cookie, request):
951126 if request.is_unverifiable() and is_third_party(request):
9520 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
9530 _debug(" third-party RFC 2965 cookie during "
954n/a "unverifiable transaction")
9550 return False
9560 elif cookie.version == 0 and self.strict_ns_unverifiable:
9570 _debug(" third-party Netscape cookie during "
958n/a "unverifiable transaction")
9590 return False
960126 return True
961n/a
9621 def set_ok_name(self, cookie, request):
963n/a # Try and stop servers setting V0 cookies designed to hack other
964n/a # servers that know both V0 and V1 protocols.
965126 if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
9660 cookie.name.startswith("$")):
9670 _debug(" illegal name (starts with '$'): '%s'", cookie.name)
9680 return False
969126 return True
970n/a
9711 def set_ok_path(self, cookie, request):
972126 if cookie.path_specified:
97333 req_path = request_path(request)
97433 if ((cookie.version > 0 or
97521 (cookie.version == 0 and self.strict_ns_set_path)) and
97612 not req_path.startswith(cookie.path)):
9772 _debug(" path attribute %s is not a prefix of request "
9782 "path %s", cookie.path, req_path)
9792 return False
980124 return True
981n/a
9821 def set_ok_domain(self, cookie, request):
983124 if self.is_blocked(cookie.domain):
9842 _debug(" domain %s is in user block-list", cookie.domain)
9852 return False
986122 if self.is_not_allowed(cookie.domain):
9871 _debug(" domain %s is not in user allow-list", cookie.domain)
9881 return False
989121 if cookie.domain_specified:
99035 req_host, erhn = eff_request_host(request)
99135 domain = cookie.domain
99235 if self.strict_domain and (domain.count(".") >= 2):
993n/a # XXX This should probably be compared with the Konqueror
994n/a # (kcookiejar.cpp) and Mozilla implementations, but it's a
995n/a # losing battle.
9965 i = domain.rfind(".")
9975 j = domain.rfind(".", 0, i)
9985 if j == 0: # domain like .foo.bar
9994 tld = domain[i+1:]
10004 sld = domain[j+1:i]
10014 if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
10020 "gov", "mil", "int", "aero", "biz", "cat", "coop",
10030 "info", "jobs", "mobi", "museum", "name", "pro",
10044 "travel", "eu") and len(tld) == 2:
1005n/a # domain like .co.uk
10064 _debug(" country-code second level domain %s", domain)
10074 return False
100831 if domain.startswith("."):
100931 undotted_domain = domain[1:]
1010n/a else:
10110 undotted_domain = domain
101231 embedded_dots = (undotted_domain.find(".") >= 0)
101331 if not embedded_dots and domain != ".local":
10143 _debug(" non-local domain %s contains no embedded dot",
10153 domain)
10163 return False
101728 if cookie.version == 0:
101813 if (not erhn.endswith(domain) and
10195 (not erhn.startswith(".") and
10205 not ("."+erhn).endswith(domain))):
10210 _debug(" effective request-host %s (even with added "
1022n/a "initial dot) does not end end with %s",
10230 erhn, domain)
10240 return False
102528 if (cookie.version > 0 or
102613 (self.strict_ns_domain & self.DomainRFC2965Match)):
102715 if not domain_match(erhn, domain):
10282 _debug(" effective request-host %s does not domain-match "
10292 "%s", erhn, domain)
10302 return False
103126 if (cookie.version > 0 or
103213 (self.strict_ns_domain & self.DomainStrictNoDots)):
103313 host_prefix = req_host[:-len(domain)]
103413 if (host_prefix.find(".") >= 0 and
10351 not IPV4_RE.search(req_host)):
10361 _debug(" host prefix %s for domain %s contains a dot",
10371 host_prefix, domain)
10381 return False
1039111 return True
1040n/a
10411 def set_ok_port(self, cookie, request):
1042111 if cookie.port_specified:
10437 req_port = request_port(request)
10447 if req_port is None:
10450 req_port = "80"
1046n/a else:
10477 req_port = str(req_port)
104813 for p in cookie.port.split(","):
104912 try:
105012 int(p)
10510 except ValueError:
10520 _debug(" bad port %s (not numeric)", p)
10530 return False
105412 if p == req_port:
10556 break
1056n/a else:
10571 _debug(" request port (%s) not found in %s",
10581 req_port, cookie.port)
10591 return False
1060110 return True
1061n/a
10621 def return_ok(self, cookie, request):
1063n/a """
1064n/a If you override .return_ok(), be sure to call this method. If it
1065n/a returns false, so should your subclass (assuming your subclass wants to
1066n/a be more strict about which cookies to return).
1067n/a
1068n/a """
1069n/a # Path has already been checked by .path_return_ok(), and domain
1070n/a # blocking done by .domain_return_ok().
1071117 _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
1072n/a
1073802 for n in "version", "verifiability", "secure", "expires", "port", "domain":
1074693 fn_name = "return_ok_"+n
1075693 fn = getattr(self, fn_name)
1076693 if not fn(cookie, request):
10778 return False
1078109 return True
1079n/a
10801 def return_ok_version(self, cookie, request):
1081117 if cookie.version > 0 and not self.rfc2965:
10820 _debug(" RFC 2965 cookies are switched off")
10830 return False
1084117 elif cookie.version == 0 and not self.netscape:
10850 _debug(" Netscape cookies are switched off")
10860 return False
1087117 return True
1088n/a
10891 def return_ok_verifiability(self, cookie, request):
1090117 if request.is_unverifiable() and is_third_party(request):
10910 if cookie.version > 0 and self.strict_rfc2965_unverifiable:
10920 _debug(" third-party RFC 2965 cookie during unverifiable "
1093n/a "transaction")
10940 return False
10950 elif cookie.version == 0 and self.strict_ns_unverifiable:
10960 _debug(" third-party Netscape cookie during unverifiable "
1097n/a "transaction")
10980 return False
1099117 return True
1100n/a
11011 def return_ok_secure(self, cookie, request):
1102117 if cookie.secure and request.get_type() != "https":
11033 _debug(" secure cookie with non-secure request")
11043 return False
1105114 return True
1106n/a
11071 def return_ok_expires(self, cookie, request):
1108114 if cookie.is_expired(self._now):
11090 _debug(" cookie expired")
11100 return False
1111114 return True
1112n/a
11131 def return_ok_port(self, cookie, request):
1114114 if cookie.port:
111517 req_port = request_port(request)
111617 if req_port is None:
11170 req_port = "80"
111821 for p in cookie.port.split(","):
111921 if p == req_port:
112017 break
1121n/a else:
11220 _debug(" request port %s does not match cookie port %s",
11230 req_port, cookie.port)
11240 return False
1125114 return True
1126n/a
11271 def return_ok_domain(self, cookie, request):
1128114 req_host, erhn = eff_request_host(request)
1129114 domain = cookie.domain
1130n/a
1131n/a # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1132114 if (cookie.version == 0 and
113364 (self.strict_ns_domain & self.DomainStrictNonDomain) and
11348 not cookie.domain_specified and domain != erhn):
11351 _debug(" cookie with unspecified domain does not string-compare "
1136n/a "equal to request domain")
11371 return False
1138n/a
1139113 if cookie.version > 0 and not domain_match(erhn, domain):
11404 _debug(" effective request-host name %s does not domain-match "
11414 "RFC 2965 cookie domain %s", erhn, domain)
11424 return False
1143109 if cookie.version == 0 and not ("."+erhn).endswith(domain):
11440 _debug(" request-host %s does not match Netscape cookie domain "
11450 "%s", req_host, domain)
11460 return False
1147109 return True
1148n/a
11491 def domain_return_ok(self, domain, request):
1150n/a # Liberal check of. This is here as an optimization to avoid
1151n/a # having to load lots of MSIE cookie files unless necessary.
1152157 req_host, erhn = eff_request_host(request)
1153157 if not req_host.startswith("."):
1154157 req_host = "."+req_host
1155157 if not erhn.startswith("."):
1156157 erhn = "."+erhn
1157157 if not (req_host.endswith(domain) or erhn.endswith(domain)):
1158n/a #_debug(" request domain %s does not match cookie domain %s",
1159n/a # req_host, domain)
116043 return False
1161n/a
1162114 if self.is_blocked(domain):
11631 _debug(" domain %s is in user block-list", domain)
11641 return False
1165113 if self.is_not_allowed(domain):
11661 _debug(" domain %s is not in user allow-list", domain)
11671 return False
1168n/a
1169112 return True
1170n/a
11711 def path_return_ok(self, path, request):
1172112 _debug("- checking cookie path=%s", path)
1173112 req_path = request_path(request)
1174112 if not req_path.startswith(path):
117511 _debug(" %s does not path-match %s", req_path, path)
117611 return False
1177101 return True
1178n/a
1179n/a
11801def vals_sorted_by_key(adict):
1181815 keys = adict.keys()
1182815 keys.sort()
1183815 return map(adict.get, keys)
1184n/a
11851def deepvalues(mapping):
1186n/a """Iterates over nested mapping, depth-first, in sorted order by key."""
1187815 values = vals_sorted_by_key(mapping)
11881758 for obj in values:
1189943 mapping = False
1190943 try:
1191943 obj.items
1192351 except AttributeError:
1193351 pass
1194n/a else:
1195592 mapping = True
11961294 for subobj in deepvalues(obj):
1197702 yield subobj
1198943 if not mapping:
1199351 yield obj
1200n/a
1201n/a
1202n/a# Used as second parameter to dict.get() method, to distinguish absent
1203n/a# dict key from one with a None value.
12042class Absent: pass
1205n/a
12062class CookieJar:
1207n/a """Collection of HTTP cookies.
1208n/a
1209n/a You may not need to know about this class: try
1210n/a urllib2.build_opener(HTTPCookieProcessor).open(url).
1211n/a
12121 """
1213n/a
12141 non_word_re = re.compile(r"\W")
12151 quote_re = re.compile(r"([\"\\])")
12161 strict_domain_re = re.compile(r"\.?[^.]*")
12171 domain_re = re.compile(r"[^.]*")
12181 dots_re = re.compile(r"^\.+")
1219n/a
12201 magic_re = r"^\#LWP-Cookies-(\d+\.\d+)"
1221n/a
12221 def __init__(self, policy=None):
122370 if policy is None:
122433 policy = DefaultCookiePolicy()
122570 self._policy = policy
1226n/a
122770 self._cookies_lock = _threading.RLock()
122870 self._cookies = {}
1229n/a
12301 def set_policy(self, policy):
12315 self._policy = policy
1232n/a
12331 def _cookies_for_domain(self, domain, request):
1234141 cookies = []
1235141 if not self._policy.domain_return_ok(domain, request):
123640 return []
1237101 _debug("Checking %s for cookies to return", domain)
1238101 cookies_by_path = self._cookies[domain]
1239213 for path in cookies_by_path.keys():
1240112 if not self._policy.path_return_ok(path, request):
124111 continue
1242101 cookies_by_name = cookies_by_path[path]
1243218 for cookie in cookies_by_name.values():
1244117 if not self._policy.return_ok(cookie, request):
12458 _debug(" not returning cookie")
12468 continue
1247109 _debug(" it's a match")
1248109 cookies.append(cookie)
1249101 return cookies
1250n/a
12511 def _cookies_for_request(self, request):
1252n/a """Return a list of cookies to be returned to server."""
1253152 cookies = []
1254293 for domain in self._cookies.keys():
1255141 cookies.extend(self._cookies_for_domain(domain, request))
1256152 return cookies
1257n/a
12581 def _cookie_attrs(self, cookies):
1259n/a """Return a list of cookie-attributes to be returned to server.
1260n/a
1261n/a like ['foo="bar"; $Path="/"', ...]
1262n/a
1263n/a The $Version attribute is also added when appropriate (currently only
1264n/a once per request).
1265n/a
1266n/a """
1267n/a # add cookies in order of most specific (ie. longest) path first
1268261 cookies.sort(key=lambda arg: len(arg.path), reverse=True)
1269n/a
1270152 version_set = False
1271n/a
1272152 attrs = []
1273261 for cookie in cookies:
1274n/a # set version of Cookie header
1275n/a # XXX
1276n/a # What should it be if multiple matching Set-Cookie headers have
1277n/a # different versions themselves?
1278n/a # Answer: there is no answer; was supposed to be settled by
1279n/a # RFC 2965 errata, but that may never appear...
1280109 version = cookie.version
1281109 if not version_set:
128278 version_set = True
128378 if version > 0:
128437 attrs.append("$Version=%s" % version)
1285n/a
1286n/a # quote cookie value if necessary
1287n/a # (not for Netscape protocol, which already has any quotes
1288n/a # intact, due to the poorly-specified Netscape Cookie: syntax)
1289109 if ((cookie.value is not None) and
1290106 self.non_word_re.search(cookie.value) and version > 0):
12911 value = self.quote_re.sub(r"\\\1", cookie.value)
1292n/a else:
1293108 value = cookie.value
1294n/a
1295n/a # add cookie-attributes to be returned in Cookie header
1296109 if cookie.value is None:
12973 attrs.append(cookie.name)
1298n/a else:
1299106 attrs.append("%s=%s" % (cookie.name, value))
1300109 if version > 0:
130146 if cookie.path_specified:
130210 attrs.append('$Path="%s"' % cookie.path)
130346 if cookie.domain.startswith("."):
130410 domain = cookie.domain
130510 if (not cookie.domain_initial_dot and
13064 domain.startswith(".")):
13074 domain = domain[1:]
130810 attrs.append('$Domain="%s"' % domain)
130946 if cookie.port is not None:
131010 p = "$Port"
131110 if cookie.port_specified:
13125 p = p + ('="%s"' % cookie.port)
131310 attrs.append(p)
1314n/a
1315152 return attrs
1316n/a
13171 def add_cookie_header(self, request):
1318n/a """Add correct Cookie: header to request (urllib2.Request object).
1319n/a
1320n/a The Cookie2 header is also added unless policy.hide_cookie2 is true.
1321n/a
1322n/a """
1323152 _debug("add_cookie_header")
1324152 self._cookies_lock.acquire()
1325152 try:
1326n/a
1327152 self._policy._now = self._now = int(time.time())
1328n/a
1329152 cookies = self._cookies_for_request(request)
1330n/a
1331152 attrs = self._cookie_attrs(cookies)
1332152 if attrs:
133378 if not request.has_header("Cookie"):
133478 request.add_unredirected_header(
133578 "Cookie", "; ".join(attrs))
1336n/a
1337n/a # if necessary, advertise that we know RFC 2965
1338152 if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
133993 not request.has_header("Cookie2")):
1340139 for cookie in cookies:
134156 if cookie.version != 1:
134210 request.add_unredirected_header("Cookie2", '$Version="1"')
134310 break
1344n/a
1345n/a finally:
1346152 self._cookies_lock.release()
1347n/a
1348152 self.clear_expired_cookies()
1349n/a
13501 def _normalized_cookie_tuples(self, attrs_set):
1351n/a """Return list of tuples containing normalised cookie information.
1352n/a
1353n/a attrs_set is the list of lists of key,value pairs extracted from
1354n/a the Set-Cookie or Set-Cookie2 headers.
1355n/a
1356n/a Tuples are name, value, standard, rest, where name and value are the
1357n/a cookie name and value, standard is a dictionary containing the standard
1358n/a cookie-attributes (discard, secure, version, expires or max-age,
1359n/a domain, path and port) and rest is a dictionary containing the rest of
1360n/a the cookie-attributes.
1361n/a
1362n/a """
1363197 cookie_tuples = []
1364n/a
1365197 boolean_attrs = "discard", "secure"
13660 value_attrs = ("version",
13670 "expires", "max-age",
13680 "domain", "path", "port",
1369197 "comment", "commenturl")
1370n/a
1371332 for cookie_attrs in attrs_set:
1372135 name, value = cookie_attrs[0]
1373n/a
1374n/a # Build dictionary of standard cookie-attributes (standard) and
1375n/a # dictionary of other cookie-attributes (rest).
1376n/a
1377n/a # Note: expiry time is normalised to seconds since epoch. V0
1378n/a # cookies should have the Expires cookie-attribute, and V1 cookies
1379n/a # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1380n/a # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1381n/a # accept either (but prefer Max-Age).
1382135 max_age_set = False
1383n/a
1384135 bad_cookie = False
1385n/a
1386135 standard = {}
1387135 rest = {}
1388395 for k, v in cookie_attrs[1:]:
1389261 lc = k.lower()
1390n/a # don't lose case distinction for unknown fields
1391261 if lc in value_attrs or lc in boolean_attrs:
1392259 k = lc
1393261 if k in boolean_attrs and v is None:
1394n/a # boolean cookie-attribute is present, but has no value
1395n/a # (like "discard", rather than "port=80")
139610 v = True
1397261 if k in standard:
1398n/a # only first value is significant
13991 continue
1400260 if k == "domain":
140136 if v is None:
14020 _debug(" missing value for domain attribute")
14030 bad_cookie = True
14040 break
1405n/a # RFC 2965 section 3.3.3
140636 v = v.lower()
1407260 if k == "expires":
140816 if max_age_set:
1409n/a # Prefer max-age to expires (like Mozilla)
14100 continue
141116 if v is None:
14123 _debug(" missing or invalid value for expires "
1413n/a "attribute: treating as session cookie")
14143 continue
1415257 if k == "max-age":
14168 max_age_set = True
14178 try:
14188 v = int(v)
14191 except ValueError:
14201 _debug(" missing or invalid (non-numeric) value for "
1421n/a "max-age attribute")
14221 bad_cookie = True
14231 break
1424n/a # convert RFC 2965 Max-Age to seconds since epoch
1425n/a # XXX Strictly you're supposed to follow RFC 2616
1426n/a # age-calculation rules. Remember that zero Max-Age is a
1427n/a # is a request to discard (old and new) cookie, though.
14287 k = "expires"
14297 v = self._now + v
1430256 if (k in value_attrs) or (k in boolean_attrs):
1431254 if (v is None and
14324 k not in ("port", "comment", "commenturl")):
14330 _debug(" missing value for %s attribute" % k)
14340 bad_cookie = True
14350 break
1436254 standard[k] = v
1437n/a else:
14382 rest[k] = v
1439n/a
1440135 if bad_cookie:
14411 continue
1442n/a
1443134 cookie_tuples.append((name, value, standard, rest))
1444n/a
1445197 return cookie_tuples
1446n/a
14471 def _cookie_from_cookie_tuple(self, tup, request):
1448n/a # standard is dict of standard cookie-attributes, rest is dict of the
1449n/a # rest of them
1450134 name, value, standard, rest = tup
1451n/a
1452134 domain = standard.get("domain", Absent)
1453134 path = standard.get("path", Absent)
1454134 port = standard.get("port", Absent)
1455134 expires = standard.get("expires", Absent)
1456n/a
1457n/a # set the easy defaults
1458134 version = standard.get("version", None)
1459134 if version is not None:
1460134 try:
1461134 version = int(version)
14621 except ValueError:
14631 return None # invalid version, ignore cookie
1464133 secure = standard.get("secure", False)
1465n/a # (discard is also set if expires is Absent)
1466133 discard = standard.get("discard", False)
1467133 comment = standard.get("comment", None)
1468133 comment_url = standard.get("commenturl", None)
1469n/a
1470n/a # set default path
1471133 if path is not Absent and path != "":
147236 path_specified = True
147336 path = escape_path(path)
1474n/a else:
147597 path_specified = False
147697 path = request_path(request)
147797 i = path.rfind("/")
147897 if i != -1:
147997 if version == 0:
1480n/a # Netscape spec parts company from reality here
148147 path = path[:i]
1482n/a else:
148350 path = path[:i+1]
148497 if len(path) == 0: path = "/"
1485n/a
1486n/a # set default domain
1487133 domain_specified = domain is not Absent
1488n/a # but first we have to remember whether it starts with a dot
1489133 domain_initial_dot = False
1490133 if domain_specified:
149136 domain_initial_dot = bool(domain.startswith("."))
1492133 if domain is Absent:
149397 req_host, erhn = eff_request_host(request)
149497 domain = erhn
149536 elif not domain.startswith("."):
14969 domain = "."+domain
1497n/a
1498n/a # set default port
1499133 port_specified = False
1500133 if port is not Absent:
150111 if port is None:
1502n/a # Port attr present, but has no value: default to request port.
1503n/a # Cookie should then only be sent back on that port.
15044 port = request_port(request)
1505n/a else:
15067 port_specified = True
15077 port = re.sub(r"\s+", "", port)
1508n/a else:
1509n/a # No port attr present. Cookie can be sent back on any port.
1510122 port = None
1511n/a
1512n/a # set default expires and discard
1513133 if expires is Absent:
1514114 expires = None
1515114 discard = True
151619 elif expires <= self._now:
1517n/a # Expiry date in past is request to delete cookie. This can't be
1518n/a # in DefaultCookiePolicy, because can't delete cookies there.
15193 try:
15203 self.clear(domain, path, name)
15211 except KeyError:
15221 pass
15233 _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
15243 domain, path, name)
15253 return None
1526n/a
1527130 return Cookie(version,
1528130 name, value,
1529130 port, port_specified,
1530130 domain, domain_specified, domain_initial_dot,
1531130 path, path_specified,
1532130 secure,
1533130 expires,
1534130 discard,
1535130 comment,
1536130 comment_url,
1537130 rest)
1538n/a
15391 def _cookies_from_attrs_set(self, attrs_set, request):
1540197 cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1541n/a
1542197 cookies = []
1543331 for tup in cookie_tuples:
1544134 cookie = self._cookie_from_cookie_tuple(tup, request)
1545134 if cookie: cookies.append(cookie)
1546197 return cookies
1547n/a
15481 def _process_rfc2109_cookies(self, cookies):
154971 rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
155071 if rfc2109_as_ns is None:
155167 rfc2109_as_ns = not self._policy.rfc2965
1552144 for cookie in cookies:
155373 if cookie.version == 1:
15546 cookie.rfc2109 = True
15556 if rfc2109_as_ns:
1556n/a # treat 2109 cookies as Netscape cookies rather than
1557n/a # as RFC2965 cookies
15583 cookie.version = 0
1559n/a
15601 def make_cookies(self, response, request):
1561n/a """Return sequence of Cookie objects extracted from response object."""
1562n/a # get cookie-attributes for RFC 2965 and Netscape protocols
1563163 headers = response.info()
1564163 rfc2965_hdrs = headers.getheaders("Set-Cookie2")
1565163 ns_hdrs = headers.getheaders("Set-Cookie")
1566n/a
1567163 rfc2965 = self._policy.rfc2965
1568163 netscape = self._policy.netscape
1569n/a
1570163 if ((not rfc2965_hdrs and not ns_hdrs) or
1571131 (not ns_hdrs and not rfc2965) or
1572126 (not rfc2965_hdrs and not netscape) or
1573126 (not netscape and not rfc2965)):
157437 return [] # no relevant cookie headers: quick exit
1575n/a
1576126 try:
1577126 cookies = self._cookies_from_attrs_set(
1578126 split_header_words(rfc2965_hdrs), request)
15790 except Exception:
15800 _warn_unhandled_exception()
15810 cookies = []
1582n/a
1583126 if ns_hdrs and netscape:
158471 try:
1585n/a # RFC 2109 and Netscape cookies
158671 ns_cookies = self._cookies_from_attrs_set(
158771 parse_ns_headers(ns_hdrs), request)
15880 except Exception:
15890 _warn_unhandled_exception()
15900 ns_cookies = []
159171 self._process_rfc2109_cookies(ns_cookies)
1592n/a
1593n/a # Look for Netscape cookies (from Set-Cookie headers) that match
1594n/a # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1595n/a # For each match, keep the RFC 2965 cookie and ignore the Netscape
1596n/a # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1597n/a # bundled in with the Netscape cookies for this purpose, which is
1598n/a # reasonable behaviour.
159971 if rfc2965:
160016 lookup = {}
160116 for cookie in cookies:
16020 lookup[(cookie.domain, cookie.path, cookie.name)] = None
1603n/a
160416 def no_matching_rfc2965(ns_cookie, lookup=lookup):
160519 key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
160619 return key not in lookup
160716 ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1608n/a
160971 if ns_cookies:
161065 cookies.extend(ns_cookies)
1611n/a
1612126 return cookies
1613n/a
16141 def set_cookie_if_ok(self, cookie, request):
1615n/a """Set a cookie if policy says it's OK to do so."""
16160 self._cookies_lock.acquire()
16170 try:
16180 self._policy._now = self._now = int(time.time())
1619n/a
16200 if self._policy.set_ok(cookie, request):
16210 self.set_cookie(cookie)
1622n/a
1623n/a
1624n/a finally:
16250 self._cookies_lock.release()
1626n/a
16271 def set_cookie(self, cookie):
1628n/a """Set a cookie, without checking whether or not it should be set."""
1629131 c = self._cookies
1630131 self._cookies_lock.acquire()
1631131 try:
1632131 if cookie.domain not in c: c[cookie.domain] = {}
1633131 c2 = c[cookie.domain]
1634131 if cookie.path not in c2: c2[cookie.path] = {}
1635131 c3 = c2[cookie.path]
1636131 c3[cookie.name] = cookie
1637n/a finally:
1638131 self._cookies_lock.release()
1639n/a
16401 def extract_cookies(self, response, request):
1641n/a """Extract cookies from response, where allowable given the request."""
1642161 _debug("extract_cookies: %s", response.info())
1643161 self._cookies_lock.acquire()
1644161 try:
1645161 self._policy._now = self._now = int(time.time())
1646n/a
1647289 for cookie in self.make_cookies(response, request):
1648128 if self._policy.set_ok(cookie, request):
1649110 _debug(" setting cookie: %s", cookie)
1650110 self.set_cookie(cookie)
1651n/a finally:
1652161 self._cookies_lock.release()
1653n/a
16541 def clear(self, domain=None, path=None, name=None):
1655n/a """Clear some cookies.
1656n/a
1657n/a Invoking this method without arguments will clear all cookies. If
1658n/a given a single argument, only cookies belonging to that domain will be
1659n/a removed. If given two arguments, cookies belonging to the specified
1660n/a path within that domain are removed. If given three arguments, then
1661n/a the cookie with the specified name, path and domain is removed.
1662n/a
1663n/a Raises KeyError if no matching cookie exists.
1664n/a
1665n/a """
16668 if name is not None:
16676 if (domain is None) or (path is None):
16680 raise ValueError(
16690 "domain and path must be given to remove a cookie by name")
16706 del self._cookies[domain][path][name]
16712 elif path is not None:
16720 if domain is None:
16730 raise ValueError(
16740 "domain must be given to remove cookies by path")
16750 del self._cookies[domain][path]
16762 elif domain is not None:
16770 del self._cookies[domain]
1678n/a else:
16792 self._cookies = {}
1680n/a
16811 def clear_session_cookies(self):
1682n/a """Discard all session cookies.
1683n/a
1684n/a Note that the .save() method won't save session cookies anyway, unless
1685n/a you ask otherwise by passing a true ignore_discard argument.
1686n/a
1687n/a """
16882 self._cookies_lock.acquire()
16892 try:
16908 for cookie in self:
16916 if cookie.discard:
16923 self.clear(cookie.domain, cookie.path, cookie.name)
1693n/a finally:
16942 self._cookies_lock.release()
1695n/a
16961 def clear_expired_cookies(self):
1697n/a """Discard all expired cookies.
1698n/a
1699n/a You probably don't need to call this method: expired cookies are never
1700n/a sent back to the server (provided you're using DefaultCookiePolicy),
1701n/a this method is called by CookieJar itself every so often, and the
1702n/a .save() method won't save expired cookies anyway (unless you ask
1703n/a otherwise by passing a true ignore_expires argument).
1704n/a
1705n/a """
1706152 self._cookies_lock.acquire()
1707152 try:
1708152 now = time.time()
1709329 for cookie in self:
1710177 if cookie.is_expired(now):
17110 self.clear(cookie.domain, cookie.path, cookie.name)
1712n/a finally:
1713152 self._cookies_lock.release()
1714n/a
17151 def __iter__(self):
1716223 return deepvalues(self._cookies)
1717n/a
17181 def __len__(self):
1719n/a """Return number of contained cookies."""
172052 i = 0
1721147 for cookie in self: i = i + 1
172252 return i
1723n/a
17241 def __repr__(self):
17256 r = []
172632 for cookie in self: r.append(repr(cookie))
17276 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1728n/a
17291 def __str__(self):
17300 r = []
17310 for cookie in self: r.append(str(cookie))
17320 return "<%s[%s]>" % (self.__class__, ", ".join(r))
1733n/a
1734n/a
1735n/a# derives from IOError for backwards-compatibility with Python 2.4.0
17362class LoadError(IOError): pass
1737n/a
17382class FileCookieJar(CookieJar):
17391 """CookieJar that can be loaded from and saved to a file."""
1740n/a
17411 def __init__(self, filename=None, delayload=False, policy=None):
1742n/a """
1743n/a Cookies are NOT loaded from the named file until either the .load() or
1744n/a .revert() method is called.
1745n/a
1746n/a """
174713 CookieJar.__init__(self, policy)
174813 if filename is not None:
17495 try:
17505 filename+""
17510 except:
17520 raise ValueError("filename must be string-like")
175313 self.filename = filename
175413 self.delayload = bool(delayload)
1755n/a
17561 def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1757n/a """Save cookies to a file."""
17580 raise NotImplementedError()
1759n/a
17601 def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1761n/a """Load cookies from a file."""
17629 if filename is None:
17632 if self.filename is not None: filename = self.filename
17640 else: raise ValueError(MISSING_FILENAME_TEXT)
1765n/a
17669 f = open(filename)
17677 try:
17687 self._really_load(f, filename, ignore_discard, ignore_expires)
1769n/a finally:
17707 f.close()
1771n/a
17721 def revert(self, filename=None,
17731 ignore_discard=False, ignore_expires=False):
1774n/a """Clear all cookies and reload cookies from a saved file.
1775n/a
1776n/a Raises LoadError (or IOError) if reversion is not successful; the
1777n/a object's state will not be altered if this happens.
1778n/a
1779n/a """
17801 if filename is None:
17811 if self.filename is not None: filename = self.filename
17820 else: raise ValueError(MISSING_FILENAME_TEXT)
1783n/a
17841 self._cookies_lock.acquire()
17851 try:
1786n/a
17871 old_state = copy.deepcopy(self._cookies)
17881 self._cookies = {}
17891 try:
17901 self.load(filename, ignore_discard, ignore_expires)
17910 except (LoadError, IOError):
17920 self._cookies = old_state
17930 raise
1794n/a
1795n/a finally:
17961 self._cookies_lock.release()
1797n/a
17981from _LWPCookieJar import LWPCookieJar, lwp_cookie_str
17991from _MozillaCookieJar import MozillaCookieJar