ยปCore Development>Code coverage>Lib/http/cookiejar.py

Python code coverage for Lib/http/cookiejar.py

#countcontent
1n/ar"""HTTP cookie handling for web clients.
2n/a
3n/aThis module has (now fairly distant) origins in Gisle Aas' Perl module
4n/aHTTP::Cookies, from the libwww-perl library.
5n/a
6n/aDocstrings, comments and debug strings in this code refer to the
7n/aattributes of the HTTP cookie system as cookie-attributes, to distinguish
8n/athem clearly from Python attributes.
9n/a
10n/aClass diagram (note that BSDDBCookieJar and the MSIE* classes are not
11n/adistributed with the Python standard library, but are available from
12n/ahttp://wwwsearch.sf.net/):
13n/a
14n/a CookieJar____
15n/a / \ \
16n/a FileCookieJar \ \
17n/a / | \ \ \
18n/a MozillaCookieJar | LWPCookieJar \ \
19n/a | | \
20n/a | ---MSIEBase | \
21n/a | / | | \
22n/a | / MSIEDBCookieJar BSDDBCookieJar
23n/a |/
24n/a MSIECookieJar
25n/a
26n/a"""
27n/a
28n/a__all__ = ['Cookie', 'CookieJar', 'CookiePolicy', 'DefaultCookiePolicy',
29n/a 'FileCookieJar', 'LWPCookieJar', 'LoadError', 'MozillaCookieJar']
30n/a
31n/aimport copy
32n/aimport datetime
33n/aimport re
34n/aimport time
35n/aimport urllib.parse, urllib.request
36n/atry:
37n/a import threading as _threading
38n/aexcept ImportError:
39n/a import dummy_threading as _threading
40n/aimport http.client # only for the default HTTP port
41n/afrom calendar import timegm
42n/a
43n/adebug = False # set to True to enable debugging via the logging module
44n/alogger = None
45n/a
46n/adef _debug(*args):
47n/a if not debug:
48n/a return
49n/a global logger
50n/a if not logger:
51n/a import logging
52n/a logger = logging.getLogger("http.cookiejar")
53n/a return logger.debug(*args)
54n/a
55n/a
56n/aDEFAULT_HTTP_PORT = str(http.client.HTTP_PORT)
57n/aMISSING_FILENAME_TEXT = ("a filename was not supplied (nor was the CookieJar "
58n/a "instance initialised with one)")
59n/a
60n/adef _warn_unhandled_exception():
61n/a # There are a few catch-all except: statements in this module, for
62n/a # catching input that's bad in unexpected ways. Warn if any
63n/a # exceptions are caught there.
64n/a import io, warnings, traceback
65n/a f = io.StringIO()
66n/a traceback.print_exc(None, f)
67n/a msg = f.getvalue()
68n/a warnings.warn("http.cookiejar bug!\n%s" % msg, stacklevel=2)
69n/a
70n/a
71n/a# Date/time conversion
72n/a# -----------------------------------------------------------------------------
73n/a
74n/aEPOCH_YEAR = 1970
75n/adef _timegm(tt):
76n/a year, month, mday, hour, min, sec = tt[:6]
77n/a if ((year >= EPOCH_YEAR) and (1 <= month <= 12) and (1 <= mday <= 31) and
78n/a (0 <= hour <= 24) and (0 <= min <= 59) and (0 <= sec <= 61)):
79n/a return timegm(tt)
80n/a else:
81n/a return None
82n/a
83n/aDAYS = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
84n/aMONTHS = ["Jan", "Feb", "Mar", "Apr", "May", "Jun",
85n/a "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
86n/aMONTHS_LOWER = []
87n/afor month in MONTHS: MONTHS_LOWER.append(month.lower())
88n/a
89n/adef time2isoz(t=None):
90n/a """Return a string representing time in seconds since epoch, t.
91n/a
92n/a If the function is called without an argument, it will use the current
93n/a time.
94n/a
95n/a The format of the returned string is like "YYYY-MM-DD hh:mm:ssZ",
96n/a representing Universal Time (UTC, aka GMT). An example of this format is:
97n/a
98n/a 1994-11-24 08:49:37Z
99n/a
100n/a """
101n/a if t is None:
102n/a dt = datetime.datetime.utcnow()
103n/a else:
104n/a dt = datetime.datetime.utcfromtimestamp(t)
105n/a return "%04d-%02d-%02d %02d:%02d:%02dZ" % (
106n/a dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
107n/a
108n/adef time2netscape(t=None):
109n/a """Return a string representing time in seconds since epoch, t.
110n/a
111n/a If the function is called without an argument, it will use the current
112n/a time.
113n/a
114n/a The format of the returned string is like this:
115n/a
116n/a Wed, DD-Mon-YYYY HH:MM:SS GMT
117n/a
118n/a """
119n/a if t is None:
120n/a dt = datetime.datetime.utcnow()
121n/a else:
122n/a dt = datetime.datetime.utcfromtimestamp(t)
123n/a return "%s, %02d-%s-%04d %02d:%02d:%02d GMT" % (
124n/a DAYS[dt.weekday()], dt.day, MONTHS[dt.month-1],
125n/a dt.year, dt.hour, dt.minute, dt.second)
126n/a
127n/a
128n/aUTC_ZONES = {"GMT": None, "UTC": None, "UT": None, "Z": None}
129n/a
130n/aTIMEZONE_RE = re.compile(r"^([-+])?(\d\d?):?(\d\d)?$", re.ASCII)
131n/adef offset_from_tz_string(tz):
132n/a offset = None
133n/a if tz in UTC_ZONES:
134n/a offset = 0
135n/a else:
136n/a m = TIMEZONE_RE.search(tz)
137n/a if m:
138n/a offset = 3600 * int(m.group(2))
139n/a if m.group(3):
140n/a offset = offset + 60 * int(m.group(3))
141n/a if m.group(1) == '-':
142n/a offset = -offset
143n/a return offset
144n/a
145n/adef _str2time(day, mon, yr, hr, min, sec, tz):
146n/a yr = int(yr)
147n/a if yr > datetime.MAXYEAR:
148n/a return None
149n/a
150n/a # translate month name to number
151n/a # month numbers start with 1 (January)
152n/a try:
153n/a mon = MONTHS_LOWER.index(mon.lower())+1
154n/a except ValueError:
155n/a # maybe it's already a number
156n/a try:
157n/a imon = int(mon)
158n/a except ValueError:
159n/a return None
160n/a if 1 <= imon <= 12:
161n/a mon = imon
162n/a else:
163n/a return None
164n/a
165n/a # make sure clock elements are defined
166n/a if hr is None: hr = 0
167n/a if min is None: min = 0
168n/a if sec is None: sec = 0
169n/a
170n/a day = int(day)
171n/a hr = int(hr)
172n/a min = int(min)
173n/a sec = int(sec)
174n/a
175n/a if yr < 1000:
176n/a # find "obvious" year
177n/a cur_yr = time.localtime(time.time())[0]
178n/a m = cur_yr % 100
179n/a tmp = yr
180n/a yr = yr + cur_yr - m
181n/a m = m - tmp
182n/a if abs(m) > 50:
183n/a if m > 0: yr = yr + 100
184n/a else: yr = yr - 100
185n/a
186n/a # convert UTC time tuple to seconds since epoch (not timezone-adjusted)
187n/a t = _timegm((yr, mon, day, hr, min, sec, tz))
188n/a
189n/a if t is not None:
190n/a # adjust time using timezone string, to get absolute time since epoch
191n/a if tz is None:
192n/a tz = "UTC"
193n/a tz = tz.upper()
194n/a offset = offset_from_tz_string(tz)
195n/a if offset is None:
196n/a return None
197n/a t = t - offset
198n/a
199n/a return t
200n/a
201n/aSTRICT_DATE_RE = re.compile(
202n/a r"^[SMTWF][a-z][a-z], (\d\d) ([JFMASOND][a-z][a-z]) "
203n/a r"(\d\d\d\d) (\d\d):(\d\d):(\d\d) GMT$", re.ASCII)
204n/aWEEKDAY_RE = re.compile(
205n/a r"^(?:Sun|Mon|Tue|Wed|Thu|Fri|Sat)[a-z]*,?\s*", re.I | re.ASCII)
206n/aLOOSE_HTTP_DATE_RE = re.compile(
207n/a r"""^
208n/a (\d\d?) # day
209n/a (?:\s+|[-\/])
210n/a (\w+) # month
211n/a (?:\s+|[-\/])
212n/a (\d+) # year
213n/a (?:
214n/a (?:\s+|:) # separator before clock
215n/a (\d\d?):(\d\d) # hour:min
216n/a (?::(\d\d))? # optional seconds
217n/a )? # optional clock
218n/a \s*
219n/a ([-+]?\d{2,4}|(?![APap][Mm]\b)[A-Za-z]+)? # timezone
220n/a \s*
221n/a (?:\(\w+\))? # ASCII representation of timezone in parens.
222n/a \s*$""", re.X | re.ASCII)
223n/adef http2time(text):
224n/a """Returns time in seconds since epoch of time represented by a string.
225n/a
226n/a Return value is an integer.
227n/a
228n/a None is returned if the format of str is unrecognized, the time is outside
229n/a the representable range, or the timezone string is not recognized. If the
230n/a string contains no timezone, UTC is assumed.
231n/a
232n/a The timezone in the string may be numerical (like "-0800" or "+0100") or a
233n/a string timezone (like "UTC", "GMT", "BST" or "EST"). Currently, only the
234n/a timezone strings equivalent to UTC (zero offset) are known to the function.
235n/a
236n/a The function loosely parses the following formats:
237n/a
238n/a Wed, 09 Feb 1994 22:23:32 GMT -- HTTP format
239n/a Tuesday, 08-Feb-94 14:15:29 GMT -- old rfc850 HTTP format
240n/a Tuesday, 08-Feb-1994 14:15:29 GMT -- broken rfc850 HTTP format
241n/a 09 Feb 1994 22:23:32 GMT -- HTTP format (no weekday)
242n/a 08-Feb-94 14:15:29 GMT -- rfc850 format (no weekday)
243n/a 08-Feb-1994 14:15:29 GMT -- broken rfc850 format (no weekday)
244n/a
245n/a The parser ignores leading and trailing whitespace. The time may be
246n/a absent.
247n/a
248n/a If the year is given with only 2 digits, the function will select the
249n/a century that makes the year closest to the current date.
250n/a
251n/a """
252n/a # fast exit for strictly conforming string
253n/a m = STRICT_DATE_RE.search(text)
254n/a if m:
255n/a g = m.groups()
256n/a mon = MONTHS_LOWER.index(g[1].lower()) + 1
257n/a tt = (int(g[2]), mon, int(g[0]),
258n/a int(g[3]), int(g[4]), float(g[5]))
259n/a return _timegm(tt)
260n/a
261n/a # No, we need some messy parsing...
262n/a
263n/a # clean up
264n/a text = text.lstrip()
265n/a text = WEEKDAY_RE.sub("", text, 1) # Useless weekday
266n/a
267n/a # tz is time zone specifier string
268n/a day, mon, yr, hr, min, sec, tz = [None]*7
269n/a
270n/a # loose regexp parse
271n/a m = LOOSE_HTTP_DATE_RE.search(text)
272n/a if m is not None:
273n/a day, mon, yr, hr, min, sec, tz = m.groups()
274n/a else:
275n/a return None # bad format
276n/a
277n/a return _str2time(day, mon, yr, hr, min, sec, tz)
278n/a
279n/aISO_DATE_RE = re.compile(
280n/a r"""^
281n/a (\d{4}) # year
282n/a [-\/]?
283n/a (\d\d?) # numerical month
284n/a [-\/]?
285n/a (\d\d?) # day
286n/a (?:
287n/a (?:\s+|[-:Tt]) # separator before clock
288n/a (\d\d?):?(\d\d) # hour:min
289n/a (?::?(\d\d(?:\.\d*)?))? # optional seconds (and fractional)
290n/a )? # optional clock
291n/a \s*
292n/a ([-+]?\d\d?:?(:?\d\d)?
293n/a |Z|z)? # timezone (Z is "zero meridian", i.e. GMT)
294n/a \s*$""", re.X | re. ASCII)
295n/adef iso2time(text):
296n/a """
297n/a As for http2time, but parses the ISO 8601 formats:
298n/a
299n/a 1994-02-03 14:15:29 -0100 -- ISO 8601 format
300n/a 1994-02-03 14:15:29 -- zone is optional
301n/a 1994-02-03 -- only date
302n/a 1994-02-03T14:15:29 -- Use T as separator
303n/a 19940203T141529Z -- ISO 8601 compact format
304n/a 19940203 -- only date
305n/a
306n/a """
307n/a # clean up
308n/a text = text.lstrip()
309n/a
310n/a # tz is time zone specifier string
311n/a day, mon, yr, hr, min, sec, tz = [None]*7
312n/a
313n/a # loose regexp parse
314n/a m = ISO_DATE_RE.search(text)
315n/a if m is not None:
316n/a # XXX there's an extra bit of the timezone I'm ignoring here: is
317n/a # this the right thing to do?
318n/a yr, mon, day, hr, min, sec, tz, _ = m.groups()
319n/a else:
320n/a return None # bad format
321n/a
322n/a return _str2time(day, mon, yr, hr, min, sec, tz)
323n/a
324n/a
325n/a# Header parsing
326n/a# -----------------------------------------------------------------------------
327n/a
328n/adef unmatched(match):
329n/a """Return unmatched part of re.Match object."""
330n/a start, end = match.span(0)
331n/a return match.string[:start]+match.string[end:]
332n/a
333n/aHEADER_TOKEN_RE = re.compile(r"^\s*([^=\s;,]+)")
334n/aHEADER_QUOTED_VALUE_RE = re.compile(r"^\s*=\s*\"([^\"\\]*(?:\\.[^\"\\]*)*)\"")
335n/aHEADER_VALUE_RE = re.compile(r"^\s*=\s*([^\s;,]*)")
336n/aHEADER_ESCAPE_RE = re.compile(r"\\(.)")
337n/adef split_header_words(header_values):
338n/a r"""Parse header values into a list of lists containing key,value pairs.
339n/a
340n/a The function knows how to deal with ",", ";" and "=" as well as quoted
341n/a values after "=". A list of space separated tokens are parsed as if they
342n/a were separated by ";".
343n/a
344n/a If the header_values passed as argument contains multiple values, then they
345n/a are treated as if they were a single value separated by comma ",".
346n/a
347n/a This means that this function is useful for parsing header fields that
348n/a follow this syntax (BNF as from the HTTP/1.1 specification, but we relax
349n/a the requirement for tokens).
350n/a
351n/a headers = #header
352n/a header = (token | parameter) *( [";"] (token | parameter))
353n/a
354n/a token = 1*<any CHAR except CTLs or separators>
355n/a separators = "(" | ")" | "<" | ">" | "@"
356n/a | "," | ";" | ":" | "\" | <">
357n/a | "/" | "[" | "]" | "?" | "="
358n/a | "{" | "}" | SP | HT
359n/a
360n/a quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
361n/a qdtext = <any TEXT except <">>
362n/a quoted-pair = "\" CHAR
363n/a
364n/a parameter = attribute "=" value
365n/a attribute = token
366n/a value = token | quoted-string
367n/a
368n/a Each header is represented by a list of key/value pairs. The value for a
369n/a simple token (not part of a parameter) is None. Syntactically incorrect
370n/a headers will not necessarily be parsed as you would want.
371n/a
372n/a This is easier to describe with some examples:
373n/a
374n/a >>> split_header_words(['foo="bar"; port="80,81"; discard, bar=baz'])
375n/a [[('foo', 'bar'), ('port', '80,81'), ('discard', None)], [('bar', 'baz')]]
376n/a >>> split_header_words(['text/html; charset="iso-8859-1"'])
377n/a [[('text/html', None), ('charset', 'iso-8859-1')]]
378n/a >>> split_header_words([r'Basic realm="\"foo\bar\""'])
379n/a [[('Basic', None), ('realm', '"foobar"')]]
380n/a
381n/a """
382n/a assert not isinstance(header_values, str)
383n/a result = []
384n/a for text in header_values:
385n/a orig_text = text
386n/a pairs = []
387n/a while text:
388n/a m = HEADER_TOKEN_RE.search(text)
389n/a if m:
390n/a text = unmatched(m)
391n/a name = m.group(1)
392n/a m = HEADER_QUOTED_VALUE_RE.search(text)
393n/a if m: # quoted value
394n/a text = unmatched(m)
395n/a value = m.group(1)
396n/a value = HEADER_ESCAPE_RE.sub(r"\1", value)
397n/a else:
398n/a m = HEADER_VALUE_RE.search(text)
399n/a if m: # unquoted value
400n/a text = unmatched(m)
401n/a value = m.group(1)
402n/a value = value.rstrip()
403n/a else:
404n/a # no value, a lone token
405n/a value = None
406n/a pairs.append((name, value))
407n/a elif text.lstrip().startswith(","):
408n/a # concatenated headers, as per RFC 2616 section 4.2
409n/a text = text.lstrip()[1:]
410n/a if pairs: result.append(pairs)
411n/a pairs = []
412n/a else:
413n/a # skip junk
414n/a non_junk, nr_junk_chars = re.subn(r"^[=\s;]*", "", text)
415n/a assert nr_junk_chars > 0, (
416n/a "split_header_words bug: '%s', '%s', %s" %
417n/a (orig_text, text, pairs))
418n/a text = non_junk
419n/a if pairs: result.append(pairs)
420n/a return result
421n/a
422n/aHEADER_JOIN_ESCAPE_RE = re.compile(r"([\"\\])")
423n/adef join_header_words(lists):
424n/a """Do the inverse (almost) of the conversion done by split_header_words.
425n/a
426n/a Takes a list of lists of (key, value) pairs and produces a single header
427n/a value. Attribute values are quoted if needed.
428n/a
429n/a >>> join_header_words([[("text/plain", None), ("charset", "iso-8859-1")]])
430n/a 'text/plain; charset="iso-8859-1"'
431n/a >>> join_header_words([[("text/plain", None)], [("charset", "iso-8859-1")]])
432n/a 'text/plain, charset="iso-8859-1"'
433n/a
434n/a """
435n/a headers = []
436n/a for pairs in lists:
437n/a attr = []
438n/a for k, v in pairs:
439n/a if v is not None:
440n/a if not re.search(r"^\w+$", v):
441n/a v = HEADER_JOIN_ESCAPE_RE.sub(r"\\\1", v) # escape " and \
442n/a v = '"%s"' % v
443n/a k = "%s=%s" % (k, v)
444n/a attr.append(k)
445n/a if attr: headers.append("; ".join(attr))
446n/a return ", ".join(headers)
447n/a
448n/adef strip_quotes(text):
449n/a if text.startswith('"'):
450n/a text = text[1:]
451n/a if text.endswith('"'):
452n/a text = text[:-1]
453n/a return text
454n/a
455n/adef parse_ns_headers(ns_headers):
456n/a """Ad-hoc parser for Netscape protocol cookie-attributes.
457n/a
458n/a The old Netscape cookie format for Set-Cookie can for instance contain
459n/a an unquoted "," in the expires field, so we have to use this ad-hoc
460n/a parser instead of split_header_words.
461n/a
462n/a XXX This may not make the best possible effort to parse all the crap
463n/a that Netscape Cookie headers contain. Ronald Tschalar's HTTPClient
464n/a parser is probably better, so could do worse than following that if
465n/a this ever gives any trouble.
466n/a
467n/a Currently, this is also used for parsing RFC 2109 cookies.
468n/a
469n/a """
470n/a known_attrs = ("expires", "domain", "path", "secure",
471n/a # RFC 2109 attrs (may turn up in Netscape cookies, too)
472n/a "version", "port", "max-age")
473n/a
474n/a result = []
475n/a for ns_header in ns_headers:
476n/a pairs = []
477n/a version_set = False
478n/a
479n/a # XXX: The following does not strictly adhere to RFCs in that empty
480n/a # names and values are legal (the former will only appear once and will
481n/a # be overwritten if multiple occurrences are present). This is
482n/a # mostly to deal with backwards compatibility.
483n/a for ii, param in enumerate(ns_header.split(';')):
484n/a param = param.strip()
485n/a
486n/a key, sep, val = param.partition('=')
487n/a key = key.strip()
488n/a
489n/a if not key:
490n/a if ii == 0:
491n/a break
492n/a else:
493n/a continue
494n/a
495n/a # allow for a distinction between present and empty and missing
496n/a # altogether
497n/a val = val.strip() if sep else None
498n/a
499n/a if ii != 0:
500n/a lc = key.lower()
501n/a if lc in known_attrs:
502n/a key = lc
503n/a
504n/a if key == "version":
505n/a # This is an RFC 2109 cookie.
506n/a if val is not None:
507n/a val = strip_quotes(val)
508n/a version_set = True
509n/a elif key == "expires":
510n/a # convert expires date to seconds since epoch
511n/a if val is not None:
512n/a val = http2time(strip_quotes(val)) # None if invalid
513n/a pairs.append((key, val))
514n/a
515n/a if pairs:
516n/a if not version_set:
517n/a pairs.append(("version", "0"))
518n/a result.append(pairs)
519n/a
520n/a return result
521n/a
522n/a
523n/aIPV4_RE = re.compile(r"\.\d+$", re.ASCII)
524n/adef is_HDN(text):
525n/a """Return True if text is a host domain name."""
526n/a # XXX
527n/a # This may well be wrong. Which RFC is HDN defined in, if any (for
528n/a # the purposes of RFC 2965)?
529n/a # For the current implementation, what about IPv6? Remember to look
530n/a # at other uses of IPV4_RE also, if change this.
531n/a if IPV4_RE.search(text):
532n/a return False
533n/a if text == "":
534n/a return False
535n/a if text[0] == "." or text[-1] == ".":
536n/a return False
537n/a return True
538n/a
539n/adef domain_match(A, B):
540n/a """Return True if domain A domain-matches domain B, according to RFC 2965.
541n/a
542n/a A and B may be host domain names or IP addresses.
543n/a
544n/a RFC 2965, section 1:
545n/a
546n/a Host names can be specified either as an IP address or a HDN string.
547n/a Sometimes we compare one host name with another. (Such comparisons SHALL
548n/a be case-insensitive.) Host A's name domain-matches host B's if
549n/a
550n/a * their host name strings string-compare equal; or
551n/a
552n/a * A is a HDN string and has the form NB, where N is a non-empty
553n/a name string, B has the form .B', and B' is a HDN string. (So,
554n/a x.y.com domain-matches .Y.com but not Y.com.)
555n/a
556n/a Note that domain-match is not a commutative operation: a.b.c.com
557n/a domain-matches .c.com, but not the reverse.
558n/a
559n/a """
560n/a # Note that, if A or B are IP addresses, the only relevant part of the
561n/a # definition of the domain-match algorithm is the direct string-compare.
562n/a A = A.lower()
563n/a B = B.lower()
564n/a if A == B:
565n/a return True
566n/a if not is_HDN(A):
567n/a return False
568n/a i = A.rfind(B)
569n/a if i == -1 or i == 0:
570n/a # A does not have form NB, or N is the empty string
571n/a return False
572n/a if not B.startswith("."):
573n/a return False
574n/a if not is_HDN(B[1:]):
575n/a return False
576n/a return True
577n/a
578n/adef liberal_is_HDN(text):
579n/a """Return True if text is a sort-of-like a host domain name.
580n/a
581n/a For accepting/blocking domains.
582n/a
583n/a """
584n/a if IPV4_RE.search(text):
585n/a return False
586n/a return True
587n/a
588n/adef user_domain_match(A, B):
589n/a """For blocking/accepting domains.
590n/a
591n/a A and B may be host domain names or IP addresses.
592n/a
593n/a """
594n/a A = A.lower()
595n/a B = B.lower()
596n/a if not (liberal_is_HDN(A) and liberal_is_HDN(B)):
597n/a if A == B:
598n/a # equal IP addresses
599n/a return True
600n/a return False
601n/a initial_dot = B.startswith(".")
602n/a if initial_dot and A.endswith(B):
603n/a return True
604n/a if not initial_dot and A == B:
605n/a return True
606n/a return False
607n/a
608n/acut_port_re = re.compile(r":\d+$", re.ASCII)
609n/adef request_host(request):
610n/a """Return request-host, as defined by RFC 2965.
611n/a
612n/a Variation from RFC: returned value is lowercased, for convenient
613n/a comparison.
614n/a
615n/a """
616n/a url = request.get_full_url()
617n/a host = urllib.parse.urlparse(url)[1]
618n/a if host == "":
619n/a host = request.get_header("Host", "")
620n/a
621n/a # remove port, if present
622n/a host = cut_port_re.sub("", host, 1)
623n/a return host.lower()
624n/a
625n/adef eff_request_host(request):
626n/a """Return a tuple (request-host, effective request-host name).
627n/a
628n/a As defined by RFC 2965, except both are lowercased.
629n/a
630n/a """
631n/a erhn = req_host = request_host(request)
632n/a if req_host.find(".") == -1 and not IPV4_RE.search(req_host):
633n/a erhn = req_host + ".local"
634n/a return req_host, erhn
635n/a
636n/adef request_path(request):
637n/a """Path component of request-URI, as defined by RFC 2965."""
638n/a url = request.get_full_url()
639n/a parts = urllib.parse.urlsplit(url)
640n/a path = escape_path(parts.path)
641n/a if not path.startswith("/"):
642n/a # fix bad RFC 2396 absoluteURI
643n/a path = "/" + path
644n/a return path
645n/a
646n/adef request_port(request):
647n/a host = request.host
648n/a i = host.find(':')
649n/a if i >= 0:
650n/a port = host[i+1:]
651n/a try:
652n/a int(port)
653n/a except ValueError:
654n/a _debug("nonnumeric port: '%s'", port)
655n/a return None
656n/a else:
657n/a port = DEFAULT_HTTP_PORT
658n/a return port
659n/a
660n/a# Characters in addition to A-Z, a-z, 0-9, '_', '.', and '-' that don't
661n/a# need to be escaped to form a valid HTTP URL (RFCs 2396 and 1738).
662n/aHTTP_PATH_SAFE = "%/;:@&=+$,!~*'()"
663n/aESCAPED_CHAR_RE = re.compile(r"%([0-9a-fA-F][0-9a-fA-F])")
664n/adef uppercase_escaped_char(match):
665n/a return "%%%s" % match.group(1).upper()
666n/adef escape_path(path):
667n/a """Escape any invalid characters in HTTP URL, and uppercase all escapes."""
668n/a # There's no knowing what character encoding was used to create URLs
669n/a # containing %-escapes, but since we have to pick one to escape invalid
670n/a # path characters, we pick UTF-8, as recommended in the HTML 4.0
671n/a # specification:
672n/a # http://www.w3.org/TR/REC-html40/appendix/notes.html#h-B.2.1
673n/a # And here, kind of: draft-fielding-uri-rfc2396bis-03
674n/a # (And in draft IRI specification: draft-duerst-iri-05)
675n/a # (And here, for new URI schemes: RFC 2718)
676n/a path = urllib.parse.quote(path, HTTP_PATH_SAFE)
677n/a path = ESCAPED_CHAR_RE.sub(uppercase_escaped_char, path)
678n/a return path
679n/a
680n/adef reach(h):
681n/a """Return reach of host h, as defined by RFC 2965, section 1.
682n/a
683n/a The reach R of a host name H is defined as follows:
684n/a
685n/a * If
686n/a
687n/a - H is the host domain name of a host; and,
688n/a
689n/a - H has the form A.B; and
690n/a
691n/a - A has no embedded (that is, interior) dots; and
692n/a
693n/a - B has at least one embedded dot, or B is the string "local".
694n/a then the reach of H is .B.
695n/a
696n/a * Otherwise, the reach of H is H.
697n/a
698n/a >>> reach("www.acme.com")
699n/a '.acme.com'
700n/a >>> reach("acme.com")
701n/a 'acme.com'
702n/a >>> reach("acme.local")
703n/a '.local'
704n/a
705n/a """
706n/a i = h.find(".")
707n/a if i >= 0:
708n/a #a = h[:i] # this line is only here to show what a is
709n/a b = h[i+1:]
710n/a i = b.find(".")
711n/a if is_HDN(h) and (i >= 0 or b == "local"):
712n/a return "."+b
713n/a return h
714n/a
715n/adef is_third_party(request):
716n/a """
717n/a
718n/a RFC 2965, section 3.3.6:
719n/a
720n/a An unverifiable transaction is to a third-party host if its request-
721n/a host U does not domain-match the reach R of the request-host O in the
722n/a origin transaction.
723n/a
724n/a """
725n/a req_host = request_host(request)
726n/a if not domain_match(req_host, reach(request.origin_req_host)):
727n/a return True
728n/a else:
729n/a return False
730n/a
731n/a
732n/aclass Cookie:
733n/a """HTTP Cookie.
734n/a
735n/a This class represents both Netscape and RFC 2965 cookies.
736n/a
737n/a This is deliberately a very simple class. It just holds attributes. It's
738n/a possible to construct Cookie instances that don't comply with the cookie
739n/a standards. CookieJar.make_cookies is the factory function for Cookie
740n/a objects -- it deals with cookie parsing, supplying defaults, and
741n/a normalising to the representation used in this class. CookiePolicy is
742n/a responsible for checking them to see whether they should be accepted from
743n/a and returned to the server.
744n/a
745n/a Note that the port may be present in the headers, but unspecified ("Port"
746n/a rather than"Port=80", for example); if this is the case, port is None.
747n/a
748n/a """
749n/a
750n/a def __init__(self, version, name, value,
751n/a port, port_specified,
752n/a domain, domain_specified, domain_initial_dot,
753n/a path, path_specified,
754n/a secure,
755n/a expires,
756n/a discard,
757n/a comment,
758n/a comment_url,
759n/a rest,
760n/a rfc2109=False,
761n/a ):
762n/a
763n/a if version is not None: version = int(version)
764n/a if expires is not None: expires = int(float(expires))
765n/a if port is None and port_specified is True:
766n/a raise ValueError("if port is None, port_specified must be false")
767n/a
768n/a self.version = version
769n/a self.name = name
770n/a self.value = value
771n/a self.port = port
772n/a self.port_specified = port_specified
773n/a # normalise case, as per RFC 2965 section 3.3.3
774n/a self.domain = domain.lower()
775n/a self.domain_specified = domain_specified
776n/a # Sigh. We need to know whether the domain given in the
777n/a # cookie-attribute had an initial dot, in order to follow RFC 2965
778n/a # (as clarified in draft errata). Needed for the returned $Domain
779n/a # value.
780n/a self.domain_initial_dot = domain_initial_dot
781n/a self.path = path
782n/a self.path_specified = path_specified
783n/a self.secure = secure
784n/a self.expires = expires
785n/a self.discard = discard
786n/a self.comment = comment
787n/a self.comment_url = comment_url
788n/a self.rfc2109 = rfc2109
789n/a
790n/a self._rest = copy.copy(rest)
791n/a
792n/a def has_nonstandard_attr(self, name):
793n/a return name in self._rest
794n/a def get_nonstandard_attr(self, name, default=None):
795n/a return self._rest.get(name, default)
796n/a def set_nonstandard_attr(self, name, value):
797n/a self._rest[name] = value
798n/a
799n/a def is_expired(self, now=None):
800n/a if now is None: now = time.time()
801n/a if (self.expires is not None) and (self.expires <= now):
802n/a return True
803n/a return False
804n/a
805n/a def __str__(self):
806n/a if self.port is None: p = ""
807n/a else: p = ":"+self.port
808n/a limit = self.domain + p + self.path
809n/a if self.value is not None:
810n/a namevalue = "%s=%s" % (self.name, self.value)
811n/a else:
812n/a namevalue = self.name
813n/a return "<Cookie %s for %s>" % (namevalue, limit)
814n/a
815n/a def __repr__(self):
816n/a args = []
817n/a for name in ("version", "name", "value",
818n/a "port", "port_specified",
819n/a "domain", "domain_specified", "domain_initial_dot",
820n/a "path", "path_specified",
821n/a "secure", "expires", "discard", "comment", "comment_url",
822n/a ):
823n/a attr = getattr(self, name)
824n/a args.append("%s=%s" % (name, repr(attr)))
825n/a args.append("rest=%s" % repr(self._rest))
826n/a args.append("rfc2109=%s" % repr(self.rfc2109))
827n/a return "%s(%s)" % (self.__class__.__name__, ", ".join(args))
828n/a
829n/a
830n/aclass CookiePolicy:
831n/a """Defines which cookies get accepted from and returned to server.
832n/a
833n/a May also modify cookies, though this is probably a bad idea.
834n/a
835n/a The subclass DefaultCookiePolicy defines the standard rules for Netscape
836n/a and RFC 2965 cookies -- override that if you want a customized policy.
837n/a
838n/a """
839n/a def set_ok(self, cookie, request):
840n/a """Return true if (and only if) cookie should be accepted from server.
841n/a
842n/a Currently, pre-expired cookies never get this far -- the CookieJar
843n/a class deletes such cookies itself.
844n/a
845n/a """
846n/a raise NotImplementedError()
847n/a
848n/a def return_ok(self, cookie, request):
849n/a """Return true if (and only if) cookie should be returned to server."""
850n/a raise NotImplementedError()
851n/a
852n/a def domain_return_ok(self, domain, request):
853n/a """Return false if cookies should not be returned, given cookie domain.
854n/a """
855n/a return True
856n/a
857n/a def path_return_ok(self, path, request):
858n/a """Return false if cookies should not be returned, given cookie path.
859n/a """
860n/a return True
861n/a
862n/a
863n/aclass DefaultCookiePolicy(CookiePolicy):
864n/a """Implements the standard rules for accepting and returning cookies."""
865n/a
866n/a DomainStrictNoDots = 1
867n/a DomainStrictNonDomain = 2
868n/a DomainRFC2965Match = 4
869n/a
870n/a DomainLiberal = 0
871n/a DomainStrict = DomainStrictNoDots|DomainStrictNonDomain
872n/a
873n/a def __init__(self,
874n/a blocked_domains=None, allowed_domains=None,
875n/a netscape=True, rfc2965=False,
876n/a rfc2109_as_netscape=None,
877n/a hide_cookie2=False,
878n/a strict_domain=False,
879n/a strict_rfc2965_unverifiable=True,
880n/a strict_ns_unverifiable=False,
881n/a strict_ns_domain=DomainLiberal,
882n/a strict_ns_set_initial_dollar=False,
883n/a strict_ns_set_path=False,
884n/a ):
885n/a """Constructor arguments should be passed as keyword arguments only."""
886n/a self.netscape = netscape
887n/a self.rfc2965 = rfc2965
888n/a self.rfc2109_as_netscape = rfc2109_as_netscape
889n/a self.hide_cookie2 = hide_cookie2
890n/a self.strict_domain = strict_domain
891n/a self.strict_rfc2965_unverifiable = strict_rfc2965_unverifiable
892n/a self.strict_ns_unverifiable = strict_ns_unverifiable
893n/a self.strict_ns_domain = strict_ns_domain
894n/a self.strict_ns_set_initial_dollar = strict_ns_set_initial_dollar
895n/a self.strict_ns_set_path = strict_ns_set_path
896n/a
897n/a if blocked_domains is not None:
898n/a self._blocked_domains = tuple(blocked_domains)
899n/a else:
900n/a self._blocked_domains = ()
901n/a
902n/a if allowed_domains is not None:
903n/a allowed_domains = tuple(allowed_domains)
904n/a self._allowed_domains = allowed_domains
905n/a
906n/a def blocked_domains(self):
907n/a """Return the sequence of blocked domains (as a tuple)."""
908n/a return self._blocked_domains
909n/a def set_blocked_domains(self, blocked_domains):
910n/a """Set the sequence of blocked domains."""
911n/a self._blocked_domains = tuple(blocked_domains)
912n/a
913n/a def is_blocked(self, domain):
914n/a for blocked_domain in self._blocked_domains:
915n/a if user_domain_match(domain, blocked_domain):
916n/a return True
917n/a return False
918n/a
919n/a def allowed_domains(self):
920n/a """Return None, or the sequence of allowed domains (as a tuple)."""
921n/a return self._allowed_domains
922n/a def set_allowed_domains(self, allowed_domains):
923n/a """Set the sequence of allowed domains, or None."""
924n/a if allowed_domains is not None:
925n/a allowed_domains = tuple(allowed_domains)
926n/a self._allowed_domains = allowed_domains
927n/a
928n/a def is_not_allowed(self, domain):
929n/a if self._allowed_domains is None:
930n/a return False
931n/a for allowed_domain in self._allowed_domains:
932n/a if user_domain_match(domain, allowed_domain):
933n/a return False
934n/a return True
935n/a
936n/a def set_ok(self, cookie, request):
937n/a """
938n/a If you override .set_ok(), be sure to call this method. If it returns
939n/a false, so should your subclass (assuming your subclass wants to be more
940n/a strict about which cookies to accept).
941n/a
942n/a """
943n/a _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
944n/a
945n/a assert cookie.name is not None
946n/a
947n/a for n in "version", "verifiability", "name", "path", "domain", "port":
948n/a fn_name = "set_ok_"+n
949n/a fn = getattr(self, fn_name)
950n/a if not fn(cookie, request):
951n/a return False
952n/a
953n/a return True
954n/a
955n/a def set_ok_version(self, cookie, request):
956n/a if cookie.version is None:
957n/a # Version is always set to 0 by parse_ns_headers if it's a Netscape
958n/a # cookie, so this must be an invalid RFC 2965 cookie.
959n/a _debug(" Set-Cookie2 without version attribute (%s=%s)",
960n/a cookie.name, cookie.value)
961n/a return False
962n/a if cookie.version > 0 and not self.rfc2965:
963n/a _debug(" RFC 2965 cookies are switched off")
964n/a return False
965n/a elif cookie.version == 0 and not self.netscape:
966n/a _debug(" Netscape cookies are switched off")
967n/a return False
968n/a return True
969n/a
970n/a def set_ok_verifiability(self, cookie, request):
971n/a if request.unverifiable and is_third_party(request):
972n/a if cookie.version > 0 and self.strict_rfc2965_unverifiable:
973n/a _debug(" third-party RFC 2965 cookie during "
974n/a "unverifiable transaction")
975n/a return False
976n/a elif cookie.version == 0 and self.strict_ns_unverifiable:
977n/a _debug(" third-party Netscape cookie during "
978n/a "unverifiable transaction")
979n/a return False
980n/a return True
981n/a
982n/a def set_ok_name(self, cookie, request):
983n/a # Try and stop servers setting V0 cookies designed to hack other
984n/a # servers that know both V0 and V1 protocols.
985n/a if (cookie.version == 0 and self.strict_ns_set_initial_dollar and
986n/a cookie.name.startswith("$")):
987n/a _debug(" illegal name (starts with '$'): '%s'", cookie.name)
988n/a return False
989n/a return True
990n/a
991n/a def set_ok_path(self, cookie, request):
992n/a if cookie.path_specified:
993n/a req_path = request_path(request)
994n/a if ((cookie.version > 0 or
995n/a (cookie.version == 0 and self.strict_ns_set_path)) and
996n/a not req_path.startswith(cookie.path)):
997n/a _debug(" path attribute %s is not a prefix of request "
998n/a "path %s", cookie.path, req_path)
999n/a return False
1000n/a return True
1001n/a
1002n/a def set_ok_domain(self, cookie, request):
1003n/a if self.is_blocked(cookie.domain):
1004n/a _debug(" domain %s is in user block-list", cookie.domain)
1005n/a return False
1006n/a if self.is_not_allowed(cookie.domain):
1007n/a _debug(" domain %s is not in user allow-list", cookie.domain)
1008n/a return False
1009n/a if cookie.domain_specified:
1010n/a req_host, erhn = eff_request_host(request)
1011n/a domain = cookie.domain
1012n/a if self.strict_domain and (domain.count(".") >= 2):
1013n/a # XXX This should probably be compared with the Konqueror
1014n/a # (kcookiejar.cpp) and Mozilla implementations, but it's a
1015n/a # losing battle.
1016n/a i = domain.rfind(".")
1017n/a j = domain.rfind(".", 0, i)
1018n/a if j == 0: # domain like .foo.bar
1019n/a tld = domain[i+1:]
1020n/a sld = domain[j+1:i]
1021n/a if sld.lower() in ("co", "ac", "com", "edu", "org", "net",
1022n/a "gov", "mil", "int", "aero", "biz", "cat", "coop",
1023n/a "info", "jobs", "mobi", "museum", "name", "pro",
1024n/a "travel", "eu") and len(tld) == 2:
1025n/a # domain like .co.uk
1026n/a _debug(" country-code second level domain %s", domain)
1027n/a return False
1028n/a if domain.startswith("."):
1029n/a undotted_domain = domain[1:]
1030n/a else:
1031n/a undotted_domain = domain
1032n/a embedded_dots = (undotted_domain.find(".") >= 0)
1033n/a if not embedded_dots and domain != ".local":
1034n/a _debug(" non-local domain %s contains no embedded dot",
1035n/a domain)
1036n/a return False
1037n/a if cookie.version == 0:
1038n/a if (not erhn.endswith(domain) and
1039n/a (not erhn.startswith(".") and
1040n/a not ("."+erhn).endswith(domain))):
1041n/a _debug(" effective request-host %s (even with added "
1042n/a "initial dot) does not end with %s",
1043n/a erhn, domain)
1044n/a return False
1045n/a if (cookie.version > 0 or
1046n/a (self.strict_ns_domain & self.DomainRFC2965Match)):
1047n/a if not domain_match(erhn, domain):
1048n/a _debug(" effective request-host %s does not domain-match "
1049n/a "%s", erhn, domain)
1050n/a return False
1051n/a if (cookie.version > 0 or
1052n/a (self.strict_ns_domain & self.DomainStrictNoDots)):
1053n/a host_prefix = req_host[:-len(domain)]
1054n/a if (host_prefix.find(".") >= 0 and
1055n/a not IPV4_RE.search(req_host)):
1056n/a _debug(" host prefix %s for domain %s contains a dot",
1057n/a host_prefix, domain)
1058n/a return False
1059n/a return True
1060n/a
1061n/a def set_ok_port(self, cookie, request):
1062n/a if cookie.port_specified:
1063n/a req_port = request_port(request)
1064n/a if req_port is None:
1065n/a req_port = "80"
1066n/a else:
1067n/a req_port = str(req_port)
1068n/a for p in cookie.port.split(","):
1069n/a try:
1070n/a int(p)
1071n/a except ValueError:
1072n/a _debug(" bad port %s (not numeric)", p)
1073n/a return False
1074n/a if p == req_port:
1075n/a break
1076n/a else:
1077n/a _debug(" request port (%s) not found in %s",
1078n/a req_port, cookie.port)
1079n/a return False
1080n/a return True
1081n/a
1082n/a def return_ok(self, cookie, request):
1083n/a """
1084n/a If you override .return_ok(), be sure to call this method. If it
1085n/a returns false, so should your subclass (assuming your subclass wants to
1086n/a be more strict about which cookies to return).
1087n/a
1088n/a """
1089n/a # Path has already been checked by .path_return_ok(), and domain
1090n/a # blocking done by .domain_return_ok().
1091n/a _debug(" - checking cookie %s=%s", cookie.name, cookie.value)
1092n/a
1093n/a for n in "version", "verifiability", "secure", "expires", "port", "domain":
1094n/a fn_name = "return_ok_"+n
1095n/a fn = getattr(self, fn_name)
1096n/a if not fn(cookie, request):
1097n/a return False
1098n/a return True
1099n/a
1100n/a def return_ok_version(self, cookie, request):
1101n/a if cookie.version > 0 and not self.rfc2965:
1102n/a _debug(" RFC 2965 cookies are switched off")
1103n/a return False
1104n/a elif cookie.version == 0 and not self.netscape:
1105n/a _debug(" Netscape cookies are switched off")
1106n/a return False
1107n/a return True
1108n/a
1109n/a def return_ok_verifiability(self, cookie, request):
1110n/a if request.unverifiable and is_third_party(request):
1111n/a if cookie.version > 0 and self.strict_rfc2965_unverifiable:
1112n/a _debug(" third-party RFC 2965 cookie during unverifiable "
1113n/a "transaction")
1114n/a return False
1115n/a elif cookie.version == 0 and self.strict_ns_unverifiable:
1116n/a _debug(" third-party Netscape cookie during unverifiable "
1117n/a "transaction")
1118n/a return False
1119n/a return True
1120n/a
1121n/a def return_ok_secure(self, cookie, request):
1122n/a if cookie.secure and request.type != "https":
1123n/a _debug(" secure cookie with non-secure request")
1124n/a return False
1125n/a return True
1126n/a
1127n/a def return_ok_expires(self, cookie, request):
1128n/a if cookie.is_expired(self._now):
1129n/a _debug(" cookie expired")
1130n/a return False
1131n/a return True
1132n/a
1133n/a def return_ok_port(self, cookie, request):
1134n/a if cookie.port:
1135n/a req_port = request_port(request)
1136n/a if req_port is None:
1137n/a req_port = "80"
1138n/a for p in cookie.port.split(","):
1139n/a if p == req_port:
1140n/a break
1141n/a else:
1142n/a _debug(" request port %s does not match cookie port %s",
1143n/a req_port, cookie.port)
1144n/a return False
1145n/a return True
1146n/a
1147n/a def return_ok_domain(self, cookie, request):
1148n/a req_host, erhn = eff_request_host(request)
1149n/a domain = cookie.domain
1150n/a
1151n/a # strict check of non-domain cookies: Mozilla does this, MSIE5 doesn't
1152n/a if (cookie.version == 0 and
1153n/a (self.strict_ns_domain & self.DomainStrictNonDomain) and
1154n/a not cookie.domain_specified and domain != erhn):
1155n/a _debug(" cookie with unspecified domain does not string-compare "
1156n/a "equal to request domain")
1157n/a return False
1158n/a
1159n/a if cookie.version > 0 and not domain_match(erhn, domain):
1160n/a _debug(" effective request-host name %s does not domain-match "
1161n/a "RFC 2965 cookie domain %s", erhn, domain)
1162n/a return False
1163n/a if cookie.version == 0 and not ("."+erhn).endswith(domain):
1164n/a _debug(" request-host %s does not match Netscape cookie domain "
1165n/a "%s", req_host, domain)
1166n/a return False
1167n/a return True
1168n/a
1169n/a def domain_return_ok(self, domain, request):
1170n/a # Liberal check of. This is here as an optimization to avoid
1171n/a # having to load lots of MSIE cookie files unless necessary.
1172n/a req_host, erhn = eff_request_host(request)
1173n/a if not req_host.startswith("."):
1174n/a req_host = "."+req_host
1175n/a if not erhn.startswith("."):
1176n/a erhn = "."+erhn
1177n/a if not (req_host.endswith(domain) or erhn.endswith(domain)):
1178n/a #_debug(" request domain %s does not match cookie domain %s",
1179n/a # req_host, domain)
1180n/a return False
1181n/a
1182n/a if self.is_blocked(domain):
1183n/a _debug(" domain %s is in user block-list", domain)
1184n/a return False
1185n/a if self.is_not_allowed(domain):
1186n/a _debug(" domain %s is not in user allow-list", domain)
1187n/a return False
1188n/a
1189n/a return True
1190n/a
1191n/a def path_return_ok(self, path, request):
1192n/a _debug("- checking cookie path=%s", path)
1193n/a req_path = request_path(request)
1194n/a if not req_path.startswith(path):
1195n/a _debug(" %s does not path-match %s", req_path, path)
1196n/a return False
1197n/a return True
1198n/a
1199n/a
1200n/adef vals_sorted_by_key(adict):
1201n/a keys = sorted(adict.keys())
1202n/a return map(adict.get, keys)
1203n/a
1204n/adef deepvalues(mapping):
1205n/a """Iterates over nested mapping, depth-first, in sorted order by key."""
1206n/a values = vals_sorted_by_key(mapping)
1207n/a for obj in values:
1208n/a mapping = False
1209n/a try:
1210n/a obj.items
1211n/a except AttributeError:
1212n/a pass
1213n/a else:
1214n/a mapping = True
1215n/a yield from deepvalues(obj)
1216n/a if not mapping:
1217n/a yield obj
1218n/a
1219n/a
1220n/a# Used as second parameter to dict.get() method, to distinguish absent
1221n/a# dict key from one with a None value.
1222n/aclass Absent: pass
1223n/a
1224n/aclass CookieJar:
1225n/a """Collection of HTTP cookies.
1226n/a
1227n/a You may not need to know about this class: try
1228n/a urllib.request.build_opener(HTTPCookieProcessor).open(url).
1229n/a """
1230n/a
1231n/a non_word_re = re.compile(r"\W")
1232n/a quote_re = re.compile(r"([\"\\])")
1233n/a strict_domain_re = re.compile(r"\.?[^.]*")
1234n/a domain_re = re.compile(r"[^.]*")
1235n/a dots_re = re.compile(r"^\.+")
1236n/a
1237n/a magic_re = re.compile(r"^\#LWP-Cookies-(\d+\.\d+)", re.ASCII)
1238n/a
1239n/a def __init__(self, policy=None):
1240n/a if policy is None:
1241n/a policy = DefaultCookiePolicy()
1242n/a self._policy = policy
1243n/a
1244n/a self._cookies_lock = _threading.RLock()
1245n/a self._cookies = {}
1246n/a
1247n/a def set_policy(self, policy):
1248n/a self._policy = policy
1249n/a
1250n/a def _cookies_for_domain(self, domain, request):
1251n/a cookies = []
1252n/a if not self._policy.domain_return_ok(domain, request):
1253n/a return []
1254n/a _debug("Checking %s for cookies to return", domain)
1255n/a cookies_by_path = self._cookies[domain]
1256n/a for path in cookies_by_path.keys():
1257n/a if not self._policy.path_return_ok(path, request):
1258n/a continue
1259n/a cookies_by_name = cookies_by_path[path]
1260n/a for cookie in cookies_by_name.values():
1261n/a if not self._policy.return_ok(cookie, request):
1262n/a _debug(" not returning cookie")
1263n/a continue
1264n/a _debug(" it's a match")
1265n/a cookies.append(cookie)
1266n/a return cookies
1267n/a
1268n/a def _cookies_for_request(self, request):
1269n/a """Return a list of cookies to be returned to server."""
1270n/a cookies = []
1271n/a for domain in self._cookies.keys():
1272n/a cookies.extend(self._cookies_for_domain(domain, request))
1273n/a return cookies
1274n/a
1275n/a def _cookie_attrs(self, cookies):
1276n/a """Return a list of cookie-attributes to be returned to server.
1277n/a
1278n/a like ['foo="bar"; $Path="/"', ...]
1279n/a
1280n/a The $Version attribute is also added when appropriate (currently only
1281n/a once per request).
1282n/a
1283n/a """
1284n/a # add cookies in order of most specific (ie. longest) path first
1285n/a cookies.sort(key=lambda a: len(a.path), reverse=True)
1286n/a
1287n/a version_set = False
1288n/a
1289n/a attrs = []
1290n/a for cookie in cookies:
1291n/a # set version of Cookie header
1292n/a # XXX
1293n/a # What should it be if multiple matching Set-Cookie headers have
1294n/a # different versions themselves?
1295n/a # Answer: there is no answer; was supposed to be settled by
1296n/a # RFC 2965 errata, but that may never appear...
1297n/a version = cookie.version
1298n/a if not version_set:
1299n/a version_set = True
1300n/a if version > 0:
1301n/a attrs.append("$Version=%s" % version)
1302n/a
1303n/a # quote cookie value if necessary
1304n/a # (not for Netscape protocol, which already has any quotes
1305n/a # intact, due to the poorly-specified Netscape Cookie: syntax)
1306n/a if ((cookie.value is not None) and
1307n/a self.non_word_re.search(cookie.value) and version > 0):
1308n/a value = self.quote_re.sub(r"\\\1", cookie.value)
1309n/a else:
1310n/a value = cookie.value
1311n/a
1312n/a # add cookie-attributes to be returned in Cookie header
1313n/a if cookie.value is None:
1314n/a attrs.append(cookie.name)
1315n/a else:
1316n/a attrs.append("%s=%s" % (cookie.name, value))
1317n/a if version > 0:
1318n/a if cookie.path_specified:
1319n/a attrs.append('$Path="%s"' % cookie.path)
1320n/a if cookie.domain.startswith("."):
1321n/a domain = cookie.domain
1322n/a if (not cookie.domain_initial_dot and
1323n/a domain.startswith(".")):
1324n/a domain = domain[1:]
1325n/a attrs.append('$Domain="%s"' % domain)
1326n/a if cookie.port is not None:
1327n/a p = "$Port"
1328n/a if cookie.port_specified:
1329n/a p = p + ('="%s"' % cookie.port)
1330n/a attrs.append(p)
1331n/a
1332n/a return attrs
1333n/a
1334n/a def add_cookie_header(self, request):
1335n/a """Add correct Cookie: header to request (urllib.request.Request object).
1336n/a
1337n/a The Cookie2 header is also added unless policy.hide_cookie2 is true.
1338n/a
1339n/a """
1340n/a _debug("add_cookie_header")
1341n/a self._cookies_lock.acquire()
1342n/a try:
1343n/a
1344n/a self._policy._now = self._now = int(time.time())
1345n/a
1346n/a cookies = self._cookies_for_request(request)
1347n/a
1348n/a attrs = self._cookie_attrs(cookies)
1349n/a if attrs:
1350n/a if not request.has_header("Cookie"):
1351n/a request.add_unredirected_header(
1352n/a "Cookie", "; ".join(attrs))
1353n/a
1354n/a # if necessary, advertise that we know RFC 2965
1355n/a if (self._policy.rfc2965 and not self._policy.hide_cookie2 and
1356n/a not request.has_header("Cookie2")):
1357n/a for cookie in cookies:
1358n/a if cookie.version != 1:
1359n/a request.add_unredirected_header("Cookie2", '$Version="1"')
1360n/a break
1361n/a
1362n/a finally:
1363n/a self._cookies_lock.release()
1364n/a
1365n/a self.clear_expired_cookies()
1366n/a
1367n/a def _normalized_cookie_tuples(self, attrs_set):
1368n/a """Return list of tuples containing normalised cookie information.
1369n/a
1370n/a attrs_set is the list of lists of key,value pairs extracted from
1371n/a the Set-Cookie or Set-Cookie2 headers.
1372n/a
1373n/a Tuples are name, value, standard, rest, where name and value are the
1374n/a cookie name and value, standard is a dictionary containing the standard
1375n/a cookie-attributes (discard, secure, version, expires or max-age,
1376n/a domain, path and port) and rest is a dictionary containing the rest of
1377n/a the cookie-attributes.
1378n/a
1379n/a """
1380n/a cookie_tuples = []
1381n/a
1382n/a boolean_attrs = "discard", "secure"
1383n/a value_attrs = ("version",
1384n/a "expires", "max-age",
1385n/a "domain", "path", "port",
1386n/a "comment", "commenturl")
1387n/a
1388n/a for cookie_attrs in attrs_set:
1389n/a name, value = cookie_attrs[0]
1390n/a
1391n/a # Build dictionary of standard cookie-attributes (standard) and
1392n/a # dictionary of other cookie-attributes (rest).
1393n/a
1394n/a # Note: expiry time is normalised to seconds since epoch. V0
1395n/a # cookies should have the Expires cookie-attribute, and V1 cookies
1396n/a # should have Max-Age, but since V1 includes RFC 2109 cookies (and
1397n/a # since V0 cookies may be a mish-mash of Netscape and RFC 2109), we
1398n/a # accept either (but prefer Max-Age).
1399n/a max_age_set = False
1400n/a
1401n/a bad_cookie = False
1402n/a
1403n/a standard = {}
1404n/a rest = {}
1405n/a for k, v in cookie_attrs[1:]:
1406n/a lc = k.lower()
1407n/a # don't lose case distinction for unknown fields
1408n/a if lc in value_attrs or lc in boolean_attrs:
1409n/a k = lc
1410n/a if k in boolean_attrs and v is None:
1411n/a # boolean cookie-attribute is present, but has no value
1412n/a # (like "discard", rather than "port=80")
1413n/a v = True
1414n/a if k in standard:
1415n/a # only first value is significant
1416n/a continue
1417n/a if k == "domain":
1418n/a if v is None:
1419n/a _debug(" missing value for domain attribute")
1420n/a bad_cookie = True
1421n/a break
1422n/a # RFC 2965 section 3.3.3
1423n/a v = v.lower()
1424n/a if k == "expires":
1425n/a if max_age_set:
1426n/a # Prefer max-age to expires (like Mozilla)
1427n/a continue
1428n/a if v is None:
1429n/a _debug(" missing or invalid value for expires "
1430n/a "attribute: treating as session cookie")
1431n/a continue
1432n/a if k == "max-age":
1433n/a max_age_set = True
1434n/a try:
1435n/a v = int(v)
1436n/a except ValueError:
1437n/a _debug(" missing or invalid (non-numeric) value for "
1438n/a "max-age attribute")
1439n/a bad_cookie = True
1440n/a break
1441n/a # convert RFC 2965 Max-Age to seconds since epoch
1442n/a # XXX Strictly you're supposed to follow RFC 2616
1443n/a # age-calculation rules. Remember that zero Max-Age
1444n/a # is a request to discard (old and new) cookie, though.
1445n/a k = "expires"
1446n/a v = self._now + v
1447n/a if (k in value_attrs) or (k in boolean_attrs):
1448n/a if (v is None and
1449n/a k not in ("port", "comment", "commenturl")):
1450n/a _debug(" missing value for %s attribute" % k)
1451n/a bad_cookie = True
1452n/a break
1453n/a standard[k] = v
1454n/a else:
1455n/a rest[k] = v
1456n/a
1457n/a if bad_cookie:
1458n/a continue
1459n/a
1460n/a cookie_tuples.append((name, value, standard, rest))
1461n/a
1462n/a return cookie_tuples
1463n/a
1464n/a def _cookie_from_cookie_tuple(self, tup, request):
1465n/a # standard is dict of standard cookie-attributes, rest is dict of the
1466n/a # rest of them
1467n/a name, value, standard, rest = tup
1468n/a
1469n/a domain = standard.get("domain", Absent)
1470n/a path = standard.get("path", Absent)
1471n/a port = standard.get("port", Absent)
1472n/a expires = standard.get("expires", Absent)
1473n/a
1474n/a # set the easy defaults
1475n/a version = standard.get("version", None)
1476n/a if version is not None:
1477n/a try:
1478n/a version = int(version)
1479n/a except ValueError:
1480n/a return None # invalid version, ignore cookie
1481n/a secure = standard.get("secure", False)
1482n/a # (discard is also set if expires is Absent)
1483n/a discard = standard.get("discard", False)
1484n/a comment = standard.get("comment", None)
1485n/a comment_url = standard.get("commenturl", None)
1486n/a
1487n/a # set default path
1488n/a if path is not Absent and path != "":
1489n/a path_specified = True
1490n/a path = escape_path(path)
1491n/a else:
1492n/a path_specified = False
1493n/a path = request_path(request)
1494n/a i = path.rfind("/")
1495n/a if i != -1:
1496n/a if version == 0:
1497n/a # Netscape spec parts company from reality here
1498n/a path = path[:i]
1499n/a else:
1500n/a path = path[:i+1]
1501n/a if len(path) == 0: path = "/"
1502n/a
1503n/a # set default domain
1504n/a domain_specified = domain is not Absent
1505n/a # but first we have to remember whether it starts with a dot
1506n/a domain_initial_dot = False
1507n/a if domain_specified:
1508n/a domain_initial_dot = bool(domain.startswith("."))
1509n/a if domain is Absent:
1510n/a req_host, erhn = eff_request_host(request)
1511n/a domain = erhn
1512n/a elif not domain.startswith("."):
1513n/a domain = "."+domain
1514n/a
1515n/a # set default port
1516n/a port_specified = False
1517n/a if port is not Absent:
1518n/a if port is None:
1519n/a # Port attr present, but has no value: default to request port.
1520n/a # Cookie should then only be sent back on that port.
1521n/a port = request_port(request)
1522n/a else:
1523n/a port_specified = True
1524n/a port = re.sub(r"\s+", "", port)
1525n/a else:
1526n/a # No port attr present. Cookie can be sent back on any port.
1527n/a port = None
1528n/a
1529n/a # set default expires and discard
1530n/a if expires is Absent:
1531n/a expires = None
1532n/a discard = True
1533n/a elif expires <= self._now:
1534n/a # Expiry date in past is request to delete cookie. This can't be
1535n/a # in DefaultCookiePolicy, because can't delete cookies there.
1536n/a try:
1537n/a self.clear(domain, path, name)
1538n/a except KeyError:
1539n/a pass
1540n/a _debug("Expiring cookie, domain='%s', path='%s', name='%s'",
1541n/a domain, path, name)
1542n/a return None
1543n/a
1544n/a return Cookie(version,
1545n/a name, value,
1546n/a port, port_specified,
1547n/a domain, domain_specified, domain_initial_dot,
1548n/a path, path_specified,
1549n/a secure,
1550n/a expires,
1551n/a discard,
1552n/a comment,
1553n/a comment_url,
1554n/a rest)
1555n/a
1556n/a def _cookies_from_attrs_set(self, attrs_set, request):
1557n/a cookie_tuples = self._normalized_cookie_tuples(attrs_set)
1558n/a
1559n/a cookies = []
1560n/a for tup in cookie_tuples:
1561n/a cookie = self._cookie_from_cookie_tuple(tup, request)
1562n/a if cookie: cookies.append(cookie)
1563n/a return cookies
1564n/a
1565n/a def _process_rfc2109_cookies(self, cookies):
1566n/a rfc2109_as_ns = getattr(self._policy, 'rfc2109_as_netscape', None)
1567n/a if rfc2109_as_ns is None:
1568n/a rfc2109_as_ns = not self._policy.rfc2965
1569n/a for cookie in cookies:
1570n/a if cookie.version == 1:
1571n/a cookie.rfc2109 = True
1572n/a if rfc2109_as_ns:
1573n/a # treat 2109 cookies as Netscape cookies rather than
1574n/a # as RFC2965 cookies
1575n/a cookie.version = 0
1576n/a
1577n/a def make_cookies(self, response, request):
1578n/a """Return sequence of Cookie objects extracted from response object."""
1579n/a # get cookie-attributes for RFC 2965 and Netscape protocols
1580n/a headers = response.info()
1581n/a rfc2965_hdrs = headers.get_all("Set-Cookie2", [])
1582n/a ns_hdrs = headers.get_all("Set-Cookie", [])
1583n/a
1584n/a rfc2965 = self._policy.rfc2965
1585n/a netscape = self._policy.netscape
1586n/a
1587n/a if ((not rfc2965_hdrs and not ns_hdrs) or
1588n/a (not ns_hdrs and not rfc2965) or
1589n/a (not rfc2965_hdrs and not netscape) or
1590n/a (not netscape and not rfc2965)):
1591n/a return [] # no relevant cookie headers: quick exit
1592n/a
1593n/a try:
1594n/a cookies = self._cookies_from_attrs_set(
1595n/a split_header_words(rfc2965_hdrs), request)
1596n/a except Exception:
1597n/a _warn_unhandled_exception()
1598n/a cookies = []
1599n/a
1600n/a if ns_hdrs and netscape:
1601n/a try:
1602n/a # RFC 2109 and Netscape cookies
1603n/a ns_cookies = self._cookies_from_attrs_set(
1604n/a parse_ns_headers(ns_hdrs), request)
1605n/a except Exception:
1606n/a _warn_unhandled_exception()
1607n/a ns_cookies = []
1608n/a self._process_rfc2109_cookies(ns_cookies)
1609n/a
1610n/a # Look for Netscape cookies (from Set-Cookie headers) that match
1611n/a # corresponding RFC 2965 cookies (from Set-Cookie2 headers).
1612n/a # For each match, keep the RFC 2965 cookie and ignore the Netscape
1613n/a # cookie (RFC 2965 section 9.1). Actually, RFC 2109 cookies are
1614n/a # bundled in with the Netscape cookies for this purpose, which is
1615n/a # reasonable behaviour.
1616n/a if rfc2965:
1617n/a lookup = {}
1618n/a for cookie in cookies:
1619n/a lookup[(cookie.domain, cookie.path, cookie.name)] = None
1620n/a
1621n/a def no_matching_rfc2965(ns_cookie, lookup=lookup):
1622n/a key = ns_cookie.domain, ns_cookie.path, ns_cookie.name
1623n/a return key not in lookup
1624n/a ns_cookies = filter(no_matching_rfc2965, ns_cookies)
1625n/a
1626n/a if ns_cookies:
1627n/a cookies.extend(ns_cookies)
1628n/a
1629n/a return cookies
1630n/a
1631n/a def set_cookie_if_ok(self, cookie, request):
1632n/a """Set a cookie if policy says it's OK to do so."""
1633n/a self._cookies_lock.acquire()
1634n/a try:
1635n/a self._policy._now = self._now = int(time.time())
1636n/a
1637n/a if self._policy.set_ok(cookie, request):
1638n/a self.set_cookie(cookie)
1639n/a
1640n/a
1641n/a finally:
1642n/a self._cookies_lock.release()
1643n/a
1644n/a def set_cookie(self, cookie):
1645n/a """Set a cookie, without checking whether or not it should be set."""
1646n/a c = self._cookies
1647n/a self._cookies_lock.acquire()
1648n/a try:
1649n/a if cookie.domain not in c: c[cookie.domain] = {}
1650n/a c2 = c[cookie.domain]
1651n/a if cookie.path not in c2: c2[cookie.path] = {}
1652n/a c3 = c2[cookie.path]
1653n/a c3[cookie.name] = cookie
1654n/a finally:
1655n/a self._cookies_lock.release()
1656n/a
1657n/a def extract_cookies(self, response, request):
1658n/a """Extract cookies from response, where allowable given the request."""
1659n/a _debug("extract_cookies: %s", response.info())
1660n/a self._cookies_lock.acquire()
1661n/a try:
1662n/a self._policy._now = self._now = int(time.time())
1663n/a
1664n/a for cookie in self.make_cookies(response, request):
1665n/a if self._policy.set_ok(cookie, request):
1666n/a _debug(" setting cookie: %s", cookie)
1667n/a self.set_cookie(cookie)
1668n/a finally:
1669n/a self._cookies_lock.release()
1670n/a
1671n/a def clear(self, domain=None, path=None, name=None):
1672n/a """Clear some cookies.
1673n/a
1674n/a Invoking this method without arguments will clear all cookies. If
1675n/a given a single argument, only cookies belonging to that domain will be
1676n/a removed. If given two arguments, cookies belonging to the specified
1677n/a path within that domain are removed. If given three arguments, then
1678n/a the cookie with the specified name, path and domain is removed.
1679n/a
1680n/a Raises KeyError if no matching cookie exists.
1681n/a
1682n/a """
1683n/a if name is not None:
1684n/a if (domain is None) or (path is None):
1685n/a raise ValueError(
1686n/a "domain and path must be given to remove a cookie by name")
1687n/a del self._cookies[domain][path][name]
1688n/a elif path is not None:
1689n/a if domain is None:
1690n/a raise ValueError(
1691n/a "domain must be given to remove cookies by path")
1692n/a del self._cookies[domain][path]
1693n/a elif domain is not None:
1694n/a del self._cookies[domain]
1695n/a else:
1696n/a self._cookies = {}
1697n/a
1698n/a def clear_session_cookies(self):
1699n/a """Discard all session cookies.
1700n/a
1701n/a Note that the .save() method won't save session cookies anyway, unless
1702n/a you ask otherwise by passing a true ignore_discard argument.
1703n/a
1704n/a """
1705n/a self._cookies_lock.acquire()
1706n/a try:
1707n/a for cookie in self:
1708n/a if cookie.discard:
1709n/a self.clear(cookie.domain, cookie.path, cookie.name)
1710n/a finally:
1711n/a self._cookies_lock.release()
1712n/a
1713n/a def clear_expired_cookies(self):
1714n/a """Discard all expired cookies.
1715n/a
1716n/a You probably don't need to call this method: expired cookies are never
1717n/a sent back to the server (provided you're using DefaultCookiePolicy),
1718n/a this method is called by CookieJar itself every so often, and the
1719n/a .save() method won't save expired cookies anyway (unless you ask
1720n/a otherwise by passing a true ignore_expires argument).
1721n/a
1722n/a """
1723n/a self._cookies_lock.acquire()
1724n/a try:
1725n/a now = time.time()
1726n/a for cookie in self:
1727n/a if cookie.is_expired(now):
1728n/a self.clear(cookie.domain, cookie.path, cookie.name)
1729n/a finally:
1730n/a self._cookies_lock.release()
1731n/a
1732n/a def __iter__(self):
1733n/a return deepvalues(self._cookies)
1734n/a
1735n/a def __len__(self):
1736n/a """Return number of contained cookies."""
1737n/a i = 0
1738n/a for cookie in self: i = i + 1
1739n/a return i
1740n/a
1741n/a def __repr__(self):
1742n/a r = []
1743n/a for cookie in self: r.append(repr(cookie))
1744n/a return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
1745n/a
1746n/a def __str__(self):
1747n/a r = []
1748n/a for cookie in self: r.append(str(cookie))
1749n/a return "<%s[%s]>" % (self.__class__.__name__, ", ".join(r))
1750n/a
1751n/a
1752n/a# derives from OSError for backwards-compatibility with Python 2.4.0
1753n/aclass LoadError(OSError): pass
1754n/a
1755n/aclass FileCookieJar(CookieJar):
1756n/a """CookieJar that can be loaded from and saved to a file."""
1757n/a
1758n/a def __init__(self, filename=None, delayload=False, policy=None):
1759n/a """
1760n/a Cookies are NOT loaded from the named file until either the .load() or
1761n/a .revert() method is called.
1762n/a
1763n/a """
1764n/a CookieJar.__init__(self, policy)
1765n/a if filename is not None:
1766n/a try:
1767n/a filename+""
1768n/a except:
1769n/a raise ValueError("filename must be string-like")
1770n/a self.filename = filename
1771n/a self.delayload = bool(delayload)
1772n/a
1773n/a def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1774n/a """Save cookies to a file."""
1775n/a raise NotImplementedError()
1776n/a
1777n/a def load(self, filename=None, ignore_discard=False, ignore_expires=False):
1778n/a """Load cookies from a file."""
1779n/a if filename is None:
1780n/a if self.filename is not None: filename = self.filename
1781n/a else: raise ValueError(MISSING_FILENAME_TEXT)
1782n/a
1783n/a with open(filename) as f:
1784n/a self._really_load(f, filename, ignore_discard, ignore_expires)
1785n/a
1786n/a def revert(self, filename=None,
1787n/a ignore_discard=False, ignore_expires=False):
1788n/a """Clear all cookies and reload cookies from a saved file.
1789n/a
1790n/a Raises LoadError (or OSError) if reversion is not successful; the
1791n/a object's state will not be altered if this happens.
1792n/a
1793n/a """
1794n/a if filename is None:
1795n/a if self.filename is not None: filename = self.filename
1796n/a else: raise ValueError(MISSING_FILENAME_TEXT)
1797n/a
1798n/a self._cookies_lock.acquire()
1799n/a try:
1800n/a
1801n/a old_state = copy.deepcopy(self._cookies)
1802n/a self._cookies = {}
1803n/a try:
1804n/a self.load(filename, ignore_discard, ignore_expires)
1805n/a except OSError:
1806n/a self._cookies = old_state
1807n/a raise
1808n/a
1809n/a finally:
1810n/a self._cookies_lock.release()
1811n/a
1812n/a
1813n/adef lwp_cookie_str(cookie):
1814n/a """Return string representation of Cookie in the LWP cookie file format.
1815n/a
1816n/a Actually, the format is extended a bit -- see module docstring.
1817n/a
1818n/a """
1819n/a h = [(cookie.name, cookie.value),
1820n/a ("path", cookie.path),
1821n/a ("domain", cookie.domain)]
1822n/a if cookie.port is not None: h.append(("port", cookie.port))
1823n/a if cookie.path_specified: h.append(("path_spec", None))
1824n/a if cookie.port_specified: h.append(("port_spec", None))
1825n/a if cookie.domain_initial_dot: h.append(("domain_dot", None))
1826n/a if cookie.secure: h.append(("secure", None))
1827n/a if cookie.expires: h.append(("expires",
1828n/a time2isoz(float(cookie.expires))))
1829n/a if cookie.discard: h.append(("discard", None))
1830n/a if cookie.comment: h.append(("comment", cookie.comment))
1831n/a if cookie.comment_url: h.append(("commenturl", cookie.comment_url))
1832n/a
1833n/a keys = sorted(cookie._rest.keys())
1834n/a for k in keys:
1835n/a h.append((k, str(cookie._rest[k])))
1836n/a
1837n/a h.append(("version", str(cookie.version)))
1838n/a
1839n/a return join_header_words([h])
1840n/a
1841n/aclass LWPCookieJar(FileCookieJar):
1842n/a """
1843n/a The LWPCookieJar saves a sequence of "Set-Cookie3" lines.
1844n/a "Set-Cookie3" is the format used by the libwww-perl library, not known
1845n/a to be compatible with any browser, but which is easy to read and
1846n/a doesn't lose information about RFC 2965 cookies.
1847n/a
1848n/a Additional methods
1849n/a
1850n/a as_lwp_str(ignore_discard=True, ignore_expired=True)
1851n/a
1852n/a """
1853n/a
1854n/a def as_lwp_str(self, ignore_discard=True, ignore_expires=True):
1855n/a """Return cookies as a string of "\\n"-separated "Set-Cookie3" headers.
1856n/a
1857n/a ignore_discard and ignore_expires: see docstring for FileCookieJar.save
1858n/a
1859n/a """
1860n/a now = time.time()
1861n/a r = []
1862n/a for cookie in self:
1863n/a if not ignore_discard and cookie.discard:
1864n/a continue
1865n/a if not ignore_expires and cookie.is_expired(now):
1866n/a continue
1867n/a r.append("Set-Cookie3: %s" % lwp_cookie_str(cookie))
1868n/a return "\n".join(r+[""])
1869n/a
1870n/a def save(self, filename=None, ignore_discard=False, ignore_expires=False):
1871n/a if filename is None:
1872n/a if self.filename is not None: filename = self.filename
1873n/a else: raise ValueError(MISSING_FILENAME_TEXT)
1874n/a
1875n/a with open(filename, "w") as f:
1876n/a # There really isn't an LWP Cookies 2.0 format, but this indicates
1877n/a # that there is extra information in here (domain_dot and
1878n/a # port_spec) while still being compatible with libwww-perl, I hope.
1879n/a f.write("#LWP-Cookies-2.0\n")
1880n/a f.write(self.as_lwp_str(ignore_discard, ignore_expires))
1881n/a
1882n/a def _really_load(self, f, filename, ignore_discard, ignore_expires):
1883n/a magic = f.readline()
1884n/a if not self.magic_re.search(magic):
1885n/a msg = ("%r does not look like a Set-Cookie3 (LWP) format "
1886n/a "file" % filename)
1887n/a raise LoadError(msg)
1888n/a
1889n/a now = time.time()
1890n/a
1891n/a header = "Set-Cookie3:"
1892n/a boolean_attrs = ("port_spec", "path_spec", "domain_dot",
1893n/a "secure", "discard")
1894n/a value_attrs = ("version",
1895n/a "port", "path", "domain",
1896n/a "expires",
1897n/a "comment", "commenturl")
1898n/a
1899n/a try:
1900n/a while 1:
1901n/a line = f.readline()
1902n/a if line == "": break
1903n/a if not line.startswith(header):
1904n/a continue
1905n/a line = line[len(header):].strip()
1906n/a
1907n/a for data in split_header_words([line]):
1908n/a name, value = data[0]
1909n/a standard = {}
1910n/a rest = {}
1911n/a for k in boolean_attrs:
1912n/a standard[k] = False
1913n/a for k, v in data[1:]:
1914n/a if k is not None:
1915n/a lc = k.lower()
1916n/a else:
1917n/a lc = None
1918n/a # don't lose case distinction for unknown fields
1919n/a if (lc in value_attrs) or (lc in boolean_attrs):
1920n/a k = lc
1921n/a if k in boolean_attrs:
1922n/a if v is None: v = True
1923n/a standard[k] = v
1924n/a elif k in value_attrs:
1925n/a standard[k] = v
1926n/a else:
1927n/a rest[k] = v
1928n/a
1929n/a h = standard.get
1930n/a expires = h("expires")
1931n/a discard = h("discard")
1932n/a if expires is not None:
1933n/a expires = iso2time(expires)
1934n/a if expires is None:
1935n/a discard = True
1936n/a domain = h("domain")
1937n/a domain_specified = domain.startswith(".")
1938n/a c = Cookie(h("version"), name, value,
1939n/a h("port"), h("port_spec"),
1940n/a domain, domain_specified, h("domain_dot"),
1941n/a h("path"), h("path_spec"),
1942n/a h("secure"),
1943n/a expires,
1944n/a discard,
1945n/a h("comment"),
1946n/a h("commenturl"),
1947n/a rest)
1948n/a if not ignore_discard and c.discard:
1949n/a continue
1950n/a if not ignore_expires and c.is_expired(now):
1951n/a continue
1952n/a self.set_cookie(c)
1953n/a except OSError:
1954n/a raise
1955n/a except Exception:
1956n/a _warn_unhandled_exception()
1957n/a raise LoadError("invalid Set-Cookie3 format file %r: %r" %
1958n/a (filename, line))
1959n/a
1960n/a
1961n/aclass MozillaCookieJar(FileCookieJar):
1962n/a """
1963n/a
1964n/a WARNING: you may want to backup your browser's cookies file if you use
1965n/a this class to save cookies. I *think* it works, but there have been
1966n/a bugs in the past!
1967n/a
1968n/a This class differs from CookieJar only in the format it uses to save and
1969n/a load cookies to and from a file. This class uses the Mozilla/Netscape
1970n/a `cookies.txt' format. lynx uses this file format, too.
1971n/a
1972n/a Don't expect cookies saved while the browser is running to be noticed by
1973n/a the browser (in fact, Mozilla on unix will overwrite your saved cookies if
1974n/a you change them on disk while it's running; on Windows, you probably can't
1975n/a save at all while the browser is running).
1976n/a
1977n/a Note that the Mozilla/Netscape format will downgrade RFC2965 cookies to
1978n/a Netscape cookies on saving.
1979n/a
1980n/a In particular, the cookie version and port number information is lost,
1981n/a together with information about whether or not Path, Port and Discard were
1982n/a specified by the Set-Cookie2 (or Set-Cookie) header, and whether or not the
1983n/a domain as set in the HTTP header started with a dot (yes, I'm aware some
1984n/a domains in Netscape files start with a dot and some don't -- trust me, you
1985n/a really don't want to know any more about this).
1986n/a
1987n/a Note that though Mozilla and Netscape use the same format, they use
1988n/a slightly different headers. The class saves cookies using the Netscape
1989n/a header by default (Mozilla can cope with that).
1990n/a
1991n/a """
1992n/a magic_re = re.compile("#( Netscape)? HTTP Cookie File")
1993n/a header = """\
1994n/a# Netscape HTTP Cookie File
1995n/a# http://curl.haxx.se/rfc/cookie_spec.html
1996n/a# This is a generated file! Do not edit.
1997n/a
1998n/a"""
1999n/a
2000n/a def _really_load(self, f, filename, ignore_discard, ignore_expires):
2001n/a now = time.time()
2002n/a
2003n/a magic = f.readline()
2004n/a if not self.magic_re.search(magic):
2005n/a raise LoadError(
2006n/a "%r does not look like a Netscape format cookies file" %
2007n/a filename)
2008n/a
2009n/a try:
2010n/a while 1:
2011n/a line = f.readline()
2012n/a if line == "": break
2013n/a
2014n/a # last field may be absent, so keep any trailing tab
2015n/a if line.endswith("\n"): line = line[:-1]
2016n/a
2017n/a # skip comments and blank lines XXX what is $ for?
2018n/a if (line.strip().startswith(("#", "$")) or
2019n/a line.strip() == ""):
2020n/a continue
2021n/a
2022n/a domain, domain_specified, path, secure, expires, name, value = \
2023n/a line.split("\t")
2024n/a secure = (secure == "TRUE")
2025n/a domain_specified = (domain_specified == "TRUE")
2026n/a if name == "":
2027n/a # cookies.txt regards 'Set-Cookie: foo' as a cookie
2028n/a # with no name, whereas http.cookiejar regards it as a
2029n/a # cookie with no value.
2030n/a name = value
2031n/a value = None
2032n/a
2033n/a initial_dot = domain.startswith(".")
2034n/a assert domain_specified == initial_dot
2035n/a
2036n/a discard = False
2037n/a if expires == "":
2038n/a expires = None
2039n/a discard = True
2040n/a
2041n/a # assume path_specified is false
2042n/a c = Cookie(0, name, value,
2043n/a None, False,
2044n/a domain, domain_specified, initial_dot,
2045n/a path, False,
2046n/a secure,
2047n/a expires,
2048n/a discard,
2049n/a None,
2050n/a None,
2051n/a {})
2052n/a if not ignore_discard and c.discard:
2053n/a continue
2054n/a if not ignore_expires and c.is_expired(now):
2055n/a continue
2056n/a self.set_cookie(c)
2057n/a
2058n/a except OSError:
2059n/a raise
2060n/a except Exception:
2061n/a _warn_unhandled_exception()
2062n/a raise LoadError("invalid Netscape format cookies file %r: %r" %
2063n/a (filename, line))
2064n/a
2065n/a def save(self, filename=None, ignore_discard=False, ignore_expires=False):
2066n/a if filename is None:
2067n/a if self.filename is not None: filename = self.filename
2068n/a else: raise ValueError(MISSING_FILENAME_TEXT)
2069n/a
2070n/a with open(filename, "w") as f:
2071n/a f.write(self.header)
2072n/a now = time.time()
2073n/a for cookie in self:
2074n/a if not ignore_discard and cookie.discard:
2075n/a continue
2076n/a if not ignore_expires and cookie.is_expired(now):
2077n/a continue
2078n/a if cookie.secure: secure = "TRUE"
2079n/a else: secure = "FALSE"
2080n/a if cookie.domain.startswith("."): initial_dot = "TRUE"
2081n/a else: initial_dot = "FALSE"
2082n/a if cookie.expires is not None:
2083n/a expires = str(cookie.expires)
2084n/a else:
2085n/a expires = ""
2086n/a if cookie.value is None:
2087n/a # cookies.txt regards 'Set-Cookie: foo' as a cookie
2088n/a # with no name, whereas http.cookiejar regards it as a
2089n/a # cookie with no value.
2090n/a name = ""
2091n/a value = cookie.name
2092n/a else:
2093n/a name = cookie.name
2094n/a value = cookie.value
2095n/a f.write(
2096n/a "\t".join([cookie.domain, initial_dot, cookie.path,
2097n/a secure, expires, name, value])+
2098n/a "\n")