ยปCore Development>Code coverage>Lib/urllib/request.py

Python code coverage for Lib/urllib/request.py

#countcontent
1n/a"""An extensible library for opening URLs using a variety of protocols
2n/a
3n/aThe simplest way to use this module is to call the urlopen function,
4n/awhich accepts a string containing a URL or a Request object (described
5n/abelow). It opens the URL and returns the results as file-like
6n/aobject; the returned object has some extra methods described below.
7n/a
8n/aThe OpenerDirector manages a collection of Handler objects that do
9n/aall the actual work. Each Handler implements a particular protocol or
10n/aoption. The OpenerDirector is a composite object that invokes the
11n/aHandlers needed to open the requested URL. For example, the
12n/aHTTPHandler performs HTTP GET and POST requests and deals with
13n/anon-error returns. The HTTPRedirectHandler automatically deals with
14n/aHTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15n/adeals with digest authentication.
16n/a
17n/aurlopen(url, data=None) -- Basic usage is the same as original
18n/aurllib. pass the url and optionally data to post to an HTTP URL, and
19n/aget a file-like object back. One difference is that you can also pass
20n/aa Request instance instead of URL. Raises a URLError (subclass of
21n/aOSError); for HTTP errors, raises an HTTPError, which can also be
22n/atreated as a valid response.
23n/a
24n/abuild_opener -- Function that creates a new OpenerDirector instance.
25n/aWill install the default handlers. Accepts one or more Handlers as
26n/aarguments, either instances or Handler classes that it will
27n/ainstantiate. If one of the argument is a subclass of the default
28n/ahandler, the argument will be installed instead of the default.
29n/a
30n/ainstall_opener -- Installs a new opener as the default opener.
31n/a
32n/aobjects of interest:
33n/a
34n/aOpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35n/athe Handler classes, while dealing with requests and responses.
36n/a
37n/aRequest -- An object that encapsulates the state of a request. The
38n/astate can be as simple as the URL. It can also include extra HTTP
39n/aheaders, e.g. a User-Agent.
40n/a
41n/aBaseHandler --
42n/a
43n/ainternals:
44n/aBaseHandler and parent
45n/a_call_chain conventions
46n/a
47n/aExample usage:
48n/a
49n/aimport urllib.request
50n/a
51n/a# set up authentication info
52n/aauthinfo = urllib.request.HTTPBasicAuthHandler()
53n/aauthinfo.add_password(realm='PDQ Application',
54n/a uri='https://mahler:8092/site-updates.py',
55n/a user='klem',
56n/a passwd='geheim$parole')
57n/a
58n/aproxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
59n/a
60n/a# build a new opener that adds authentication and caching FTP handlers
61n/aopener = urllib.request.build_opener(proxy_support, authinfo,
62n/a urllib.request.CacheFTPHandler)
63n/a
64n/a# install it
65n/aurllib.request.install_opener(opener)
66n/a
67n/af = urllib.request.urlopen('http://www.python.org/')
68n/a"""
69n/a
70n/a# XXX issues:
71n/a# If an authentication error handler that tries to perform
72n/a# authentication for some reason but fails, how should the error be
73n/a# signalled? The client needs to know the HTTP error code. But if
74n/a# the handler knows that the problem was, e.g., that it didn't know
75n/a# that hash algo that requested in the challenge, it would be good to
76n/a# pass that information along to the client, too.
77n/a# ftp errors aren't handled cleanly
78n/a# check digest against correct (i.e. non-apache) implementation
79n/a
80n/a# Possible extensions:
81n/a# complex proxies XXX not sure what exactly was meant by this
82n/a# abstract factory for opener
83n/a
84n/aimport base64
85n/aimport bisect
86n/aimport email
87n/aimport hashlib
88n/aimport http.client
89n/aimport io
90n/aimport os
91n/aimport posixpath
92n/aimport re
93n/aimport socket
94n/aimport string
95n/aimport sys
96n/aimport time
97n/aimport tempfile
98n/aimport contextlib
99n/aimport warnings
100n/a
101n/a
102n/afrom urllib.error import URLError, HTTPError, ContentTooShortError
103n/afrom urllib.parse import (
104n/a urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105n/a splittype, splithost, splitport, splituser, splitpasswd,
106n/a splitattr, splitquery, splitvalue, splittag, to_bytes,
107n/a unquote_to_bytes, urlunparse)
108n/afrom urllib.response import addinfourl, addclosehook
109n/a
110n/a# check for SSL
111n/atry:
112n/a import ssl
113n/aexcept ImportError:
114n/a _have_ssl = False
115n/aelse:
116n/a _have_ssl = True
117n/a
118n/a__all__ = [
119n/a # Classes
120n/a 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121n/a 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122n/a 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
123n/a 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
124n/a 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
125n/a 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
126n/a 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
127n/a 'UnknownHandler', 'HTTPErrorProcessor',
128n/a # Functions
129n/a 'urlopen', 'install_opener', 'build_opener',
130n/a 'pathname2url', 'url2pathname', 'getproxies',
131n/a # Legacy interface
132n/a 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
133n/a]
134n/a
135n/a# used in User-Agent header sent
136n/a__version__ = '%d.%d' % sys.version_info[:2]
137n/a
138n/a_opener = None
139n/adef urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
140n/a *, cafile=None, capath=None, cadefault=False, context=None):
141n/a '''Open the URL url, which can be either a string or a Request object.
142n/a
143n/a *data* must be an object specifying additional data to be sent to
144n/a the server, or None if no such data is needed. See Request for
145n/a details.
146n/a
147n/a urllib.request module uses HTTP/1.1 and includes a "Connection:close"
148n/a header in its HTTP requests.
149n/a
150n/a The optional *timeout* parameter specifies a timeout in seconds for
151n/a blocking operations like the connection attempt (if not specified, the
152n/a global default timeout setting will be used). This only works for HTTP,
153n/a HTTPS and FTP connections.
154n/a
155n/a If *context* is specified, it must be a ssl.SSLContext instance describing
156n/a the various SSL options. See HTTPSConnection for more details.
157n/a
158n/a The optional *cafile* and *capath* parameters specify a set of trusted CA
159n/a certificates for HTTPS requests. cafile should point to a single file
160n/a containing a bundle of CA certificates, whereas capath should point to a
161n/a directory of hashed certificate files. More information can be found in
162n/a ssl.SSLContext.load_verify_locations().
163n/a
164n/a The *cadefault* parameter is ignored.
165n/a
166n/a This function always returns an object which can work as a context
167n/a manager and has methods such as
168n/a
169n/a * geturl() - return the URL of the resource retrieved, commonly used to
170n/a determine if a redirect was followed
171n/a
172n/a * info() - return the meta-information of the page, such as headers, in the
173n/a form of an email.message_from_string() instance (see Quick Reference to
174n/a HTTP Headers)
175n/a
176n/a * getcode() - return the HTTP status code of the response. Raises URLError
177n/a on errors.
178n/a
179n/a For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
180n/a object slightly modified. In addition to the three new methods above, the
181n/a msg attribute contains the same information as the reason attribute ---
182n/a the reason phrase returned by the server --- instead of the response
183n/a headers as it is specified in the documentation for HTTPResponse.
184n/a
185n/a For FTP, file, and data URLs and requests explicitly handled by legacy
186n/a URLopener and FancyURLopener classes, this function returns a
187n/a urllib.response.addinfourl object.
188n/a
189n/a Note that None may be returned if no handler handles the request (though
190n/a the default installed global OpenerDirector uses UnknownHandler to ensure
191n/a this never happens).
192n/a
193n/a In addition, if proxy settings are detected (for example, when a *_proxy
194n/a environment variable like http_proxy is set), ProxyHandler is default
195n/a installed and makes sure the requests are handled through the proxy.
196n/a
197n/a '''
198n/a global _opener
199n/a if cafile or capath or cadefault:
200n/a import warnings
201n/a warnings.warn("cafile, cpath and cadefault are deprecated, use a "
202n/a "custom context instead.", DeprecationWarning, 2)
203n/a if context is not None:
204n/a raise ValueError(
205n/a "You can't pass both context and any of cafile, capath, and "
206n/a "cadefault"
207n/a )
208n/a if not _have_ssl:
209n/a raise ValueError('SSL support not available')
210n/a context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
211n/a cafile=cafile,
212n/a capath=capath)
213n/a https_handler = HTTPSHandler(context=context)
214n/a opener = build_opener(https_handler)
215n/a elif context:
216n/a https_handler = HTTPSHandler(context=context)
217n/a opener = build_opener(https_handler)
218n/a elif _opener is None:
219n/a _opener = opener = build_opener()
220n/a else:
221n/a opener = _opener
222n/a return opener.open(url, data, timeout)
223n/a
224n/adef install_opener(opener):
225n/a global _opener
226n/a _opener = opener
227n/a
228n/a_url_tempfiles = []
229n/adef urlretrieve(url, filename=None, reporthook=None, data=None):
230n/a """
231n/a Retrieve a URL into a temporary location on disk.
232n/a
233n/a Requires a URL argument. If a filename is passed, it is used as
234n/a the temporary file location. The reporthook argument should be
235n/a a callable that accepts a block number, a read size, and the
236n/a total file size of the URL target. The data argument should be
237n/a valid URL encoded data.
238n/a
239n/a If a filename is passed and the URL points to a local resource,
240n/a the result is a copy from local file to new file.
241n/a
242n/a Returns a tuple containing the path to the newly created
243n/a data file as well as the resulting HTTPMessage object.
244n/a """
245n/a url_type, path = splittype(url)
246n/a
247n/a with contextlib.closing(urlopen(url, data)) as fp:
248n/a headers = fp.info()
249n/a
250n/a # Just return the local path and the "headers" for file://
251n/a # URLs. No sense in performing a copy unless requested.
252n/a if url_type == "file" and not filename:
253n/a return os.path.normpath(path), headers
254n/a
255n/a # Handle temporary file setup.
256n/a if filename:
257n/a tfp = open(filename, 'wb')
258n/a else:
259n/a tfp = tempfile.NamedTemporaryFile(delete=False)
260n/a filename = tfp.name
261n/a _url_tempfiles.append(filename)
262n/a
263n/a with tfp:
264n/a result = filename, headers
265n/a bs = 1024*8
266n/a size = -1
267n/a read = 0
268n/a blocknum = 0
269n/a if "content-length" in headers:
270n/a size = int(headers["Content-Length"])
271n/a
272n/a if reporthook:
273n/a reporthook(blocknum, bs, size)
274n/a
275n/a while True:
276n/a block = fp.read(bs)
277n/a if not block:
278n/a break
279n/a read += len(block)
280n/a tfp.write(block)
281n/a blocknum += 1
282n/a if reporthook:
283n/a reporthook(blocknum, bs, size)
284n/a
285n/a if size >= 0 and read < size:
286n/a raise ContentTooShortError(
287n/a "retrieval incomplete: got only %i out of %i bytes"
288n/a % (read, size), result)
289n/a
290n/a return result
291n/a
292n/adef urlcleanup():
293n/a """Clean up temporary files from urlretrieve calls."""
294n/a for temp_file in _url_tempfiles:
295n/a try:
296n/a os.unlink(temp_file)
297n/a except OSError:
298n/a pass
299n/a
300n/a del _url_tempfiles[:]
301n/a global _opener
302n/a if _opener:
303n/a _opener = None
304n/a
305n/a# copied from cookielib.py
306n/a_cut_port_re = re.compile(r":\d+$", re.ASCII)
307n/adef request_host(request):
308n/a """Return request-host, as defined by RFC 2965.
309n/a
310n/a Variation from RFC: returned value is lowercased, for convenient
311n/a comparison.
312n/a
313n/a """
314n/a url = request.full_url
315n/a host = urlparse(url)[1]
316n/a if host == "":
317n/a host = request.get_header("Host", "")
318n/a
319n/a # remove port, if present
320n/a host = _cut_port_re.sub("", host, 1)
321n/a return host.lower()
322n/a
323n/aclass Request:
324n/a
325n/a def __init__(self, url, data=None, headers={},
326n/a origin_req_host=None, unverifiable=False,
327n/a method=None):
328n/a self.full_url = url
329n/a self.headers = {}
330n/a self.unredirected_hdrs = {}
331n/a self._data = None
332n/a self.data = data
333n/a self._tunnel_host = None
334n/a for key, value in headers.items():
335n/a self.add_header(key, value)
336n/a if origin_req_host is None:
337n/a origin_req_host = request_host(self)
338n/a self.origin_req_host = origin_req_host
339n/a self.unverifiable = unverifiable
340n/a if method:
341n/a self.method = method
342n/a
343n/a @property
344n/a def full_url(self):
345n/a if self.fragment:
346n/a return '{}#{}'.format(self._full_url, self.fragment)
347n/a return self._full_url
348n/a
349n/a @full_url.setter
350n/a def full_url(self, url):
351n/a # unwrap('<URL:type://host/path>') --> 'type://host/path'
352n/a self._full_url = unwrap(url)
353n/a self._full_url, self.fragment = splittag(self._full_url)
354n/a self._parse()
355n/a
356n/a @full_url.deleter
357n/a def full_url(self):
358n/a self._full_url = None
359n/a self.fragment = None
360n/a self.selector = ''
361n/a
362n/a @property
363n/a def data(self):
364n/a return self._data
365n/a
366n/a @data.setter
367n/a def data(self, data):
368n/a if data != self._data:
369n/a self._data = data
370n/a # issue 16464
371n/a # if we change data we need to remove content-length header
372n/a # (cause it's most probably calculated for previous value)
373n/a if self.has_header("Content-length"):
374n/a self.remove_header("Content-length")
375n/a
376n/a @data.deleter
377n/a def data(self):
378n/a self.data = None
379n/a
380n/a def _parse(self):
381n/a self.type, rest = splittype(self._full_url)
382n/a if self.type is None:
383n/a raise ValueError("unknown url type: %r" % self.full_url)
384n/a self.host, self.selector = splithost(rest)
385n/a if self.host:
386n/a self.host = unquote(self.host)
387n/a
388n/a def get_method(self):
389n/a """Return a string indicating the HTTP request method."""
390n/a default_method = "POST" if self.data is not None else "GET"
391n/a return getattr(self, 'method', default_method)
392n/a
393n/a def get_full_url(self):
394n/a return self.full_url
395n/a
396n/a def set_proxy(self, host, type):
397n/a if self.type == 'https' and not self._tunnel_host:
398n/a self._tunnel_host = self.host
399n/a else:
400n/a self.type= type
401n/a self.selector = self.full_url
402n/a self.host = host
403n/a
404n/a def has_proxy(self):
405n/a return self.selector == self.full_url
406n/a
407n/a def add_header(self, key, val):
408n/a # useful for something like authentication
409n/a self.headers[key.capitalize()] = val
410n/a
411n/a def add_unredirected_header(self, key, val):
412n/a # will not be added to a redirected request
413n/a self.unredirected_hdrs[key.capitalize()] = val
414n/a
415n/a def has_header(self, header_name):
416n/a return (header_name in self.headers or
417n/a header_name in self.unredirected_hdrs)
418n/a
419n/a def get_header(self, header_name, default=None):
420n/a return self.headers.get(
421n/a header_name,
422n/a self.unredirected_hdrs.get(header_name, default))
423n/a
424n/a def remove_header(self, header_name):
425n/a self.headers.pop(header_name, None)
426n/a self.unredirected_hdrs.pop(header_name, None)
427n/a
428n/a def header_items(self):
429n/a hdrs = self.unredirected_hdrs.copy()
430n/a hdrs.update(self.headers)
431n/a return list(hdrs.items())
432n/a
433n/aclass OpenerDirector:
434n/a def __init__(self):
435n/a client_version = "Python-urllib/%s" % __version__
436n/a self.addheaders = [('User-agent', client_version)]
437n/a # self.handlers is retained only for backward compatibility
438n/a self.handlers = []
439n/a # manage the individual handlers
440n/a self.handle_open = {}
441n/a self.handle_error = {}
442n/a self.process_response = {}
443n/a self.process_request = {}
444n/a
445n/a def add_handler(self, handler):
446n/a if not hasattr(handler, "add_parent"):
447n/a raise TypeError("expected BaseHandler instance, got %r" %
448n/a type(handler))
449n/a
450n/a added = False
451n/a for meth in dir(handler):
452n/a if meth in ["redirect_request", "do_open", "proxy_open"]:
453n/a # oops, coincidental match
454n/a continue
455n/a
456n/a i = meth.find("_")
457n/a protocol = meth[:i]
458n/a condition = meth[i+1:]
459n/a
460n/a if condition.startswith("error"):
461n/a j = condition.find("_") + i + 1
462n/a kind = meth[j+1:]
463n/a try:
464n/a kind = int(kind)
465n/a except ValueError:
466n/a pass
467n/a lookup = self.handle_error.get(protocol, {})
468n/a self.handle_error[protocol] = lookup
469n/a elif condition == "open":
470n/a kind = protocol
471n/a lookup = self.handle_open
472n/a elif condition == "response":
473n/a kind = protocol
474n/a lookup = self.process_response
475n/a elif condition == "request":
476n/a kind = protocol
477n/a lookup = self.process_request
478n/a else:
479n/a continue
480n/a
481n/a handlers = lookup.setdefault(kind, [])
482n/a if handlers:
483n/a bisect.insort(handlers, handler)
484n/a else:
485n/a handlers.append(handler)
486n/a added = True
487n/a
488n/a if added:
489n/a bisect.insort(self.handlers, handler)
490n/a handler.add_parent(self)
491n/a
492n/a def close(self):
493n/a # Only exists for backwards compatibility.
494n/a pass
495n/a
496n/a def _call_chain(self, chain, kind, meth_name, *args):
497n/a # Handlers raise an exception if no one else should try to handle
498n/a # the request, or return None if they can't but another handler
499n/a # could. Otherwise, they return the response.
500n/a handlers = chain.get(kind, ())
501n/a for handler in handlers:
502n/a func = getattr(handler, meth_name)
503n/a result = func(*args)
504n/a if result is not None:
505n/a return result
506n/a
507n/a def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
508n/a # accept a URL or a Request object
509n/a if isinstance(fullurl, str):
510n/a req = Request(fullurl, data)
511n/a else:
512n/a req = fullurl
513n/a if data is not None:
514n/a req.data = data
515n/a
516n/a req.timeout = timeout
517n/a protocol = req.type
518n/a
519n/a # pre-process request
520n/a meth_name = protocol+"_request"
521n/a for processor in self.process_request.get(protocol, []):
522n/a meth = getattr(processor, meth_name)
523n/a req = meth(req)
524n/a
525n/a response = self._open(req, data)
526n/a
527n/a # post-process response
528n/a meth_name = protocol+"_response"
529n/a for processor in self.process_response.get(protocol, []):
530n/a meth = getattr(processor, meth_name)
531n/a response = meth(req, response)
532n/a
533n/a return response
534n/a
535n/a def _open(self, req, data=None):
536n/a result = self._call_chain(self.handle_open, 'default',
537n/a 'default_open', req)
538n/a if result:
539n/a return result
540n/a
541n/a protocol = req.type
542n/a result = self._call_chain(self.handle_open, protocol, protocol +
543n/a '_open', req)
544n/a if result:
545n/a return result
546n/a
547n/a return self._call_chain(self.handle_open, 'unknown',
548n/a 'unknown_open', req)
549n/a
550n/a def error(self, proto, *args):
551n/a if proto in ('http', 'https'):
552n/a # XXX http[s] protocols are special-cased
553n/a dict = self.handle_error['http'] # https is not different than http
554n/a proto = args[2] # YUCK!
555n/a meth_name = 'http_error_%s' % proto
556n/a http_err = 1
557n/a orig_args = args
558n/a else:
559n/a dict = self.handle_error
560n/a meth_name = proto + '_error'
561n/a http_err = 0
562n/a args = (dict, proto, meth_name) + args
563n/a result = self._call_chain(*args)
564n/a if result:
565n/a return result
566n/a
567n/a if http_err:
568n/a args = (dict, 'default', 'http_error_default') + orig_args
569n/a return self._call_chain(*args)
570n/a
571n/a# XXX probably also want an abstract factory that knows when it makes
572n/a# sense to skip a superclass in favor of a subclass and when it might
573n/a# make sense to include both
574n/a
575n/adef build_opener(*handlers):
576n/a """Create an opener object from a list of handlers.
577n/a
578n/a The opener will use several default handlers, including support
579n/a for HTTP, FTP and when applicable HTTPS.
580n/a
581n/a If any of the handlers passed as arguments are subclasses of the
582n/a default handlers, the default handlers will not be used.
583n/a """
584n/a opener = OpenerDirector()
585n/a default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
586n/a HTTPDefaultErrorHandler, HTTPRedirectHandler,
587n/a FTPHandler, FileHandler, HTTPErrorProcessor,
588n/a DataHandler]
589n/a if hasattr(http.client, "HTTPSConnection"):
590n/a default_classes.append(HTTPSHandler)
591n/a skip = set()
592n/a for klass in default_classes:
593n/a for check in handlers:
594n/a if isinstance(check, type):
595n/a if issubclass(check, klass):
596n/a skip.add(klass)
597n/a elif isinstance(check, klass):
598n/a skip.add(klass)
599n/a for klass in skip:
600n/a default_classes.remove(klass)
601n/a
602n/a for klass in default_classes:
603n/a opener.add_handler(klass())
604n/a
605n/a for h in handlers:
606n/a if isinstance(h, type):
607n/a h = h()
608n/a opener.add_handler(h)
609n/a return opener
610n/a
611n/aclass BaseHandler:
612n/a handler_order = 500
613n/a
614n/a def add_parent(self, parent):
615n/a self.parent = parent
616n/a
617n/a def close(self):
618n/a # Only exists for backwards compatibility
619n/a pass
620n/a
621n/a def __lt__(self, other):
622n/a if not hasattr(other, "handler_order"):
623n/a # Try to preserve the old behavior of having custom classes
624n/a # inserted after default ones (works only for custom user
625n/a # classes which are not aware of handler_order).
626n/a return True
627n/a return self.handler_order < other.handler_order
628n/a
629n/a
630n/aclass HTTPErrorProcessor(BaseHandler):
631n/a """Process HTTP error responses."""
632n/a handler_order = 1000 # after all other processing
633n/a
634n/a def http_response(self, request, response):
635n/a code, msg, hdrs = response.code, response.msg, response.info()
636n/a
637n/a # According to RFC 2616, "2xx" code indicates that the client's
638n/a # request was successfully received, understood, and accepted.
639n/a if not (200 <= code < 300):
640n/a response = self.parent.error(
641n/a 'http', request, response, code, msg, hdrs)
642n/a
643n/a return response
644n/a
645n/a https_response = http_response
646n/a
647n/aclass HTTPDefaultErrorHandler(BaseHandler):
648n/a def http_error_default(self, req, fp, code, msg, hdrs):
649n/a raise HTTPError(req.full_url, code, msg, hdrs, fp)
650n/a
651n/aclass HTTPRedirectHandler(BaseHandler):
652n/a # maximum number of redirections to any single URL
653n/a # this is needed because of the state that cookies introduce
654n/a max_repeats = 4
655n/a # maximum total number of redirections (regardless of URL) before
656n/a # assuming we're in a loop
657n/a max_redirections = 10
658n/a
659n/a def redirect_request(self, req, fp, code, msg, headers, newurl):
660n/a """Return a Request or None in response to a redirect.
661n/a
662n/a This is called by the http_error_30x methods when a
663n/a redirection response is received. If a redirection should
664n/a take place, return a new Request to allow http_error_30x to
665n/a perform the redirect. Otherwise, raise HTTPError if no-one
666n/a else should try to handle this url. Return None if you can't
667n/a but another Handler might.
668n/a """
669n/a m = req.get_method()
670n/a if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
671n/a or code in (301, 302, 303) and m == "POST")):
672n/a raise HTTPError(req.full_url, code, msg, headers, fp)
673n/a
674n/a # Strictly (according to RFC 2616), 301 or 302 in response to
675n/a # a POST MUST NOT cause a redirection without confirmation
676n/a # from the user (of urllib.request, in this case). In practice,
677n/a # essentially all clients do redirect in this case, so we do
678n/a # the same.
679n/a
680n/a # Be conciliant with URIs containing a space. This is mainly
681n/a # redundant with the more complete encoding done in http_error_302(),
682n/a # but it is kept for compatibility with other callers.
683n/a newurl = newurl.replace(' ', '%20')
684n/a
685n/a CONTENT_HEADERS = ("content-length", "content-type")
686n/a newheaders = dict((k, v) for k, v in req.headers.items()
687n/a if k.lower() not in CONTENT_HEADERS)
688n/a return Request(newurl,
689n/a headers=newheaders,
690n/a origin_req_host=req.origin_req_host,
691n/a unverifiable=True)
692n/a
693n/a # Implementation note: To avoid the server sending us into an
694n/a # infinite loop, the request object needs to track what URLs we
695n/a # have already seen. Do this by adding a handler-specific
696n/a # attribute to the Request object.
697n/a def http_error_302(self, req, fp, code, msg, headers):
698n/a # Some servers (incorrectly) return multiple Location headers
699n/a # (so probably same goes for URI). Use first header.
700n/a if "location" in headers:
701n/a newurl = headers["location"]
702n/a elif "uri" in headers:
703n/a newurl = headers["uri"]
704n/a else:
705n/a return
706n/a
707n/a # fix a possible malformed URL
708n/a urlparts = urlparse(newurl)
709n/a
710n/a # For security reasons we don't allow redirection to anything other
711n/a # than http, https or ftp.
712n/a
713n/a if urlparts.scheme not in ('http', 'https', 'ftp', ''):
714n/a raise HTTPError(
715n/a newurl, code,
716n/a "%s - Redirection to url '%s' is not allowed" % (msg, newurl),
717n/a headers, fp)
718n/a
719n/a if not urlparts.path and urlparts.netloc:
720n/a urlparts = list(urlparts)
721n/a urlparts[2] = "/"
722n/a newurl = urlunparse(urlparts)
723n/a
724n/a # http.client.parse_headers() decodes as ISO-8859-1. Recover the
725n/a # original bytes and percent-encode non-ASCII bytes, and any special
726n/a # characters such as the space.
727n/a newurl = quote(
728n/a newurl, encoding="iso-8859-1", safe=string.punctuation)
729n/a newurl = urljoin(req.full_url, newurl)
730n/a
731n/a # XXX Probably want to forget about the state of the current
732n/a # request, although that might interact poorly with other
733n/a # handlers that also use handler-specific request attributes
734n/a new = self.redirect_request(req, fp, code, msg, headers, newurl)
735n/a if new is None:
736n/a return
737n/a
738n/a # loop detection
739n/a # .redirect_dict has a key url if url was previously visited.
740n/a if hasattr(req, 'redirect_dict'):
741n/a visited = new.redirect_dict = req.redirect_dict
742n/a if (visited.get(newurl, 0) >= self.max_repeats or
743n/a len(visited) >= self.max_redirections):
744n/a raise HTTPError(req.full_url, code,
745n/a self.inf_msg + msg, headers, fp)
746n/a else:
747n/a visited = new.redirect_dict = req.redirect_dict = {}
748n/a visited[newurl] = visited.get(newurl, 0) + 1
749n/a
750n/a # Don't close the fp until we are sure that we won't use it
751n/a # with HTTPError.
752n/a fp.read()
753n/a fp.close()
754n/a
755n/a return self.parent.open(new, timeout=req.timeout)
756n/a
757n/a http_error_301 = http_error_303 = http_error_307 = http_error_302
758n/a
759n/a inf_msg = "The HTTP server returned a redirect error that would " \
760n/a "lead to an infinite loop.\n" \
761n/a "The last 30x error message was:\n"
762n/a
763n/a
764n/adef _parse_proxy(proxy):
765n/a """Return (scheme, user, password, host/port) given a URL or an authority.
766n/a
767n/a If a URL is supplied, it must have an authority (host:port) component.
768n/a According to RFC 3986, having an authority component means the URL must
769n/a have two slashes after the scheme.
770n/a """
771n/a scheme, r_scheme = splittype(proxy)
772n/a if not r_scheme.startswith("/"):
773n/a # authority
774n/a scheme = None
775n/a authority = proxy
776n/a else:
777n/a # URL
778n/a if not r_scheme.startswith("//"):
779n/a raise ValueError("proxy URL with no authority: %r" % proxy)
780n/a # We have an authority, so for RFC 3986-compliant URLs (by ss 3.
781n/a # and 3.3.), path is empty or starts with '/'
782n/a end = r_scheme.find("/", 2)
783n/a if end == -1:
784n/a end = None
785n/a authority = r_scheme[2:end]
786n/a userinfo, hostport = splituser(authority)
787n/a if userinfo is not None:
788n/a user, password = splitpasswd(userinfo)
789n/a else:
790n/a user = password = None
791n/a return scheme, user, password, hostport
792n/a
793n/aclass ProxyHandler(BaseHandler):
794n/a # Proxies must be in front
795n/a handler_order = 100
796n/a
797n/a def __init__(self, proxies=None):
798n/a if proxies is None:
799n/a proxies = getproxies()
800n/a assert hasattr(proxies, 'keys'), "proxies must be a mapping"
801n/a self.proxies = proxies
802n/a for type, url in proxies.items():
803n/a setattr(self, '%s_open' % type,
804n/a lambda r, proxy=url, type=type, meth=self.proxy_open:
805n/a meth(r, proxy, type))
806n/a
807n/a def proxy_open(self, req, proxy, type):
808n/a orig_type = req.type
809n/a proxy_type, user, password, hostport = _parse_proxy(proxy)
810n/a if proxy_type is None:
811n/a proxy_type = orig_type
812n/a
813n/a if req.host and proxy_bypass(req.host):
814n/a return None
815n/a
816n/a if user and password:
817n/a user_pass = '%s:%s' % (unquote(user),
818n/a unquote(password))
819n/a creds = base64.b64encode(user_pass.encode()).decode("ascii")
820n/a req.add_header('Proxy-authorization', 'Basic ' + creds)
821n/a hostport = unquote(hostport)
822n/a req.set_proxy(hostport, proxy_type)
823n/a if orig_type == proxy_type or orig_type == 'https':
824n/a # let other handlers take care of it
825n/a return None
826n/a else:
827n/a # need to start over, because the other handlers don't
828n/a # grok the proxy's URL type
829n/a # e.g. if we have a constructor arg proxies like so:
830n/a # {'http': 'ftp://proxy.example.com'}, we may end up turning
831n/a # a request for http://acme.example.com/a into one for
832n/a # ftp://proxy.example.com/a
833n/a return self.parent.open(req, timeout=req.timeout)
834n/a
835n/aclass HTTPPasswordMgr:
836n/a
837n/a def __init__(self):
838n/a self.passwd = {}
839n/a
840n/a def add_password(self, realm, uri, user, passwd):
841n/a # uri could be a single URI or a sequence
842n/a if isinstance(uri, str):
843n/a uri = [uri]
844n/a if realm not in self.passwd:
845n/a self.passwd[realm] = {}
846n/a for default_port in True, False:
847n/a reduced_uri = tuple(
848n/a [self.reduce_uri(u, default_port) for u in uri])
849n/a self.passwd[realm][reduced_uri] = (user, passwd)
850n/a
851n/a def find_user_password(self, realm, authuri):
852n/a domains = self.passwd.get(realm, {})
853n/a for default_port in True, False:
854n/a reduced_authuri = self.reduce_uri(authuri, default_port)
855n/a for uris, authinfo in domains.items():
856n/a for uri in uris:
857n/a if self.is_suburi(uri, reduced_authuri):
858n/a return authinfo
859n/a return None, None
860n/a
861n/a def reduce_uri(self, uri, default_port=True):
862n/a """Accept authority or URI and extract only the authority and path."""
863n/a # note HTTP URLs do not have a userinfo component
864n/a parts = urlsplit(uri)
865n/a if parts[1]:
866n/a # URI
867n/a scheme = parts[0]
868n/a authority = parts[1]
869n/a path = parts[2] or '/'
870n/a else:
871n/a # host or host:port
872n/a scheme = None
873n/a authority = uri
874n/a path = '/'
875n/a host, port = splitport(authority)
876n/a if default_port and port is None and scheme is not None:
877n/a dport = {"http": 80,
878n/a "https": 443,
879n/a }.get(scheme)
880n/a if dport is not None:
881n/a authority = "%s:%d" % (host, dport)
882n/a return authority, path
883n/a
884n/a def is_suburi(self, base, test):
885n/a """Check if test is below base in a URI tree
886n/a
887n/a Both args must be URIs in reduced form.
888n/a """
889n/a if base == test:
890n/a return True
891n/a if base[0] != test[0]:
892n/a return False
893n/a common = posixpath.commonprefix((base[1], test[1]))
894n/a if len(common) == len(base[1]):
895n/a return True
896n/a return False
897n/a
898n/a
899n/aclass HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
900n/a
901n/a def find_user_password(self, realm, authuri):
902n/a user, password = HTTPPasswordMgr.find_user_password(self, realm,
903n/a authuri)
904n/a if user is not None:
905n/a return user, password
906n/a return HTTPPasswordMgr.find_user_password(self, None, authuri)
907n/a
908n/a
909n/aclass HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
910n/a
911n/a def __init__(self, *args, **kwargs):
912n/a self.authenticated = {}
913n/a super().__init__(*args, **kwargs)
914n/a
915n/a def add_password(self, realm, uri, user, passwd, is_authenticated=False):
916n/a self.update_authenticated(uri, is_authenticated)
917n/a # Add a default for prior auth requests
918n/a if realm is not None:
919n/a super().add_password(None, uri, user, passwd)
920n/a super().add_password(realm, uri, user, passwd)
921n/a
922n/a def update_authenticated(self, uri, is_authenticated=False):
923n/a # uri could be a single URI or a sequence
924n/a if isinstance(uri, str):
925n/a uri = [uri]
926n/a
927n/a for default_port in True, False:
928n/a for u in uri:
929n/a reduced_uri = self.reduce_uri(u, default_port)
930n/a self.authenticated[reduced_uri] = is_authenticated
931n/a
932n/a def is_authenticated(self, authuri):
933n/a for default_port in True, False:
934n/a reduced_authuri = self.reduce_uri(authuri, default_port)
935n/a for uri in self.authenticated:
936n/a if self.is_suburi(uri, reduced_authuri):
937n/a return self.authenticated[uri]
938n/a
939n/a
940n/aclass AbstractBasicAuthHandler:
941n/a
942n/a # XXX this allows for multiple auth-schemes, but will stupidly pick
943n/a # the last one with a realm specified.
944n/a
945n/a # allow for double- and single-quoted realm values
946n/a # (single quotes are a violation of the RFC, but appear in the wild)
947n/a rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
948n/a 'realm=(["\']?)([^"\']*)\\2', re.I)
949n/a
950n/a # XXX could pre-emptively send auth info already accepted (RFC 2617,
951n/a # end of section 2, and section 1.2 immediately after "credentials"
952n/a # production).
953n/a
954n/a def __init__(self, password_mgr=None):
955n/a if password_mgr is None:
956n/a password_mgr = HTTPPasswordMgr()
957n/a self.passwd = password_mgr
958n/a self.add_password = self.passwd.add_password
959n/a
960n/a def http_error_auth_reqed(self, authreq, host, req, headers):
961n/a # host may be an authority (without userinfo) or a URL with an
962n/a # authority
963n/a # XXX could be multiple headers
964n/a authreq = headers.get(authreq, None)
965n/a
966n/a if authreq:
967n/a scheme = authreq.split()[0]
968n/a if scheme.lower() != 'basic':
969n/a raise ValueError("AbstractBasicAuthHandler does not"
970n/a " support the following scheme: '%s'" %
971n/a scheme)
972n/a else:
973n/a mo = AbstractBasicAuthHandler.rx.search(authreq)
974n/a if mo:
975n/a scheme, quote, realm = mo.groups()
976n/a if quote not in ['"',"'"]:
977n/a warnings.warn("Basic Auth Realm was unquoted",
978n/a UserWarning, 2)
979n/a if scheme.lower() == 'basic':
980n/a return self.retry_http_basic_auth(host, req, realm)
981n/a
982n/a def retry_http_basic_auth(self, host, req, realm):
983n/a user, pw = self.passwd.find_user_password(realm, host)
984n/a if pw is not None:
985n/a raw = "%s:%s" % (user, pw)
986n/a auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
987n/a if req.get_header(self.auth_header, None) == auth:
988n/a return None
989n/a req.add_unredirected_header(self.auth_header, auth)
990n/a return self.parent.open(req, timeout=req.timeout)
991n/a else:
992n/a return None
993n/a
994n/a def http_request(self, req):
995n/a if (not hasattr(self.passwd, 'is_authenticated') or
996n/a not self.passwd.is_authenticated(req.full_url)):
997n/a return req
998n/a
999n/a if not req.has_header('Authorization'):
1000n/a user, passwd = self.passwd.find_user_password(None, req.full_url)
1001n/a credentials = '{0}:{1}'.format(user, passwd).encode()
1002n/a auth_str = base64.standard_b64encode(credentials).decode()
1003n/a req.add_unredirected_header('Authorization',
1004n/a 'Basic {}'.format(auth_str.strip()))
1005n/a return req
1006n/a
1007n/a def http_response(self, req, response):
1008n/a if hasattr(self.passwd, 'is_authenticated'):
1009n/a if 200 <= response.code < 300:
1010n/a self.passwd.update_authenticated(req.full_url, True)
1011n/a else:
1012n/a self.passwd.update_authenticated(req.full_url, False)
1013n/a return response
1014n/a
1015n/a https_request = http_request
1016n/a https_response = http_response
1017n/a
1018n/a
1019n/a
1020n/aclass HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1021n/a
1022n/a auth_header = 'Authorization'
1023n/a
1024n/a def http_error_401(self, req, fp, code, msg, headers):
1025n/a url = req.full_url
1026n/a response = self.http_error_auth_reqed('www-authenticate',
1027n/a url, req, headers)
1028n/a return response
1029n/a
1030n/a
1031n/aclass ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1032n/a
1033n/a auth_header = 'Proxy-authorization'
1034n/a
1035n/a def http_error_407(self, req, fp, code, msg, headers):
1036n/a # http_error_auth_reqed requires that there is no userinfo component in
1037n/a # authority. Assume there isn't one, since urllib.request does not (and
1038n/a # should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1039n/a # userinfo.
1040n/a authority = req.host
1041n/a response = self.http_error_auth_reqed('proxy-authenticate',
1042n/a authority, req, headers)
1043n/a return response
1044n/a
1045n/a
1046n/a# Return n random bytes.
1047n/a_randombytes = os.urandom
1048n/a
1049n/a
1050n/aclass AbstractDigestAuthHandler:
1051n/a # Digest authentication is specified in RFC 2617.
1052n/a
1053n/a # XXX The client does not inspect the Authentication-Info header
1054n/a # in a successful response.
1055n/a
1056n/a # XXX It should be possible to test this implementation against
1057n/a # a mock server that just generates a static set of challenges.
1058n/a
1059n/a # XXX qop="auth-int" supports is shaky
1060n/a
1061n/a def __init__(self, passwd=None):
1062n/a if passwd is None:
1063n/a passwd = HTTPPasswordMgr()
1064n/a self.passwd = passwd
1065n/a self.add_password = self.passwd.add_password
1066n/a self.retried = 0
1067n/a self.nonce_count = 0
1068n/a self.last_nonce = None
1069n/a
1070n/a def reset_retry_count(self):
1071n/a self.retried = 0
1072n/a
1073n/a def http_error_auth_reqed(self, auth_header, host, req, headers):
1074n/a authreq = headers.get(auth_header, None)
1075n/a if self.retried > 5:
1076n/a # Don't fail endlessly - if we failed once, we'll probably
1077n/a # fail a second time. Hm. Unless the Password Manager is
1078n/a # prompting for the information. Crap. This isn't great
1079n/a # but it's better than the current 'repeat until recursion
1080n/a # depth exceeded' approach <wink>
1081n/a raise HTTPError(req.full_url, 401, "digest auth failed",
1082n/a headers, None)
1083n/a else:
1084n/a self.retried += 1
1085n/a if authreq:
1086n/a scheme = authreq.split()[0]
1087n/a if scheme.lower() == 'digest':
1088n/a return self.retry_http_digest_auth(req, authreq)
1089n/a elif scheme.lower() != 'basic':
1090n/a raise ValueError("AbstractDigestAuthHandler does not support"
1091n/a " the following scheme: '%s'" % scheme)
1092n/a
1093n/a def retry_http_digest_auth(self, req, auth):
1094n/a token, challenge = auth.split(' ', 1)
1095n/a chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1096n/a auth = self.get_authorization(req, chal)
1097n/a if auth:
1098n/a auth_val = 'Digest %s' % auth
1099n/a if req.headers.get(self.auth_header, None) == auth_val:
1100n/a return None
1101n/a req.add_unredirected_header(self.auth_header, auth_val)
1102n/a resp = self.parent.open(req, timeout=req.timeout)
1103n/a return resp
1104n/a
1105n/a def get_cnonce(self, nonce):
1106n/a # The cnonce-value is an opaque
1107n/a # quoted string value provided by the client and used by both client
1108n/a # and server to avoid chosen plaintext attacks, to provide mutual
1109n/a # authentication, and to provide some message integrity protection.
1110n/a # This isn't a fabulous effort, but it's probably Good Enough.
1111n/a s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
1112n/a b = s.encode("ascii") + _randombytes(8)
1113n/a dig = hashlib.sha1(b).hexdigest()
1114n/a return dig[:16]
1115n/a
1116n/a def get_authorization(self, req, chal):
1117n/a try:
1118n/a realm = chal['realm']
1119n/a nonce = chal['nonce']
1120n/a qop = chal.get('qop')
1121n/a algorithm = chal.get('algorithm', 'MD5')
1122n/a # mod_digest doesn't send an opaque, even though it isn't
1123n/a # supposed to be optional
1124n/a opaque = chal.get('opaque', None)
1125n/a except KeyError:
1126n/a return None
1127n/a
1128n/a H, KD = self.get_algorithm_impls(algorithm)
1129n/a if H is None:
1130n/a return None
1131n/a
1132n/a user, pw = self.passwd.find_user_password(realm, req.full_url)
1133n/a if user is None:
1134n/a return None
1135n/a
1136n/a # XXX not implemented yet
1137n/a if req.data is not None:
1138n/a entdig = self.get_entity_digest(req.data, chal)
1139n/a else:
1140n/a entdig = None
1141n/a
1142n/a A1 = "%s:%s:%s" % (user, realm, pw)
1143n/a A2 = "%s:%s" % (req.get_method(),
1144n/a # XXX selector: what about proxies and full urls
1145n/a req.selector)
1146n/a if qop == 'auth':
1147n/a if nonce == self.last_nonce:
1148n/a self.nonce_count += 1
1149n/a else:
1150n/a self.nonce_count = 1
1151n/a self.last_nonce = nonce
1152n/a ncvalue = '%08x' % self.nonce_count
1153n/a cnonce = self.get_cnonce(nonce)
1154n/a noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1155n/a respdig = KD(H(A1), noncebit)
1156n/a elif qop is None:
1157n/a respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1158n/a else:
1159n/a # XXX handle auth-int.
1160n/a raise URLError("qop '%s' is not supported." % qop)
1161n/a
1162n/a # XXX should the partial digests be encoded too?
1163n/a
1164n/a base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
1165n/a 'response="%s"' % (user, realm, nonce, req.selector,
1166n/a respdig)
1167n/a if opaque:
1168n/a base += ', opaque="%s"' % opaque
1169n/a if entdig:
1170n/a base += ', digest="%s"' % entdig
1171n/a base += ', algorithm="%s"' % algorithm
1172n/a if qop:
1173n/a base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1174n/a return base
1175n/a
1176n/a def get_algorithm_impls(self, algorithm):
1177n/a # lambdas assume digest modules are imported at the top level
1178n/a if algorithm == 'MD5':
1179n/a H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1180n/a elif algorithm == 'SHA':
1181n/a H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1182n/a # XXX MD5-sess
1183n/a else:
1184n/a raise ValueError("Unsupported digest authentication "
1185n/a "algorithm %r" % algorithm)
1186n/a KD = lambda s, d: H("%s:%s" % (s, d))
1187n/a return H, KD
1188n/a
1189n/a def get_entity_digest(self, data, chal):
1190n/a # XXX not implemented yet
1191n/a return None
1192n/a
1193n/a
1194n/aclass HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1195n/a """An authentication protocol defined by RFC 2069
1196n/a
1197n/a Digest authentication improves on basic authentication because it
1198n/a does not transmit passwords in the clear.
1199n/a """
1200n/a
1201n/a auth_header = 'Authorization'
1202n/a handler_order = 490 # before Basic auth
1203n/a
1204n/a def http_error_401(self, req, fp, code, msg, headers):
1205n/a host = urlparse(req.full_url)[1]
1206n/a retry = self.http_error_auth_reqed('www-authenticate',
1207n/a host, req, headers)
1208n/a self.reset_retry_count()
1209n/a return retry
1210n/a
1211n/a
1212n/aclass ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1213n/a
1214n/a auth_header = 'Proxy-Authorization'
1215n/a handler_order = 490 # before Basic auth
1216n/a
1217n/a def http_error_407(self, req, fp, code, msg, headers):
1218n/a host = req.host
1219n/a retry = self.http_error_auth_reqed('proxy-authenticate',
1220n/a host, req, headers)
1221n/a self.reset_retry_count()
1222n/a return retry
1223n/a
1224n/aclass AbstractHTTPHandler(BaseHandler):
1225n/a
1226n/a def __init__(self, debuglevel=0):
1227n/a self._debuglevel = debuglevel
1228n/a
1229n/a def set_http_debuglevel(self, level):
1230n/a self._debuglevel = level
1231n/a
1232n/a def _get_content_length(self, request):
1233n/a return http.client.HTTPConnection._get_content_length(
1234n/a request.data,
1235n/a request.get_method())
1236n/a
1237n/a def do_request_(self, request):
1238n/a host = request.host
1239n/a if not host:
1240n/a raise URLError('no host given')
1241n/a
1242n/a if request.data is not None: # POST
1243n/a data = request.data
1244n/a if isinstance(data, str):
1245n/a msg = "POST data should be bytes, an iterable of bytes, " \
1246n/a "or a file object. It cannot be of type str."
1247n/a raise TypeError(msg)
1248n/a if not request.has_header('Content-type'):
1249n/a request.add_unredirected_header(
1250n/a 'Content-type',
1251n/a 'application/x-www-form-urlencoded')
1252n/a if (not request.has_header('Content-length')
1253n/a and not request.has_header('Transfer-encoding')):
1254n/a content_length = self._get_content_length(request)
1255n/a if content_length is not None:
1256n/a request.add_unredirected_header(
1257n/a 'Content-length', str(content_length))
1258n/a else:
1259n/a request.add_unredirected_header(
1260n/a 'Transfer-encoding', 'chunked')
1261n/a
1262n/a sel_host = host
1263n/a if request.has_proxy():
1264n/a scheme, sel = splittype(request.selector)
1265n/a sel_host, sel_path = splithost(sel)
1266n/a if not request.has_header('Host'):
1267n/a request.add_unredirected_header('Host', sel_host)
1268n/a for name, value in self.parent.addheaders:
1269n/a name = name.capitalize()
1270n/a if not request.has_header(name):
1271n/a request.add_unredirected_header(name, value)
1272n/a
1273n/a return request
1274n/a
1275n/a def do_open(self, http_class, req, **http_conn_args):
1276n/a """Return an HTTPResponse object for the request, using http_class.
1277n/a
1278n/a http_class must implement the HTTPConnection API from http.client.
1279n/a """
1280n/a host = req.host
1281n/a if not host:
1282n/a raise URLError('no host given')
1283n/a
1284n/a # will parse host:port
1285n/a h = http_class(host, timeout=req.timeout, **http_conn_args)
1286n/a h.set_debuglevel(self._debuglevel)
1287n/a
1288n/a headers = dict(req.unredirected_hdrs)
1289n/a headers.update(dict((k, v) for k, v in req.headers.items()
1290n/a if k not in headers))
1291n/a
1292n/a # TODO(jhylton): Should this be redesigned to handle
1293n/a # persistent connections?
1294n/a
1295n/a # We want to make an HTTP/1.1 request, but the addinfourl
1296n/a # class isn't prepared to deal with a persistent connection.
1297n/a # It will try to read all remaining data from the socket,
1298n/a # which will block while the server waits for the next request.
1299n/a # So make sure the connection gets closed after the (only)
1300n/a # request.
1301n/a headers["Connection"] = "close"
1302n/a headers = dict((name.title(), val) for name, val in headers.items())
1303n/a
1304n/a if req._tunnel_host:
1305n/a tunnel_headers = {}
1306n/a proxy_auth_hdr = "Proxy-Authorization"
1307n/a if proxy_auth_hdr in headers:
1308n/a tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1309n/a # Proxy-Authorization should not be sent to origin
1310n/a # server.
1311n/a del headers[proxy_auth_hdr]
1312n/a h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
1313n/a
1314n/a try:
1315n/a try:
1316n/a h.request(req.get_method(), req.selector, req.data, headers,
1317n/a encode_chunked=req.has_header('Transfer-encoding'))
1318n/a except OSError as err: # timeout error
1319n/a raise URLError(err)
1320n/a r = h.getresponse()
1321n/a except:
1322n/a h.close()
1323n/a raise
1324n/a
1325n/a # If the server does not send us a 'Connection: close' header,
1326n/a # HTTPConnection assumes the socket should be left open. Manually
1327n/a # mark the socket to be closed when this response object goes away.
1328n/a if h.sock:
1329n/a h.sock.close()
1330n/a h.sock = None
1331n/a
1332n/a r.url = req.get_full_url()
1333n/a # This line replaces the .msg attribute of the HTTPResponse
1334n/a # with .headers, because urllib clients expect the response to
1335n/a # have the reason in .msg. It would be good to mark this
1336n/a # attribute is deprecated and get then to use info() or
1337n/a # .headers.
1338n/a r.msg = r.reason
1339n/a return r
1340n/a
1341n/a
1342n/aclass HTTPHandler(AbstractHTTPHandler):
1343n/a
1344n/a def http_open(self, req):
1345n/a return self.do_open(http.client.HTTPConnection, req)
1346n/a
1347n/a http_request = AbstractHTTPHandler.do_request_
1348n/a
1349n/aif hasattr(http.client, 'HTTPSConnection'):
1350n/a
1351n/a class HTTPSHandler(AbstractHTTPHandler):
1352n/a
1353n/a def __init__(self, debuglevel=0, context=None, check_hostname=None):
1354n/a AbstractHTTPHandler.__init__(self, debuglevel)
1355n/a self._context = context
1356n/a self._check_hostname = check_hostname
1357n/a
1358n/a def https_open(self, req):
1359n/a return self.do_open(http.client.HTTPSConnection, req,
1360n/a context=self._context, check_hostname=self._check_hostname)
1361n/a
1362n/a https_request = AbstractHTTPHandler.do_request_
1363n/a
1364n/a __all__.append('HTTPSHandler')
1365n/a
1366n/aclass HTTPCookieProcessor(BaseHandler):
1367n/a def __init__(self, cookiejar=None):
1368n/a import http.cookiejar
1369n/a if cookiejar is None:
1370n/a cookiejar = http.cookiejar.CookieJar()
1371n/a self.cookiejar = cookiejar
1372n/a
1373n/a def http_request(self, request):
1374n/a self.cookiejar.add_cookie_header(request)
1375n/a return request
1376n/a
1377n/a def http_response(self, request, response):
1378n/a self.cookiejar.extract_cookies(response, request)
1379n/a return response
1380n/a
1381n/a https_request = http_request
1382n/a https_response = http_response
1383n/a
1384n/aclass UnknownHandler(BaseHandler):
1385n/a def unknown_open(self, req):
1386n/a type = req.type
1387n/a raise URLError('unknown url type: %s' % type)
1388n/a
1389n/adef parse_keqv_list(l):
1390n/a """Parse list of key=value strings where keys are not duplicated."""
1391n/a parsed = {}
1392n/a for elt in l:
1393n/a k, v = elt.split('=', 1)
1394n/a if v[0] == '"' and v[-1] == '"':
1395n/a v = v[1:-1]
1396n/a parsed[k] = v
1397n/a return parsed
1398n/a
1399n/adef parse_http_list(s):
1400n/a """Parse lists as described by RFC 2068 Section 2.
1401n/a
1402n/a In particular, parse comma-separated lists where the elements of
1403n/a the list may include quoted-strings. A quoted-string could
1404n/a contain a comma. A non-quoted string could have quotes in the
1405n/a middle. Neither commas nor quotes count if they are escaped.
1406n/a Only double-quotes count, not single-quotes.
1407n/a """
1408n/a res = []
1409n/a part = ''
1410n/a
1411n/a escape = quote = False
1412n/a for cur in s:
1413n/a if escape:
1414n/a part += cur
1415n/a escape = False
1416n/a continue
1417n/a if quote:
1418n/a if cur == '\\':
1419n/a escape = True
1420n/a continue
1421n/a elif cur == '"':
1422n/a quote = False
1423n/a part += cur
1424n/a continue
1425n/a
1426n/a if cur == ',':
1427n/a res.append(part)
1428n/a part = ''
1429n/a continue
1430n/a
1431n/a if cur == '"':
1432n/a quote = True
1433n/a
1434n/a part += cur
1435n/a
1436n/a # append last part
1437n/a if part:
1438n/a res.append(part)
1439n/a
1440n/a return [part.strip() for part in res]
1441n/a
1442n/aclass FileHandler(BaseHandler):
1443n/a # Use local file or FTP depending on form of URL
1444n/a def file_open(self, req):
1445n/a url = req.selector
1446n/a if url[:2] == '//' and url[2:3] != '/' and (req.host and
1447n/a req.host != 'localhost'):
1448n/a if not req.host in self.get_names():
1449n/a raise URLError("file:// scheme is supported only on localhost")
1450n/a else:
1451n/a return self.open_local_file(req)
1452n/a
1453n/a # names for the localhost
1454n/a names = None
1455n/a def get_names(self):
1456n/a if FileHandler.names is None:
1457n/a try:
1458n/a FileHandler.names = tuple(
1459n/a socket.gethostbyname_ex('localhost')[2] +
1460n/a socket.gethostbyname_ex(socket.gethostname())[2])
1461n/a except socket.gaierror:
1462n/a FileHandler.names = (socket.gethostbyname('localhost'),)
1463n/a return FileHandler.names
1464n/a
1465n/a # not entirely sure what the rules are here
1466n/a def open_local_file(self, req):
1467n/a import email.utils
1468n/a import mimetypes
1469n/a host = req.host
1470n/a filename = req.selector
1471n/a localfile = url2pathname(filename)
1472n/a try:
1473n/a stats = os.stat(localfile)
1474n/a size = stats.st_size
1475n/a modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1476n/a mtype = mimetypes.guess_type(filename)[0]
1477n/a headers = email.message_from_string(
1478n/a 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1479n/a (mtype or 'text/plain', size, modified))
1480n/a if host:
1481n/a host, port = splitport(host)
1482n/a if not host or \
1483n/a (not port and _safe_gethostbyname(host) in self.get_names()):
1484n/a if host:
1485n/a origurl = 'file://' + host + filename
1486n/a else:
1487n/a origurl = 'file://' + filename
1488n/a return addinfourl(open(localfile, 'rb'), headers, origurl)
1489n/a except OSError as exp:
1490n/a # users shouldn't expect OSErrors coming from urlopen()
1491n/a raise URLError(exp)
1492n/a raise URLError('file not on local host')
1493n/a
1494n/adef _safe_gethostbyname(host):
1495n/a try:
1496n/a return socket.gethostbyname(host)
1497n/a except socket.gaierror:
1498n/a return None
1499n/a
1500n/aclass FTPHandler(BaseHandler):
1501n/a def ftp_open(self, req):
1502n/a import ftplib
1503n/a import mimetypes
1504n/a host = req.host
1505n/a if not host:
1506n/a raise URLError('ftp error: no host given')
1507n/a host, port = splitport(host)
1508n/a if port is None:
1509n/a port = ftplib.FTP_PORT
1510n/a else:
1511n/a port = int(port)
1512n/a
1513n/a # username/password handling
1514n/a user, host = splituser(host)
1515n/a if user:
1516n/a user, passwd = splitpasswd(user)
1517n/a else:
1518n/a passwd = None
1519n/a host = unquote(host)
1520n/a user = user or ''
1521n/a passwd = passwd or ''
1522n/a
1523n/a try:
1524n/a host = socket.gethostbyname(host)
1525n/a except OSError as msg:
1526n/a raise URLError(msg)
1527n/a path, attrs = splitattr(req.selector)
1528n/a dirs = path.split('/')
1529n/a dirs = list(map(unquote, dirs))
1530n/a dirs, file = dirs[:-1], dirs[-1]
1531n/a if dirs and not dirs[0]:
1532n/a dirs = dirs[1:]
1533n/a try:
1534n/a fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1535n/a type = file and 'I' or 'D'
1536n/a for attr in attrs:
1537n/a attr, value = splitvalue(attr)
1538n/a if attr.lower() == 'type' and \
1539n/a value in ('a', 'A', 'i', 'I', 'd', 'D'):
1540n/a type = value.upper()
1541n/a fp, retrlen = fw.retrfile(file, type)
1542n/a headers = ""
1543n/a mtype = mimetypes.guess_type(req.full_url)[0]
1544n/a if mtype:
1545n/a headers += "Content-type: %s\n" % mtype
1546n/a if retrlen is not None and retrlen >= 0:
1547n/a headers += "Content-length: %d\n" % retrlen
1548n/a headers = email.message_from_string(headers)
1549n/a return addinfourl(fp, headers, req.full_url)
1550n/a except ftplib.all_errors as exp:
1551n/a exc = URLError('ftp error: %r' % exp)
1552n/a raise exc.with_traceback(sys.exc_info()[2])
1553n/a
1554n/a def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1555n/a return ftpwrapper(user, passwd, host, port, dirs, timeout,
1556n/a persistent=False)
1557n/a
1558n/aclass CacheFTPHandler(FTPHandler):
1559n/a # XXX would be nice to have pluggable cache strategies
1560n/a # XXX this stuff is definitely not thread safe
1561n/a def __init__(self):
1562n/a self.cache = {}
1563n/a self.timeout = {}
1564n/a self.soonest = 0
1565n/a self.delay = 60
1566n/a self.max_conns = 16
1567n/a
1568n/a def setTimeout(self, t):
1569n/a self.delay = t
1570n/a
1571n/a def setMaxConns(self, m):
1572n/a self.max_conns = m
1573n/a
1574n/a def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1575n/a key = user, host, port, '/'.join(dirs), timeout
1576n/a if key in self.cache:
1577n/a self.timeout[key] = time.time() + self.delay
1578n/a else:
1579n/a self.cache[key] = ftpwrapper(user, passwd, host, port,
1580n/a dirs, timeout)
1581n/a self.timeout[key] = time.time() + self.delay
1582n/a self.check_cache()
1583n/a return self.cache[key]
1584n/a
1585n/a def check_cache(self):
1586n/a # first check for old ones
1587n/a t = time.time()
1588n/a if self.soonest <= t:
1589n/a for k, v in list(self.timeout.items()):
1590n/a if v < t:
1591n/a self.cache[k].close()
1592n/a del self.cache[k]
1593n/a del self.timeout[k]
1594n/a self.soonest = min(list(self.timeout.values()))
1595n/a
1596n/a # then check the size
1597n/a if len(self.cache) == self.max_conns:
1598n/a for k, v in list(self.timeout.items()):
1599n/a if v == self.soonest:
1600n/a del self.cache[k]
1601n/a del self.timeout[k]
1602n/a break
1603n/a self.soonest = min(list(self.timeout.values()))
1604n/a
1605n/a def clear_cache(self):
1606n/a for conn in self.cache.values():
1607n/a conn.close()
1608n/a self.cache.clear()
1609n/a self.timeout.clear()
1610n/a
1611n/aclass DataHandler(BaseHandler):
1612n/a def data_open(self, req):
1613n/a # data URLs as specified in RFC 2397.
1614n/a #
1615n/a # ignores POSTed data
1616n/a #
1617n/a # syntax:
1618n/a # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1619n/a # mediatype := [ type "/" subtype ] *( ";" parameter )
1620n/a # data := *urlchar
1621n/a # parameter := attribute "=" value
1622n/a url = req.full_url
1623n/a
1624n/a scheme, data = url.split(":",1)
1625n/a mediatype, data = data.split(",",1)
1626n/a
1627n/a # even base64 encoded data URLs might be quoted so unquote in any case:
1628n/a data = unquote_to_bytes(data)
1629n/a if mediatype.endswith(";base64"):
1630n/a data = base64.decodebytes(data)
1631n/a mediatype = mediatype[:-7]
1632n/a
1633n/a if not mediatype:
1634n/a mediatype = "text/plain;charset=US-ASCII"
1635n/a
1636n/a headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1637n/a (mediatype, len(data)))
1638n/a
1639n/a return addinfourl(io.BytesIO(data), headers, url)
1640n/a
1641n/a
1642n/a# Code move from the old urllib module
1643n/a
1644n/aMAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1645n/a
1646n/a# Helper for non-unix systems
1647n/aif os.name == 'nt':
1648n/a from nturl2path import url2pathname, pathname2url
1649n/aelse:
1650n/a def url2pathname(pathname):
1651n/a """OS-specific conversion from a relative URL of the 'file' scheme
1652n/a to a file system path; not recommended for general use."""
1653n/a return unquote(pathname)
1654n/a
1655n/a def pathname2url(pathname):
1656n/a """OS-specific conversion from a file system path to a relative URL
1657n/a of the 'file' scheme; not recommended for general use."""
1658n/a return quote(pathname)
1659n/a
1660n/a# This really consists of two pieces:
1661n/a# (1) a class which handles opening of all sorts of URLs
1662n/a# (plus assorted utilities etc.)
1663n/a# (2) a set of functions for parsing URLs
1664n/a# XXX Should these be separated out into different modules?
1665n/a
1666n/a
1667n/aftpcache = {}
1668n/aclass URLopener:
1669n/a """Class to open URLs.
1670n/a This is a class rather than just a subroutine because we may need
1671n/a more than one set of global protocol-specific options.
1672n/a Note -- this is a base class for those who don't want the
1673n/a automatic handling of errors type 302 (relocated) and 401
1674n/a (authorization needed)."""
1675n/a
1676n/a __tempfiles = None
1677n/a
1678n/a version = "Python-urllib/%s" % __version__
1679n/a
1680n/a # Constructor
1681n/a def __init__(self, proxies=None, **x509):
1682n/a msg = "%(class)s style of invoking requests is deprecated. " \
1683n/a "Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1684n/a warnings.warn(msg, DeprecationWarning, stacklevel=3)
1685n/a if proxies is None:
1686n/a proxies = getproxies()
1687n/a assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1688n/a self.proxies = proxies
1689n/a self.key_file = x509.get('key_file')
1690n/a self.cert_file = x509.get('cert_file')
1691n/a self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')]
1692n/a self.__tempfiles = []
1693n/a self.__unlink = os.unlink # See cleanup()
1694n/a self.tempcache = None
1695n/a # Undocumented feature: if you assign {} to tempcache,
1696n/a # it is used to cache files retrieved with
1697n/a # self.retrieve(). This is not enabled by default
1698n/a # since it does not work for changing documents (and I
1699n/a # haven't got the logic to check expiration headers
1700n/a # yet).
1701n/a self.ftpcache = ftpcache
1702n/a # Undocumented feature: you can use a different
1703n/a # ftp cache by assigning to the .ftpcache member;
1704n/a # in case you want logically independent URL openers
1705n/a # XXX This is not threadsafe. Bah.
1706n/a
1707n/a def __del__(self):
1708n/a self.close()
1709n/a
1710n/a def close(self):
1711n/a self.cleanup()
1712n/a
1713n/a def cleanup(self):
1714n/a # This code sometimes runs when the rest of this module
1715n/a # has already been deleted, so it can't use any globals
1716n/a # or import anything.
1717n/a if self.__tempfiles:
1718n/a for file in self.__tempfiles:
1719n/a try:
1720n/a self.__unlink(file)
1721n/a except OSError:
1722n/a pass
1723n/a del self.__tempfiles[:]
1724n/a if self.tempcache:
1725n/a self.tempcache.clear()
1726n/a
1727n/a def addheader(self, *args):
1728n/a """Add a header to be used by the HTTP interface only
1729n/a e.g. u.addheader('Accept', 'sound/basic')"""
1730n/a self.addheaders.append(args)
1731n/a
1732n/a # External interface
1733n/a def open(self, fullurl, data=None):
1734n/a """Use URLopener().open(file) instead of open(file, 'r')."""
1735n/a fullurl = unwrap(to_bytes(fullurl))
1736n/a fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
1737n/a if self.tempcache and fullurl in self.tempcache:
1738n/a filename, headers = self.tempcache[fullurl]
1739n/a fp = open(filename, 'rb')
1740n/a return addinfourl(fp, headers, fullurl)
1741n/a urltype, url = splittype(fullurl)
1742n/a if not urltype:
1743n/a urltype = 'file'
1744n/a if urltype in self.proxies:
1745n/a proxy = self.proxies[urltype]
1746n/a urltype, proxyhost = splittype(proxy)
1747n/a host, selector = splithost(proxyhost)
1748n/a url = (host, fullurl) # Signal special case to open_*()
1749n/a else:
1750n/a proxy = None
1751n/a name = 'open_' + urltype
1752n/a self.type = urltype
1753n/a name = name.replace('-', '_')
1754n/a if not hasattr(self, name):
1755n/a if proxy:
1756n/a return self.open_unknown_proxy(proxy, fullurl, data)
1757n/a else:
1758n/a return self.open_unknown(fullurl, data)
1759n/a try:
1760n/a if data is None:
1761n/a return getattr(self, name)(url)
1762n/a else:
1763n/a return getattr(self, name)(url, data)
1764n/a except (HTTPError, URLError):
1765n/a raise
1766n/a except OSError as msg:
1767n/a raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
1768n/a
1769n/a def open_unknown(self, fullurl, data=None):
1770n/a """Overridable interface to open unknown URL type."""
1771n/a type, url = splittype(fullurl)
1772n/a raise OSError('url error', 'unknown url type', type)
1773n/a
1774n/a def open_unknown_proxy(self, proxy, fullurl, data=None):
1775n/a """Overridable interface to open unknown URL type."""
1776n/a type, url = splittype(fullurl)
1777n/a raise OSError('url error', 'invalid proxy for %s' % type, proxy)
1778n/a
1779n/a # External interface
1780n/a def retrieve(self, url, filename=None, reporthook=None, data=None):
1781n/a """retrieve(url) returns (filename, headers) for a local object
1782n/a or (tempfilename, headers) for a remote object."""
1783n/a url = unwrap(to_bytes(url))
1784n/a if self.tempcache and url in self.tempcache:
1785n/a return self.tempcache[url]
1786n/a type, url1 = splittype(url)
1787n/a if filename is None and (not type or type == 'file'):
1788n/a try:
1789n/a fp = self.open_local_file(url1)
1790n/a hdrs = fp.info()
1791n/a fp.close()
1792n/a return url2pathname(splithost(url1)[1]), hdrs
1793n/a except OSError as msg:
1794n/a pass
1795n/a fp = self.open(url, data)
1796n/a try:
1797n/a headers = fp.info()
1798n/a if filename:
1799n/a tfp = open(filename, 'wb')
1800n/a else:
1801n/a import tempfile
1802n/a garbage, path = splittype(url)
1803n/a garbage, path = splithost(path or "")
1804n/a path, garbage = splitquery(path or "")
1805n/a path, garbage = splitattr(path or "")
1806n/a suffix = os.path.splitext(path)[1]
1807n/a (fd, filename) = tempfile.mkstemp(suffix)
1808n/a self.__tempfiles.append(filename)
1809n/a tfp = os.fdopen(fd, 'wb')
1810n/a try:
1811n/a result = filename, headers
1812n/a if self.tempcache is not None:
1813n/a self.tempcache[url] = result
1814n/a bs = 1024*8
1815n/a size = -1
1816n/a read = 0
1817n/a blocknum = 0
1818n/a if "content-length" in headers:
1819n/a size = int(headers["Content-Length"])
1820n/a if reporthook:
1821n/a reporthook(blocknum, bs, size)
1822n/a while 1:
1823n/a block = fp.read(bs)
1824n/a if not block:
1825n/a break
1826n/a read += len(block)
1827n/a tfp.write(block)
1828n/a blocknum += 1
1829n/a if reporthook:
1830n/a reporthook(blocknum, bs, size)
1831n/a finally:
1832n/a tfp.close()
1833n/a finally:
1834n/a fp.close()
1835n/a
1836n/a # raise exception if actual size does not match content-length header
1837n/a if size >= 0 and read < size:
1838n/a raise ContentTooShortError(
1839n/a "retrieval incomplete: got only %i out of %i bytes"
1840n/a % (read, size), result)
1841n/a
1842n/a return result
1843n/a
1844n/a # Each method named open_<type> knows how to open that type of URL
1845n/a
1846n/a def _open_generic_http(self, connection_factory, url, data):
1847n/a """Make an HTTP connection using connection_class.
1848n/a
1849n/a This is an internal method that should be called from
1850n/a open_http() or open_https().
1851n/a
1852n/a Arguments:
1853n/a - connection_factory should take a host name and return an
1854n/a HTTPConnection instance.
1855n/a - url is the url to retrieval or a host, relative-path pair.
1856n/a - data is payload for a POST request or None.
1857n/a """
1858n/a
1859n/a user_passwd = None
1860n/a proxy_passwd= None
1861n/a if isinstance(url, str):
1862n/a host, selector = splithost(url)
1863n/a if host:
1864n/a user_passwd, host = splituser(host)
1865n/a host = unquote(host)
1866n/a realhost = host
1867n/a else:
1868n/a host, selector = url
1869n/a # check whether the proxy contains authorization information
1870n/a proxy_passwd, host = splituser(host)
1871n/a # now we proceed with the url we want to obtain
1872n/a urltype, rest = splittype(selector)
1873n/a url = rest
1874n/a user_passwd = None
1875n/a if urltype.lower() != 'http':
1876n/a realhost = None
1877n/a else:
1878n/a realhost, rest = splithost(rest)
1879n/a if realhost:
1880n/a user_passwd, realhost = splituser(realhost)
1881n/a if user_passwd:
1882n/a selector = "%s://%s%s" % (urltype, realhost, rest)
1883n/a if proxy_bypass(realhost):
1884n/a host = realhost
1885n/a
1886n/a if not host: raise OSError('http error', 'no host given')
1887n/a
1888n/a if proxy_passwd:
1889n/a proxy_passwd = unquote(proxy_passwd)
1890n/a proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
1891n/a else:
1892n/a proxy_auth = None
1893n/a
1894n/a if user_passwd:
1895n/a user_passwd = unquote(user_passwd)
1896n/a auth = base64.b64encode(user_passwd.encode()).decode('ascii')
1897n/a else:
1898n/a auth = None
1899n/a http_conn = connection_factory(host)
1900n/a headers = {}
1901n/a if proxy_auth:
1902n/a headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1903n/a if auth:
1904n/a headers["Authorization"] = "Basic %s" % auth
1905n/a if realhost:
1906n/a headers["Host"] = realhost
1907n/a
1908n/a # Add Connection:close as we don't support persistent connections yet.
1909n/a # This helps in closing the socket and avoiding ResourceWarning
1910n/a
1911n/a headers["Connection"] = "close"
1912n/a
1913n/a for header, value in self.addheaders:
1914n/a headers[header] = value
1915n/a
1916n/a if data is not None:
1917n/a headers["Content-Type"] = "application/x-www-form-urlencoded"
1918n/a http_conn.request("POST", selector, data, headers)
1919n/a else:
1920n/a http_conn.request("GET", selector, headers=headers)
1921n/a
1922n/a try:
1923n/a response = http_conn.getresponse()
1924n/a except http.client.BadStatusLine:
1925n/a # something went wrong with the HTTP status line
1926n/a raise URLError("http protocol error: bad status line")
1927n/a
1928n/a # According to RFC 2616, "2xx" code indicates that the client's
1929n/a # request was successfully received, understood, and accepted.
1930n/a if 200 <= response.status < 300:
1931n/a return addinfourl(response, response.msg, "http:" + url,
1932n/a response.status)
1933n/a else:
1934n/a return self.http_error(
1935n/a url, response.fp,
1936n/a response.status, response.reason, response.msg, data)
1937n/a
1938n/a def open_http(self, url, data=None):
1939n/a """Use HTTP protocol."""
1940n/a return self._open_generic_http(http.client.HTTPConnection, url, data)
1941n/a
1942n/a def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1943n/a """Handle http errors.
1944n/a
1945n/a Derived class can override this, or provide specific handlers
1946n/a named http_error_DDD where DDD is the 3-digit error code."""
1947n/a # First check if there's a specific handler for this error
1948n/a name = 'http_error_%d' % errcode
1949n/a if hasattr(self, name):
1950n/a method = getattr(self, name)
1951n/a if data is None:
1952n/a result = method(url, fp, errcode, errmsg, headers)
1953n/a else:
1954n/a result = method(url, fp, errcode, errmsg, headers, data)
1955n/a if result: return result
1956n/a return self.http_error_default(url, fp, errcode, errmsg, headers)
1957n/a
1958n/a def http_error_default(self, url, fp, errcode, errmsg, headers):
1959n/a """Default error handler: close the connection and raise OSError."""
1960n/a fp.close()
1961n/a raise HTTPError(url, errcode, errmsg, headers, None)
1962n/a
1963n/a if _have_ssl:
1964n/a def _https_connection(self, host):
1965n/a return http.client.HTTPSConnection(host,
1966n/a key_file=self.key_file,
1967n/a cert_file=self.cert_file)
1968n/a
1969n/a def open_https(self, url, data=None):
1970n/a """Use HTTPS protocol."""
1971n/a return self._open_generic_http(self._https_connection, url, data)
1972n/a
1973n/a def open_file(self, url):
1974n/a """Use local file or FTP depending on form of URL."""
1975n/a if not isinstance(url, str):
1976n/a raise URLError('file error: proxy support for file protocol currently not implemented')
1977n/a if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
1978n/a raise ValueError("file:// scheme is supported only on localhost")
1979n/a else:
1980n/a return self.open_local_file(url)
1981n/a
1982n/a def open_local_file(self, url):
1983n/a """Use local file."""
1984n/a import email.utils
1985n/a import mimetypes
1986n/a host, file = splithost(url)
1987n/a localname = url2pathname(file)
1988n/a try:
1989n/a stats = os.stat(localname)
1990n/a except OSError as e:
1991n/a raise URLError(e.strerror, e.filename)
1992n/a size = stats.st_size
1993n/a modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1994n/a mtype = mimetypes.guess_type(url)[0]
1995n/a headers = email.message_from_string(
1996n/a 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1997n/a (mtype or 'text/plain', size, modified))
1998n/a if not host:
1999n/a urlfile = file
2000n/a if file[:1] == '/':
2001n/a urlfile = 'file://' + file
2002n/a return addinfourl(open(localname, 'rb'), headers, urlfile)
2003n/a host, port = splitport(host)
2004n/a if (not port
2005n/a and socket.gethostbyname(host) in ((localhost(),) + thishost())):
2006n/a urlfile = file
2007n/a if file[:1] == '/':
2008n/a urlfile = 'file://' + file
2009n/a elif file[:2] == './':
2010n/a raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
2011n/a return addinfourl(open(localname, 'rb'), headers, urlfile)
2012n/a raise URLError('local file error: not on local host')
2013n/a
2014n/a def open_ftp(self, url):
2015n/a """Use FTP protocol."""
2016n/a if not isinstance(url, str):
2017n/a raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
2018n/a import mimetypes
2019n/a host, path = splithost(url)
2020n/a if not host: raise URLError('ftp error: no host given')
2021n/a host, port = splitport(host)
2022n/a user, host = splituser(host)
2023n/a if user: user, passwd = splitpasswd(user)
2024n/a else: passwd = None
2025n/a host = unquote(host)
2026n/a user = unquote(user or '')
2027n/a passwd = unquote(passwd or '')
2028n/a host = socket.gethostbyname(host)
2029n/a if not port:
2030n/a import ftplib
2031n/a port = ftplib.FTP_PORT
2032n/a else:
2033n/a port = int(port)
2034n/a path, attrs = splitattr(path)
2035n/a path = unquote(path)
2036n/a dirs = path.split('/')
2037n/a dirs, file = dirs[:-1], dirs[-1]
2038n/a if dirs and not dirs[0]: dirs = dirs[1:]
2039n/a if dirs and not dirs[0]: dirs[0] = '/'
2040n/a key = user, host, port, '/'.join(dirs)
2041n/a # XXX thread unsafe!
2042n/a if len(self.ftpcache) > MAXFTPCACHE:
2043n/a # Prune the cache, rather arbitrarily
2044n/a for k in list(self.ftpcache):
2045n/a if k != key:
2046n/a v = self.ftpcache[k]
2047n/a del self.ftpcache[k]
2048n/a v.close()
2049n/a try:
2050n/a if key not in self.ftpcache:
2051n/a self.ftpcache[key] = \
2052n/a ftpwrapper(user, passwd, host, port, dirs)
2053n/a if not file: type = 'D'
2054n/a else: type = 'I'
2055n/a for attr in attrs:
2056n/a attr, value = splitvalue(attr)
2057n/a if attr.lower() == 'type' and \
2058n/a value in ('a', 'A', 'i', 'I', 'd', 'D'):
2059n/a type = value.upper()
2060n/a (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2061n/a mtype = mimetypes.guess_type("ftp:" + url)[0]
2062n/a headers = ""
2063n/a if mtype:
2064n/a headers += "Content-Type: %s\n" % mtype
2065n/a if retrlen is not None and retrlen >= 0:
2066n/a headers += "Content-Length: %d\n" % retrlen
2067n/a headers = email.message_from_string(headers)
2068n/a return addinfourl(fp, headers, "ftp:" + url)
2069n/a except ftperrors() as exp:
2070n/a raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
2071n/a
2072n/a def open_data(self, url, data=None):
2073n/a """Use "data" URL."""
2074n/a if not isinstance(url, str):
2075n/a raise URLError('data error: proxy support for data protocol currently not implemented')
2076n/a # ignore POSTed data
2077n/a #
2078n/a # syntax of data URLs:
2079n/a # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2080n/a # mediatype := [ type "/" subtype ] *( ";" parameter )
2081n/a # data := *urlchar
2082n/a # parameter := attribute "=" value
2083n/a try:
2084n/a [type, data] = url.split(',', 1)
2085n/a except ValueError:
2086n/a raise OSError('data error', 'bad data URL')
2087n/a if not type:
2088n/a type = 'text/plain;charset=US-ASCII'
2089n/a semi = type.rfind(';')
2090n/a if semi >= 0 and '=' not in type[semi:]:
2091n/a encoding = type[semi+1:]
2092n/a type = type[:semi]
2093n/a else:
2094n/a encoding = ''
2095n/a msg = []
2096n/a msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
2097n/a time.gmtime(time.time())))
2098n/a msg.append('Content-type: %s' % type)
2099n/a if encoding == 'base64':
2100n/a # XXX is this encoding/decoding ok?
2101n/a data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
2102n/a else:
2103n/a data = unquote(data)
2104n/a msg.append('Content-Length: %d' % len(data))
2105n/a msg.append('')
2106n/a msg.append(data)
2107n/a msg = '\n'.join(msg)
2108n/a headers = email.message_from_string(msg)
2109n/a f = io.StringIO(msg)
2110n/a #f.fileno = None # needed for addinfourl
2111n/a return addinfourl(f, headers, url)
2112n/a
2113n/a
2114n/aclass FancyURLopener(URLopener):
2115n/a """Derived class with handlers for errors we can handle (perhaps)."""
2116n/a
2117n/a def __init__(self, *args, **kwargs):
2118n/a URLopener.__init__(self, *args, **kwargs)
2119n/a self.auth_cache = {}
2120n/a self.tries = 0
2121n/a self.maxtries = 10
2122n/a
2123n/a def http_error_default(self, url, fp, errcode, errmsg, headers):
2124n/a """Default error handling -- don't raise an exception."""
2125n/a return addinfourl(fp, headers, "http:" + url, errcode)
2126n/a
2127n/a def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2128n/a """Error 302 -- relocated (temporarily)."""
2129n/a self.tries += 1
2130n/a try:
2131n/a if self.maxtries and self.tries >= self.maxtries:
2132n/a if hasattr(self, "http_error_500"):
2133n/a meth = self.http_error_500
2134n/a else:
2135n/a meth = self.http_error_default
2136n/a return meth(url, fp, 500,
2137n/a "Internal Server Error: Redirect Recursion",
2138n/a headers)
2139n/a result = self.redirect_internal(url, fp, errcode, errmsg,
2140n/a headers, data)
2141n/a return result
2142n/a finally:
2143n/a self.tries = 0
2144n/a
2145n/a def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2146n/a if 'location' in headers:
2147n/a newurl = headers['location']
2148n/a elif 'uri' in headers:
2149n/a newurl = headers['uri']
2150n/a else:
2151n/a return
2152n/a fp.close()
2153n/a
2154n/a # In case the server sent a relative URL, join with original:
2155n/a newurl = urljoin(self.type + ":" + url, newurl)
2156n/a
2157n/a urlparts = urlparse(newurl)
2158n/a
2159n/a # For security reasons, we don't allow redirection to anything other
2160n/a # than http, https and ftp.
2161n/a
2162n/a # We are using newer HTTPError with older redirect_internal method
2163n/a # This older method will get deprecated in 3.3
2164n/a
2165n/a if urlparts.scheme not in ('http', 'https', 'ftp', ''):
2166n/a raise HTTPError(newurl, errcode,
2167n/a errmsg +
2168n/a " Redirection to url '%s' is not allowed." % newurl,
2169n/a headers, fp)
2170n/a
2171n/a return self.open(newurl)
2172n/a
2173n/a def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2174n/a """Error 301 -- also relocated (permanently)."""
2175n/a return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2176n/a
2177n/a def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2178n/a """Error 303 -- also relocated (essentially identical to 302)."""
2179n/a return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2180n/a
2181n/a def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2182n/a """Error 307 -- relocated, but turn POST into error."""
2183n/a if data is None:
2184n/a return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2185n/a else:
2186n/a return self.http_error_default(url, fp, errcode, errmsg, headers)
2187n/a
2188n/a def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2189n/a retry=False):
2190n/a """Error 401 -- authentication required.
2191n/a This function supports Basic authentication only."""
2192n/a if 'www-authenticate' not in headers:
2193n/a URLopener.http_error_default(self, url, fp,
2194n/a errcode, errmsg, headers)
2195n/a stuff = headers['www-authenticate']
2196n/a match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2197n/a if not match:
2198n/a URLopener.http_error_default(self, url, fp,
2199n/a errcode, errmsg, headers)
2200n/a scheme, realm = match.groups()
2201n/a if scheme.lower() != 'basic':
2202n/a URLopener.http_error_default(self, url, fp,
2203n/a errcode, errmsg, headers)
2204n/a if not retry:
2205n/a URLopener.http_error_default(self, url, fp, errcode, errmsg,
2206n/a headers)
2207n/a name = 'retry_' + self.type + '_basic_auth'
2208n/a if data is None:
2209n/a return getattr(self,name)(url, realm)
2210n/a else:
2211n/a return getattr(self,name)(url, realm, data)
2212n/a
2213n/a def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2214n/a retry=False):
2215n/a """Error 407 -- proxy authentication required.
2216n/a This function supports Basic authentication only."""
2217n/a if 'proxy-authenticate' not in headers:
2218n/a URLopener.http_error_default(self, url, fp,
2219n/a errcode, errmsg, headers)
2220n/a stuff = headers['proxy-authenticate']
2221n/a match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
2222n/a if not match:
2223n/a URLopener.http_error_default(self, url, fp,
2224n/a errcode, errmsg, headers)
2225n/a scheme, realm = match.groups()
2226n/a if scheme.lower() != 'basic':
2227n/a URLopener.http_error_default(self, url, fp,
2228n/a errcode, errmsg, headers)
2229n/a if not retry:
2230n/a URLopener.http_error_default(self, url, fp, errcode, errmsg,
2231n/a headers)
2232n/a name = 'retry_proxy_' + self.type + '_basic_auth'
2233n/a if data is None:
2234n/a return getattr(self,name)(url, realm)
2235n/a else:
2236n/a return getattr(self,name)(url, realm, data)
2237n/a
2238n/a def retry_proxy_http_basic_auth(self, url, realm, data=None):
2239n/a host, selector = splithost(url)
2240n/a newurl = 'http://' + host + selector
2241n/a proxy = self.proxies['http']
2242n/a urltype, proxyhost = splittype(proxy)
2243n/a proxyhost, proxyselector = splithost(proxyhost)
2244n/a i = proxyhost.find('@') + 1
2245n/a proxyhost = proxyhost[i:]
2246n/a user, passwd = self.get_user_passwd(proxyhost, realm, i)
2247n/a if not (user or passwd): return None
2248n/a proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2249n/a quote(passwd, safe=''), proxyhost)
2250n/a self.proxies['http'] = 'http://' + proxyhost + proxyselector
2251n/a if data is None:
2252n/a return self.open(newurl)
2253n/a else:
2254n/a return self.open(newurl, data)
2255n/a
2256n/a def retry_proxy_https_basic_auth(self, url, realm, data=None):
2257n/a host, selector = splithost(url)
2258n/a newurl = 'https://' + host + selector
2259n/a proxy = self.proxies['https']
2260n/a urltype, proxyhost = splittype(proxy)
2261n/a proxyhost, proxyselector = splithost(proxyhost)
2262n/a i = proxyhost.find('@') + 1
2263n/a proxyhost = proxyhost[i:]
2264n/a user, passwd = self.get_user_passwd(proxyhost, realm, i)
2265n/a if not (user or passwd): return None
2266n/a proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2267n/a quote(passwd, safe=''), proxyhost)
2268n/a self.proxies['https'] = 'https://' + proxyhost + proxyselector
2269n/a if data is None:
2270n/a return self.open(newurl)
2271n/a else:
2272n/a return self.open(newurl, data)
2273n/a
2274n/a def retry_http_basic_auth(self, url, realm, data=None):
2275n/a host, selector = splithost(url)
2276n/a i = host.find('@') + 1
2277n/a host = host[i:]
2278n/a user, passwd = self.get_user_passwd(host, realm, i)
2279n/a if not (user or passwd): return None
2280n/a host = "%s:%s@%s" % (quote(user, safe=''),
2281n/a quote(passwd, safe=''), host)
2282n/a newurl = 'http://' + host + selector
2283n/a if data is None:
2284n/a return self.open(newurl)
2285n/a else:
2286n/a return self.open(newurl, data)
2287n/a
2288n/a def retry_https_basic_auth(self, url, realm, data=None):
2289n/a host, selector = splithost(url)
2290n/a i = host.find('@') + 1
2291n/a host = host[i:]
2292n/a user, passwd = self.get_user_passwd(host, realm, i)
2293n/a if not (user or passwd): return None
2294n/a host = "%s:%s@%s" % (quote(user, safe=''),
2295n/a quote(passwd, safe=''), host)
2296n/a newurl = 'https://' + host + selector
2297n/a if data is None:
2298n/a return self.open(newurl)
2299n/a else:
2300n/a return self.open(newurl, data)
2301n/a
2302n/a def get_user_passwd(self, host, realm, clear_cache=0):
2303n/a key = realm + '@' + host.lower()
2304n/a if key in self.auth_cache:
2305n/a if clear_cache:
2306n/a del self.auth_cache[key]
2307n/a else:
2308n/a return self.auth_cache[key]
2309n/a user, passwd = self.prompt_user_passwd(host, realm)
2310n/a if user or passwd: self.auth_cache[key] = (user, passwd)
2311n/a return user, passwd
2312n/a
2313n/a def prompt_user_passwd(self, host, realm):
2314n/a """Override this in a GUI environment!"""
2315n/a import getpass
2316n/a try:
2317n/a user = input("Enter username for %s at %s: " % (realm, host))
2318n/a passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2319n/a (user, realm, host))
2320n/a return user, passwd
2321n/a except KeyboardInterrupt:
2322n/a print()
2323n/a return None, None
2324n/a
2325n/a
2326n/a# Utility functions
2327n/a
2328n/a_localhost = None
2329n/adef localhost():
2330n/a """Return the IP address of the magic hostname 'localhost'."""
2331n/a global _localhost
2332n/a if _localhost is None:
2333n/a _localhost = socket.gethostbyname('localhost')
2334n/a return _localhost
2335n/a
2336n/a_thishost = None
2337n/adef thishost():
2338n/a """Return the IP addresses of the current host."""
2339n/a global _thishost
2340n/a if _thishost is None:
2341n/a try:
2342n/a _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2343n/a except socket.gaierror:
2344n/a _thishost = tuple(socket.gethostbyname_ex('localhost')[2])
2345n/a return _thishost
2346n/a
2347n/a_ftperrors = None
2348n/adef ftperrors():
2349n/a """Return the set of errors raised by the FTP class."""
2350n/a global _ftperrors
2351n/a if _ftperrors is None:
2352n/a import ftplib
2353n/a _ftperrors = ftplib.all_errors
2354n/a return _ftperrors
2355n/a
2356n/a_noheaders = None
2357n/adef noheaders():
2358n/a """Return an empty email Message object."""
2359n/a global _noheaders
2360n/a if _noheaders is None:
2361n/a _noheaders = email.message_from_string("")
2362n/a return _noheaders
2363n/a
2364n/a
2365n/a# Utility classes
2366n/a
2367n/aclass ftpwrapper:
2368n/a """Class used by open_ftp() for cache of open FTP connections."""
2369n/a
2370n/a def __init__(self, user, passwd, host, port, dirs, timeout=None,
2371n/a persistent=True):
2372n/a self.user = user
2373n/a self.passwd = passwd
2374n/a self.host = host
2375n/a self.port = port
2376n/a self.dirs = dirs
2377n/a self.timeout = timeout
2378n/a self.refcount = 0
2379n/a self.keepalive = persistent
2380n/a try:
2381n/a self.init()
2382n/a except:
2383n/a self.close()
2384n/a raise
2385n/a
2386n/a def init(self):
2387n/a import ftplib
2388n/a self.busy = 0
2389n/a self.ftp = ftplib.FTP()
2390n/a self.ftp.connect(self.host, self.port, self.timeout)
2391n/a self.ftp.login(self.user, self.passwd)
2392n/a _target = '/'.join(self.dirs)
2393n/a self.ftp.cwd(_target)
2394n/a
2395n/a def retrfile(self, file, type):
2396n/a import ftplib
2397n/a self.endtransfer()
2398n/a if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2399n/a else: cmd = 'TYPE ' + type; isdir = 0
2400n/a try:
2401n/a self.ftp.voidcmd(cmd)
2402n/a except ftplib.all_errors:
2403n/a self.init()
2404n/a self.ftp.voidcmd(cmd)
2405n/a conn = None
2406n/a if file and not isdir:
2407n/a # Try to retrieve as a file
2408n/a try:
2409n/a cmd = 'RETR ' + file
2410n/a conn, retrlen = self.ftp.ntransfercmd(cmd)
2411n/a except ftplib.error_perm as reason:
2412n/a if str(reason)[:3] != '550':
2413n/a raise URLError('ftp error: %r' % reason).with_traceback(
2414n/a sys.exc_info()[2])
2415n/a if not conn:
2416n/a # Set transfer mode to ASCII!
2417n/a self.ftp.voidcmd('TYPE A')
2418n/a # Try a directory listing. Verify that directory exists.
2419n/a if file:
2420n/a pwd = self.ftp.pwd()
2421n/a try:
2422n/a try:
2423n/a self.ftp.cwd(file)
2424n/a except ftplib.error_perm as reason:
2425n/a raise URLError('ftp error: %r' % reason) from reason
2426n/a finally:
2427n/a self.ftp.cwd(pwd)
2428n/a cmd = 'LIST ' + file
2429n/a else:
2430n/a cmd = 'LIST'
2431n/a conn, retrlen = self.ftp.ntransfercmd(cmd)
2432n/a self.busy = 1
2433n/a
2434n/a ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2435n/a self.refcount += 1
2436n/a conn.close()
2437n/a # Pass back both a suitably decorated object and a retrieval length
2438n/a return (ftpobj, retrlen)
2439n/a
2440n/a def endtransfer(self):
2441n/a self.busy = 0
2442n/a
2443n/a def close(self):
2444n/a self.keepalive = False
2445n/a if self.refcount <= 0:
2446n/a self.real_close()
2447n/a
2448n/a def file_close(self):
2449n/a self.endtransfer()
2450n/a self.refcount -= 1
2451n/a if self.refcount <= 0 and not self.keepalive:
2452n/a self.real_close()
2453n/a
2454n/a def real_close(self):
2455n/a self.endtransfer()
2456n/a try:
2457n/a self.ftp.close()
2458n/a except ftperrors():
2459n/a pass
2460n/a
2461n/a# Proxy handling
2462n/adef getproxies_environment():
2463n/a """Return a dictionary of scheme -> proxy server URL mappings.
2464n/a
2465n/a Scan the environment for variables named <scheme>_proxy;
2466n/a this seems to be the standard convention. If you need a
2467n/a different way, you can pass a proxies dictionary to the
2468n/a [Fancy]URLopener constructor.
2469n/a
2470n/a """
2471n/a proxies = {}
2472n/a # in order to prefer lowercase variables, process environment in
2473n/a # two passes: first matches any, second pass matches lowercase only
2474n/a for name, value in os.environ.items():
2475n/a name = name.lower()
2476n/a if value and name[-6:] == '_proxy':
2477n/a proxies[name[:-6]] = value
2478n/a # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2479n/a # (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2480n/a # header from the client
2481n/a # If "proxy" is lowercase, it will still be used thanks to the next block
2482n/a if 'REQUEST_METHOD' in os.environ:
2483n/a proxies.pop('http', None)
2484n/a for name, value in os.environ.items():
2485n/a if name[-6:] == '_proxy':
2486n/a name = name.lower()
2487n/a if value:
2488n/a proxies[name[:-6]] = value
2489n/a else:
2490n/a proxies.pop(name[:-6], None)
2491n/a return proxies
2492n/a
2493n/adef proxy_bypass_environment(host, proxies=None):
2494n/a """Test if proxies should not be used for a particular host.
2495n/a
2496n/a Checks the proxy dict for the value of no_proxy, which should
2497n/a be a list of comma separated DNS suffixes, or '*' for all hosts.
2498n/a
2499n/a """
2500n/a if proxies is None:
2501n/a proxies = getproxies_environment()
2502n/a # don't bypass, if no_proxy isn't specified
2503n/a try:
2504n/a no_proxy = proxies['no']
2505n/a except KeyError:
2506n/a return 0
2507n/a # '*' is special case for always bypass
2508n/a if no_proxy == '*':
2509n/a return 1
2510n/a # strip port off host
2511n/a hostonly, port = splitport(host)
2512n/a # check if the host ends with any of the DNS suffixes
2513n/a no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2514n/a for name in no_proxy_list:
2515n/a if name:
2516n/a name = name.lstrip('.') # ignore leading dots
2517n/a name = re.escape(name)
2518n/a pattern = r'(.+\.)?%s$' % name
2519n/a if (re.match(pattern, hostonly, re.I)
2520n/a or re.match(pattern, host, re.I)):
2521n/a return 1
2522n/a # otherwise, don't bypass
2523n/a return 0
2524n/a
2525n/a
2526n/a# This code tests an OSX specific data structure but is testable on all
2527n/a# platforms
2528n/adef _proxy_bypass_macosx_sysconf(host, proxy_settings):
2529n/a """
2530n/a Return True iff this host shouldn't be accessed using a proxy
2531n/a
2532n/a This function uses the MacOSX framework SystemConfiguration
2533n/a to fetch the proxy information.
2534n/a
2535n/a proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2536n/a { 'exclude_simple': bool,
2537n/a 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2538n/a }
2539n/a """
2540n/a from fnmatch import fnmatch
2541n/a
2542n/a hostonly, port = splitport(host)
2543n/a
2544n/a def ip2num(ipAddr):
2545n/a parts = ipAddr.split('.')
2546n/a parts = list(map(int, parts))
2547n/a if len(parts) != 4:
2548n/a parts = (parts + [0, 0, 0, 0])[:4]
2549n/a return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
2550n/a
2551n/a # Check for simple host names:
2552n/a if '.' not in host:
2553n/a if proxy_settings['exclude_simple']:
2554n/a return True
2555n/a
2556n/a hostIP = None
2557n/a
2558n/a for value in proxy_settings.get('exceptions', ()):
2559n/a # Items in the list are strings like these: *.local, 169.254/16
2560n/a if not value: continue
2561n/a
2562n/a m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2563n/a if m is not None:
2564n/a if hostIP is None:
2565n/a try:
2566n/a hostIP = socket.gethostbyname(hostonly)
2567n/a hostIP = ip2num(hostIP)
2568n/a except OSError:
2569n/a continue
2570n/a
2571n/a base = ip2num(m.group(1))
2572n/a mask = m.group(2)
2573n/a if mask is None:
2574n/a mask = 8 * (m.group(1).count('.') + 1)
2575n/a else:
2576n/a mask = int(mask[1:])
2577n/a mask = 32 - mask
2578n/a
2579n/a if (hostIP >> mask) == (base >> mask):
2580n/a return True
2581n/a
2582n/a elif fnmatch(host, value):
2583n/a return True
2584n/a
2585n/a return False
2586n/a
2587n/a
2588n/aif sys.platform == 'darwin':
2589n/a from _scproxy import _get_proxy_settings, _get_proxies
2590n/a
2591n/a def proxy_bypass_macosx_sysconf(host):
2592n/a proxy_settings = _get_proxy_settings()
2593n/a return _proxy_bypass_macosx_sysconf(host, proxy_settings)
2594n/a
2595n/a def getproxies_macosx_sysconf():
2596n/a """Return a dictionary of scheme -> proxy server URL mappings.
2597n/a
2598n/a This function uses the MacOSX framework SystemConfiguration
2599n/a to fetch the proxy information.
2600n/a """
2601n/a return _get_proxies()
2602n/a
2603n/a
2604n/a
2605n/a def proxy_bypass(host):
2606n/a """Return True, if host should be bypassed.
2607n/a
2608n/a Checks proxy settings gathered from the environment, if specified,
2609n/a or from the MacOSX framework SystemConfiguration.
2610n/a
2611n/a """
2612n/a proxies = getproxies_environment()
2613n/a if proxies:
2614n/a return proxy_bypass_environment(host, proxies)
2615n/a else:
2616n/a return proxy_bypass_macosx_sysconf(host)
2617n/a
2618n/a def getproxies():
2619n/a return getproxies_environment() or getproxies_macosx_sysconf()
2620n/a
2621n/a
2622n/aelif os.name == 'nt':
2623n/a def getproxies_registry():
2624n/a """Return a dictionary of scheme -> proxy server URL mappings.
2625n/a
2626n/a Win32 uses the registry to store proxies.
2627n/a
2628n/a """
2629n/a proxies = {}
2630n/a try:
2631n/a import winreg
2632n/a except ImportError:
2633n/a # Std module, so should be around - but you never know!
2634n/a return proxies
2635n/a try:
2636n/a internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2637n/a r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2638n/a proxyEnable = winreg.QueryValueEx(internetSettings,
2639n/a 'ProxyEnable')[0]
2640n/a if proxyEnable:
2641n/a # Returned as Unicode but problems if not converted to ASCII
2642n/a proxyServer = str(winreg.QueryValueEx(internetSettings,
2643n/a 'ProxyServer')[0])
2644n/a if '=' in proxyServer:
2645n/a # Per-protocol settings
2646n/a for p in proxyServer.split(';'):
2647n/a protocol, address = p.split('=', 1)
2648n/a # See if address has a type:// prefix
2649n/a if not re.match('^([^/:]+)://', address):
2650n/a address = '%s://%s' % (protocol, address)
2651n/a proxies[protocol] = address
2652n/a else:
2653n/a # Use one setting for all protocols
2654n/a if proxyServer[:5] == 'http:':
2655n/a proxies['http'] = proxyServer
2656n/a else:
2657n/a proxies['http'] = 'http://%s' % proxyServer
2658n/a proxies['https'] = 'https://%s' % proxyServer
2659n/a proxies['ftp'] = 'ftp://%s' % proxyServer
2660n/a internetSettings.Close()
2661n/a except (OSError, ValueError, TypeError):
2662n/a # Either registry key not found etc, or the value in an
2663n/a # unexpected format.
2664n/a # proxies already set up to be empty so nothing to do
2665n/a pass
2666n/a return proxies
2667n/a
2668n/a def getproxies():
2669n/a """Return a dictionary of scheme -> proxy server URL mappings.
2670n/a
2671n/a Returns settings gathered from the environment, if specified,
2672n/a or the registry.
2673n/a
2674n/a """
2675n/a return getproxies_environment() or getproxies_registry()
2676n/a
2677n/a def proxy_bypass_registry(host):
2678n/a try:
2679n/a import winreg
2680n/a except ImportError:
2681n/a # Std modules, so should be around - but you never know!
2682n/a return 0
2683n/a try:
2684n/a internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2685n/a r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2686n/a proxyEnable = winreg.QueryValueEx(internetSettings,
2687n/a 'ProxyEnable')[0]
2688n/a proxyOverride = str(winreg.QueryValueEx(internetSettings,
2689n/a 'ProxyOverride')[0])
2690n/a # ^^^^ Returned as Unicode but problems if not converted to ASCII
2691n/a except OSError:
2692n/a return 0
2693n/a if not proxyEnable or not proxyOverride:
2694n/a return 0
2695n/a # try to make a host list from name and IP address.
2696n/a rawHost, port = splitport(host)
2697n/a host = [rawHost]
2698n/a try:
2699n/a addr = socket.gethostbyname(rawHost)
2700n/a if addr != rawHost:
2701n/a host.append(addr)
2702n/a except OSError:
2703n/a pass
2704n/a try:
2705n/a fqdn = socket.getfqdn(rawHost)
2706n/a if fqdn != rawHost:
2707n/a host.append(fqdn)
2708n/a except OSError:
2709n/a pass
2710n/a # make a check value list from the registry entry: replace the
2711n/a # '<local>' string by the localhost entry and the corresponding
2712n/a # canonical entry.
2713n/a proxyOverride = proxyOverride.split(';')
2714n/a # now check if we match one of the registry values.
2715n/a for test in proxyOverride:
2716n/a if test == '<local>':
2717n/a if '.' not in rawHost:
2718n/a return 1
2719n/a test = test.replace(".", r"\.") # mask dots
2720n/a test = test.replace("*", r".*") # change glob sequence
2721n/a test = test.replace("?", r".") # change glob char
2722n/a for val in host:
2723n/a if re.match(test, val, re.I):
2724n/a return 1
2725n/a return 0
2726n/a
2727n/a def proxy_bypass(host):
2728n/a """Return True, if host should be bypassed.
2729n/a
2730n/a Checks proxy settings gathered from the environment, if specified,
2731n/a or the registry.
2732n/a
2733n/a """
2734n/a proxies = getproxies_environment()
2735n/a if proxies:
2736n/a return proxy_bypass_environment(host, proxies)
2737n/a else:
2738n/a return proxy_bypass_registry(host)
2739n/a
2740n/aelse:
2741n/a # By default use environment variables
2742n/a getproxies = getproxies_environment
2743n/a proxy_bypass = proxy_bypass_environment