Python code coverage for Lib/urllib/request.py

#	count	content
1	n/a	"""An extensible library for opening URLs using a variety of protocols
2	n/a
3	n/a	The simplest way to use this module is to call the urlopen function,
4	n/a	which accepts a string containing a URL or a Request object (described
5	n/a	below). It opens the URL and returns the results as file-like
6	n/a	object; the returned object has some extra methods described below.
7	n/a
8	n/a	The OpenerDirector manages a collection of Handler objects that do
9	n/a	all the actual work. Each Handler implements a particular protocol or
10	n/a	option. The OpenerDirector is a composite object that invokes the
11	n/a	Handlers needed to open the requested URL. For example, the
12	n/a	HTTPHandler performs HTTP GET and POST requests and deals with
13	n/a	non-error returns. The HTTPRedirectHandler automatically deals with
14	n/a	HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler
15	n/a	deals with digest authentication.
16	n/a
17	n/a	urlopen(url, data=None) -- Basic usage is the same as original
18	n/a	urllib. pass the url and optionally data to post to an HTTP URL, and
19	n/a	get a file-like object back. One difference is that you can also pass
20	n/a	a Request instance instead of URL. Raises a URLError (subclass of
21	n/a	OSError); for HTTP errors, raises an HTTPError, which can also be
22	n/a	treated as a valid response.
23	n/a
24	n/a	build_opener -- Function that creates a new OpenerDirector instance.
25	n/a	Will install the default handlers. Accepts one or more Handlers as
26	n/a	arguments, either instances or Handler classes that it will
27	n/a	instantiate. If one of the argument is a subclass of the default
28	n/a	handler, the argument will be installed instead of the default.
29	n/a
30	n/a	install_opener -- Installs a new opener as the default opener.
31	n/a
32	n/a	objects of interest:
33	n/a
34	n/a	OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages
35	n/a	the Handler classes, while dealing with requests and responses.
36	n/a
37	n/a	Request -- An object that encapsulates the state of a request. The
38	n/a	state can be as simple as the URL. It can also include extra HTTP
39	n/a	headers, e.g. a User-Agent.
40	n/a
41	n/a	BaseHandler --
42	n/a
43	n/a	internals:
44	n/a	BaseHandler and parent
45	n/a	_call_chain conventions
46	n/a
47	n/a	Example usage:
48	n/a
49	n/a	import urllib.request
50	n/a
51	n/a	# set up authentication info
52	n/a	authinfo = urllib.request.HTTPBasicAuthHandler()
53	n/a	authinfo.add_password(realm='PDQ Application',
54	n/a	uri='https://mahler:8092/site-updates.py',
55	n/a	user='klem',
56	n/a	passwd='geheim$parole')
57	n/a
58	n/a	proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"})
59	n/a
60	n/a	# build a new opener that adds authentication and caching FTP handlers
61	n/a	opener = urllib.request.build_opener(proxy_support, authinfo,
62	n/a	urllib.request.CacheFTPHandler)
63	n/a
64	n/a	# install it
65	n/a	urllib.request.install_opener(opener)
66	n/a
67	n/a	f = urllib.request.urlopen('http://www.python.org/')
68	n/a	"""
69	n/a
70	n/a	# XXX issues:
71	n/a	# If an authentication error handler that tries to perform
72	n/a	# authentication for some reason but fails, how should the error be
73	n/a	# signalled? The client needs to know the HTTP error code. But if
74	n/a	# the handler knows that the problem was, e.g., that it didn't know
75	n/a	# that hash algo that requested in the challenge, it would be good to
76	n/a	# pass that information along to the client, too.
77	n/a	# ftp errors aren't handled cleanly
78	n/a	# check digest against correct (i.e. non-apache) implementation
79	n/a
80	n/a	# Possible extensions:
81	n/a	# complex proxies XXX not sure what exactly was meant by this
82	n/a	# abstract factory for opener
83	n/a
84	n/a	import base64
85	n/a	import bisect
86	n/a	import email
87	n/a	import hashlib
88	n/a	import http.client
89	n/a	import io
90	n/a	import os
91	n/a	import posixpath
92	n/a	import re
93	n/a	import socket
94	n/a	import string
95	n/a	import sys
96	n/a	import time
97	n/a	import tempfile
98	n/a	import contextlib
99	n/a	import warnings
100	n/a
101	n/a
102	n/a	from urllib.error import URLError, HTTPError, ContentTooShortError
103	n/a	from urllib.parse import (
104	n/a	urlparse, urlsplit, urljoin, unwrap, quote, unquote,
105	n/a	splittype, splithost, splitport, splituser, splitpasswd,
106	n/a	splitattr, splitquery, splitvalue, splittag, to_bytes,
107	n/a	unquote_to_bytes, urlunparse)
108	n/a	from urllib.response import addinfourl, addclosehook
109	n/a
110	n/a	# check for SSL
111	n/a	try:
112	n/a	import ssl
113	n/a	except ImportError:
114	n/a	_have_ssl = False
115	n/a	else:
116	n/a	_have_ssl = True
117	n/a
118	n/a	__all__ = [
119	n/a	# Classes
120	n/a	'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler',
121	n/a	'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler',
122	n/a	'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm',
123	n/a	'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler',
124	n/a	'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler',
125	n/a	'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler',
126	n/a	'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler',
127	n/a	'UnknownHandler', 'HTTPErrorProcessor',
128	n/a	# Functions
129	n/a	'urlopen', 'install_opener', 'build_opener',
130	n/a	'pathname2url', 'url2pathname', 'getproxies',
131	n/a	# Legacy interface
132	n/a	'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener',
133	n/a	]
134	n/a
135	n/a	# used in User-Agent header sent
136	n/a	__version__ = '%d.%d' % sys.version_info[:2]
137	n/a
138	n/a	_opener = None
139	n/a	def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
140	n/a	*, cafile=None, capath=None, cadefault=False, context=None):
141	n/a	'''Open the URL url, which can be either a string or a Request object.
142	n/a
143	n/a	data must be an object specifying additional data to be sent to
144	n/a	the server, or None if no such data is needed. See Request for
145	n/a	details.
146	n/a
147	n/a	urllib.request module uses HTTP/1.1 and includes a "Connection:close"
148	n/a	header in its HTTP requests.
149	n/a
150	n/a	The optional timeout parameter specifies a timeout in seconds for
151	n/a	blocking operations like the connection attempt (if not specified, the
152	n/a	global default timeout setting will be used). This only works for HTTP,
153	n/a	HTTPS and FTP connections.
154	n/a
155	n/a	If context is specified, it must be a ssl.SSLContext instance describing
156	n/a	the various SSL options. See HTTPSConnection for more details.
157	n/a
158	n/a	The optional cafile and capath parameters specify a set of trusted CA
159	n/a	certificates for HTTPS requests. cafile should point to a single file
160	n/a	containing a bundle of CA certificates, whereas capath should point to a
161	n/a	directory of hashed certificate files. More information can be found in
162	n/a	ssl.SSLContext.load_verify_locations().
163	n/a
164	n/a	The cadefault parameter is ignored.
165	n/a
166	n/a	This function always returns an object which can work as a context
167	n/a	manager and has methods such as
168	n/a
169	n/a	* geturl() - return the URL of the resource retrieved, commonly used to
170	n/a	determine if a redirect was followed
171	n/a
172	n/a	* info() - return the meta-information of the page, such as headers, in the
173	n/a	form of an email.message_from_string() instance (see Quick Reference to
174	n/a	HTTP Headers)
175	n/a
176	n/a	* getcode() - return the HTTP status code of the response. Raises URLError
177	n/a	on errors.
178	n/a
179	n/a	For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse
180	n/a	object slightly modified. In addition to the three new methods above, the
181	n/a	msg attribute contains the same information as the reason attribute ---
182	n/a	the reason phrase returned by the server --- instead of the response
183	n/a	headers as it is specified in the documentation for HTTPResponse.
184	n/a
185	n/a	For FTP, file, and data URLs and requests explicitly handled by legacy
186	n/a	URLopener and FancyURLopener classes, this function returns a
187	n/a	urllib.response.addinfourl object.
188	n/a
189	n/a	Note that None may be returned if no handler handles the request (though
190	n/a	the default installed global OpenerDirector uses UnknownHandler to ensure
191	n/a	this never happens).
192	n/a
193	n/a	In addition, if proxy settings are detected (for example, when a *_proxy
194	n/a	environment variable like http_proxy is set), ProxyHandler is default
195	n/a	installed and makes sure the requests are handled through the proxy.
196	n/a
197	n/a	'''
198	n/a	global _opener
199	n/a	if cafile or capath or cadefault:
200	n/a	import warnings
201	n/a	warnings.warn("cafile, cpath and cadefault are deprecated, use a "
202	n/a	"custom context instead.", DeprecationWarning, 2)
203	n/a	if context is not None:
204	n/a	raise ValueError(
205	n/a	"You can't pass both context and any of cafile, capath, and "
206	n/a	"cadefault"
207	n/a	)
208	n/a	if not _have_ssl:
209	n/a	raise ValueError('SSL support not available')
210	n/a	context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH,
211	n/a	cafile=cafile,
212	n/a	capath=capath)
213	n/a	https_handler = HTTPSHandler(context=context)
214	n/a	opener = build_opener(https_handler)
215	n/a	elif context:
216	n/a	https_handler = HTTPSHandler(context=context)
217	n/a	opener = build_opener(https_handler)
218	n/a	elif _opener is None:
219	n/a	_opener = opener = build_opener()
220	n/a	else:
221	n/a	opener = _opener
222	n/a	return opener.open(url, data, timeout)
223	n/a
224	n/a	def install_opener(opener):
225	n/a	global _opener
226	n/a	_opener = opener
227	n/a
228	n/a	_url_tempfiles = []
229	n/a	def urlretrieve(url, filename=None, reporthook=None, data=None):
230	n/a	"""
231	n/a	Retrieve a URL into a temporary location on disk.
232	n/a
233	n/a	Requires a URL argument. If a filename is passed, it is used as
234	n/a	the temporary file location. The reporthook argument should be
235	n/a	a callable that accepts a block number, a read size, and the
236	n/a	total file size of the URL target. The data argument should be
237	n/a	valid URL encoded data.
238	n/a
239	n/a	If a filename is passed and the URL points to a local resource,
240	n/a	the result is a copy from local file to new file.
241	n/a
242	n/a	Returns a tuple containing the path to the newly created
243	n/a	data file as well as the resulting HTTPMessage object.
244	n/a	"""
245	n/a	url_type, path = splittype(url)
246	n/a
247	n/a	with contextlib.closing(urlopen(url, data)) as fp:
248	n/a	headers = fp.info()
249	n/a
250	n/a	# Just return the local path and the "headers" for file://
251	n/a	# URLs. No sense in performing a copy unless requested.
252	n/a	if url_type == "file" and not filename:
253	n/a	return os.path.normpath(path), headers
254	n/a
255	n/a	# Handle temporary file setup.
256	n/a	if filename:
257	n/a	tfp = open(filename, 'wb')
258	n/a	else:
259	n/a	tfp = tempfile.NamedTemporaryFile(delete=False)
260	n/a	filename = tfp.name
261	n/a	_url_tempfiles.append(filename)
262	n/a
263	n/a	with tfp:
264	n/a	result = filename, headers
265	n/a	bs = 1024*8
266	n/a	size = -1
267	n/a	read = 0
268	n/a	blocknum = 0
269	n/a	if "content-length" in headers:
270	n/a	size = int(headers["Content-Length"])
271	n/a
272	n/a	if reporthook:
273	n/a	reporthook(blocknum, bs, size)
274	n/a
275	n/a	while True:
276	n/a	block = fp.read(bs)
277	n/a	if not block:
278	n/a	break
279	n/a	read += len(block)
280	n/a	tfp.write(block)
281	n/a	blocknum += 1
282	n/a	if reporthook:
283	n/a	reporthook(blocknum, bs, size)
284	n/a
285	n/a	if size >= 0 and read < size:
286	n/a	raise ContentTooShortError(
287	n/a	"retrieval incomplete: got only %i out of %i bytes"
288	n/a	% (read, size), result)
289	n/a
290	n/a	return result
291	n/a
292	n/a	def urlcleanup():
293	n/a	"""Clean up temporary files from urlretrieve calls."""
294	n/a	for temp_file in _url_tempfiles:
295	n/a	try:
296	n/a	os.unlink(temp_file)
297	n/a	except OSError:
298	n/a	pass
299	n/a
300	n/a	del _url_tempfiles[:]
301	n/a	global _opener
302	n/a	if _opener:
303	n/a	_opener = None
304	n/a
305	n/a	# copied from cookielib.py
306	n/a	_cut_port_re = re.compile(r":\d+$", re.ASCII)
307	n/a	def request_host(request):
308	n/a	"""Return request-host, as defined by RFC 2965.
309	n/a
310	n/a	Variation from RFC: returned value is lowercased, for convenient
311	n/a	comparison.
312	n/a
313	n/a	"""
314	n/a	url = request.full_url
315	n/a	host = urlparse(url)[1]
316	n/a	if host == "":
317	n/a	host = request.get_header("Host", "")
318	n/a
319	n/a	# remove port, if present
320	n/a	host = _cut_port_re.sub("", host, 1)
321	n/a	return host.lower()
322	n/a
323	n/a	class Request:
324	n/a
325	n/a	def __init__(self, url, data=None, headers={},
326	n/a	origin_req_host=None, unverifiable=False,
327	n/a	method=None):
328	n/a	self.full_url = url
329	n/a	self.headers = {}
330	n/a	self.unredirected_hdrs = {}
331	n/a	self._data = None
332	n/a	self.data = data
333	n/a	self._tunnel_host = None
334	n/a	for key, value in headers.items():
335	n/a	self.add_header(key, value)
336	n/a	if origin_req_host is None:
337	n/a	origin_req_host = request_host(self)
338	n/a	self.origin_req_host = origin_req_host
339	n/a	self.unverifiable = unverifiable
340	n/a	if method:
341	n/a	self.method = method
342	n/a
343	n/a	@property
344	n/a	def full_url(self):
345	n/a	if self.fragment:
346	n/a	return '{}#{}'.format(self._full_url, self.fragment)
347	n/a	return self._full_url
348	n/a
349	n/a	@full_url.setter
350	n/a	def full_url(self, url):
351	n/a	# unwrap('<URL:type://host/path>') --> 'type://host/path'
352	n/a	self._full_url = unwrap(url)
353	n/a	self._full_url, self.fragment = splittag(self._full_url)
354	n/a	self._parse()
355	n/a
356	n/a	@full_url.deleter
357	n/a	def full_url(self):
358	n/a	self._full_url = None
359	n/a	self.fragment = None
360	n/a	self.selector = ''
361	n/a
362	n/a	@property
363	n/a	def data(self):
364	n/a	return self._data
365	n/a
366	n/a	@data.setter
367	n/a	def data(self, data):
368	n/a	if data != self._data:
369	n/a	self._data = data
370	n/a	# issue 16464
371	n/a	# if we change data we need to remove content-length header
372	n/a	# (cause it's most probably calculated for previous value)
373	n/a	if self.has_header("Content-length"):
374	n/a	self.remove_header("Content-length")
375	n/a
376	n/a	@data.deleter
377	n/a	def data(self):
378	n/a	self.data = None
379	n/a
380	n/a	def _parse(self):
381	n/a	self.type, rest = splittype(self._full_url)
382	n/a	if self.type is None:
383	n/a	raise ValueError("unknown url type: %r" % self.full_url)
384	n/a	self.host, self.selector = splithost(rest)
385	n/a	if self.host:
386	n/a	self.host = unquote(self.host)
387	n/a
388	n/a	def get_method(self):
389	n/a	"""Return a string indicating the HTTP request method."""
390	n/a	default_method = "POST" if self.data is not None else "GET"
391	n/a	return getattr(self, 'method', default_method)
392	n/a
393	n/a	def get_full_url(self):
394	n/a	return self.full_url
395	n/a
396	n/a	def set_proxy(self, host, type):
397	n/a	if self.type == 'https' and not self._tunnel_host:
398	n/a	self._tunnel_host = self.host
399	n/a	else:
400	n/a	self.type= type
401	n/a	self.selector = self.full_url
402	n/a	self.host = host
403	n/a
404	n/a	def has_proxy(self):
405	n/a	return self.selector == self.full_url
406	n/a
407	n/a	def add_header(self, key, val):
408	n/a	# useful for something like authentication
409	n/a	self.headers[key.capitalize()] = val
410	n/a
411	n/a	def add_unredirected_header(self, key, val):
412	n/a	# will not be added to a redirected request
413	n/a	self.unredirected_hdrs[key.capitalize()] = val
414	n/a
415	n/a	def has_header(self, header_name):
416	n/a	return (header_name in self.headers or
417	n/a	header_name in self.unredirected_hdrs)
418	n/a
419	n/a	def get_header(self, header_name, default=None):
420	n/a	return self.headers.get(
421	n/a	header_name,
422	n/a	self.unredirected_hdrs.get(header_name, default))
423	n/a
424	n/a	def remove_header(self, header_name):
425	n/a	self.headers.pop(header_name, None)
426	n/a	self.unredirected_hdrs.pop(header_name, None)
427	n/a
428	n/a	def header_items(self):
429	n/a	hdrs = self.unredirected_hdrs.copy()
430	n/a	hdrs.update(self.headers)
431	n/a	return list(hdrs.items())
432	n/a
433	n/a	class OpenerDirector:
434	n/a	def __init__(self):
435	n/a	client_version = "Python-urllib/%s" % __version__
436	n/a	self.addheaders = [('User-agent', client_version)]
437	n/a	# self.handlers is retained only for backward compatibility
438	n/a	self.handlers = []
439	n/a	# manage the individual handlers
440	n/a	self.handle_open = {}
441	n/a	self.handle_error = {}
442	n/a	self.process_response = {}
443	n/a	self.process_request = {}
444	n/a
445	n/a	def add_handler(self, handler):
446	n/a	if not hasattr(handler, "add_parent"):
447	n/a	raise TypeError("expected BaseHandler instance, got %r" %
448	n/a	type(handler))
449	n/a
450	n/a	added = False
451	n/a	for meth in dir(handler):
452	n/a	if meth in ["redirect_request", "do_open", "proxy_open"]:
453	n/a	# oops, coincidental match
454	n/a	continue
455	n/a
456	n/a	i = meth.find("_")
457	n/a	protocol = meth[:i]
458	n/a	condition = meth[i+1:]
459	n/a
460	n/a	if condition.startswith("error"):
461	n/a	j = condition.find("_") + i + 1
462	n/a	kind = meth[j+1:]
463	n/a	try:
464	n/a	kind = int(kind)
465	n/a	except ValueError:
466	n/a	pass
467	n/a	lookup = self.handle_error.get(protocol, {})
468	n/a	self.handle_error[protocol] = lookup
469	n/a	elif condition == "open":
470	n/a	kind = protocol
471	n/a	lookup = self.handle_open
472	n/a	elif condition == "response":
473	n/a	kind = protocol
474	n/a	lookup = self.process_response
475	n/a	elif condition == "request":
476	n/a	kind = protocol
477	n/a	lookup = self.process_request
478	n/a	else:
479	n/a	continue
480	n/a
481	n/a	handlers = lookup.setdefault(kind, [])
482	n/a	if handlers:
483	n/a	bisect.insort(handlers, handler)
484	n/a	else:
485	n/a	handlers.append(handler)
486	n/a	added = True
487	n/a
488	n/a	if added:
489	n/a	bisect.insort(self.handlers, handler)
490	n/a	handler.add_parent(self)
491	n/a
492	n/a	def close(self):
493	n/a	# Only exists for backwards compatibility.
494	n/a	pass
495	n/a
496	n/a	def _call_chain(self, chain, kind, meth_name, *args):
497	n/a	# Handlers raise an exception if no one else should try to handle
498	n/a	# the request, or return None if they can't but another handler
499	n/a	# could. Otherwise, they return the response.
500	n/a	handlers = chain.get(kind, ())
501	n/a	for handler in handlers:
502	n/a	func = getattr(handler, meth_name)
503	n/a	result = func(*args)
504	n/a	if result is not None:
505	n/a	return result
506	n/a
507	n/a	def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
508	n/a	# accept a URL or a Request object
509	n/a	if isinstance(fullurl, str):
510	n/a	req = Request(fullurl, data)
511	n/a	else:
512	n/a	req = fullurl
513	n/a	if data is not None:
514	n/a	req.data = data
515	n/a
516	n/a	req.timeout = timeout
517	n/a	protocol = req.type
518	n/a
519	n/a	# pre-process request
520	n/a	meth_name = protocol+"_request"
521	n/a	for processor in self.process_request.get(protocol, []):
522	n/a	meth = getattr(processor, meth_name)
523	n/a	req = meth(req)
524	n/a
525	n/a	response = self._open(req, data)
526	n/a
527	n/a	# post-process response
528	n/a	meth_name = protocol+"_response"
529	n/a	for processor in self.process_response.get(protocol, []):
530	n/a	meth = getattr(processor, meth_name)
531	n/a	response = meth(req, response)
532	n/a
533	n/a	return response
534	n/a
535	n/a	def _open(self, req, data=None):
536	n/a	result = self._call_chain(self.handle_open, 'default',
537	n/a	'default_open', req)
538	n/a	if result:
539	n/a	return result
540	n/a
541	n/a	protocol = req.type
542	n/a	result = self._call_chain(self.handle_open, protocol, protocol +
543	n/a	'_open', req)
544	n/a	if result:
545	n/a	return result
546	n/a
547	n/a	return self._call_chain(self.handle_open, 'unknown',
548	n/a	'unknown_open', req)
549	n/a
550	n/a	def error(self, proto, *args):
551	n/a	if proto in ('http', 'https'):
552	n/a	# XXX http[s] protocols are special-cased
553	n/a	dict = self.handle_error['http'] # https is not different than http
554	n/a	proto = args[2] # YUCK!
555	n/a	meth_name = 'http_error_%s' % proto
556	n/a	http_err = 1
557	n/a	orig_args = args
558	n/a	else:
559	n/a	dict = self.handle_error
560	n/a	meth_name = proto + '_error'
561	n/a	http_err = 0
562	n/a	args = (dict, proto, meth_name) + args
563	n/a	result = self._call_chain(*args)
564	n/a	if result:
565	n/a	return result
566	n/a
567	n/a	if http_err:
568	n/a	args = (dict, 'default', 'http_error_default') + orig_args
569	n/a	return self._call_chain(*args)
570	n/a
571	n/a	# XXX probably also want an abstract factory that knows when it makes
572	n/a	# sense to skip a superclass in favor of a subclass and when it might
573	n/a	# make sense to include both
574	n/a
575	n/a	def build_opener(*handlers):
576	n/a	"""Create an opener object from a list of handlers.
577	n/a
578	n/a	The opener will use several default handlers, including support
579	n/a	for HTTP, FTP and when applicable HTTPS.
580	n/a
581	n/a	If any of the handlers passed as arguments are subclasses of the
582	n/a	default handlers, the default handlers will not be used.
583	n/a	"""
584	n/a	opener = OpenerDirector()
585	n/a	default_classes = [ProxyHandler, UnknownHandler, HTTPHandler,
586	n/a	HTTPDefaultErrorHandler, HTTPRedirectHandler,
587	n/a	FTPHandler, FileHandler, HTTPErrorProcessor,
588	n/a	DataHandler]
589	n/a	if hasattr(http.client, "HTTPSConnection"):
590	n/a	default_classes.append(HTTPSHandler)
591	n/a	skip = set()
592	n/a	for klass in default_classes:
593	n/a	for check in handlers:
594	n/a	if isinstance(check, type):
595	n/a	if issubclass(check, klass):
596	n/a	skip.add(klass)
597	n/a	elif isinstance(check, klass):
598	n/a	skip.add(klass)
599	n/a	for klass in skip:
600	n/a	default_classes.remove(klass)
601	n/a
602	n/a	for klass in default_classes:
603	n/a	opener.add_handler(klass())
604	n/a
605	n/a	for h in handlers:
606	n/a	if isinstance(h, type):
607	n/a	h = h()
608	n/a	opener.add_handler(h)
609	n/a	return opener
610	n/a
611	n/a	class BaseHandler:
612	n/a	handler_order = 500
613	n/a
614	n/a	def add_parent(self, parent):
615	n/a	self.parent = parent
616	n/a
617	n/a	def close(self):
618	n/a	# Only exists for backwards compatibility
619	n/a	pass
620	n/a
621	n/a	def __lt__(self, other):
622	n/a	if not hasattr(other, "handler_order"):
623	n/a	# Try to preserve the old behavior of having custom classes
624	n/a	# inserted after default ones (works only for custom user
625	n/a	# classes which are not aware of handler_order).
626	n/a	return True
627	n/a	return self.handler_order < other.handler_order
628	n/a
629	n/a
630	n/a	class HTTPErrorProcessor(BaseHandler):
631	n/a	"""Process HTTP error responses."""
632	n/a	handler_order = 1000 # after all other processing
633	n/a
634	n/a	def http_response(self, request, response):
635	n/a	code, msg, hdrs = response.code, response.msg, response.info()
636	n/a
637	n/a	# According to RFC 2616, "2xx" code indicates that the client's
638	n/a	# request was successfully received, understood, and accepted.
639	n/a	if not (200 <= code < 300):
640	n/a	response = self.parent.error(
641	n/a	'http', request, response, code, msg, hdrs)
642	n/a
643	n/a	return response
644	n/a
645	n/a	https_response = http_response
646	n/a
647	n/a	class HTTPDefaultErrorHandler(BaseHandler):
648	n/a	def http_error_default(self, req, fp, code, msg, hdrs):
649	n/a	raise HTTPError(req.full_url, code, msg, hdrs, fp)
650	n/a
651	n/a	class HTTPRedirectHandler(BaseHandler):
652	n/a	# maximum number of redirections to any single URL
653	n/a	# this is needed because of the state that cookies introduce
654	n/a	max_repeats = 4
655	n/a	# maximum total number of redirections (regardless of URL) before
656	n/a	# assuming we're in a loop
657	n/a	max_redirections = 10
658	n/a
659	n/a	def redirect_request(self, req, fp, code, msg, headers, newurl):
660	n/a	"""Return a Request or None in response to a redirect.
661	n/a
662	n/a	This is called by the http_error_30x methods when a
663	n/a	redirection response is received. If a redirection should
664	n/a	take place, return a new Request to allow http_error_30x to
665	n/a	perform the redirect. Otherwise, raise HTTPError if no-one
666	n/a	else should try to handle this url. Return None if you can't
667	n/a	but another Handler might.
668	n/a	"""
669	n/a	m = req.get_method()
670	n/a	if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD")
671	n/a	or code in (301, 302, 303) and m == "POST")):
672	n/a	raise HTTPError(req.full_url, code, msg, headers, fp)
673	n/a
674	n/a	# Strictly (according to RFC 2616), 301 or 302 in response to
675	n/a	# a POST MUST NOT cause a redirection without confirmation
676	n/a	# from the user (of urllib.request, in this case). In practice,
677	n/a	# essentially all clients do redirect in this case, so we do
678	n/a	# the same.
679	n/a
680	n/a	# Be conciliant with URIs containing a space. This is mainly
681	n/a	# redundant with the more complete encoding done in http_error_302(),
682	n/a	# but it is kept for compatibility with other callers.
683	n/a	newurl = newurl.replace(' ', '%20')
684	n/a
685	n/a	CONTENT_HEADERS = ("content-length", "content-type")
686	n/a	newheaders = dict((k, v) for k, v in req.headers.items()
687	n/a	if k.lower() not in CONTENT_HEADERS)
688	n/a	return Request(newurl,
689	n/a	headers=newheaders,
690	n/a	origin_req_host=req.origin_req_host,
691	n/a	unverifiable=True)
692	n/a
693	n/a	# Implementation note: To avoid the server sending us into an
694	n/a	# infinite loop, the request object needs to track what URLs we
695	n/a	# have already seen. Do this by adding a handler-specific
696	n/a	# attribute to the Request object.
697	n/a	def http_error_302(self, req, fp, code, msg, headers):
698	n/a	# Some servers (incorrectly) return multiple Location headers
699	n/a	# (so probably same goes for URI). Use first header.
700	n/a	if "location" in headers:
701	n/a	newurl = headers["location"]
702	n/a	elif "uri" in headers:
703	n/a	newurl = headers["uri"]
704	n/a	else:
705	n/a	return
706	n/a
707	n/a	# fix a possible malformed URL
708	n/a	urlparts = urlparse(newurl)
709	n/a
710	n/a	# For security reasons we don't allow redirection to anything other
711	n/a	# than http, https or ftp.
712	n/a
713	n/a	if urlparts.scheme not in ('http', 'https', 'ftp', ''):
714	n/a	raise HTTPError(
715	n/a	newurl, code,
716	n/a	"%s - Redirection to url '%s' is not allowed" % (msg, newurl),
717	n/a	headers, fp)
718	n/a
719	n/a	if not urlparts.path and urlparts.netloc:
720	n/a	urlparts = list(urlparts)
721	n/a	urlparts[2] = "/"
722	n/a	newurl = urlunparse(urlparts)
723	n/a
724	n/a	# http.client.parse_headers() decodes as ISO-8859-1. Recover the
725	n/a	# original bytes and percent-encode non-ASCII bytes, and any special
726	n/a	# characters such as the space.
727	n/a	newurl = quote(
728	n/a	newurl, encoding="iso-8859-1", safe=string.punctuation)
729	n/a	newurl = urljoin(req.full_url, newurl)
730	n/a
731	n/a	# XXX Probably want to forget about the state of the current
732	n/a	# request, although that might interact poorly with other
733	n/a	# handlers that also use handler-specific request attributes
734	n/a	new = self.redirect_request(req, fp, code, msg, headers, newurl)
735	n/a	if new is None:
736	n/a	return
737	n/a
738	n/a	# loop detection
739	n/a	# .redirect_dict has a key url if url was previously visited.
740	n/a	if hasattr(req, 'redirect_dict'):
741	n/a	visited = new.redirect_dict = req.redirect_dict
742	n/a	if (visited.get(newurl, 0) >= self.max_repeats or
743	n/a	len(visited) >= self.max_redirections):
744	n/a	raise HTTPError(req.full_url, code,
745	n/a	self.inf_msg + msg, headers, fp)
746	n/a	else:
747	n/a	visited = new.redirect_dict = req.redirect_dict = {}
748	n/a	visited[newurl] = visited.get(newurl, 0) + 1
749	n/a
750	n/a	# Don't close the fp until we are sure that we won't use it
751	n/a	# with HTTPError.
752	n/a	fp.read()
753	n/a	fp.close()
754	n/a
755	n/a	return self.parent.open(new, timeout=req.timeout)
756	n/a
757	n/a	http_error_301 = http_error_303 = http_error_307 = http_error_302
758	n/a
759	n/a	inf_msg = "The HTTP server returned a redirect error that would " \
760	n/a	"lead to an infinite loop.\n" \
761	n/a	"The last 30x error message was:\n"
762	n/a
763	n/a
764	n/a	def _parse_proxy(proxy):
765	n/a	"""Return (scheme, user, password, host/port) given a URL or an authority.
766	n/a
767	n/a	If a URL is supplied, it must have an authority (host:port) component.
768	n/a	According to RFC 3986, having an authority component means the URL must
769	n/a	have two slashes after the scheme.
770	n/a	"""
771	n/a	scheme, r_scheme = splittype(proxy)
772	n/a	if not r_scheme.startswith("/"):
773	n/a	# authority
774	n/a	scheme = None
775	n/a	authority = proxy
776	n/a	else:
777	n/a	# URL
778	n/a	if not r_scheme.startswith("//"):
779	n/a	raise ValueError("proxy URL with no authority: %r" % proxy)
780	n/a	# We have an authority, so for RFC 3986-compliant URLs (by ss 3.
781	n/a	# and 3.3.), path is empty or starts with '/'
782	n/a	end = r_scheme.find("/", 2)
783	n/a	if end == -1:
784	n/a	end = None
785	n/a	authority = r_scheme[2:end]
786	n/a	userinfo, hostport = splituser(authority)
787	n/a	if userinfo is not None:
788	n/a	user, password = splitpasswd(userinfo)
789	n/a	else:
790	n/a	user = password = None
791	n/a	return scheme, user, password, hostport
792	n/a
793	n/a	class ProxyHandler(BaseHandler):
794	n/a	# Proxies must be in front
795	n/a	handler_order = 100
796	n/a
797	n/a	def __init__(self, proxies=None):
798	n/a	if proxies is None:
799	n/a	proxies = getproxies()
800	n/a	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
801	n/a	self.proxies = proxies
802	n/a	for type, url in proxies.items():
803	n/a	setattr(self, '%s_open' % type,
804	n/a	lambda r, proxy=url, type=type, meth=self.proxy_open:
805	n/a	meth(r, proxy, type))
806	n/a
807	n/a	def proxy_open(self, req, proxy, type):
808	n/a	orig_type = req.type
809	n/a	proxy_type, user, password, hostport = _parse_proxy(proxy)
810	n/a	if proxy_type is None:
811	n/a	proxy_type = orig_type
812	n/a
813	n/a	if req.host and proxy_bypass(req.host):
814	n/a	return None
815	n/a
816	n/a	if user and password:
817	n/a	user_pass = '%s:%s' % (unquote(user),
818	n/a	unquote(password))
819	n/a	creds = base64.b64encode(user_pass.encode()).decode("ascii")
820	n/a	req.add_header('Proxy-authorization', 'Basic ' + creds)
821	n/a	hostport = unquote(hostport)
822	n/a	req.set_proxy(hostport, proxy_type)
823	n/a	if orig_type == proxy_type or orig_type == 'https':
824	n/a	# let other handlers take care of it
825	n/a	return None
826	n/a	else:
827	n/a	# need to start over, because the other handlers don't
828	n/a	# grok the proxy's URL type
829	n/a	# e.g. if we have a constructor arg proxies like so:
830	n/a	# {'http': 'ftp://proxy.example.com'}, we may end up turning
831	n/a	# a request for http://acme.example.com/a into one for
832	n/a	# ftp://proxy.example.com/a
833	n/a	return self.parent.open(req, timeout=req.timeout)
834	n/a
835	n/a	class HTTPPasswordMgr:
836	n/a
837	n/a	def __init__(self):
838	n/a	self.passwd = {}
839	n/a
840	n/a	def add_password(self, realm, uri, user, passwd):
841	n/a	# uri could be a single URI or a sequence
842	n/a	if isinstance(uri, str):
843	n/a	uri = [uri]
844	n/a	if realm not in self.passwd:
845	n/a	self.passwd[realm] = {}
846	n/a	for default_port in True, False:
847	n/a	reduced_uri = tuple(
848	n/a	[self.reduce_uri(u, default_port) for u in uri])
849	n/a	self.passwd[realm][reduced_uri] = (user, passwd)
850	n/a
851	n/a	def find_user_password(self, realm, authuri):
852	n/a	domains = self.passwd.get(realm, {})
853	n/a	for default_port in True, False:
854	n/a	reduced_authuri = self.reduce_uri(authuri, default_port)
855	n/a	for uris, authinfo in domains.items():
856	n/a	for uri in uris:
857	n/a	if self.is_suburi(uri, reduced_authuri):
858	n/a	return authinfo
859	n/a	return None, None
860	n/a
861	n/a	def reduce_uri(self, uri, default_port=True):
862	n/a	"""Accept authority or URI and extract only the authority and path."""
863	n/a	# note HTTP URLs do not have a userinfo component
864	n/a	parts = urlsplit(uri)
865	n/a	if parts[1]:
866	n/a	# URI
867	n/a	scheme = parts[0]
868	n/a	authority = parts[1]
869	n/a	path = parts[2] or '/'
870	n/a	else:
871	n/a	# host or host:port
872	n/a	scheme = None
873	n/a	authority = uri
874	n/a	path = '/'
875	n/a	host, port = splitport(authority)
876	n/a	if default_port and port is None and scheme is not None:
877	n/a	dport = {"http": 80,
878	n/a	"https": 443,
879	n/a	}.get(scheme)
880	n/a	if dport is not None:
881	n/a	authority = "%s:%d" % (host, dport)
882	n/a	return authority, path
883	n/a
884	n/a	def is_suburi(self, base, test):
885	n/a	"""Check if test is below base in a URI tree
886	n/a
887	n/a	Both args must be URIs in reduced form.
888	n/a	"""
889	n/a	if base == test:
890	n/a	return True
891	n/a	if base[0] != test[0]:
892	n/a	return False
893	n/a	common = posixpath.commonprefix((base[1], test[1]))
894	n/a	if len(common) == len(base[1]):
895	n/a	return True
896	n/a	return False
897	n/a
898	n/a
899	n/a	class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr):
900	n/a
901	n/a	def find_user_password(self, realm, authuri):
902	n/a	user, password = HTTPPasswordMgr.find_user_password(self, realm,
903	n/a	authuri)
904	n/a	if user is not None:
905	n/a	return user, password
906	n/a	return HTTPPasswordMgr.find_user_password(self, None, authuri)
907	n/a
908	n/a
909	n/a	class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm):
910	n/a
911	n/a	def __init__(self, args, *kwargs):
912	n/a	self.authenticated = {}
913	n/a	super().__init__(args, *kwargs)
914	n/a
915	n/a	def add_password(self, realm, uri, user, passwd, is_authenticated=False):
916	n/a	self.update_authenticated(uri, is_authenticated)
917	n/a	# Add a default for prior auth requests
918	n/a	if realm is not None:
919	n/a	super().add_password(None, uri, user, passwd)
920	n/a	super().add_password(realm, uri, user, passwd)
921	n/a
922	n/a	def update_authenticated(self, uri, is_authenticated=False):
923	n/a	# uri could be a single URI or a sequence
924	n/a	if isinstance(uri, str):
925	n/a	uri = [uri]
926	n/a
927	n/a	for default_port in True, False:
928	n/a	for u in uri:
929	n/a	reduced_uri = self.reduce_uri(u, default_port)
930	n/a	self.authenticated[reduced_uri] = is_authenticated
931	n/a
932	n/a	def is_authenticated(self, authuri):
933	n/a	for default_port in True, False:
934	n/a	reduced_authuri = self.reduce_uri(authuri, default_port)
935	n/a	for uri in self.authenticated:
936	n/a	if self.is_suburi(uri, reduced_authuri):
937	n/a	return self.authenticated[uri]
938	n/a
939	n/a
940	n/a	class AbstractBasicAuthHandler:
941	n/a
942	n/a	# XXX this allows for multiple auth-schemes, but will stupidly pick
943	n/a	# the last one with a realm specified.
944	n/a
945	n/a	# allow for double- and single-quoted realm values
946	n/a	# (single quotes are a violation of the RFC, but appear in the wild)
947	n/a	rx = re.compile('(?:.,)[ \t]*([^ \t]+)[ \t]+'
948	n/a	'realm=(["\']?)([^"\']*)\\2', re.I)
949	n/a
950	n/a	# XXX could pre-emptively send auth info already accepted (RFC 2617,
951	n/a	# end of section 2, and section 1.2 immediately after "credentials"
952	n/a	# production).
953	n/a
954	n/a	def __init__(self, password_mgr=None):
955	n/a	if password_mgr is None:
956	n/a	password_mgr = HTTPPasswordMgr()
957	n/a	self.passwd = password_mgr
958	n/a	self.add_password = self.passwd.add_password
959	n/a
960	n/a	def http_error_auth_reqed(self, authreq, host, req, headers):
961	n/a	# host may be an authority (without userinfo) or a URL with an
962	n/a	# authority
963	n/a	# XXX could be multiple headers
964	n/a	authreq = headers.get(authreq, None)
965	n/a
966	n/a	if authreq:
967	n/a	scheme = authreq.split()[0]
968	n/a	if scheme.lower() != 'basic':
969	n/a	raise ValueError("AbstractBasicAuthHandler does not"
970	n/a	" support the following scheme: '%s'" %
971	n/a	scheme)
972	n/a	else:
973	n/a	mo = AbstractBasicAuthHandler.rx.search(authreq)
974	n/a	if mo:
975	n/a	scheme, quote, realm = mo.groups()
976	n/a	if quote not in ['"',"'"]:
977	n/a	warnings.warn("Basic Auth Realm was unquoted",
978	n/a	UserWarning, 2)
979	n/a	if scheme.lower() == 'basic':
980	n/a	return self.retry_http_basic_auth(host, req, realm)
981	n/a
982	n/a	def retry_http_basic_auth(self, host, req, realm):
983	n/a	user, pw = self.passwd.find_user_password(realm, host)
984	n/a	if pw is not None:
985	n/a	raw = "%s:%s" % (user, pw)
986	n/a	auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii")
987	n/a	if req.get_header(self.auth_header, None) == auth:
988	n/a	return None
989	n/a	req.add_unredirected_header(self.auth_header, auth)
990	n/a	return self.parent.open(req, timeout=req.timeout)
991	n/a	else:
992	n/a	return None
993	n/a
994	n/a	def http_request(self, req):
995	n/a	if (not hasattr(self.passwd, 'is_authenticated') or
996	n/a	not self.passwd.is_authenticated(req.full_url)):
997	n/a	return req
998	n/a
999	n/a	if not req.has_header('Authorization'):
1000	n/a	user, passwd = self.passwd.find_user_password(None, req.full_url)
1001	n/a	credentials = '{0}:{1}'.format(user, passwd).encode()
1002	n/a	auth_str = base64.standard_b64encode(credentials).decode()
1003	n/a	req.add_unredirected_header('Authorization',
1004	n/a	'Basic {}'.format(auth_str.strip()))
1005	n/a	return req
1006	n/a
1007	n/a	def http_response(self, req, response):
1008	n/a	if hasattr(self.passwd, 'is_authenticated'):
1009	n/a	if 200 <= response.code < 300:
1010	n/a	self.passwd.update_authenticated(req.full_url, True)
1011	n/a	else:
1012	n/a	self.passwd.update_authenticated(req.full_url, False)
1013	n/a	return response
1014	n/a
1015	n/a	https_request = http_request
1016	n/a	https_response = http_response
1017	n/a
1018	n/a
1019	n/a
1020	n/a	class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1021	n/a
1022	n/a	auth_header = 'Authorization'
1023	n/a
1024	n/a	def http_error_401(self, req, fp, code, msg, headers):
1025	n/a	url = req.full_url
1026	n/a	response = self.http_error_auth_reqed('www-authenticate',
1027	n/a	url, req, headers)
1028	n/a	return response
1029	n/a
1030	n/a
1031	n/a	class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler):
1032	n/a
1033	n/a	auth_header = 'Proxy-authorization'
1034	n/a
1035	n/a	def http_error_407(self, req, fp, code, msg, headers):
1036	n/a	# http_error_auth_reqed requires that there is no userinfo component in
1037	n/a	# authority. Assume there isn't one, since urllib.request does not (and
1038	n/a	# should not, RFC 3986 s. 3.2.1) support requests for URLs containing
1039	n/a	# userinfo.
1040	n/a	authority = req.host
1041	n/a	response = self.http_error_auth_reqed('proxy-authenticate',
1042	n/a	authority, req, headers)
1043	n/a	return response
1044	n/a
1045	n/a
1046	n/a	# Return n random bytes.
1047	n/a	_randombytes = os.urandom
1048	n/a
1049	n/a
1050	n/a	class AbstractDigestAuthHandler:
1051	n/a	# Digest authentication is specified in RFC 2617.
1052	n/a
1053	n/a	# XXX The client does not inspect the Authentication-Info header
1054	n/a	# in a successful response.
1055	n/a
1056	n/a	# XXX It should be possible to test this implementation against
1057	n/a	# a mock server that just generates a static set of challenges.
1058	n/a
1059	n/a	# XXX qop="auth-int" supports is shaky
1060	n/a
1061	n/a	def __init__(self, passwd=None):
1062	n/a	if passwd is None:
1063	n/a	passwd = HTTPPasswordMgr()
1064	n/a	self.passwd = passwd
1065	n/a	self.add_password = self.passwd.add_password
1066	n/a	self.retried = 0
1067	n/a	self.nonce_count = 0
1068	n/a	self.last_nonce = None
1069	n/a
1070	n/a	def reset_retry_count(self):
1071	n/a	self.retried = 0
1072	n/a
1073	n/a	def http_error_auth_reqed(self, auth_header, host, req, headers):
1074	n/a	authreq = headers.get(auth_header, None)
1075	n/a	if self.retried > 5:
1076	n/a	# Don't fail endlessly - if we failed once, we'll probably
1077	n/a	# fail a second time. Hm. Unless the Password Manager is
1078	n/a	# prompting for the information. Crap. This isn't great
1079	n/a	# but it's better than the current 'repeat until recursion
1080	n/a	# depth exceeded' approach <wink>
1081	n/a	raise HTTPError(req.full_url, 401, "digest auth failed",
1082	n/a	headers, None)
1083	n/a	else:
1084	n/a	self.retried += 1
1085	n/a	if authreq:
1086	n/a	scheme = authreq.split()[0]
1087	n/a	if scheme.lower() == 'digest':
1088	n/a	return self.retry_http_digest_auth(req, authreq)
1089	n/a	elif scheme.lower() != 'basic':
1090	n/a	raise ValueError("AbstractDigestAuthHandler does not support"
1091	n/a	" the following scheme: '%s'" % scheme)
1092	n/a
1093	n/a	def retry_http_digest_auth(self, req, auth):
1094	n/a	token, challenge = auth.split(' ', 1)
1095	n/a	chal = parse_keqv_list(filter(None, parse_http_list(challenge)))
1096	n/a	auth = self.get_authorization(req, chal)
1097	n/a	if auth:
1098	n/a	auth_val = 'Digest %s' % auth
1099	n/a	if req.headers.get(self.auth_header, None) == auth_val:
1100	n/a	return None
1101	n/a	req.add_unredirected_header(self.auth_header, auth_val)
1102	n/a	resp = self.parent.open(req, timeout=req.timeout)
1103	n/a	return resp
1104	n/a
1105	n/a	def get_cnonce(self, nonce):
1106	n/a	# The cnonce-value is an opaque
1107	n/a	# quoted string value provided by the client and used by both client
1108	n/a	# and server to avoid chosen plaintext attacks, to provide mutual
1109	n/a	# authentication, and to provide some message integrity protection.
1110	n/a	# This isn't a fabulous effort, but it's probably Good Enough.
1111	n/a	s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime())
1112	n/a	b = s.encode("ascii") + _randombytes(8)
1113	n/a	dig = hashlib.sha1(b).hexdigest()
1114	n/a	return dig[:16]
1115	n/a
1116	n/a	def get_authorization(self, req, chal):
1117	n/a	try:
1118	n/a	realm = chal['realm']
1119	n/a	nonce = chal['nonce']
1120	n/a	qop = chal.get('qop')
1121	n/a	algorithm = chal.get('algorithm', 'MD5')
1122	n/a	# mod_digest doesn't send an opaque, even though it isn't
1123	n/a	# supposed to be optional
1124	n/a	opaque = chal.get('opaque', None)
1125	n/a	except KeyError:
1126	n/a	return None
1127	n/a
1128	n/a	H, KD = self.get_algorithm_impls(algorithm)
1129	n/a	if H is None:
1130	n/a	return None
1131	n/a
1132	n/a	user, pw = self.passwd.find_user_password(realm, req.full_url)
1133	n/a	if user is None:
1134	n/a	return None
1135	n/a
1136	n/a	# XXX not implemented yet
1137	n/a	if req.data is not None:
1138	n/a	entdig = self.get_entity_digest(req.data, chal)
1139	n/a	else:
1140	n/a	entdig = None
1141	n/a
1142	n/a	A1 = "%s:%s:%s" % (user, realm, pw)
1143	n/a	A2 = "%s:%s" % (req.get_method(),
1144	n/a	# XXX selector: what about proxies and full urls
1145	n/a	req.selector)
1146	n/a	if qop == 'auth':
1147	n/a	if nonce == self.last_nonce:
1148	n/a	self.nonce_count += 1
1149	n/a	else:
1150	n/a	self.nonce_count = 1
1151	n/a	self.last_nonce = nonce
1152	n/a	ncvalue = '%08x' % self.nonce_count
1153	n/a	cnonce = self.get_cnonce(nonce)
1154	n/a	noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2))
1155	n/a	respdig = KD(H(A1), noncebit)
1156	n/a	elif qop is None:
1157	n/a	respdig = KD(H(A1), "%s:%s" % (nonce, H(A2)))
1158	n/a	else:
1159	n/a	# XXX handle auth-int.
1160	n/a	raise URLError("qop '%s' is not supported." % qop)
1161	n/a
1162	n/a	# XXX should the partial digests be encoded too?
1163	n/a
1164	n/a	base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \
1165	n/a	'response="%s"' % (user, realm, nonce, req.selector,
1166	n/a	respdig)
1167	n/a	if opaque:
1168	n/a	base += ', opaque="%s"' % opaque
1169	n/a	if entdig:
1170	n/a	base += ', digest="%s"' % entdig
1171	n/a	base += ', algorithm="%s"' % algorithm
1172	n/a	if qop:
1173	n/a	base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce)
1174	n/a	return base
1175	n/a
1176	n/a	def get_algorithm_impls(self, algorithm):
1177	n/a	# lambdas assume digest modules are imported at the top level
1178	n/a	if algorithm == 'MD5':
1179	n/a	H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest()
1180	n/a	elif algorithm == 'SHA':
1181	n/a	H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest()
1182	n/a	# XXX MD5-sess
1183	n/a	else:
1184	n/a	raise ValueError("Unsupported digest authentication "
1185	n/a	"algorithm %r" % algorithm)
1186	n/a	KD = lambda s, d: H("%s:%s" % (s, d))
1187	n/a	return H, KD
1188	n/a
1189	n/a	def get_entity_digest(self, data, chal):
1190	n/a	# XXX not implemented yet
1191	n/a	return None
1192	n/a
1193	n/a
1194	n/a	class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1195	n/a	"""An authentication protocol defined by RFC 2069
1196	n/a
1197	n/a	Digest authentication improves on basic authentication because it
1198	n/a	does not transmit passwords in the clear.
1199	n/a	"""
1200	n/a
1201	n/a	auth_header = 'Authorization'
1202	n/a	handler_order = 490 # before Basic auth
1203	n/a
1204	n/a	def http_error_401(self, req, fp, code, msg, headers):
1205	n/a	host = urlparse(req.full_url)[1]
1206	n/a	retry = self.http_error_auth_reqed('www-authenticate',
1207	n/a	host, req, headers)
1208	n/a	self.reset_retry_count()
1209	n/a	return retry
1210	n/a
1211	n/a
1212	n/a	class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler):
1213	n/a
1214	n/a	auth_header = 'Proxy-Authorization'
1215	n/a	handler_order = 490 # before Basic auth
1216	n/a
1217	n/a	def http_error_407(self, req, fp, code, msg, headers):
1218	n/a	host = req.host
1219	n/a	retry = self.http_error_auth_reqed('proxy-authenticate',
1220	n/a	host, req, headers)
1221	n/a	self.reset_retry_count()
1222	n/a	return retry
1223	n/a
1224	n/a	class AbstractHTTPHandler(BaseHandler):
1225	n/a
1226	n/a	def __init__(self, debuglevel=0):
1227	n/a	self._debuglevel = debuglevel
1228	n/a
1229	n/a	def set_http_debuglevel(self, level):
1230	n/a	self._debuglevel = level
1231	n/a
1232	n/a	def _get_content_length(self, request):
1233	n/a	return http.client.HTTPConnection._get_content_length(
1234	n/a	request.data,
1235	n/a	request.get_method())
1236	n/a
1237	n/a	def do_request_(self, request):
1238	n/a	host = request.host
1239	n/a	if not host:
1240	n/a	raise URLError('no host given')
1241	n/a
1242	n/a	if request.data is not None: # POST
1243	n/a	data = request.data
1244	n/a	if isinstance(data, str):
1245	n/a	msg = "POST data should be bytes, an iterable of bytes, " \
1246	n/a	"or a file object. It cannot be of type str."
1247	n/a	raise TypeError(msg)
1248	n/a	if not request.has_header('Content-type'):
1249	n/a	request.add_unredirected_header(
1250	n/a	'Content-type',
1251	n/a	'application/x-www-form-urlencoded')
1252	n/a	if (not request.has_header('Content-length')
1253	n/a	and not request.has_header('Transfer-encoding')):
1254	n/a	content_length = self._get_content_length(request)
1255	n/a	if content_length is not None:
1256	n/a	request.add_unredirected_header(
1257	n/a	'Content-length', str(content_length))
1258	n/a	else:
1259	n/a	request.add_unredirected_header(
1260	n/a	'Transfer-encoding', 'chunked')
1261	n/a
1262	n/a	sel_host = host
1263	n/a	if request.has_proxy():
1264	n/a	scheme, sel = splittype(request.selector)
1265	n/a	sel_host, sel_path = splithost(sel)
1266	n/a	if not request.has_header('Host'):
1267	n/a	request.add_unredirected_header('Host', sel_host)
1268	n/a	for name, value in self.parent.addheaders:
1269	n/a	name = name.capitalize()
1270	n/a	if not request.has_header(name):
1271	n/a	request.add_unredirected_header(name, value)
1272	n/a
1273	n/a	return request
1274	n/a
1275	n/a	def do_open(self, http_class, req, **http_conn_args):
1276	n/a	"""Return an HTTPResponse object for the request, using http_class.
1277	n/a
1278	n/a	http_class must implement the HTTPConnection API from http.client.
1279	n/a	"""
1280	n/a	host = req.host
1281	n/a	if not host:
1282	n/a	raise URLError('no host given')
1283	n/a
1284	n/a	# will parse host:port
1285	n/a	h = http_class(host, timeout=req.timeout, **http_conn_args)
1286	n/a	h.set_debuglevel(self._debuglevel)
1287	n/a
1288	n/a	headers = dict(req.unredirected_hdrs)
1289	n/a	headers.update(dict((k, v) for k, v in req.headers.items()
1290	n/a	if k not in headers))
1291	n/a
1292	n/a	# TODO(jhylton): Should this be redesigned to handle
1293	n/a	# persistent connections?
1294	n/a
1295	n/a	# We want to make an HTTP/1.1 request, but the addinfourl
1296	n/a	# class isn't prepared to deal with a persistent connection.
1297	n/a	# It will try to read all remaining data from the socket,
1298	n/a	# which will block while the server waits for the next request.
1299	n/a	# So make sure the connection gets closed after the (only)
1300	n/a	# request.
1301	n/a	headers["Connection"] = "close"
1302	n/a	headers = dict((name.title(), val) for name, val in headers.items())
1303	n/a
1304	n/a	if req._tunnel_host:
1305	n/a	tunnel_headers = {}
1306	n/a	proxy_auth_hdr = "Proxy-Authorization"
1307	n/a	if proxy_auth_hdr in headers:
1308	n/a	tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr]
1309	n/a	# Proxy-Authorization should not be sent to origin
1310	n/a	# server.
1311	n/a	del headers[proxy_auth_hdr]
1312	n/a	h.set_tunnel(req._tunnel_host, headers=tunnel_headers)
1313	n/a
1314	n/a	try:
1315	n/a	try:
1316	n/a	h.request(req.get_method(), req.selector, req.data, headers,
1317	n/a	encode_chunked=req.has_header('Transfer-encoding'))
1318	n/a	except OSError as err: # timeout error
1319	n/a	raise URLError(err)
1320	n/a	r = h.getresponse()
1321	n/a	except:
1322	n/a	h.close()
1323	n/a	raise
1324	n/a
1325	n/a	# If the server does not send us a 'Connection: close' header,
1326	n/a	# HTTPConnection assumes the socket should be left open. Manually
1327	n/a	# mark the socket to be closed when this response object goes away.
1328	n/a	if h.sock:
1329	n/a	h.sock.close()
1330	n/a	h.sock = None
1331	n/a
1332	n/a	r.url = req.get_full_url()
1333	n/a	# This line replaces the .msg attribute of the HTTPResponse
1334	n/a	# with .headers, because urllib clients expect the response to
1335	n/a	# have the reason in .msg. It would be good to mark this
1336	n/a	# attribute is deprecated and get then to use info() or
1337	n/a	# .headers.
1338	n/a	r.msg = r.reason
1339	n/a	return r
1340	n/a
1341	n/a
1342	n/a	class HTTPHandler(AbstractHTTPHandler):
1343	n/a
1344	n/a	def http_open(self, req):
1345	n/a	return self.do_open(http.client.HTTPConnection, req)
1346	n/a
1347	n/a	http_request = AbstractHTTPHandler.do_request_
1348	n/a
1349	n/a	if hasattr(http.client, 'HTTPSConnection'):
1350	n/a
1351	n/a	class HTTPSHandler(AbstractHTTPHandler):
1352	n/a
1353	n/a	def __init__(self, debuglevel=0, context=None, check_hostname=None):
1354	n/a	AbstractHTTPHandler.__init__(self, debuglevel)
1355	n/a	self._context = context
1356	n/a	self._check_hostname = check_hostname
1357	n/a
1358	n/a	def https_open(self, req):
1359	n/a	return self.do_open(http.client.HTTPSConnection, req,
1360	n/a	context=self._context, check_hostname=self._check_hostname)
1361	n/a
1362	n/a	https_request = AbstractHTTPHandler.do_request_
1363	n/a
1364	n/a	__all__.append('HTTPSHandler')
1365	n/a
1366	n/a	class HTTPCookieProcessor(BaseHandler):
1367	n/a	def __init__(self, cookiejar=None):
1368	n/a	import http.cookiejar
1369	n/a	if cookiejar is None:
1370	n/a	cookiejar = http.cookiejar.CookieJar()
1371	n/a	self.cookiejar = cookiejar
1372	n/a
1373	n/a	def http_request(self, request):
1374	n/a	self.cookiejar.add_cookie_header(request)
1375	n/a	return request
1376	n/a
1377	n/a	def http_response(self, request, response):
1378	n/a	self.cookiejar.extract_cookies(response, request)
1379	n/a	return response
1380	n/a
1381	n/a	https_request = http_request
1382	n/a	https_response = http_response
1383	n/a
1384	n/a	class UnknownHandler(BaseHandler):
1385	n/a	def unknown_open(self, req):
1386	n/a	type = req.type
1387	n/a	raise URLError('unknown url type: %s' % type)
1388	n/a
1389	n/a	def parse_keqv_list(l):
1390	n/a	"""Parse list of key=value strings where keys are not duplicated."""
1391	n/a	parsed = {}
1392	n/a	for elt in l:
1393	n/a	k, v = elt.split('=', 1)
1394	n/a	if v[0] == '"' and v[-1] == '"':
1395	n/a	v = v[1:-1]
1396	n/a	parsed[k] = v
1397	n/a	return parsed
1398	n/a
1399	n/a	def parse_http_list(s):
1400	n/a	"""Parse lists as described by RFC 2068 Section 2.
1401	n/a
1402	n/a	In particular, parse comma-separated lists where the elements of
1403	n/a	the list may include quoted-strings. A quoted-string could
1404	n/a	contain a comma. A non-quoted string could have quotes in the
1405	n/a	middle. Neither commas nor quotes count if they are escaped.
1406	n/a	Only double-quotes count, not single-quotes.
1407	n/a	"""
1408	n/a	res = []
1409	n/a	part = ''
1410	n/a
1411	n/a	escape = quote = False
1412	n/a	for cur in s:
1413	n/a	if escape:
1414	n/a	part += cur
1415	n/a	escape = False
1416	n/a	continue
1417	n/a	if quote:
1418	n/a	if cur == '\\':
1419	n/a	escape = True
1420	n/a	continue
1421	n/a	elif cur == '"':
1422	n/a	quote = False
1423	n/a	part += cur
1424	n/a	continue
1425	n/a
1426	n/a	if cur == ',':
1427	n/a	res.append(part)
1428	n/a	part = ''
1429	n/a	continue
1430	n/a
1431	n/a	if cur == '"':
1432	n/a	quote = True
1433	n/a
1434	n/a	part += cur
1435	n/a
1436	n/a	# append last part
1437	n/a	if part:
1438	n/a	res.append(part)
1439	n/a
1440	n/a	return [part.strip() for part in res]
1441	n/a
1442	n/a	class FileHandler(BaseHandler):
1443	n/a	# Use local file or FTP depending on form of URL
1444	n/a	def file_open(self, req):
1445	n/a	url = req.selector
1446	n/a	if url[:2] == '//' and url[2:3] != '/' and (req.host and
1447	n/a	req.host != 'localhost'):
1448	n/a	if not req.host in self.get_names():
1449	n/a	raise URLError("file:// scheme is supported only on localhost")
1450	n/a	else:
1451	n/a	return self.open_local_file(req)
1452	n/a
1453	n/a	# names for the localhost
1454	n/a	names = None
1455	n/a	def get_names(self):
1456	n/a	if FileHandler.names is None:
1457	n/a	try:
1458	n/a	FileHandler.names = tuple(
1459	n/a	socket.gethostbyname_ex('localhost')[2] +
1460	n/a	socket.gethostbyname_ex(socket.gethostname())[2])
1461	n/a	except socket.gaierror:
1462	n/a	FileHandler.names = (socket.gethostbyname('localhost'),)
1463	n/a	return FileHandler.names
1464	n/a
1465	n/a	# not entirely sure what the rules are here
1466	n/a	def open_local_file(self, req):
1467	n/a	import email.utils
1468	n/a	import mimetypes
1469	n/a	host = req.host
1470	n/a	filename = req.selector
1471	n/a	localfile = url2pathname(filename)
1472	n/a	try:
1473	n/a	stats = os.stat(localfile)
1474	n/a	size = stats.st_size
1475	n/a	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1476	n/a	mtype = mimetypes.guess_type(filename)[0]
1477	n/a	headers = email.message_from_string(
1478	n/a	'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' %
1479	n/a	(mtype or 'text/plain', size, modified))
1480	n/a	if host:
1481	n/a	host, port = splitport(host)
1482	n/a	if not host or \
1483	n/a	(not port and _safe_gethostbyname(host) in self.get_names()):
1484	n/a	if host:
1485	n/a	origurl = 'file://' + host + filename
1486	n/a	else:
1487	n/a	origurl = 'file://' + filename
1488	n/a	return addinfourl(open(localfile, 'rb'), headers, origurl)
1489	n/a	except OSError as exp:
1490	n/a	# users shouldn't expect OSErrors coming from urlopen()
1491	n/a	raise URLError(exp)
1492	n/a	raise URLError('file not on local host')
1493	n/a
1494	n/a	def _safe_gethostbyname(host):
1495	n/a	try:
1496	n/a	return socket.gethostbyname(host)
1497	n/a	except socket.gaierror:
1498	n/a	return None
1499	n/a
1500	n/a	class FTPHandler(BaseHandler):
1501	n/a	def ftp_open(self, req):
1502	n/a	import ftplib
1503	n/a	import mimetypes
1504	n/a	host = req.host
1505	n/a	if not host:
1506	n/a	raise URLError('ftp error: no host given')
1507	n/a	host, port = splitport(host)
1508	n/a	if port is None:
1509	n/a	port = ftplib.FTP_PORT
1510	n/a	else:
1511	n/a	port = int(port)
1512	n/a
1513	n/a	# username/password handling
1514	n/a	user, host = splituser(host)
1515	n/a	if user:
1516	n/a	user, passwd = splitpasswd(user)
1517	n/a	else:
1518	n/a	passwd = None
1519	n/a	host = unquote(host)
1520	n/a	user = user or ''
1521	n/a	passwd = passwd or ''
1522	n/a
1523	n/a	try:
1524	n/a	host = socket.gethostbyname(host)
1525	n/a	except OSError as msg:
1526	n/a	raise URLError(msg)
1527	n/a	path, attrs = splitattr(req.selector)
1528	n/a	dirs = path.split('/')
1529	n/a	dirs = list(map(unquote, dirs))
1530	n/a	dirs, file = dirs[:-1], dirs[-1]
1531	n/a	if dirs and not dirs[0]:
1532	n/a	dirs = dirs[1:]
1533	n/a	try:
1534	n/a	fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout)
1535	n/a	type = file and 'I' or 'D'
1536	n/a	for attr in attrs:
1537	n/a	attr, value = splitvalue(attr)
1538	n/a	if attr.lower() == 'type' and \
1539	n/a	value in ('a', 'A', 'i', 'I', 'd', 'D'):
1540	n/a	type = value.upper()
1541	n/a	fp, retrlen = fw.retrfile(file, type)
1542	n/a	headers = ""
1543	n/a	mtype = mimetypes.guess_type(req.full_url)[0]
1544	n/a	if mtype:
1545	n/a	headers += "Content-type: %s\n" % mtype
1546	n/a	if retrlen is not None and retrlen >= 0:
1547	n/a	headers += "Content-length: %d\n" % retrlen
1548	n/a	headers = email.message_from_string(headers)
1549	n/a	return addinfourl(fp, headers, req.full_url)
1550	n/a	except ftplib.all_errors as exp:
1551	n/a	exc = URLError('ftp error: %r' % exp)
1552	n/a	raise exc.with_traceback(sys.exc_info()[2])
1553	n/a
1554	n/a	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1555	n/a	return ftpwrapper(user, passwd, host, port, dirs, timeout,
1556	n/a	persistent=False)
1557	n/a
1558	n/a	class CacheFTPHandler(FTPHandler):
1559	n/a	# XXX would be nice to have pluggable cache strategies
1560	n/a	# XXX this stuff is definitely not thread safe
1561	n/a	def __init__(self):
1562	n/a	self.cache = {}
1563	n/a	self.timeout = {}
1564	n/a	self.soonest = 0
1565	n/a	self.delay = 60
1566	n/a	self.max_conns = 16
1567	n/a
1568	n/a	def setTimeout(self, t):
1569	n/a	self.delay = t
1570	n/a
1571	n/a	def setMaxConns(self, m):
1572	n/a	self.max_conns = m
1573	n/a
1574	n/a	def connect_ftp(self, user, passwd, host, port, dirs, timeout):
1575	n/a	key = user, host, port, '/'.join(dirs), timeout
1576	n/a	if key in self.cache:
1577	n/a	self.timeout[key] = time.time() + self.delay
1578	n/a	else:
1579	n/a	self.cache[key] = ftpwrapper(user, passwd, host, port,
1580	n/a	dirs, timeout)
1581	n/a	self.timeout[key] = time.time() + self.delay
1582	n/a	self.check_cache()
1583	n/a	return self.cache[key]
1584	n/a
1585	n/a	def check_cache(self):
1586	n/a	# first check for old ones
1587	n/a	t = time.time()
1588	n/a	if self.soonest <= t:
1589	n/a	for k, v in list(self.timeout.items()):
1590	n/a	if v < t:
1591	n/a	self.cache[k].close()
1592	n/a	del self.cache[k]
1593	n/a	del self.timeout[k]
1594	n/a	self.soonest = min(list(self.timeout.values()))
1595	n/a
1596	n/a	# then check the size
1597	n/a	if len(self.cache) == self.max_conns:
1598	n/a	for k, v in list(self.timeout.items()):
1599	n/a	if v == self.soonest:
1600	n/a	del self.cache[k]
1601	n/a	del self.timeout[k]
1602	n/a	break
1603	n/a	self.soonest = min(list(self.timeout.values()))
1604	n/a
1605	n/a	def clear_cache(self):
1606	n/a	for conn in self.cache.values():
1607	n/a	conn.close()
1608	n/a	self.cache.clear()
1609	n/a	self.timeout.clear()
1610	n/a
1611	n/a	class DataHandler(BaseHandler):
1612	n/a	def data_open(self, req):
1613	n/a	# data URLs as specified in RFC 2397.
1614	n/a	#
1615	n/a	# ignores POSTed data
1616	n/a	#
1617	n/a	# syntax:
1618	n/a	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
1619	n/a	# mediatype := [ type "/" subtype ] *( ";" parameter )
1620	n/a	# data := *urlchar
1621	n/a	# parameter := attribute "=" value
1622	n/a	url = req.full_url
1623	n/a
1624	n/a	scheme, data = url.split(":",1)
1625	n/a	mediatype, data = data.split(",",1)
1626	n/a
1627	n/a	# even base64 encoded data URLs might be quoted so unquote in any case:
1628	n/a	data = unquote_to_bytes(data)
1629	n/a	if mediatype.endswith(";base64"):
1630	n/a	data = base64.decodebytes(data)
1631	n/a	mediatype = mediatype[:-7]
1632	n/a
1633	n/a	if not mediatype:
1634	n/a	mediatype = "text/plain;charset=US-ASCII"
1635	n/a
1636	n/a	headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" %
1637	n/a	(mediatype, len(data)))
1638	n/a
1639	n/a	return addinfourl(io.BytesIO(data), headers, url)
1640	n/a
1641	n/a
1642	n/a	# Code move from the old urllib module
1643	n/a
1644	n/a	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
1645	n/a
1646	n/a	# Helper for non-unix systems
1647	n/a	if os.name == 'nt':
1648	n/a	from nturl2path import url2pathname, pathname2url
1649	n/a	else:
1650	n/a	def url2pathname(pathname):
1651	n/a	"""OS-specific conversion from a relative URL of the 'file' scheme
1652	n/a	to a file system path; not recommended for general use."""
1653	n/a	return unquote(pathname)
1654	n/a
1655	n/a	def pathname2url(pathname):
1656	n/a	"""OS-specific conversion from a file system path to a relative URL
1657	n/a	of the 'file' scheme; not recommended for general use."""
1658	n/a	return quote(pathname)
1659	n/a
1660	n/a	# This really consists of two pieces:
1661	n/a	# (1) a class which handles opening of all sorts of URLs
1662	n/a	# (plus assorted utilities etc.)
1663	n/a	# (2) a set of functions for parsing URLs
1664	n/a	# XXX Should these be separated out into different modules?
1665	n/a
1666	n/a
1667	n/a	ftpcache = {}
1668	n/a	class URLopener:
1669	n/a	"""Class to open URLs.
1670	n/a	This is a class rather than just a subroutine because we may need
1671	n/a	more than one set of global protocol-specific options.
1672	n/a	Note -- this is a base class for those who don't want the
1673	n/a	automatic handling of errors type 302 (relocated) and 401
1674	n/a	(authorization needed)."""
1675	n/a
1676	n/a	__tempfiles = None
1677	n/a
1678	n/a	version = "Python-urllib/%s" % __version__
1679	n/a
1680	n/a	# Constructor
1681	n/a	def __init__(self, proxies=None, **x509):
1682	n/a	msg = "%(class)s style of invoking requests is deprecated. " \
1683	n/a	"Use newer urlopen functions/methods" % {'class': self.__class__.__name__}
1684	n/a	warnings.warn(msg, DeprecationWarning, stacklevel=3)
1685	n/a	if proxies is None:
1686	n/a	proxies = getproxies()
1687	n/a	assert hasattr(proxies, 'keys'), "proxies must be a mapping"
1688	n/a	self.proxies = proxies
1689	n/a	self.key_file = x509.get('key_file')
1690	n/a	self.cert_file = x509.get('cert_file')
1691	n/a	self.addheaders = [('User-Agent', self.version), ('Accept', '/')]
1692	n/a	self.__tempfiles = []
1693	n/a	self.__unlink = os.unlink # See cleanup()
1694	n/a	self.tempcache = None
1695	n/a	# Undocumented feature: if you assign {} to tempcache,
1696	n/a	# it is used to cache files retrieved with
1697	n/a	# self.retrieve(). This is not enabled by default
1698	n/a	# since it does not work for changing documents (and I
1699	n/a	# haven't got the logic to check expiration headers
1700	n/a	# yet).
1701	n/a	self.ftpcache = ftpcache
1702	n/a	# Undocumented feature: you can use a different
1703	n/a	# ftp cache by assigning to the .ftpcache member;
1704	n/a	# in case you want logically independent URL openers
1705	n/a	# XXX This is not threadsafe. Bah.
1706	n/a
1707	n/a	def __del__(self):
1708	n/a	self.close()
1709	n/a
1710	n/a	def close(self):
1711	n/a	self.cleanup()
1712	n/a
1713	n/a	def cleanup(self):
1714	n/a	# This code sometimes runs when the rest of this module
1715	n/a	# has already been deleted, so it can't use any globals
1716	n/a	# or import anything.
1717	n/a	if self.__tempfiles:
1718	n/a	for file in self.__tempfiles:
1719	n/a	try:
1720	n/a	self.__unlink(file)
1721	n/a	except OSError:
1722	n/a	pass
1723	n/a	del self.__tempfiles[:]
1724	n/a	if self.tempcache:
1725	n/a	self.tempcache.clear()
1726	n/a
1727	n/a	def addheader(self, *args):
1728	n/a	"""Add a header to be used by the HTTP interface only
1729	n/a	e.g. u.addheader('Accept', 'sound/basic')"""
1730	n/a	self.addheaders.append(args)
1731	n/a
1732	n/a	# External interface
1733	n/a	def open(self, fullurl, data=None):
1734	n/a	"""Use URLopener().open(file) instead of open(file, 'r')."""
1735	n/a	fullurl = unwrap(to_bytes(fullurl))
1736	n/a	fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]\|")
1737	n/a	if self.tempcache and fullurl in self.tempcache:
1738	n/a	filename, headers = self.tempcache[fullurl]
1739	n/a	fp = open(filename, 'rb')
1740	n/a	return addinfourl(fp, headers, fullurl)
1741	n/a	urltype, url = splittype(fullurl)
1742	n/a	if not urltype:
1743	n/a	urltype = 'file'
1744	n/a	if urltype in self.proxies:
1745	n/a	proxy = self.proxies[urltype]
1746	n/a	urltype, proxyhost = splittype(proxy)
1747	n/a	host, selector = splithost(proxyhost)
1748	n/a	url = (host, fullurl) # Signal special case to open_*()
1749	n/a	else:
1750	n/a	proxy = None
1751	n/a	name = 'open_' + urltype
1752	n/a	self.type = urltype
1753	n/a	name = name.replace('-', '_')
1754	n/a	if not hasattr(self, name):
1755	n/a	if proxy:
1756	n/a	return self.open_unknown_proxy(proxy, fullurl, data)
1757	n/a	else:
1758	n/a	return self.open_unknown(fullurl, data)
1759	n/a	try:
1760	n/a	if data is None:
1761	n/a	return getattr(self, name)(url)
1762	n/a	else:
1763	n/a	return getattr(self, name)(url, data)
1764	n/a	except (HTTPError, URLError):
1765	n/a	raise
1766	n/a	except OSError as msg:
1767	n/a	raise OSError('socket error', msg).with_traceback(sys.exc_info()[2])
1768	n/a
1769	n/a	def open_unknown(self, fullurl, data=None):
1770	n/a	"""Overridable interface to open unknown URL type."""
1771	n/a	type, url = splittype(fullurl)
1772	n/a	raise OSError('url error', 'unknown url type', type)
1773	n/a
1774	n/a	def open_unknown_proxy(self, proxy, fullurl, data=None):
1775	n/a	"""Overridable interface to open unknown URL type."""
1776	n/a	type, url = splittype(fullurl)
1777	n/a	raise OSError('url error', 'invalid proxy for %s' % type, proxy)
1778	n/a
1779	n/a	# External interface
1780	n/a	def retrieve(self, url, filename=None, reporthook=None, data=None):
1781	n/a	"""retrieve(url) returns (filename, headers) for a local object
1782	n/a	or (tempfilename, headers) for a remote object."""
1783	n/a	url = unwrap(to_bytes(url))
1784	n/a	if self.tempcache and url in self.tempcache:
1785	n/a	return self.tempcache[url]
1786	n/a	type, url1 = splittype(url)
1787	n/a	if filename is None and (not type or type == 'file'):
1788	n/a	try:
1789	n/a	fp = self.open_local_file(url1)
1790	n/a	hdrs = fp.info()
1791	n/a	fp.close()
1792	n/a	return url2pathname(splithost(url1)[1]), hdrs
1793	n/a	except OSError as msg:
1794	n/a	pass
1795	n/a	fp = self.open(url, data)
1796	n/a	try:
1797	n/a	headers = fp.info()
1798	n/a	if filename:
1799	n/a	tfp = open(filename, 'wb')
1800	n/a	else:
1801	n/a	import tempfile
1802	n/a	garbage, path = splittype(url)
1803	n/a	garbage, path = splithost(path or "")
1804	n/a	path, garbage = splitquery(path or "")
1805	n/a	path, garbage = splitattr(path or "")
1806	n/a	suffix = os.path.splitext(path)[1]
1807	n/a	(fd, filename) = tempfile.mkstemp(suffix)
1808	n/a	self.__tempfiles.append(filename)
1809	n/a	tfp = os.fdopen(fd, 'wb')
1810	n/a	try:
1811	n/a	result = filename, headers
1812	n/a	if self.tempcache is not None:
1813	n/a	self.tempcache[url] = result
1814	n/a	bs = 1024*8
1815	n/a	size = -1
1816	n/a	read = 0
1817	n/a	blocknum = 0
1818	n/a	if "content-length" in headers:
1819	n/a	size = int(headers["Content-Length"])
1820	n/a	if reporthook:
1821	n/a	reporthook(blocknum, bs, size)
1822	n/a	while 1:
1823	n/a	block = fp.read(bs)
1824	n/a	if not block:
1825	n/a	break
1826	n/a	read += len(block)
1827	n/a	tfp.write(block)
1828	n/a	blocknum += 1
1829	n/a	if reporthook:
1830	n/a	reporthook(blocknum, bs, size)
1831	n/a	finally:
1832	n/a	tfp.close()
1833	n/a	finally:
1834	n/a	fp.close()
1835	n/a
1836	n/a	# raise exception if actual size does not match content-length header
1837	n/a	if size >= 0 and read < size:
1838	n/a	raise ContentTooShortError(
1839	n/a	"retrieval incomplete: got only %i out of %i bytes"
1840	n/a	% (read, size), result)
1841	n/a
1842	n/a	return result
1843	n/a
1844	n/a	# Each method named open_<type> knows how to open that type of URL
1845	n/a
1846	n/a	def _open_generic_http(self, connection_factory, url, data):
1847	n/a	"""Make an HTTP connection using connection_class.
1848	n/a
1849	n/a	This is an internal method that should be called from
1850	n/a	open_http() or open_https().
1851	n/a
1852	n/a	Arguments:
1853	n/a	- connection_factory should take a host name and return an
1854	n/a	HTTPConnection instance.
1855	n/a	- url is the url to retrieval or a host, relative-path pair.
1856	n/a	- data is payload for a POST request or None.
1857	n/a	"""
1858	n/a
1859	n/a	user_passwd = None
1860	n/a	proxy_passwd= None
1861	n/a	if isinstance(url, str):
1862	n/a	host, selector = splithost(url)
1863	n/a	if host:
1864	n/a	user_passwd, host = splituser(host)
1865	n/a	host = unquote(host)
1866	n/a	realhost = host
1867	n/a	else:
1868	n/a	host, selector = url
1869	n/a	# check whether the proxy contains authorization information
1870	n/a	proxy_passwd, host = splituser(host)
1871	n/a	# now we proceed with the url we want to obtain
1872	n/a	urltype, rest = splittype(selector)
1873	n/a	url = rest
1874	n/a	user_passwd = None
1875	n/a	if urltype.lower() != 'http':
1876	n/a	realhost = None
1877	n/a	else:
1878	n/a	realhost, rest = splithost(rest)
1879	n/a	if realhost:
1880	n/a	user_passwd, realhost = splituser(realhost)
1881	n/a	if user_passwd:
1882	n/a	selector = "%s://%s%s" % (urltype, realhost, rest)
1883	n/a	if proxy_bypass(realhost):
1884	n/a	host = realhost
1885	n/a
1886	n/a	if not host: raise OSError('http error', 'no host given')
1887	n/a
1888	n/a	if proxy_passwd:
1889	n/a	proxy_passwd = unquote(proxy_passwd)
1890	n/a	proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii')
1891	n/a	else:
1892	n/a	proxy_auth = None
1893	n/a
1894	n/a	if user_passwd:
1895	n/a	user_passwd = unquote(user_passwd)
1896	n/a	auth = base64.b64encode(user_passwd.encode()).decode('ascii')
1897	n/a	else:
1898	n/a	auth = None
1899	n/a	http_conn = connection_factory(host)
1900	n/a	headers = {}
1901	n/a	if proxy_auth:
1902	n/a	headers["Proxy-Authorization"] = "Basic %s" % proxy_auth
1903	n/a	if auth:
1904	n/a	headers["Authorization"] = "Basic %s" % auth
1905	n/a	if realhost:
1906	n/a	headers["Host"] = realhost
1907	n/a
1908	n/a	# Add Connection:close as we don't support persistent connections yet.
1909	n/a	# This helps in closing the socket and avoiding ResourceWarning
1910	n/a
1911	n/a	headers["Connection"] = "close"
1912	n/a
1913	n/a	for header, value in self.addheaders:
1914	n/a	headers[header] = value
1915	n/a
1916	n/a	if data is not None:
1917	n/a	headers["Content-Type"] = "application/x-www-form-urlencoded"
1918	n/a	http_conn.request("POST", selector, data, headers)
1919	n/a	else:
1920	n/a	http_conn.request("GET", selector, headers=headers)
1921	n/a
1922	n/a	try:
1923	n/a	response = http_conn.getresponse()
1924	n/a	except http.client.BadStatusLine:
1925	n/a	# something went wrong with the HTTP status line
1926	n/a	raise URLError("http protocol error: bad status line")
1927	n/a
1928	n/a	# According to RFC 2616, "2xx" code indicates that the client's
1929	n/a	# request was successfully received, understood, and accepted.
1930	n/a	if 200 <= response.status < 300:
1931	n/a	return addinfourl(response, response.msg, "http:" + url,
1932	n/a	response.status)
1933	n/a	else:
1934	n/a	return self.http_error(
1935	n/a	url, response.fp,
1936	n/a	response.status, response.reason, response.msg, data)
1937	n/a
1938	n/a	def open_http(self, url, data=None):
1939	n/a	"""Use HTTP protocol."""
1940	n/a	return self._open_generic_http(http.client.HTTPConnection, url, data)
1941	n/a
1942	n/a	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
1943	n/a	"""Handle http errors.
1944	n/a
1945	n/a	Derived class can override this, or provide specific handlers
1946	n/a	named http_error_DDD where DDD is the 3-digit error code."""
1947	n/a	# First check if there's a specific handler for this error
1948	n/a	name = 'http_error_%d' % errcode
1949	n/a	if hasattr(self, name):
1950	n/a	method = getattr(self, name)
1951	n/a	if data is None:
1952	n/a	result = method(url, fp, errcode, errmsg, headers)
1953	n/a	else:
1954	n/a	result = method(url, fp, errcode, errmsg, headers, data)
1955	n/a	if result: return result
1956	n/a	return self.http_error_default(url, fp, errcode, errmsg, headers)
1957	n/a
1958	n/a	def http_error_default(self, url, fp, errcode, errmsg, headers):
1959	n/a	"""Default error handler: close the connection and raise OSError."""
1960	n/a	fp.close()
1961	n/a	raise HTTPError(url, errcode, errmsg, headers, None)
1962	n/a
1963	n/a	if _have_ssl:
1964	n/a	def _https_connection(self, host):
1965	n/a	return http.client.HTTPSConnection(host,
1966	n/a	key_file=self.key_file,
1967	n/a	cert_file=self.cert_file)
1968	n/a
1969	n/a	def open_https(self, url, data=None):
1970	n/a	"""Use HTTPS protocol."""
1971	n/a	return self._open_generic_http(self._https_connection, url, data)
1972	n/a
1973	n/a	def open_file(self, url):
1974	n/a	"""Use local file or FTP depending on form of URL."""
1975	n/a	if not isinstance(url, str):
1976	n/a	raise URLError('file error: proxy support for file protocol currently not implemented')
1977	n/a	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
1978	n/a	raise ValueError("file:// scheme is supported only on localhost")
1979	n/a	else:
1980	n/a	return self.open_local_file(url)
1981	n/a
1982	n/a	def open_local_file(self, url):
1983	n/a	"""Use local file."""
1984	n/a	import email.utils
1985	n/a	import mimetypes
1986	n/a	host, file = splithost(url)
1987	n/a	localname = url2pathname(file)
1988	n/a	try:
1989	n/a	stats = os.stat(localname)
1990	n/a	except OSError as e:
1991	n/a	raise URLError(e.strerror, e.filename)
1992	n/a	size = stats.st_size
1993	n/a	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
1994	n/a	mtype = mimetypes.guess_type(url)[0]
1995	n/a	headers = email.message_from_string(
1996	n/a	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
1997	n/a	(mtype or 'text/plain', size, modified))
1998	n/a	if not host:
1999	n/a	urlfile = file
2000	n/a	if file[:1] == '/':
2001	n/a	urlfile = 'file://' + file
2002	n/a	return addinfourl(open(localname, 'rb'), headers, urlfile)
2003	n/a	host, port = splitport(host)
2004	n/a	if (not port
2005	n/a	and socket.gethostbyname(host) in ((localhost(),) + thishost())):
2006	n/a	urlfile = file
2007	n/a	if file[:1] == '/':
2008	n/a	urlfile = 'file://' + file
2009	n/a	elif file[:2] == './':
2010	n/a	raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
2011	n/a	return addinfourl(open(localname, 'rb'), headers, urlfile)
2012	n/a	raise URLError('local file error: not on local host')
2013	n/a
2014	n/a	def open_ftp(self, url):
2015	n/a	"""Use FTP protocol."""
2016	n/a	if not isinstance(url, str):
2017	n/a	raise URLError('ftp error: proxy support for ftp protocol currently not implemented')
2018	n/a	import mimetypes
2019	n/a	host, path = splithost(url)
2020	n/a	if not host: raise URLError('ftp error: no host given')
2021	n/a	host, port = splitport(host)
2022	n/a	user, host = splituser(host)
2023	n/a	if user: user, passwd = splitpasswd(user)
2024	n/a	else: passwd = None
2025	n/a	host = unquote(host)
2026	n/a	user = unquote(user or '')
2027	n/a	passwd = unquote(passwd or '')
2028	n/a	host = socket.gethostbyname(host)
2029	n/a	if not port:
2030	n/a	import ftplib
2031	n/a	port = ftplib.FTP_PORT
2032	n/a	else:
2033	n/a	port = int(port)
2034	n/a	path, attrs = splitattr(path)
2035	n/a	path = unquote(path)
2036	n/a	dirs = path.split('/')
2037	n/a	dirs, file = dirs[:-1], dirs[-1]
2038	n/a	if dirs and not dirs[0]: dirs = dirs[1:]
2039	n/a	if dirs and not dirs[0]: dirs[0] = '/'
2040	n/a	key = user, host, port, '/'.join(dirs)
2041	n/a	# XXX thread unsafe!
2042	n/a	if len(self.ftpcache) > MAXFTPCACHE:
2043	n/a	# Prune the cache, rather arbitrarily
2044	n/a	for k in list(self.ftpcache):
2045	n/a	if k != key:
2046	n/a	v = self.ftpcache[k]
2047	n/a	del self.ftpcache[k]
2048	n/a	v.close()
2049	n/a	try:
2050	n/a	if key not in self.ftpcache:
2051	n/a	self.ftpcache[key] = \
2052	n/a	ftpwrapper(user, passwd, host, port, dirs)
2053	n/a	if not file: type = 'D'
2054	n/a	else: type = 'I'
2055	n/a	for attr in attrs:
2056	n/a	attr, value = splitvalue(attr)
2057	n/a	if attr.lower() == 'type' and \
2058	n/a	value in ('a', 'A', 'i', 'I', 'd', 'D'):
2059	n/a	type = value.upper()
2060	n/a	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
2061	n/a	mtype = mimetypes.guess_type("ftp:" + url)[0]
2062	n/a	headers = ""
2063	n/a	if mtype:
2064	n/a	headers += "Content-Type: %s\n" % mtype
2065	n/a	if retrlen is not None and retrlen >= 0:
2066	n/a	headers += "Content-Length: %d\n" % retrlen
2067	n/a	headers = email.message_from_string(headers)
2068	n/a	return addinfourl(fp, headers, "ftp:" + url)
2069	n/a	except ftperrors() as exp:
2070	n/a	raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2])
2071	n/a
2072	n/a	def open_data(self, url, data=None):
2073	n/a	"""Use "data" URL."""
2074	n/a	if not isinstance(url, str):
2075	n/a	raise URLError('data error: proxy support for data protocol currently not implemented')
2076	n/a	# ignore POSTed data
2077	n/a	#
2078	n/a	# syntax of data URLs:
2079	n/a	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
2080	n/a	# mediatype := [ type "/" subtype ] *( ";" parameter )
2081	n/a	# data := *urlchar
2082	n/a	# parameter := attribute "=" value
2083	n/a	try:
2084	n/a	[type, data] = url.split(',', 1)
2085	n/a	except ValueError:
2086	n/a	raise OSError('data error', 'bad data URL')
2087	n/a	if not type:
2088	n/a	type = 'text/plain;charset=US-ASCII'
2089	n/a	semi = type.rfind(';')
2090	n/a	if semi >= 0 and '=' not in type[semi:]:
2091	n/a	encoding = type[semi+1:]
2092	n/a	type = type[:semi]
2093	n/a	else:
2094	n/a	encoding = ''
2095	n/a	msg = []
2096	n/a	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
2097	n/a	time.gmtime(time.time())))
2098	n/a	msg.append('Content-type: %s' % type)
2099	n/a	if encoding == 'base64':
2100	n/a	# XXX is this encoding/decoding ok?
2101	n/a	data = base64.decodebytes(data.encode('ascii')).decode('latin-1')
2102	n/a	else:
2103	n/a	data = unquote(data)
2104	n/a	msg.append('Content-Length: %d' % len(data))
2105	n/a	msg.append('')
2106	n/a	msg.append(data)
2107	n/a	msg = '\n'.join(msg)
2108	n/a	headers = email.message_from_string(msg)
2109	n/a	f = io.StringIO(msg)
2110	n/a	#f.fileno = None # needed for addinfourl
2111	n/a	return addinfourl(f, headers, url)
2112	n/a
2113	n/a
2114	n/a	class FancyURLopener(URLopener):
2115	n/a	"""Derived class with handlers for errors we can handle (perhaps)."""
2116	n/a
2117	n/a	def __init__(self, args, *kwargs):
2118	n/a	URLopener.__init__(self, args, *kwargs)
2119	n/a	self.auth_cache = {}
2120	n/a	self.tries = 0
2121	n/a	self.maxtries = 10
2122	n/a
2123	n/a	def http_error_default(self, url, fp, errcode, errmsg, headers):
2124	n/a	"""Default error handling -- don't raise an exception."""
2125	n/a	return addinfourl(fp, headers, "http:" + url, errcode)
2126	n/a
2127	n/a	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
2128	n/a	"""Error 302 -- relocated (temporarily)."""
2129	n/a	self.tries += 1
2130	n/a	try:
2131	n/a	if self.maxtries and self.tries >= self.maxtries:
2132	n/a	if hasattr(self, "http_error_500"):
2133	n/a	meth = self.http_error_500
2134	n/a	else:
2135	n/a	meth = self.http_error_default
2136	n/a	return meth(url, fp, 500,
2137	n/a	"Internal Server Error: Redirect Recursion",
2138	n/a	headers)
2139	n/a	result = self.redirect_internal(url, fp, errcode, errmsg,
2140	n/a	headers, data)
2141	n/a	return result
2142	n/a	finally:
2143	n/a	self.tries = 0
2144	n/a
2145	n/a	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
2146	n/a	if 'location' in headers:
2147	n/a	newurl = headers['location']
2148	n/a	elif 'uri' in headers:
2149	n/a	newurl = headers['uri']
2150	n/a	else:
2151	n/a	return
2152	n/a	fp.close()
2153	n/a
2154	n/a	# In case the server sent a relative URL, join with original:
2155	n/a	newurl = urljoin(self.type + ":" + url, newurl)
2156	n/a
2157	n/a	urlparts = urlparse(newurl)
2158	n/a
2159	n/a	# For security reasons, we don't allow redirection to anything other
2160	n/a	# than http, https and ftp.
2161	n/a
2162	n/a	# We are using newer HTTPError with older redirect_internal method
2163	n/a	# This older method will get deprecated in 3.3
2164	n/a
2165	n/a	if urlparts.scheme not in ('http', 'https', 'ftp', ''):
2166	n/a	raise HTTPError(newurl, errcode,
2167	n/a	errmsg +
2168	n/a	" Redirection to url '%s' is not allowed." % newurl,
2169	n/a	headers, fp)
2170	n/a
2171	n/a	return self.open(newurl)
2172	n/a
2173	n/a	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
2174	n/a	"""Error 301 -- also relocated (permanently)."""
2175	n/a	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2176	n/a
2177	n/a	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
2178	n/a	"""Error 303 -- also relocated (essentially identical to 302)."""
2179	n/a	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2180	n/a
2181	n/a	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
2182	n/a	"""Error 307 -- relocated, but turn POST into error."""
2183	n/a	if data is None:
2184	n/a	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
2185	n/a	else:
2186	n/a	return self.http_error_default(url, fp, errcode, errmsg, headers)
2187	n/a
2188	n/a	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None,
2189	n/a	retry=False):
2190	n/a	"""Error 401 -- authentication required.
2191	n/a	This function supports Basic authentication only."""
2192	n/a	if 'www-authenticate' not in headers:
2193	n/a	URLopener.http_error_default(self, url, fp,
2194	n/a	errcode, errmsg, headers)
2195	n/a	stuff = headers['www-authenticate']
2196	n/a	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
2197	n/a	if not match:
2198	n/a	URLopener.http_error_default(self, url, fp,
2199	n/a	errcode, errmsg, headers)
2200	n/a	scheme, realm = match.groups()
2201	n/a	if scheme.lower() != 'basic':
2202	n/a	URLopener.http_error_default(self, url, fp,
2203	n/a	errcode, errmsg, headers)
2204	n/a	if not retry:
2205	n/a	URLopener.http_error_default(self, url, fp, errcode, errmsg,
2206	n/a	headers)
2207	n/a	name = 'retry_' + self.type + '_basic_auth'
2208	n/a	if data is None:
2209	n/a	return getattr(self,name)(url, realm)
2210	n/a	else:
2211	n/a	return getattr(self,name)(url, realm, data)
2212	n/a
2213	n/a	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None,
2214	n/a	retry=False):
2215	n/a	"""Error 407 -- proxy authentication required.
2216	n/a	This function supports Basic authentication only."""
2217	n/a	if 'proxy-authenticate' not in headers:
2218	n/a	URLopener.http_error_default(self, url, fp,
2219	n/a	errcode, errmsg, headers)
2220	n/a	stuff = headers['proxy-authenticate']
2221	n/a	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
2222	n/a	if not match:
2223	n/a	URLopener.http_error_default(self, url, fp,
2224	n/a	errcode, errmsg, headers)
2225	n/a	scheme, realm = match.groups()
2226	n/a	if scheme.lower() != 'basic':
2227	n/a	URLopener.http_error_default(self, url, fp,
2228	n/a	errcode, errmsg, headers)
2229	n/a	if not retry:
2230	n/a	URLopener.http_error_default(self, url, fp, errcode, errmsg,
2231	n/a	headers)
2232	n/a	name = 'retry_proxy_' + self.type + '_basic_auth'
2233	n/a	if data is None:
2234	n/a	return getattr(self,name)(url, realm)
2235	n/a	else:
2236	n/a	return getattr(self,name)(url, realm, data)
2237	n/a
2238	n/a	def retry_proxy_http_basic_auth(self, url, realm, data=None):
2239	n/a	host, selector = splithost(url)
2240	n/a	newurl = 'http://' + host + selector
2241	n/a	proxy = self.proxies['http']
2242	n/a	urltype, proxyhost = splittype(proxy)
2243	n/a	proxyhost, proxyselector = splithost(proxyhost)
2244	n/a	i = proxyhost.find('@') + 1
2245	n/a	proxyhost = proxyhost[i:]
2246	n/a	user, passwd = self.get_user_passwd(proxyhost, realm, i)
2247	n/a	if not (user or passwd): return None
2248	n/a	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2249	n/a	quote(passwd, safe=''), proxyhost)
2250	n/a	self.proxies['http'] = 'http://' + proxyhost + proxyselector
2251	n/a	if data is None:
2252	n/a	return self.open(newurl)
2253	n/a	else:
2254	n/a	return self.open(newurl, data)
2255	n/a
2256	n/a	def retry_proxy_https_basic_auth(self, url, realm, data=None):
2257	n/a	host, selector = splithost(url)
2258	n/a	newurl = 'https://' + host + selector
2259	n/a	proxy = self.proxies['https']
2260	n/a	urltype, proxyhost = splittype(proxy)
2261	n/a	proxyhost, proxyselector = splithost(proxyhost)
2262	n/a	i = proxyhost.find('@') + 1
2263	n/a	proxyhost = proxyhost[i:]
2264	n/a	user, passwd = self.get_user_passwd(proxyhost, realm, i)
2265	n/a	if not (user or passwd): return None
2266	n/a	proxyhost = "%s:%s@%s" % (quote(user, safe=''),
2267	n/a	quote(passwd, safe=''), proxyhost)
2268	n/a	self.proxies['https'] = 'https://' + proxyhost + proxyselector
2269	n/a	if data is None:
2270	n/a	return self.open(newurl)
2271	n/a	else:
2272	n/a	return self.open(newurl, data)
2273	n/a
2274	n/a	def retry_http_basic_auth(self, url, realm, data=None):
2275	n/a	host, selector = splithost(url)
2276	n/a	i = host.find('@') + 1
2277	n/a	host = host[i:]
2278	n/a	user, passwd = self.get_user_passwd(host, realm, i)
2279	n/a	if not (user or passwd): return None
2280	n/a	host = "%s:%s@%s" % (quote(user, safe=''),
2281	n/a	quote(passwd, safe=''), host)
2282	n/a	newurl = 'http://' + host + selector
2283	n/a	if data is None:
2284	n/a	return self.open(newurl)
2285	n/a	else:
2286	n/a	return self.open(newurl, data)
2287	n/a
2288	n/a	def retry_https_basic_auth(self, url, realm, data=None):
2289	n/a	host, selector = splithost(url)
2290	n/a	i = host.find('@') + 1
2291	n/a	host = host[i:]
2292	n/a	user, passwd = self.get_user_passwd(host, realm, i)
2293	n/a	if not (user or passwd): return None
2294	n/a	host = "%s:%s@%s" % (quote(user, safe=''),
2295	n/a	quote(passwd, safe=''), host)
2296	n/a	newurl = 'https://' + host + selector
2297	n/a	if data is None:
2298	n/a	return self.open(newurl)
2299	n/a	else:
2300	n/a	return self.open(newurl, data)
2301	n/a
2302	n/a	def get_user_passwd(self, host, realm, clear_cache=0):
2303	n/a	key = realm + '@' + host.lower()
2304	n/a	if key in self.auth_cache:
2305	n/a	if clear_cache:
2306	n/a	del self.auth_cache[key]
2307	n/a	else:
2308	n/a	return self.auth_cache[key]
2309	n/a	user, passwd = self.prompt_user_passwd(host, realm)
2310	n/a	if user or passwd: self.auth_cache[key] = (user, passwd)
2311	n/a	return user, passwd
2312	n/a
2313	n/a	def prompt_user_passwd(self, host, realm):
2314	n/a	"""Override this in a GUI environment!"""
2315	n/a	import getpass
2316	n/a	try:
2317	n/a	user = input("Enter username for %s at %s: " % (realm, host))
2318	n/a	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
2319	n/a	(user, realm, host))
2320	n/a	return user, passwd
2321	n/a	except KeyboardInterrupt:
2322	n/a	print()
2323	n/a	return None, None
2324	n/a
2325	n/a
2326	n/a	# Utility functions
2327	n/a
2328	n/a	_localhost = None
2329	n/a	def localhost():
2330	n/a	"""Return the IP address of the magic hostname 'localhost'."""
2331	n/a	global _localhost
2332	n/a	if _localhost is None:
2333	n/a	_localhost = socket.gethostbyname('localhost')
2334	n/a	return _localhost
2335	n/a
2336	n/a	_thishost = None
2337	n/a	def thishost():
2338	n/a	"""Return the IP addresses of the current host."""
2339	n/a	global _thishost
2340	n/a	if _thishost is None:
2341	n/a	try:
2342	n/a	_thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2])
2343	n/a	except socket.gaierror:
2344	n/a	_thishost = tuple(socket.gethostbyname_ex('localhost')[2])
2345	n/a	return _thishost
2346	n/a
2347	n/a	_ftperrors = None
2348	n/a	def ftperrors():
2349	n/a	"""Return the set of errors raised by the FTP class."""
2350	n/a	global _ftperrors
2351	n/a	if _ftperrors is None:
2352	n/a	import ftplib
2353	n/a	_ftperrors = ftplib.all_errors
2354	n/a	return _ftperrors
2355	n/a
2356	n/a	_noheaders = None
2357	n/a	def noheaders():
2358	n/a	"""Return an empty email Message object."""
2359	n/a	global _noheaders
2360	n/a	if _noheaders is None:
2361	n/a	_noheaders = email.message_from_string("")
2362	n/a	return _noheaders
2363	n/a
2364	n/a
2365	n/a	# Utility classes
2366	n/a
2367	n/a	class ftpwrapper:
2368	n/a	"""Class used by open_ftp() for cache of open FTP connections."""
2369	n/a
2370	n/a	def __init__(self, user, passwd, host, port, dirs, timeout=None,
2371	n/a	persistent=True):
2372	n/a	self.user = user
2373	n/a	self.passwd = passwd
2374	n/a	self.host = host
2375	n/a	self.port = port
2376	n/a	self.dirs = dirs
2377	n/a	self.timeout = timeout
2378	n/a	self.refcount = 0
2379	n/a	self.keepalive = persistent
2380	n/a	try:
2381	n/a	self.init()
2382	n/a	except:
2383	n/a	self.close()
2384	n/a	raise
2385	n/a
2386	n/a	def init(self):
2387	n/a	import ftplib
2388	n/a	self.busy = 0
2389	n/a	self.ftp = ftplib.FTP()
2390	n/a	self.ftp.connect(self.host, self.port, self.timeout)
2391	n/a	self.ftp.login(self.user, self.passwd)
2392	n/a	_target = '/'.join(self.dirs)
2393	n/a	self.ftp.cwd(_target)
2394	n/a
2395	n/a	def retrfile(self, file, type):
2396	n/a	import ftplib
2397	n/a	self.endtransfer()
2398	n/a	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
2399	n/a	else: cmd = 'TYPE ' + type; isdir = 0
2400	n/a	try:
2401	n/a	self.ftp.voidcmd(cmd)
2402	n/a	except ftplib.all_errors:
2403	n/a	self.init()
2404	n/a	self.ftp.voidcmd(cmd)
2405	n/a	conn = None
2406	n/a	if file and not isdir:
2407	n/a	# Try to retrieve as a file
2408	n/a	try:
2409	n/a	cmd = 'RETR ' + file
2410	n/a	conn, retrlen = self.ftp.ntransfercmd(cmd)
2411	n/a	except ftplib.error_perm as reason:
2412	n/a	if str(reason)[:3] != '550':
2413	n/a	raise URLError('ftp error: %r' % reason).with_traceback(
2414	n/a	sys.exc_info()[2])
2415	n/a	if not conn:
2416	n/a	# Set transfer mode to ASCII!
2417	n/a	self.ftp.voidcmd('TYPE A')
2418	n/a	# Try a directory listing. Verify that directory exists.
2419	n/a	if file:
2420	n/a	pwd = self.ftp.pwd()
2421	n/a	try:
2422	n/a	try:
2423	n/a	self.ftp.cwd(file)
2424	n/a	except ftplib.error_perm as reason:
2425	n/a	raise URLError('ftp error: %r' % reason) from reason
2426	n/a	finally:
2427	n/a	self.ftp.cwd(pwd)
2428	n/a	cmd = 'LIST ' + file
2429	n/a	else:
2430	n/a	cmd = 'LIST'
2431	n/a	conn, retrlen = self.ftp.ntransfercmd(cmd)
2432	n/a	self.busy = 1
2433	n/a
2434	n/a	ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
2435	n/a	self.refcount += 1
2436	n/a	conn.close()
2437	n/a	# Pass back both a suitably decorated object and a retrieval length
2438	n/a	return (ftpobj, retrlen)
2439	n/a
2440	n/a	def endtransfer(self):
2441	n/a	self.busy = 0
2442	n/a
2443	n/a	def close(self):
2444	n/a	self.keepalive = False
2445	n/a	if self.refcount <= 0:
2446	n/a	self.real_close()
2447	n/a
2448	n/a	def file_close(self):
2449	n/a	self.endtransfer()
2450	n/a	self.refcount -= 1
2451	n/a	if self.refcount <= 0 and not self.keepalive:
2452	n/a	self.real_close()
2453	n/a
2454	n/a	def real_close(self):
2455	n/a	self.endtransfer()
2456	n/a	try:
2457	n/a	self.ftp.close()
2458	n/a	except ftperrors():
2459	n/a	pass
2460	n/a
2461	n/a	# Proxy handling
2462	n/a	def getproxies_environment():
2463	n/a	"""Return a dictionary of scheme -> proxy server URL mappings.
2464	n/a
2465	n/a	Scan the environment for variables named <scheme>_proxy;
2466	n/a	this seems to be the standard convention. If you need a
2467	n/a	different way, you can pass a proxies dictionary to the
2468	n/a	[Fancy]URLopener constructor.
2469	n/a
2470	n/a	"""
2471	n/a	proxies = {}
2472	n/a	# in order to prefer lowercase variables, process environment in
2473	n/a	# two passes: first matches any, second pass matches lowercase only
2474	n/a	for name, value in os.environ.items():
2475	n/a	name = name.lower()
2476	n/a	if value and name[-6:] == '_proxy':
2477	n/a	proxies[name[:-6]] = value
2478	n/a	# CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY
2479	n/a	# (non-all-lowercase) as it may be set from the web server by a "Proxy:"
2480	n/a	# header from the client
2481	n/a	# If "proxy" is lowercase, it will still be used thanks to the next block
2482	n/a	if 'REQUEST_METHOD' in os.environ:
2483	n/a	proxies.pop('http', None)
2484	n/a	for name, value in os.environ.items():
2485	n/a	if name[-6:] == '_proxy':
2486	n/a	name = name.lower()
2487	n/a	if value:
2488	n/a	proxies[name[:-6]] = value
2489	n/a	else:
2490	n/a	proxies.pop(name[:-6], None)
2491	n/a	return proxies
2492	n/a
2493	n/a	def proxy_bypass_environment(host, proxies=None):
2494	n/a	"""Test if proxies should not be used for a particular host.
2495	n/a
2496	n/a	Checks the proxy dict for the value of no_proxy, which should
2497	n/a	be a list of comma separated DNS suffixes, or '*' for all hosts.
2498	n/a
2499	n/a	"""
2500	n/a	if proxies is None:
2501	n/a	proxies = getproxies_environment()
2502	n/a	# don't bypass, if no_proxy isn't specified
2503	n/a	try:
2504	n/a	no_proxy = proxies['no']
2505	n/a	except KeyError:
2506	n/a	return 0
2507	n/a	# '*' is special case for always bypass
2508	n/a	if no_proxy == '*':
2509	n/a	return 1
2510	n/a	# strip port off host
2511	n/a	hostonly, port = splitport(host)
2512	n/a	# check if the host ends with any of the DNS suffixes
2513	n/a	no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
2514	n/a	for name in no_proxy_list:
2515	n/a	if name:
2516	n/a	name = name.lstrip('.') # ignore leading dots
2517	n/a	name = re.escape(name)
2518	n/a	pattern = r'(.+\.)?%s$' % name
2519	n/a	if (re.match(pattern, hostonly, re.I)
2520	n/a	or re.match(pattern, host, re.I)):
2521	n/a	return 1
2522	n/a	# otherwise, don't bypass
2523	n/a	return 0
2524	n/a
2525	n/a
2526	n/a	# This code tests an OSX specific data structure but is testable on all
2527	n/a	# platforms
2528	n/a	def _proxy_bypass_macosx_sysconf(host, proxy_settings):
2529	n/a	"""
2530	n/a	Return True iff this host shouldn't be accessed using a proxy
2531	n/a
2532	n/a	This function uses the MacOSX framework SystemConfiguration
2533	n/a	to fetch the proxy information.
2534	n/a
2535	n/a	proxy_settings come from _scproxy._get_proxy_settings or get mocked ie:
2536	n/a	{ 'exclude_simple': bool,
2537	n/a	'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16']
2538	n/a	}
2539	n/a	"""
2540	n/a	from fnmatch import fnmatch
2541	n/a
2542	n/a	hostonly, port = splitport(host)
2543	n/a
2544	n/a	def ip2num(ipAddr):
2545	n/a	parts = ipAddr.split('.')
2546	n/a	parts = list(map(int, parts))
2547	n/a	if len(parts) != 4:
2548	n/a	parts = (parts + [0, 0, 0, 0])[:4]
2549	n/a	return (parts[0] << 24) \| (parts[1] << 16) \| (parts[2] << 8) \| parts[3]
2550	n/a
2551	n/a	# Check for simple host names:
2552	n/a	if '.' not in host:
2553	n/a	if proxy_settings['exclude_simple']:
2554	n/a	return True
2555	n/a
2556	n/a	hostIP = None
2557	n/a
2558	n/a	for value in proxy_settings.get('exceptions', ()):
2559	n/a	# Items in the list are strings like these: *.local, 169.254/16
2560	n/a	if not value: continue
2561	n/a
2562	n/a	m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
2563	n/a	if m is not None:
2564	n/a	if hostIP is None:
2565	n/a	try:
2566	n/a	hostIP = socket.gethostbyname(hostonly)
2567	n/a	hostIP = ip2num(hostIP)
2568	n/a	except OSError:
2569	n/a	continue
2570	n/a
2571	n/a	base = ip2num(m.group(1))
2572	n/a	mask = m.group(2)
2573	n/a	if mask is None:
2574	n/a	mask = 8 * (m.group(1).count('.') + 1)
2575	n/a	else:
2576	n/a	mask = int(mask[1:])
2577	n/a	mask = 32 - mask
2578	n/a
2579	n/a	if (hostIP >> mask) == (base >> mask):
2580	n/a	return True
2581	n/a
2582	n/a	elif fnmatch(host, value):
2583	n/a	return True
2584	n/a
2585	n/a	return False
2586	n/a
2587	n/a
2588	n/a	if sys.platform == 'darwin':
2589	n/a	from _scproxy import _get_proxy_settings, _get_proxies
2590	n/a
2591	n/a	def proxy_bypass_macosx_sysconf(host):
2592	n/a	proxy_settings = _get_proxy_settings()
2593	n/a	return _proxy_bypass_macosx_sysconf(host, proxy_settings)
2594	n/a
2595	n/a	def getproxies_macosx_sysconf():
2596	n/a	"""Return a dictionary of scheme -> proxy server URL mappings.
2597	n/a
2598	n/a	This function uses the MacOSX framework SystemConfiguration
2599	n/a	to fetch the proxy information.
2600	n/a	"""
2601	n/a	return _get_proxies()
2602	n/a
2603	n/a
2604	n/a
2605	n/a	def proxy_bypass(host):
2606	n/a	"""Return True, if host should be bypassed.
2607	n/a
2608	n/a	Checks proxy settings gathered from the environment, if specified,
2609	n/a	or from the MacOSX framework SystemConfiguration.
2610	n/a
2611	n/a	"""
2612	n/a	proxies = getproxies_environment()
2613	n/a	if proxies:
2614	n/a	return proxy_bypass_environment(host, proxies)
2615	n/a	else:
2616	n/a	return proxy_bypass_macosx_sysconf(host)
2617	n/a
2618	n/a	def getproxies():
2619	n/a	return getproxies_environment() or getproxies_macosx_sysconf()
2620	n/a
2621	n/a
2622	n/a	elif os.name == 'nt':
2623	n/a	def getproxies_registry():
2624	n/a	"""Return a dictionary of scheme -> proxy server URL mappings.
2625	n/a
2626	n/a	Win32 uses the registry to store proxies.
2627	n/a
2628	n/a	"""
2629	n/a	proxies = {}
2630	n/a	try:
2631	n/a	import winreg
2632	n/a	except ImportError:
2633	n/a	# Std module, so should be around - but you never know!
2634	n/a	return proxies
2635	n/a	try:
2636	n/a	internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2637	n/a	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2638	n/a	proxyEnable = winreg.QueryValueEx(internetSettings,
2639	n/a	'ProxyEnable')[0]
2640	n/a	if proxyEnable:
2641	n/a	# Returned as Unicode but problems if not converted to ASCII
2642	n/a	proxyServer = str(winreg.QueryValueEx(internetSettings,
2643	n/a	'ProxyServer')[0])
2644	n/a	if '=' in proxyServer:
2645	n/a	# Per-protocol settings
2646	n/a	for p in proxyServer.split(';'):
2647	n/a	protocol, address = p.split('=', 1)
2648	n/a	# See if address has a type:// prefix
2649	n/a	if not re.match('^([^/:]+)://', address):
2650	n/a	address = '%s://%s' % (protocol, address)
2651	n/a	proxies[protocol] = address
2652	n/a	else:
2653	n/a	# Use one setting for all protocols
2654	n/a	if proxyServer[:5] == 'http:':
2655	n/a	proxies['http'] = proxyServer
2656	n/a	else:
2657	n/a	proxies['http'] = 'http://%s' % proxyServer
2658	n/a	proxies['https'] = 'https://%s' % proxyServer
2659	n/a	proxies['ftp'] = 'ftp://%s' % proxyServer
2660	n/a	internetSettings.Close()
2661	n/a	except (OSError, ValueError, TypeError):
2662	n/a	# Either registry key not found etc, or the value in an
2663	n/a	# unexpected format.
2664	n/a	# proxies already set up to be empty so nothing to do
2665	n/a	pass
2666	n/a	return proxies
2667	n/a
2668	n/a	def getproxies():
2669	n/a	"""Return a dictionary of scheme -> proxy server URL mappings.
2670	n/a
2671	n/a	Returns settings gathered from the environment, if specified,
2672	n/a	or the registry.
2673	n/a
2674	n/a	"""
2675	n/a	return getproxies_environment() or getproxies_registry()
2676	n/a
2677	n/a	def proxy_bypass_registry(host):
2678	n/a	try:
2679	n/a	import winreg
2680	n/a	except ImportError:
2681	n/a	# Std modules, so should be around - but you never know!
2682	n/a	return 0
2683	n/a	try:
2684	n/a	internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
2685	n/a	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
2686	n/a	proxyEnable = winreg.QueryValueEx(internetSettings,
2687	n/a	'ProxyEnable')[0]
2688	n/a	proxyOverride = str(winreg.QueryValueEx(internetSettings,
2689	n/a	'ProxyOverride')[0])
2690	n/a	# ^^^^ Returned as Unicode but problems if not converted to ASCII
2691	n/a	except OSError:
2692	n/a	return 0
2693	n/a	if not proxyEnable or not proxyOverride:
2694	n/a	return 0
2695	n/a	# try to make a host list from name and IP address.
2696	n/a	rawHost, port = splitport(host)
2697	n/a	host = [rawHost]
2698	n/a	try:
2699	n/a	addr = socket.gethostbyname(rawHost)
2700	n/a	if addr != rawHost:
2701	n/a	host.append(addr)
2702	n/a	except OSError:
2703	n/a	pass
2704	n/a	try:
2705	n/a	fqdn = socket.getfqdn(rawHost)
2706	n/a	if fqdn != rawHost:
2707	n/a	host.append(fqdn)
2708	n/a	except OSError:
2709	n/a	pass
2710	n/a	# make a check value list from the registry entry: replace the
2711	n/a	# '<local>' string by the localhost entry and the corresponding
2712	n/a	# canonical entry.
2713	n/a	proxyOverride = proxyOverride.split(';')
2714	n/a	# now check if we match one of the registry values.
2715	n/a	for test in proxyOverride:
2716	n/a	if test == '<local>':
2717	n/a	if '.' not in rawHost:
2718	n/a	return 1
2719	n/a	test = test.replace(".", r"\.") # mask dots
2720	n/a	test = test.replace("", r".") # change glob sequence
2721	n/a	test = test.replace("?", r".") # change glob char
2722	n/a	for val in host:
2723	n/a	if re.match(test, val, re.I):
2724	n/a	return 1
2725	n/a	return 0
2726	n/a
2727	n/a	def proxy_bypass(host):
2728	n/a	"""Return True, if host should be bypassed.
2729	n/a
2730	n/a	Checks proxy settings gathered from the environment, if specified,
2731	n/a	or the registry.
2732	n/a
2733	n/a	"""
2734	n/a	proxies = getproxies_environment()
2735	n/a	if proxies:
2736	n/a	return proxy_bypass_environment(host, proxies)
2737	n/a	else:
2738	n/a	return proxy_bypass_registry(host)
2739	n/a
2740	n/a	else:
2741	n/a	# By default use environment variables
2742	n/a	getproxies = getproxies_environment
2743	n/a	proxy_bypass = proxy_bypass_environment