1 | n/a | """An extensible library for opening URLs using a variety of protocols |
---|
2 | n/a | |
---|
3 | n/a | The simplest way to use this module is to call the urlopen function, |
---|
4 | n/a | which accepts a string containing a URL or a Request object (described |
---|
5 | n/a | below). It opens the URL and returns the results as file-like |
---|
6 | n/a | object; the returned object has some extra methods described below. |
---|
7 | n/a | |
---|
8 | n/a | The OpenerDirector manages a collection of Handler objects that do |
---|
9 | n/a | all the actual work. Each Handler implements a particular protocol or |
---|
10 | n/a | option. The OpenerDirector is a composite object that invokes the |
---|
11 | n/a | Handlers needed to open the requested URL. For example, the |
---|
12 | n/a | HTTPHandler performs HTTP GET and POST requests and deals with |
---|
13 | n/a | non-error returns. The HTTPRedirectHandler automatically deals with |
---|
14 | n/a | HTTP 301, 302, 303 and 307 redirect errors, and the HTTPDigestAuthHandler |
---|
15 | n/a | deals with digest authentication. |
---|
16 | n/a | |
---|
17 | n/a | urlopen(url, data=None) -- Basic usage is the same as original |
---|
18 | n/a | urllib. pass the url and optionally data to post to an HTTP URL, and |
---|
19 | n/a | get a file-like object back. One difference is that you can also pass |
---|
20 | n/a | a Request instance instead of URL. Raises a URLError (subclass of |
---|
21 | n/a | OSError); for HTTP errors, raises an HTTPError, which can also be |
---|
22 | n/a | treated as a valid response. |
---|
23 | n/a | |
---|
24 | n/a | build_opener -- Function that creates a new OpenerDirector instance. |
---|
25 | n/a | Will install the default handlers. Accepts one or more Handlers as |
---|
26 | n/a | arguments, either instances or Handler classes that it will |
---|
27 | n/a | instantiate. If one of the argument is a subclass of the default |
---|
28 | n/a | handler, the argument will be installed instead of the default. |
---|
29 | n/a | |
---|
30 | n/a | install_opener -- Installs a new opener as the default opener. |
---|
31 | n/a | |
---|
32 | n/a | objects of interest: |
---|
33 | n/a | |
---|
34 | n/a | OpenerDirector -- Sets up the User Agent as the Python-urllib client and manages |
---|
35 | n/a | the Handler classes, while dealing with requests and responses. |
---|
36 | n/a | |
---|
37 | n/a | Request -- An object that encapsulates the state of a request. The |
---|
38 | n/a | state can be as simple as the URL. It can also include extra HTTP |
---|
39 | n/a | headers, e.g. a User-Agent. |
---|
40 | n/a | |
---|
41 | n/a | BaseHandler -- |
---|
42 | n/a | |
---|
43 | n/a | internals: |
---|
44 | n/a | BaseHandler and parent |
---|
45 | n/a | _call_chain conventions |
---|
46 | n/a | |
---|
47 | n/a | Example usage: |
---|
48 | n/a | |
---|
49 | n/a | import urllib.request |
---|
50 | n/a | |
---|
51 | n/a | # set up authentication info |
---|
52 | n/a | authinfo = urllib.request.HTTPBasicAuthHandler() |
---|
53 | n/a | authinfo.add_password(realm='PDQ Application', |
---|
54 | n/a | uri='https://mahler:8092/site-updates.py', |
---|
55 | n/a | user='klem', |
---|
56 | n/a | passwd='geheim$parole') |
---|
57 | n/a | |
---|
58 | n/a | proxy_support = urllib.request.ProxyHandler({"http" : "http://ahad-haam:3128"}) |
---|
59 | n/a | |
---|
60 | n/a | # build a new opener that adds authentication and caching FTP handlers |
---|
61 | n/a | opener = urllib.request.build_opener(proxy_support, authinfo, |
---|
62 | n/a | urllib.request.CacheFTPHandler) |
---|
63 | n/a | |
---|
64 | n/a | # install it |
---|
65 | n/a | urllib.request.install_opener(opener) |
---|
66 | n/a | |
---|
67 | n/a | f = urllib.request.urlopen('http://www.python.org/') |
---|
68 | n/a | """ |
---|
69 | n/a | |
---|
70 | n/a | # XXX issues: |
---|
71 | n/a | # If an authentication error handler that tries to perform |
---|
72 | n/a | # authentication for some reason but fails, how should the error be |
---|
73 | n/a | # signalled? The client needs to know the HTTP error code. But if |
---|
74 | n/a | # the handler knows that the problem was, e.g., that it didn't know |
---|
75 | n/a | # that hash algo that requested in the challenge, it would be good to |
---|
76 | n/a | # pass that information along to the client, too. |
---|
77 | n/a | # ftp errors aren't handled cleanly |
---|
78 | n/a | # check digest against correct (i.e. non-apache) implementation |
---|
79 | n/a | |
---|
80 | n/a | # Possible extensions: |
---|
81 | n/a | # complex proxies XXX not sure what exactly was meant by this |
---|
82 | n/a | # abstract factory for opener |
---|
83 | n/a | |
---|
84 | n/a | import base64 |
---|
85 | n/a | import bisect |
---|
86 | n/a | import email |
---|
87 | n/a | import hashlib |
---|
88 | n/a | import http.client |
---|
89 | n/a | import io |
---|
90 | n/a | import os |
---|
91 | n/a | import posixpath |
---|
92 | n/a | import re |
---|
93 | n/a | import socket |
---|
94 | n/a | import string |
---|
95 | n/a | import sys |
---|
96 | n/a | import time |
---|
97 | n/a | import tempfile |
---|
98 | n/a | import contextlib |
---|
99 | n/a | import warnings |
---|
100 | n/a | |
---|
101 | n/a | |
---|
102 | n/a | from urllib.error import URLError, HTTPError, ContentTooShortError |
---|
103 | n/a | from urllib.parse import ( |
---|
104 | n/a | urlparse, urlsplit, urljoin, unwrap, quote, unquote, |
---|
105 | n/a | splittype, splithost, splitport, splituser, splitpasswd, |
---|
106 | n/a | splitattr, splitquery, splitvalue, splittag, to_bytes, |
---|
107 | n/a | unquote_to_bytes, urlunparse) |
---|
108 | n/a | from urllib.response import addinfourl, addclosehook |
---|
109 | n/a | |
---|
110 | n/a | # check for SSL |
---|
111 | n/a | try: |
---|
112 | n/a | import ssl |
---|
113 | n/a | except ImportError: |
---|
114 | n/a | _have_ssl = False |
---|
115 | n/a | else: |
---|
116 | n/a | _have_ssl = True |
---|
117 | n/a | |
---|
118 | n/a | __all__ = [ |
---|
119 | n/a | # Classes |
---|
120 | n/a | 'Request', 'OpenerDirector', 'BaseHandler', 'HTTPDefaultErrorHandler', |
---|
121 | n/a | 'HTTPRedirectHandler', 'HTTPCookieProcessor', 'ProxyHandler', |
---|
122 | n/a | 'HTTPPasswordMgr', 'HTTPPasswordMgrWithDefaultRealm', |
---|
123 | n/a | 'HTTPPasswordMgrWithPriorAuth', 'AbstractBasicAuthHandler', |
---|
124 | n/a | 'HTTPBasicAuthHandler', 'ProxyBasicAuthHandler', 'AbstractDigestAuthHandler', |
---|
125 | n/a | 'HTTPDigestAuthHandler', 'ProxyDigestAuthHandler', 'HTTPHandler', |
---|
126 | n/a | 'FileHandler', 'FTPHandler', 'CacheFTPHandler', 'DataHandler', |
---|
127 | n/a | 'UnknownHandler', 'HTTPErrorProcessor', |
---|
128 | n/a | # Functions |
---|
129 | n/a | 'urlopen', 'install_opener', 'build_opener', |
---|
130 | n/a | 'pathname2url', 'url2pathname', 'getproxies', |
---|
131 | n/a | # Legacy interface |
---|
132 | n/a | 'urlretrieve', 'urlcleanup', 'URLopener', 'FancyURLopener', |
---|
133 | n/a | ] |
---|
134 | n/a | |
---|
135 | n/a | # used in User-Agent header sent |
---|
136 | n/a | __version__ = '%d.%d' % sys.version_info[:2] |
---|
137 | n/a | |
---|
138 | n/a | _opener = None |
---|
139 | n/a | def urlopen(url, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, |
---|
140 | n/a | *, cafile=None, capath=None, cadefault=False, context=None): |
---|
141 | n/a | '''Open the URL url, which can be either a string or a Request object. |
---|
142 | n/a | |
---|
143 | n/a | *data* must be an object specifying additional data to be sent to |
---|
144 | n/a | the server, or None if no such data is needed. See Request for |
---|
145 | n/a | details. |
---|
146 | n/a | |
---|
147 | n/a | urllib.request module uses HTTP/1.1 and includes a "Connection:close" |
---|
148 | n/a | header in its HTTP requests. |
---|
149 | n/a | |
---|
150 | n/a | The optional *timeout* parameter specifies a timeout in seconds for |
---|
151 | n/a | blocking operations like the connection attempt (if not specified, the |
---|
152 | n/a | global default timeout setting will be used). This only works for HTTP, |
---|
153 | n/a | HTTPS and FTP connections. |
---|
154 | n/a | |
---|
155 | n/a | If *context* is specified, it must be a ssl.SSLContext instance describing |
---|
156 | n/a | the various SSL options. See HTTPSConnection for more details. |
---|
157 | n/a | |
---|
158 | n/a | The optional *cafile* and *capath* parameters specify a set of trusted CA |
---|
159 | n/a | certificates for HTTPS requests. cafile should point to a single file |
---|
160 | n/a | containing a bundle of CA certificates, whereas capath should point to a |
---|
161 | n/a | directory of hashed certificate files. More information can be found in |
---|
162 | n/a | ssl.SSLContext.load_verify_locations(). |
---|
163 | n/a | |
---|
164 | n/a | The *cadefault* parameter is ignored. |
---|
165 | n/a | |
---|
166 | n/a | This function always returns an object which can work as a context |
---|
167 | n/a | manager and has methods such as |
---|
168 | n/a | |
---|
169 | n/a | * geturl() - return the URL of the resource retrieved, commonly used to |
---|
170 | n/a | determine if a redirect was followed |
---|
171 | n/a | |
---|
172 | n/a | * info() - return the meta-information of the page, such as headers, in the |
---|
173 | n/a | form of an email.message_from_string() instance (see Quick Reference to |
---|
174 | n/a | HTTP Headers) |
---|
175 | n/a | |
---|
176 | n/a | * getcode() - return the HTTP status code of the response. Raises URLError |
---|
177 | n/a | on errors. |
---|
178 | n/a | |
---|
179 | n/a | For HTTP and HTTPS URLs, this function returns a http.client.HTTPResponse |
---|
180 | n/a | object slightly modified. In addition to the three new methods above, the |
---|
181 | n/a | msg attribute contains the same information as the reason attribute --- |
---|
182 | n/a | the reason phrase returned by the server --- instead of the response |
---|
183 | n/a | headers as it is specified in the documentation for HTTPResponse. |
---|
184 | n/a | |
---|
185 | n/a | For FTP, file, and data URLs and requests explicitly handled by legacy |
---|
186 | n/a | URLopener and FancyURLopener classes, this function returns a |
---|
187 | n/a | urllib.response.addinfourl object. |
---|
188 | n/a | |
---|
189 | n/a | Note that None may be returned if no handler handles the request (though |
---|
190 | n/a | the default installed global OpenerDirector uses UnknownHandler to ensure |
---|
191 | n/a | this never happens). |
---|
192 | n/a | |
---|
193 | n/a | In addition, if proxy settings are detected (for example, when a *_proxy |
---|
194 | n/a | environment variable like http_proxy is set), ProxyHandler is default |
---|
195 | n/a | installed and makes sure the requests are handled through the proxy. |
---|
196 | n/a | |
---|
197 | n/a | ''' |
---|
198 | n/a | global _opener |
---|
199 | n/a | if cafile or capath or cadefault: |
---|
200 | n/a | import warnings |
---|
201 | n/a | warnings.warn("cafile, cpath and cadefault are deprecated, use a " |
---|
202 | n/a | "custom context instead.", DeprecationWarning, 2) |
---|
203 | n/a | if context is not None: |
---|
204 | n/a | raise ValueError( |
---|
205 | n/a | "You can't pass both context and any of cafile, capath, and " |
---|
206 | n/a | "cadefault" |
---|
207 | n/a | ) |
---|
208 | n/a | if not _have_ssl: |
---|
209 | n/a | raise ValueError('SSL support not available') |
---|
210 | n/a | context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH, |
---|
211 | n/a | cafile=cafile, |
---|
212 | n/a | capath=capath) |
---|
213 | n/a | https_handler = HTTPSHandler(context=context) |
---|
214 | n/a | opener = build_opener(https_handler) |
---|
215 | n/a | elif context: |
---|
216 | n/a | https_handler = HTTPSHandler(context=context) |
---|
217 | n/a | opener = build_opener(https_handler) |
---|
218 | n/a | elif _opener is None: |
---|
219 | n/a | _opener = opener = build_opener() |
---|
220 | n/a | else: |
---|
221 | n/a | opener = _opener |
---|
222 | n/a | return opener.open(url, data, timeout) |
---|
223 | n/a | |
---|
224 | n/a | def install_opener(opener): |
---|
225 | n/a | global _opener |
---|
226 | n/a | _opener = opener |
---|
227 | n/a | |
---|
228 | n/a | _url_tempfiles = [] |
---|
229 | n/a | def urlretrieve(url, filename=None, reporthook=None, data=None): |
---|
230 | n/a | """ |
---|
231 | n/a | Retrieve a URL into a temporary location on disk. |
---|
232 | n/a | |
---|
233 | n/a | Requires a URL argument. If a filename is passed, it is used as |
---|
234 | n/a | the temporary file location. The reporthook argument should be |
---|
235 | n/a | a callable that accepts a block number, a read size, and the |
---|
236 | n/a | total file size of the URL target. The data argument should be |
---|
237 | n/a | valid URL encoded data. |
---|
238 | n/a | |
---|
239 | n/a | If a filename is passed and the URL points to a local resource, |
---|
240 | n/a | the result is a copy from local file to new file. |
---|
241 | n/a | |
---|
242 | n/a | Returns a tuple containing the path to the newly created |
---|
243 | n/a | data file as well as the resulting HTTPMessage object. |
---|
244 | n/a | """ |
---|
245 | n/a | url_type, path = splittype(url) |
---|
246 | n/a | |
---|
247 | n/a | with contextlib.closing(urlopen(url, data)) as fp: |
---|
248 | n/a | headers = fp.info() |
---|
249 | n/a | |
---|
250 | n/a | # Just return the local path and the "headers" for file:// |
---|
251 | n/a | # URLs. No sense in performing a copy unless requested. |
---|
252 | n/a | if url_type == "file" and not filename: |
---|
253 | n/a | return os.path.normpath(path), headers |
---|
254 | n/a | |
---|
255 | n/a | # Handle temporary file setup. |
---|
256 | n/a | if filename: |
---|
257 | n/a | tfp = open(filename, 'wb') |
---|
258 | n/a | else: |
---|
259 | n/a | tfp = tempfile.NamedTemporaryFile(delete=False) |
---|
260 | n/a | filename = tfp.name |
---|
261 | n/a | _url_tempfiles.append(filename) |
---|
262 | n/a | |
---|
263 | n/a | with tfp: |
---|
264 | n/a | result = filename, headers |
---|
265 | n/a | bs = 1024*8 |
---|
266 | n/a | size = -1 |
---|
267 | n/a | read = 0 |
---|
268 | n/a | blocknum = 0 |
---|
269 | n/a | if "content-length" in headers: |
---|
270 | n/a | size = int(headers["Content-Length"]) |
---|
271 | n/a | |
---|
272 | n/a | if reporthook: |
---|
273 | n/a | reporthook(blocknum, bs, size) |
---|
274 | n/a | |
---|
275 | n/a | while True: |
---|
276 | n/a | block = fp.read(bs) |
---|
277 | n/a | if not block: |
---|
278 | n/a | break |
---|
279 | n/a | read += len(block) |
---|
280 | n/a | tfp.write(block) |
---|
281 | n/a | blocknum += 1 |
---|
282 | n/a | if reporthook: |
---|
283 | n/a | reporthook(blocknum, bs, size) |
---|
284 | n/a | |
---|
285 | n/a | if size >= 0 and read < size: |
---|
286 | n/a | raise ContentTooShortError( |
---|
287 | n/a | "retrieval incomplete: got only %i out of %i bytes" |
---|
288 | n/a | % (read, size), result) |
---|
289 | n/a | |
---|
290 | n/a | return result |
---|
291 | n/a | |
---|
292 | n/a | def urlcleanup(): |
---|
293 | n/a | """Clean up temporary files from urlretrieve calls.""" |
---|
294 | n/a | for temp_file in _url_tempfiles: |
---|
295 | n/a | try: |
---|
296 | n/a | os.unlink(temp_file) |
---|
297 | n/a | except OSError: |
---|
298 | n/a | pass |
---|
299 | n/a | |
---|
300 | n/a | del _url_tempfiles[:] |
---|
301 | n/a | global _opener |
---|
302 | n/a | if _opener: |
---|
303 | n/a | _opener = None |
---|
304 | n/a | |
---|
305 | n/a | # copied from cookielib.py |
---|
306 | n/a | _cut_port_re = re.compile(r":\d+$", re.ASCII) |
---|
307 | n/a | def request_host(request): |
---|
308 | n/a | """Return request-host, as defined by RFC 2965. |
---|
309 | n/a | |
---|
310 | n/a | Variation from RFC: returned value is lowercased, for convenient |
---|
311 | n/a | comparison. |
---|
312 | n/a | |
---|
313 | n/a | """ |
---|
314 | n/a | url = request.full_url |
---|
315 | n/a | host = urlparse(url)[1] |
---|
316 | n/a | if host == "": |
---|
317 | n/a | host = request.get_header("Host", "") |
---|
318 | n/a | |
---|
319 | n/a | # remove port, if present |
---|
320 | n/a | host = _cut_port_re.sub("", host, 1) |
---|
321 | n/a | return host.lower() |
---|
322 | n/a | |
---|
323 | n/a | class Request: |
---|
324 | n/a | |
---|
325 | n/a | def __init__(self, url, data=None, headers={}, |
---|
326 | n/a | origin_req_host=None, unverifiable=False, |
---|
327 | n/a | method=None): |
---|
328 | n/a | self.full_url = url |
---|
329 | n/a | self.headers = {} |
---|
330 | n/a | self.unredirected_hdrs = {} |
---|
331 | n/a | self._data = None |
---|
332 | n/a | self.data = data |
---|
333 | n/a | self._tunnel_host = None |
---|
334 | n/a | for key, value in headers.items(): |
---|
335 | n/a | self.add_header(key, value) |
---|
336 | n/a | if origin_req_host is None: |
---|
337 | n/a | origin_req_host = request_host(self) |
---|
338 | n/a | self.origin_req_host = origin_req_host |
---|
339 | n/a | self.unverifiable = unverifiable |
---|
340 | n/a | if method: |
---|
341 | n/a | self.method = method |
---|
342 | n/a | |
---|
343 | n/a | @property |
---|
344 | n/a | def full_url(self): |
---|
345 | n/a | if self.fragment: |
---|
346 | n/a | return '{}#{}'.format(self._full_url, self.fragment) |
---|
347 | n/a | return self._full_url |
---|
348 | n/a | |
---|
349 | n/a | @full_url.setter |
---|
350 | n/a | def full_url(self, url): |
---|
351 | n/a | # unwrap('<URL:type://host/path>') --> 'type://host/path' |
---|
352 | n/a | self._full_url = unwrap(url) |
---|
353 | n/a | self._full_url, self.fragment = splittag(self._full_url) |
---|
354 | n/a | self._parse() |
---|
355 | n/a | |
---|
356 | n/a | @full_url.deleter |
---|
357 | n/a | def full_url(self): |
---|
358 | n/a | self._full_url = None |
---|
359 | n/a | self.fragment = None |
---|
360 | n/a | self.selector = '' |
---|
361 | n/a | |
---|
362 | n/a | @property |
---|
363 | n/a | def data(self): |
---|
364 | n/a | return self._data |
---|
365 | n/a | |
---|
366 | n/a | @data.setter |
---|
367 | n/a | def data(self, data): |
---|
368 | n/a | if data != self._data: |
---|
369 | n/a | self._data = data |
---|
370 | n/a | # issue 16464 |
---|
371 | n/a | # if we change data we need to remove content-length header |
---|
372 | n/a | # (cause it's most probably calculated for previous value) |
---|
373 | n/a | if self.has_header("Content-length"): |
---|
374 | n/a | self.remove_header("Content-length") |
---|
375 | n/a | |
---|
376 | n/a | @data.deleter |
---|
377 | n/a | def data(self): |
---|
378 | n/a | self.data = None |
---|
379 | n/a | |
---|
380 | n/a | def _parse(self): |
---|
381 | n/a | self.type, rest = splittype(self._full_url) |
---|
382 | n/a | if self.type is None: |
---|
383 | n/a | raise ValueError("unknown url type: %r" % self.full_url) |
---|
384 | n/a | self.host, self.selector = splithost(rest) |
---|
385 | n/a | if self.host: |
---|
386 | n/a | self.host = unquote(self.host) |
---|
387 | n/a | |
---|
388 | n/a | def get_method(self): |
---|
389 | n/a | """Return a string indicating the HTTP request method.""" |
---|
390 | n/a | default_method = "POST" if self.data is not None else "GET" |
---|
391 | n/a | return getattr(self, 'method', default_method) |
---|
392 | n/a | |
---|
393 | n/a | def get_full_url(self): |
---|
394 | n/a | return self.full_url |
---|
395 | n/a | |
---|
396 | n/a | def set_proxy(self, host, type): |
---|
397 | n/a | if self.type == 'https' and not self._tunnel_host: |
---|
398 | n/a | self._tunnel_host = self.host |
---|
399 | n/a | else: |
---|
400 | n/a | self.type= type |
---|
401 | n/a | self.selector = self.full_url |
---|
402 | n/a | self.host = host |
---|
403 | n/a | |
---|
404 | n/a | def has_proxy(self): |
---|
405 | n/a | return self.selector == self.full_url |
---|
406 | n/a | |
---|
407 | n/a | def add_header(self, key, val): |
---|
408 | n/a | # useful for something like authentication |
---|
409 | n/a | self.headers[key.capitalize()] = val |
---|
410 | n/a | |
---|
411 | n/a | def add_unredirected_header(self, key, val): |
---|
412 | n/a | # will not be added to a redirected request |
---|
413 | n/a | self.unredirected_hdrs[key.capitalize()] = val |
---|
414 | n/a | |
---|
415 | n/a | def has_header(self, header_name): |
---|
416 | n/a | return (header_name in self.headers or |
---|
417 | n/a | header_name in self.unredirected_hdrs) |
---|
418 | n/a | |
---|
419 | n/a | def get_header(self, header_name, default=None): |
---|
420 | n/a | return self.headers.get( |
---|
421 | n/a | header_name, |
---|
422 | n/a | self.unredirected_hdrs.get(header_name, default)) |
---|
423 | n/a | |
---|
424 | n/a | def remove_header(self, header_name): |
---|
425 | n/a | self.headers.pop(header_name, None) |
---|
426 | n/a | self.unredirected_hdrs.pop(header_name, None) |
---|
427 | n/a | |
---|
428 | n/a | def header_items(self): |
---|
429 | n/a | hdrs = self.unredirected_hdrs.copy() |
---|
430 | n/a | hdrs.update(self.headers) |
---|
431 | n/a | return list(hdrs.items()) |
---|
432 | n/a | |
---|
433 | n/a | class OpenerDirector: |
---|
434 | n/a | def __init__(self): |
---|
435 | n/a | client_version = "Python-urllib/%s" % __version__ |
---|
436 | n/a | self.addheaders = [('User-agent', client_version)] |
---|
437 | n/a | # self.handlers is retained only for backward compatibility |
---|
438 | n/a | self.handlers = [] |
---|
439 | n/a | # manage the individual handlers |
---|
440 | n/a | self.handle_open = {} |
---|
441 | n/a | self.handle_error = {} |
---|
442 | n/a | self.process_response = {} |
---|
443 | n/a | self.process_request = {} |
---|
444 | n/a | |
---|
445 | n/a | def add_handler(self, handler): |
---|
446 | n/a | if not hasattr(handler, "add_parent"): |
---|
447 | n/a | raise TypeError("expected BaseHandler instance, got %r" % |
---|
448 | n/a | type(handler)) |
---|
449 | n/a | |
---|
450 | n/a | added = False |
---|
451 | n/a | for meth in dir(handler): |
---|
452 | n/a | if meth in ["redirect_request", "do_open", "proxy_open"]: |
---|
453 | n/a | # oops, coincidental match |
---|
454 | n/a | continue |
---|
455 | n/a | |
---|
456 | n/a | i = meth.find("_") |
---|
457 | n/a | protocol = meth[:i] |
---|
458 | n/a | condition = meth[i+1:] |
---|
459 | n/a | |
---|
460 | n/a | if condition.startswith("error"): |
---|
461 | n/a | j = condition.find("_") + i + 1 |
---|
462 | n/a | kind = meth[j+1:] |
---|
463 | n/a | try: |
---|
464 | n/a | kind = int(kind) |
---|
465 | n/a | except ValueError: |
---|
466 | n/a | pass |
---|
467 | n/a | lookup = self.handle_error.get(protocol, {}) |
---|
468 | n/a | self.handle_error[protocol] = lookup |
---|
469 | n/a | elif condition == "open": |
---|
470 | n/a | kind = protocol |
---|
471 | n/a | lookup = self.handle_open |
---|
472 | n/a | elif condition == "response": |
---|
473 | n/a | kind = protocol |
---|
474 | n/a | lookup = self.process_response |
---|
475 | n/a | elif condition == "request": |
---|
476 | n/a | kind = protocol |
---|
477 | n/a | lookup = self.process_request |
---|
478 | n/a | else: |
---|
479 | n/a | continue |
---|
480 | n/a | |
---|
481 | n/a | handlers = lookup.setdefault(kind, []) |
---|
482 | n/a | if handlers: |
---|
483 | n/a | bisect.insort(handlers, handler) |
---|
484 | n/a | else: |
---|
485 | n/a | handlers.append(handler) |
---|
486 | n/a | added = True |
---|
487 | n/a | |
---|
488 | n/a | if added: |
---|
489 | n/a | bisect.insort(self.handlers, handler) |
---|
490 | n/a | handler.add_parent(self) |
---|
491 | n/a | |
---|
492 | n/a | def close(self): |
---|
493 | n/a | # Only exists for backwards compatibility. |
---|
494 | n/a | pass |
---|
495 | n/a | |
---|
496 | n/a | def _call_chain(self, chain, kind, meth_name, *args): |
---|
497 | n/a | # Handlers raise an exception if no one else should try to handle |
---|
498 | n/a | # the request, or return None if they can't but another handler |
---|
499 | n/a | # could. Otherwise, they return the response. |
---|
500 | n/a | handlers = chain.get(kind, ()) |
---|
501 | n/a | for handler in handlers: |
---|
502 | n/a | func = getattr(handler, meth_name) |
---|
503 | n/a | result = func(*args) |
---|
504 | n/a | if result is not None: |
---|
505 | n/a | return result |
---|
506 | n/a | |
---|
507 | n/a | def open(self, fullurl, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT): |
---|
508 | n/a | # accept a URL or a Request object |
---|
509 | n/a | if isinstance(fullurl, str): |
---|
510 | n/a | req = Request(fullurl, data) |
---|
511 | n/a | else: |
---|
512 | n/a | req = fullurl |
---|
513 | n/a | if data is not None: |
---|
514 | n/a | req.data = data |
---|
515 | n/a | |
---|
516 | n/a | req.timeout = timeout |
---|
517 | n/a | protocol = req.type |
---|
518 | n/a | |
---|
519 | n/a | # pre-process request |
---|
520 | n/a | meth_name = protocol+"_request" |
---|
521 | n/a | for processor in self.process_request.get(protocol, []): |
---|
522 | n/a | meth = getattr(processor, meth_name) |
---|
523 | n/a | req = meth(req) |
---|
524 | n/a | |
---|
525 | n/a | response = self._open(req, data) |
---|
526 | n/a | |
---|
527 | n/a | # post-process response |
---|
528 | n/a | meth_name = protocol+"_response" |
---|
529 | n/a | for processor in self.process_response.get(protocol, []): |
---|
530 | n/a | meth = getattr(processor, meth_name) |
---|
531 | n/a | response = meth(req, response) |
---|
532 | n/a | |
---|
533 | n/a | return response |
---|
534 | n/a | |
---|
535 | n/a | def _open(self, req, data=None): |
---|
536 | n/a | result = self._call_chain(self.handle_open, 'default', |
---|
537 | n/a | 'default_open', req) |
---|
538 | n/a | if result: |
---|
539 | n/a | return result |
---|
540 | n/a | |
---|
541 | n/a | protocol = req.type |
---|
542 | n/a | result = self._call_chain(self.handle_open, protocol, protocol + |
---|
543 | n/a | '_open', req) |
---|
544 | n/a | if result: |
---|
545 | n/a | return result |
---|
546 | n/a | |
---|
547 | n/a | return self._call_chain(self.handle_open, 'unknown', |
---|
548 | n/a | 'unknown_open', req) |
---|
549 | n/a | |
---|
550 | n/a | def error(self, proto, *args): |
---|
551 | n/a | if proto in ('http', 'https'): |
---|
552 | n/a | # XXX http[s] protocols are special-cased |
---|
553 | n/a | dict = self.handle_error['http'] # https is not different than http |
---|
554 | n/a | proto = args[2] # YUCK! |
---|
555 | n/a | meth_name = 'http_error_%s' % proto |
---|
556 | n/a | http_err = 1 |
---|
557 | n/a | orig_args = args |
---|
558 | n/a | else: |
---|
559 | n/a | dict = self.handle_error |
---|
560 | n/a | meth_name = proto + '_error' |
---|
561 | n/a | http_err = 0 |
---|
562 | n/a | args = (dict, proto, meth_name) + args |
---|
563 | n/a | result = self._call_chain(*args) |
---|
564 | n/a | if result: |
---|
565 | n/a | return result |
---|
566 | n/a | |
---|
567 | n/a | if http_err: |
---|
568 | n/a | args = (dict, 'default', 'http_error_default') + orig_args |
---|
569 | n/a | return self._call_chain(*args) |
---|
570 | n/a | |
---|
571 | n/a | # XXX probably also want an abstract factory that knows when it makes |
---|
572 | n/a | # sense to skip a superclass in favor of a subclass and when it might |
---|
573 | n/a | # make sense to include both |
---|
574 | n/a | |
---|
575 | n/a | def build_opener(*handlers): |
---|
576 | n/a | """Create an opener object from a list of handlers. |
---|
577 | n/a | |
---|
578 | n/a | The opener will use several default handlers, including support |
---|
579 | n/a | for HTTP, FTP and when applicable HTTPS. |
---|
580 | n/a | |
---|
581 | n/a | If any of the handlers passed as arguments are subclasses of the |
---|
582 | n/a | default handlers, the default handlers will not be used. |
---|
583 | n/a | """ |
---|
584 | n/a | opener = OpenerDirector() |
---|
585 | n/a | default_classes = [ProxyHandler, UnknownHandler, HTTPHandler, |
---|
586 | n/a | HTTPDefaultErrorHandler, HTTPRedirectHandler, |
---|
587 | n/a | FTPHandler, FileHandler, HTTPErrorProcessor, |
---|
588 | n/a | DataHandler] |
---|
589 | n/a | if hasattr(http.client, "HTTPSConnection"): |
---|
590 | n/a | default_classes.append(HTTPSHandler) |
---|
591 | n/a | skip = set() |
---|
592 | n/a | for klass in default_classes: |
---|
593 | n/a | for check in handlers: |
---|
594 | n/a | if isinstance(check, type): |
---|
595 | n/a | if issubclass(check, klass): |
---|
596 | n/a | skip.add(klass) |
---|
597 | n/a | elif isinstance(check, klass): |
---|
598 | n/a | skip.add(klass) |
---|
599 | n/a | for klass in skip: |
---|
600 | n/a | default_classes.remove(klass) |
---|
601 | n/a | |
---|
602 | n/a | for klass in default_classes: |
---|
603 | n/a | opener.add_handler(klass()) |
---|
604 | n/a | |
---|
605 | n/a | for h in handlers: |
---|
606 | n/a | if isinstance(h, type): |
---|
607 | n/a | h = h() |
---|
608 | n/a | opener.add_handler(h) |
---|
609 | n/a | return opener |
---|
610 | n/a | |
---|
611 | n/a | class BaseHandler: |
---|
612 | n/a | handler_order = 500 |
---|
613 | n/a | |
---|
614 | n/a | def add_parent(self, parent): |
---|
615 | n/a | self.parent = parent |
---|
616 | n/a | |
---|
617 | n/a | def close(self): |
---|
618 | n/a | # Only exists for backwards compatibility |
---|
619 | n/a | pass |
---|
620 | n/a | |
---|
621 | n/a | def __lt__(self, other): |
---|
622 | n/a | if not hasattr(other, "handler_order"): |
---|
623 | n/a | # Try to preserve the old behavior of having custom classes |
---|
624 | n/a | # inserted after default ones (works only for custom user |
---|
625 | n/a | # classes which are not aware of handler_order). |
---|
626 | n/a | return True |
---|
627 | n/a | return self.handler_order < other.handler_order |
---|
628 | n/a | |
---|
629 | n/a | |
---|
630 | n/a | class HTTPErrorProcessor(BaseHandler): |
---|
631 | n/a | """Process HTTP error responses.""" |
---|
632 | n/a | handler_order = 1000 # after all other processing |
---|
633 | n/a | |
---|
634 | n/a | def http_response(self, request, response): |
---|
635 | n/a | code, msg, hdrs = response.code, response.msg, response.info() |
---|
636 | n/a | |
---|
637 | n/a | # According to RFC 2616, "2xx" code indicates that the client's |
---|
638 | n/a | # request was successfully received, understood, and accepted. |
---|
639 | n/a | if not (200 <= code < 300): |
---|
640 | n/a | response = self.parent.error( |
---|
641 | n/a | 'http', request, response, code, msg, hdrs) |
---|
642 | n/a | |
---|
643 | n/a | return response |
---|
644 | n/a | |
---|
645 | n/a | https_response = http_response |
---|
646 | n/a | |
---|
647 | n/a | class HTTPDefaultErrorHandler(BaseHandler): |
---|
648 | n/a | def http_error_default(self, req, fp, code, msg, hdrs): |
---|
649 | n/a | raise HTTPError(req.full_url, code, msg, hdrs, fp) |
---|
650 | n/a | |
---|
651 | n/a | class HTTPRedirectHandler(BaseHandler): |
---|
652 | n/a | # maximum number of redirections to any single URL |
---|
653 | n/a | # this is needed because of the state that cookies introduce |
---|
654 | n/a | max_repeats = 4 |
---|
655 | n/a | # maximum total number of redirections (regardless of URL) before |
---|
656 | n/a | # assuming we're in a loop |
---|
657 | n/a | max_redirections = 10 |
---|
658 | n/a | |
---|
659 | n/a | def redirect_request(self, req, fp, code, msg, headers, newurl): |
---|
660 | n/a | """Return a Request or None in response to a redirect. |
---|
661 | n/a | |
---|
662 | n/a | This is called by the http_error_30x methods when a |
---|
663 | n/a | redirection response is received. If a redirection should |
---|
664 | n/a | take place, return a new Request to allow http_error_30x to |
---|
665 | n/a | perform the redirect. Otherwise, raise HTTPError if no-one |
---|
666 | n/a | else should try to handle this url. Return None if you can't |
---|
667 | n/a | but another Handler might. |
---|
668 | n/a | """ |
---|
669 | n/a | m = req.get_method() |
---|
670 | n/a | if (not (code in (301, 302, 303, 307) and m in ("GET", "HEAD") |
---|
671 | n/a | or code in (301, 302, 303) and m == "POST")): |
---|
672 | n/a | raise HTTPError(req.full_url, code, msg, headers, fp) |
---|
673 | n/a | |
---|
674 | n/a | # Strictly (according to RFC 2616), 301 or 302 in response to |
---|
675 | n/a | # a POST MUST NOT cause a redirection without confirmation |
---|
676 | n/a | # from the user (of urllib.request, in this case). In practice, |
---|
677 | n/a | # essentially all clients do redirect in this case, so we do |
---|
678 | n/a | # the same. |
---|
679 | n/a | |
---|
680 | n/a | # Be conciliant with URIs containing a space. This is mainly |
---|
681 | n/a | # redundant with the more complete encoding done in http_error_302(), |
---|
682 | n/a | # but it is kept for compatibility with other callers. |
---|
683 | n/a | newurl = newurl.replace(' ', '%20') |
---|
684 | n/a | |
---|
685 | n/a | CONTENT_HEADERS = ("content-length", "content-type") |
---|
686 | n/a | newheaders = dict((k, v) for k, v in req.headers.items() |
---|
687 | n/a | if k.lower() not in CONTENT_HEADERS) |
---|
688 | n/a | return Request(newurl, |
---|
689 | n/a | headers=newheaders, |
---|
690 | n/a | origin_req_host=req.origin_req_host, |
---|
691 | n/a | unverifiable=True) |
---|
692 | n/a | |
---|
693 | n/a | # Implementation note: To avoid the server sending us into an |
---|
694 | n/a | # infinite loop, the request object needs to track what URLs we |
---|
695 | n/a | # have already seen. Do this by adding a handler-specific |
---|
696 | n/a | # attribute to the Request object. |
---|
697 | n/a | def http_error_302(self, req, fp, code, msg, headers): |
---|
698 | n/a | # Some servers (incorrectly) return multiple Location headers |
---|
699 | n/a | # (so probably same goes for URI). Use first header. |
---|
700 | n/a | if "location" in headers: |
---|
701 | n/a | newurl = headers["location"] |
---|
702 | n/a | elif "uri" in headers: |
---|
703 | n/a | newurl = headers["uri"] |
---|
704 | n/a | else: |
---|
705 | n/a | return |
---|
706 | n/a | |
---|
707 | n/a | # fix a possible malformed URL |
---|
708 | n/a | urlparts = urlparse(newurl) |
---|
709 | n/a | |
---|
710 | n/a | # For security reasons we don't allow redirection to anything other |
---|
711 | n/a | # than http, https or ftp. |
---|
712 | n/a | |
---|
713 | n/a | if urlparts.scheme not in ('http', 'https', 'ftp', ''): |
---|
714 | n/a | raise HTTPError( |
---|
715 | n/a | newurl, code, |
---|
716 | n/a | "%s - Redirection to url '%s' is not allowed" % (msg, newurl), |
---|
717 | n/a | headers, fp) |
---|
718 | n/a | |
---|
719 | n/a | if not urlparts.path and urlparts.netloc: |
---|
720 | n/a | urlparts = list(urlparts) |
---|
721 | n/a | urlparts[2] = "/" |
---|
722 | n/a | newurl = urlunparse(urlparts) |
---|
723 | n/a | |
---|
724 | n/a | # http.client.parse_headers() decodes as ISO-8859-1. Recover the |
---|
725 | n/a | # original bytes and percent-encode non-ASCII bytes, and any special |
---|
726 | n/a | # characters such as the space. |
---|
727 | n/a | newurl = quote( |
---|
728 | n/a | newurl, encoding="iso-8859-1", safe=string.punctuation) |
---|
729 | n/a | newurl = urljoin(req.full_url, newurl) |
---|
730 | n/a | |
---|
731 | n/a | # XXX Probably want to forget about the state of the current |
---|
732 | n/a | # request, although that might interact poorly with other |
---|
733 | n/a | # handlers that also use handler-specific request attributes |
---|
734 | n/a | new = self.redirect_request(req, fp, code, msg, headers, newurl) |
---|
735 | n/a | if new is None: |
---|
736 | n/a | return |
---|
737 | n/a | |
---|
738 | n/a | # loop detection |
---|
739 | n/a | # .redirect_dict has a key url if url was previously visited. |
---|
740 | n/a | if hasattr(req, 'redirect_dict'): |
---|
741 | n/a | visited = new.redirect_dict = req.redirect_dict |
---|
742 | n/a | if (visited.get(newurl, 0) >= self.max_repeats or |
---|
743 | n/a | len(visited) >= self.max_redirections): |
---|
744 | n/a | raise HTTPError(req.full_url, code, |
---|
745 | n/a | self.inf_msg + msg, headers, fp) |
---|
746 | n/a | else: |
---|
747 | n/a | visited = new.redirect_dict = req.redirect_dict = {} |
---|
748 | n/a | visited[newurl] = visited.get(newurl, 0) + 1 |
---|
749 | n/a | |
---|
750 | n/a | # Don't close the fp until we are sure that we won't use it |
---|
751 | n/a | # with HTTPError. |
---|
752 | n/a | fp.read() |
---|
753 | n/a | fp.close() |
---|
754 | n/a | |
---|
755 | n/a | return self.parent.open(new, timeout=req.timeout) |
---|
756 | n/a | |
---|
757 | n/a | http_error_301 = http_error_303 = http_error_307 = http_error_302 |
---|
758 | n/a | |
---|
759 | n/a | inf_msg = "The HTTP server returned a redirect error that would " \ |
---|
760 | n/a | "lead to an infinite loop.\n" \ |
---|
761 | n/a | "The last 30x error message was:\n" |
---|
762 | n/a | |
---|
763 | n/a | |
---|
764 | n/a | def _parse_proxy(proxy): |
---|
765 | n/a | """Return (scheme, user, password, host/port) given a URL or an authority. |
---|
766 | n/a | |
---|
767 | n/a | If a URL is supplied, it must have an authority (host:port) component. |
---|
768 | n/a | According to RFC 3986, having an authority component means the URL must |
---|
769 | n/a | have two slashes after the scheme. |
---|
770 | n/a | """ |
---|
771 | n/a | scheme, r_scheme = splittype(proxy) |
---|
772 | n/a | if not r_scheme.startswith("/"): |
---|
773 | n/a | # authority |
---|
774 | n/a | scheme = None |
---|
775 | n/a | authority = proxy |
---|
776 | n/a | else: |
---|
777 | n/a | # URL |
---|
778 | n/a | if not r_scheme.startswith("//"): |
---|
779 | n/a | raise ValueError("proxy URL with no authority: %r" % proxy) |
---|
780 | n/a | # We have an authority, so for RFC 3986-compliant URLs (by ss 3. |
---|
781 | n/a | # and 3.3.), path is empty or starts with '/' |
---|
782 | n/a | end = r_scheme.find("/", 2) |
---|
783 | n/a | if end == -1: |
---|
784 | n/a | end = None |
---|
785 | n/a | authority = r_scheme[2:end] |
---|
786 | n/a | userinfo, hostport = splituser(authority) |
---|
787 | n/a | if userinfo is not None: |
---|
788 | n/a | user, password = splitpasswd(userinfo) |
---|
789 | n/a | else: |
---|
790 | n/a | user = password = None |
---|
791 | n/a | return scheme, user, password, hostport |
---|
792 | n/a | |
---|
793 | n/a | class ProxyHandler(BaseHandler): |
---|
794 | n/a | # Proxies must be in front |
---|
795 | n/a | handler_order = 100 |
---|
796 | n/a | |
---|
797 | n/a | def __init__(self, proxies=None): |
---|
798 | n/a | if proxies is None: |
---|
799 | n/a | proxies = getproxies() |
---|
800 | n/a | assert hasattr(proxies, 'keys'), "proxies must be a mapping" |
---|
801 | n/a | self.proxies = proxies |
---|
802 | n/a | for type, url in proxies.items(): |
---|
803 | n/a | setattr(self, '%s_open' % type, |
---|
804 | n/a | lambda r, proxy=url, type=type, meth=self.proxy_open: |
---|
805 | n/a | meth(r, proxy, type)) |
---|
806 | n/a | |
---|
807 | n/a | def proxy_open(self, req, proxy, type): |
---|
808 | n/a | orig_type = req.type |
---|
809 | n/a | proxy_type, user, password, hostport = _parse_proxy(proxy) |
---|
810 | n/a | if proxy_type is None: |
---|
811 | n/a | proxy_type = orig_type |
---|
812 | n/a | |
---|
813 | n/a | if req.host and proxy_bypass(req.host): |
---|
814 | n/a | return None |
---|
815 | n/a | |
---|
816 | n/a | if user and password: |
---|
817 | n/a | user_pass = '%s:%s' % (unquote(user), |
---|
818 | n/a | unquote(password)) |
---|
819 | n/a | creds = base64.b64encode(user_pass.encode()).decode("ascii") |
---|
820 | n/a | req.add_header('Proxy-authorization', 'Basic ' + creds) |
---|
821 | n/a | hostport = unquote(hostport) |
---|
822 | n/a | req.set_proxy(hostport, proxy_type) |
---|
823 | n/a | if orig_type == proxy_type or orig_type == 'https': |
---|
824 | n/a | # let other handlers take care of it |
---|
825 | n/a | return None |
---|
826 | n/a | else: |
---|
827 | n/a | # need to start over, because the other handlers don't |
---|
828 | n/a | # grok the proxy's URL type |
---|
829 | n/a | # e.g. if we have a constructor arg proxies like so: |
---|
830 | n/a | # {'http': 'ftp://proxy.example.com'}, we may end up turning |
---|
831 | n/a | # a request for http://acme.example.com/a into one for |
---|
832 | n/a | # ftp://proxy.example.com/a |
---|
833 | n/a | return self.parent.open(req, timeout=req.timeout) |
---|
834 | n/a | |
---|
835 | n/a | class HTTPPasswordMgr: |
---|
836 | n/a | |
---|
837 | n/a | def __init__(self): |
---|
838 | n/a | self.passwd = {} |
---|
839 | n/a | |
---|
840 | n/a | def add_password(self, realm, uri, user, passwd): |
---|
841 | n/a | # uri could be a single URI or a sequence |
---|
842 | n/a | if isinstance(uri, str): |
---|
843 | n/a | uri = [uri] |
---|
844 | n/a | if realm not in self.passwd: |
---|
845 | n/a | self.passwd[realm] = {} |
---|
846 | n/a | for default_port in True, False: |
---|
847 | n/a | reduced_uri = tuple( |
---|
848 | n/a | [self.reduce_uri(u, default_port) for u in uri]) |
---|
849 | n/a | self.passwd[realm][reduced_uri] = (user, passwd) |
---|
850 | n/a | |
---|
851 | n/a | def find_user_password(self, realm, authuri): |
---|
852 | n/a | domains = self.passwd.get(realm, {}) |
---|
853 | n/a | for default_port in True, False: |
---|
854 | n/a | reduced_authuri = self.reduce_uri(authuri, default_port) |
---|
855 | n/a | for uris, authinfo in domains.items(): |
---|
856 | n/a | for uri in uris: |
---|
857 | n/a | if self.is_suburi(uri, reduced_authuri): |
---|
858 | n/a | return authinfo |
---|
859 | n/a | return None, None |
---|
860 | n/a | |
---|
861 | n/a | def reduce_uri(self, uri, default_port=True): |
---|
862 | n/a | """Accept authority or URI and extract only the authority and path.""" |
---|
863 | n/a | # note HTTP URLs do not have a userinfo component |
---|
864 | n/a | parts = urlsplit(uri) |
---|
865 | n/a | if parts[1]: |
---|
866 | n/a | # URI |
---|
867 | n/a | scheme = parts[0] |
---|
868 | n/a | authority = parts[1] |
---|
869 | n/a | path = parts[2] or '/' |
---|
870 | n/a | else: |
---|
871 | n/a | # host or host:port |
---|
872 | n/a | scheme = None |
---|
873 | n/a | authority = uri |
---|
874 | n/a | path = '/' |
---|
875 | n/a | host, port = splitport(authority) |
---|
876 | n/a | if default_port and port is None and scheme is not None: |
---|
877 | n/a | dport = {"http": 80, |
---|
878 | n/a | "https": 443, |
---|
879 | n/a | }.get(scheme) |
---|
880 | n/a | if dport is not None: |
---|
881 | n/a | authority = "%s:%d" % (host, dport) |
---|
882 | n/a | return authority, path |
---|
883 | n/a | |
---|
884 | n/a | def is_suburi(self, base, test): |
---|
885 | n/a | """Check if test is below base in a URI tree |
---|
886 | n/a | |
---|
887 | n/a | Both args must be URIs in reduced form. |
---|
888 | n/a | """ |
---|
889 | n/a | if base == test: |
---|
890 | n/a | return True |
---|
891 | n/a | if base[0] != test[0]: |
---|
892 | n/a | return False |
---|
893 | n/a | common = posixpath.commonprefix((base[1], test[1])) |
---|
894 | n/a | if len(common) == len(base[1]): |
---|
895 | n/a | return True |
---|
896 | n/a | return False |
---|
897 | n/a | |
---|
898 | n/a | |
---|
899 | n/a | class HTTPPasswordMgrWithDefaultRealm(HTTPPasswordMgr): |
---|
900 | n/a | |
---|
901 | n/a | def find_user_password(self, realm, authuri): |
---|
902 | n/a | user, password = HTTPPasswordMgr.find_user_password(self, realm, |
---|
903 | n/a | authuri) |
---|
904 | n/a | if user is not None: |
---|
905 | n/a | return user, password |
---|
906 | n/a | return HTTPPasswordMgr.find_user_password(self, None, authuri) |
---|
907 | n/a | |
---|
908 | n/a | |
---|
909 | n/a | class HTTPPasswordMgrWithPriorAuth(HTTPPasswordMgrWithDefaultRealm): |
---|
910 | n/a | |
---|
911 | n/a | def __init__(self, *args, **kwargs): |
---|
912 | n/a | self.authenticated = {} |
---|
913 | n/a | super().__init__(*args, **kwargs) |
---|
914 | n/a | |
---|
915 | n/a | def add_password(self, realm, uri, user, passwd, is_authenticated=False): |
---|
916 | n/a | self.update_authenticated(uri, is_authenticated) |
---|
917 | n/a | # Add a default for prior auth requests |
---|
918 | n/a | if realm is not None: |
---|
919 | n/a | super().add_password(None, uri, user, passwd) |
---|
920 | n/a | super().add_password(realm, uri, user, passwd) |
---|
921 | n/a | |
---|
922 | n/a | def update_authenticated(self, uri, is_authenticated=False): |
---|
923 | n/a | # uri could be a single URI or a sequence |
---|
924 | n/a | if isinstance(uri, str): |
---|
925 | n/a | uri = [uri] |
---|
926 | n/a | |
---|
927 | n/a | for default_port in True, False: |
---|
928 | n/a | for u in uri: |
---|
929 | n/a | reduced_uri = self.reduce_uri(u, default_port) |
---|
930 | n/a | self.authenticated[reduced_uri] = is_authenticated |
---|
931 | n/a | |
---|
932 | n/a | def is_authenticated(self, authuri): |
---|
933 | n/a | for default_port in True, False: |
---|
934 | n/a | reduced_authuri = self.reduce_uri(authuri, default_port) |
---|
935 | n/a | for uri in self.authenticated: |
---|
936 | n/a | if self.is_suburi(uri, reduced_authuri): |
---|
937 | n/a | return self.authenticated[uri] |
---|
938 | n/a | |
---|
939 | n/a | |
---|
940 | n/a | class AbstractBasicAuthHandler: |
---|
941 | n/a | |
---|
942 | n/a | # XXX this allows for multiple auth-schemes, but will stupidly pick |
---|
943 | n/a | # the last one with a realm specified. |
---|
944 | n/a | |
---|
945 | n/a | # allow for double- and single-quoted realm values |
---|
946 | n/a | # (single quotes are a violation of the RFC, but appear in the wild) |
---|
947 | n/a | rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+' |
---|
948 | n/a | 'realm=(["\']?)([^"\']*)\\2', re.I) |
---|
949 | n/a | |
---|
950 | n/a | # XXX could pre-emptively send auth info already accepted (RFC 2617, |
---|
951 | n/a | # end of section 2, and section 1.2 immediately after "credentials" |
---|
952 | n/a | # production). |
---|
953 | n/a | |
---|
954 | n/a | def __init__(self, password_mgr=None): |
---|
955 | n/a | if password_mgr is None: |
---|
956 | n/a | password_mgr = HTTPPasswordMgr() |
---|
957 | n/a | self.passwd = password_mgr |
---|
958 | n/a | self.add_password = self.passwd.add_password |
---|
959 | n/a | |
---|
960 | n/a | def http_error_auth_reqed(self, authreq, host, req, headers): |
---|
961 | n/a | # host may be an authority (without userinfo) or a URL with an |
---|
962 | n/a | # authority |
---|
963 | n/a | # XXX could be multiple headers |
---|
964 | n/a | authreq = headers.get(authreq, None) |
---|
965 | n/a | |
---|
966 | n/a | if authreq: |
---|
967 | n/a | scheme = authreq.split()[0] |
---|
968 | n/a | if scheme.lower() != 'basic': |
---|
969 | n/a | raise ValueError("AbstractBasicAuthHandler does not" |
---|
970 | n/a | " support the following scheme: '%s'" % |
---|
971 | n/a | scheme) |
---|
972 | n/a | else: |
---|
973 | n/a | mo = AbstractBasicAuthHandler.rx.search(authreq) |
---|
974 | n/a | if mo: |
---|
975 | n/a | scheme, quote, realm = mo.groups() |
---|
976 | n/a | if quote not in ['"',"'"]: |
---|
977 | n/a | warnings.warn("Basic Auth Realm was unquoted", |
---|
978 | n/a | UserWarning, 2) |
---|
979 | n/a | if scheme.lower() == 'basic': |
---|
980 | n/a | return self.retry_http_basic_auth(host, req, realm) |
---|
981 | n/a | |
---|
982 | n/a | def retry_http_basic_auth(self, host, req, realm): |
---|
983 | n/a | user, pw = self.passwd.find_user_password(realm, host) |
---|
984 | n/a | if pw is not None: |
---|
985 | n/a | raw = "%s:%s" % (user, pw) |
---|
986 | n/a | auth = "Basic " + base64.b64encode(raw.encode()).decode("ascii") |
---|
987 | n/a | if req.get_header(self.auth_header, None) == auth: |
---|
988 | n/a | return None |
---|
989 | n/a | req.add_unredirected_header(self.auth_header, auth) |
---|
990 | n/a | return self.parent.open(req, timeout=req.timeout) |
---|
991 | n/a | else: |
---|
992 | n/a | return None |
---|
993 | n/a | |
---|
994 | n/a | def http_request(self, req): |
---|
995 | n/a | if (not hasattr(self.passwd, 'is_authenticated') or |
---|
996 | n/a | not self.passwd.is_authenticated(req.full_url)): |
---|
997 | n/a | return req |
---|
998 | n/a | |
---|
999 | n/a | if not req.has_header('Authorization'): |
---|
1000 | n/a | user, passwd = self.passwd.find_user_password(None, req.full_url) |
---|
1001 | n/a | credentials = '{0}:{1}'.format(user, passwd).encode() |
---|
1002 | n/a | auth_str = base64.standard_b64encode(credentials).decode() |
---|
1003 | n/a | req.add_unredirected_header('Authorization', |
---|
1004 | n/a | 'Basic {}'.format(auth_str.strip())) |
---|
1005 | n/a | return req |
---|
1006 | n/a | |
---|
1007 | n/a | def http_response(self, req, response): |
---|
1008 | n/a | if hasattr(self.passwd, 'is_authenticated'): |
---|
1009 | n/a | if 200 <= response.code < 300: |
---|
1010 | n/a | self.passwd.update_authenticated(req.full_url, True) |
---|
1011 | n/a | else: |
---|
1012 | n/a | self.passwd.update_authenticated(req.full_url, False) |
---|
1013 | n/a | return response |
---|
1014 | n/a | |
---|
1015 | n/a | https_request = http_request |
---|
1016 | n/a | https_response = http_response |
---|
1017 | n/a | |
---|
1018 | n/a | |
---|
1019 | n/a | |
---|
1020 | n/a | class HTTPBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): |
---|
1021 | n/a | |
---|
1022 | n/a | auth_header = 'Authorization' |
---|
1023 | n/a | |
---|
1024 | n/a | def http_error_401(self, req, fp, code, msg, headers): |
---|
1025 | n/a | url = req.full_url |
---|
1026 | n/a | response = self.http_error_auth_reqed('www-authenticate', |
---|
1027 | n/a | url, req, headers) |
---|
1028 | n/a | return response |
---|
1029 | n/a | |
---|
1030 | n/a | |
---|
1031 | n/a | class ProxyBasicAuthHandler(AbstractBasicAuthHandler, BaseHandler): |
---|
1032 | n/a | |
---|
1033 | n/a | auth_header = 'Proxy-authorization' |
---|
1034 | n/a | |
---|
1035 | n/a | def http_error_407(self, req, fp, code, msg, headers): |
---|
1036 | n/a | # http_error_auth_reqed requires that there is no userinfo component in |
---|
1037 | n/a | # authority. Assume there isn't one, since urllib.request does not (and |
---|
1038 | n/a | # should not, RFC 3986 s. 3.2.1) support requests for URLs containing |
---|
1039 | n/a | # userinfo. |
---|
1040 | n/a | authority = req.host |
---|
1041 | n/a | response = self.http_error_auth_reqed('proxy-authenticate', |
---|
1042 | n/a | authority, req, headers) |
---|
1043 | n/a | return response |
---|
1044 | n/a | |
---|
1045 | n/a | |
---|
1046 | n/a | # Return n random bytes. |
---|
1047 | n/a | _randombytes = os.urandom |
---|
1048 | n/a | |
---|
1049 | n/a | |
---|
1050 | n/a | class AbstractDigestAuthHandler: |
---|
1051 | n/a | # Digest authentication is specified in RFC 2617. |
---|
1052 | n/a | |
---|
1053 | n/a | # XXX The client does not inspect the Authentication-Info header |
---|
1054 | n/a | # in a successful response. |
---|
1055 | n/a | |
---|
1056 | n/a | # XXX It should be possible to test this implementation against |
---|
1057 | n/a | # a mock server that just generates a static set of challenges. |
---|
1058 | n/a | |
---|
1059 | n/a | # XXX qop="auth-int" supports is shaky |
---|
1060 | n/a | |
---|
1061 | n/a | def __init__(self, passwd=None): |
---|
1062 | n/a | if passwd is None: |
---|
1063 | n/a | passwd = HTTPPasswordMgr() |
---|
1064 | n/a | self.passwd = passwd |
---|
1065 | n/a | self.add_password = self.passwd.add_password |
---|
1066 | n/a | self.retried = 0 |
---|
1067 | n/a | self.nonce_count = 0 |
---|
1068 | n/a | self.last_nonce = None |
---|
1069 | n/a | |
---|
1070 | n/a | def reset_retry_count(self): |
---|
1071 | n/a | self.retried = 0 |
---|
1072 | n/a | |
---|
1073 | n/a | def http_error_auth_reqed(self, auth_header, host, req, headers): |
---|
1074 | n/a | authreq = headers.get(auth_header, None) |
---|
1075 | n/a | if self.retried > 5: |
---|
1076 | n/a | # Don't fail endlessly - if we failed once, we'll probably |
---|
1077 | n/a | # fail a second time. Hm. Unless the Password Manager is |
---|
1078 | n/a | # prompting for the information. Crap. This isn't great |
---|
1079 | n/a | # but it's better than the current 'repeat until recursion |
---|
1080 | n/a | # depth exceeded' approach <wink> |
---|
1081 | n/a | raise HTTPError(req.full_url, 401, "digest auth failed", |
---|
1082 | n/a | headers, None) |
---|
1083 | n/a | else: |
---|
1084 | n/a | self.retried += 1 |
---|
1085 | n/a | if authreq: |
---|
1086 | n/a | scheme = authreq.split()[0] |
---|
1087 | n/a | if scheme.lower() == 'digest': |
---|
1088 | n/a | return self.retry_http_digest_auth(req, authreq) |
---|
1089 | n/a | elif scheme.lower() != 'basic': |
---|
1090 | n/a | raise ValueError("AbstractDigestAuthHandler does not support" |
---|
1091 | n/a | " the following scheme: '%s'" % scheme) |
---|
1092 | n/a | |
---|
1093 | n/a | def retry_http_digest_auth(self, req, auth): |
---|
1094 | n/a | token, challenge = auth.split(' ', 1) |
---|
1095 | n/a | chal = parse_keqv_list(filter(None, parse_http_list(challenge))) |
---|
1096 | n/a | auth = self.get_authorization(req, chal) |
---|
1097 | n/a | if auth: |
---|
1098 | n/a | auth_val = 'Digest %s' % auth |
---|
1099 | n/a | if req.headers.get(self.auth_header, None) == auth_val: |
---|
1100 | n/a | return None |
---|
1101 | n/a | req.add_unredirected_header(self.auth_header, auth_val) |
---|
1102 | n/a | resp = self.parent.open(req, timeout=req.timeout) |
---|
1103 | n/a | return resp |
---|
1104 | n/a | |
---|
1105 | n/a | def get_cnonce(self, nonce): |
---|
1106 | n/a | # The cnonce-value is an opaque |
---|
1107 | n/a | # quoted string value provided by the client and used by both client |
---|
1108 | n/a | # and server to avoid chosen plaintext attacks, to provide mutual |
---|
1109 | n/a | # authentication, and to provide some message integrity protection. |
---|
1110 | n/a | # This isn't a fabulous effort, but it's probably Good Enough. |
---|
1111 | n/a | s = "%s:%s:%s:" % (self.nonce_count, nonce, time.ctime()) |
---|
1112 | n/a | b = s.encode("ascii") + _randombytes(8) |
---|
1113 | n/a | dig = hashlib.sha1(b).hexdigest() |
---|
1114 | n/a | return dig[:16] |
---|
1115 | n/a | |
---|
1116 | n/a | def get_authorization(self, req, chal): |
---|
1117 | n/a | try: |
---|
1118 | n/a | realm = chal['realm'] |
---|
1119 | n/a | nonce = chal['nonce'] |
---|
1120 | n/a | qop = chal.get('qop') |
---|
1121 | n/a | algorithm = chal.get('algorithm', 'MD5') |
---|
1122 | n/a | # mod_digest doesn't send an opaque, even though it isn't |
---|
1123 | n/a | # supposed to be optional |
---|
1124 | n/a | opaque = chal.get('opaque', None) |
---|
1125 | n/a | except KeyError: |
---|
1126 | n/a | return None |
---|
1127 | n/a | |
---|
1128 | n/a | H, KD = self.get_algorithm_impls(algorithm) |
---|
1129 | n/a | if H is None: |
---|
1130 | n/a | return None |
---|
1131 | n/a | |
---|
1132 | n/a | user, pw = self.passwd.find_user_password(realm, req.full_url) |
---|
1133 | n/a | if user is None: |
---|
1134 | n/a | return None |
---|
1135 | n/a | |
---|
1136 | n/a | # XXX not implemented yet |
---|
1137 | n/a | if req.data is not None: |
---|
1138 | n/a | entdig = self.get_entity_digest(req.data, chal) |
---|
1139 | n/a | else: |
---|
1140 | n/a | entdig = None |
---|
1141 | n/a | |
---|
1142 | n/a | A1 = "%s:%s:%s" % (user, realm, pw) |
---|
1143 | n/a | A2 = "%s:%s" % (req.get_method(), |
---|
1144 | n/a | # XXX selector: what about proxies and full urls |
---|
1145 | n/a | req.selector) |
---|
1146 | n/a | if qop == 'auth': |
---|
1147 | n/a | if nonce == self.last_nonce: |
---|
1148 | n/a | self.nonce_count += 1 |
---|
1149 | n/a | else: |
---|
1150 | n/a | self.nonce_count = 1 |
---|
1151 | n/a | self.last_nonce = nonce |
---|
1152 | n/a | ncvalue = '%08x' % self.nonce_count |
---|
1153 | n/a | cnonce = self.get_cnonce(nonce) |
---|
1154 | n/a | noncebit = "%s:%s:%s:%s:%s" % (nonce, ncvalue, cnonce, qop, H(A2)) |
---|
1155 | n/a | respdig = KD(H(A1), noncebit) |
---|
1156 | n/a | elif qop is None: |
---|
1157 | n/a | respdig = KD(H(A1), "%s:%s" % (nonce, H(A2))) |
---|
1158 | n/a | else: |
---|
1159 | n/a | # XXX handle auth-int. |
---|
1160 | n/a | raise URLError("qop '%s' is not supported." % qop) |
---|
1161 | n/a | |
---|
1162 | n/a | # XXX should the partial digests be encoded too? |
---|
1163 | n/a | |
---|
1164 | n/a | base = 'username="%s", realm="%s", nonce="%s", uri="%s", ' \ |
---|
1165 | n/a | 'response="%s"' % (user, realm, nonce, req.selector, |
---|
1166 | n/a | respdig) |
---|
1167 | n/a | if opaque: |
---|
1168 | n/a | base += ', opaque="%s"' % opaque |
---|
1169 | n/a | if entdig: |
---|
1170 | n/a | base += ', digest="%s"' % entdig |
---|
1171 | n/a | base += ', algorithm="%s"' % algorithm |
---|
1172 | n/a | if qop: |
---|
1173 | n/a | base += ', qop=auth, nc=%s, cnonce="%s"' % (ncvalue, cnonce) |
---|
1174 | n/a | return base |
---|
1175 | n/a | |
---|
1176 | n/a | def get_algorithm_impls(self, algorithm): |
---|
1177 | n/a | # lambdas assume digest modules are imported at the top level |
---|
1178 | n/a | if algorithm == 'MD5': |
---|
1179 | n/a | H = lambda x: hashlib.md5(x.encode("ascii")).hexdigest() |
---|
1180 | n/a | elif algorithm == 'SHA': |
---|
1181 | n/a | H = lambda x: hashlib.sha1(x.encode("ascii")).hexdigest() |
---|
1182 | n/a | # XXX MD5-sess |
---|
1183 | n/a | else: |
---|
1184 | n/a | raise ValueError("Unsupported digest authentication " |
---|
1185 | n/a | "algorithm %r" % algorithm) |
---|
1186 | n/a | KD = lambda s, d: H("%s:%s" % (s, d)) |
---|
1187 | n/a | return H, KD |
---|
1188 | n/a | |
---|
1189 | n/a | def get_entity_digest(self, data, chal): |
---|
1190 | n/a | # XXX not implemented yet |
---|
1191 | n/a | return None |
---|
1192 | n/a | |
---|
1193 | n/a | |
---|
1194 | n/a | class HTTPDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): |
---|
1195 | n/a | """An authentication protocol defined by RFC 2069 |
---|
1196 | n/a | |
---|
1197 | n/a | Digest authentication improves on basic authentication because it |
---|
1198 | n/a | does not transmit passwords in the clear. |
---|
1199 | n/a | """ |
---|
1200 | n/a | |
---|
1201 | n/a | auth_header = 'Authorization' |
---|
1202 | n/a | handler_order = 490 # before Basic auth |
---|
1203 | n/a | |
---|
1204 | n/a | def http_error_401(self, req, fp, code, msg, headers): |
---|
1205 | n/a | host = urlparse(req.full_url)[1] |
---|
1206 | n/a | retry = self.http_error_auth_reqed('www-authenticate', |
---|
1207 | n/a | host, req, headers) |
---|
1208 | n/a | self.reset_retry_count() |
---|
1209 | n/a | return retry |
---|
1210 | n/a | |
---|
1211 | n/a | |
---|
1212 | n/a | class ProxyDigestAuthHandler(BaseHandler, AbstractDigestAuthHandler): |
---|
1213 | n/a | |
---|
1214 | n/a | auth_header = 'Proxy-Authorization' |
---|
1215 | n/a | handler_order = 490 # before Basic auth |
---|
1216 | n/a | |
---|
1217 | n/a | def http_error_407(self, req, fp, code, msg, headers): |
---|
1218 | n/a | host = req.host |
---|
1219 | n/a | retry = self.http_error_auth_reqed('proxy-authenticate', |
---|
1220 | n/a | host, req, headers) |
---|
1221 | n/a | self.reset_retry_count() |
---|
1222 | n/a | return retry |
---|
1223 | n/a | |
---|
1224 | n/a | class AbstractHTTPHandler(BaseHandler): |
---|
1225 | n/a | |
---|
1226 | n/a | def __init__(self, debuglevel=0): |
---|
1227 | n/a | self._debuglevel = debuglevel |
---|
1228 | n/a | |
---|
1229 | n/a | def set_http_debuglevel(self, level): |
---|
1230 | n/a | self._debuglevel = level |
---|
1231 | n/a | |
---|
1232 | n/a | def _get_content_length(self, request): |
---|
1233 | n/a | return http.client.HTTPConnection._get_content_length( |
---|
1234 | n/a | request.data, |
---|
1235 | n/a | request.get_method()) |
---|
1236 | n/a | |
---|
1237 | n/a | def do_request_(self, request): |
---|
1238 | n/a | host = request.host |
---|
1239 | n/a | if not host: |
---|
1240 | n/a | raise URLError('no host given') |
---|
1241 | n/a | |
---|
1242 | n/a | if request.data is not None: # POST |
---|
1243 | n/a | data = request.data |
---|
1244 | n/a | if isinstance(data, str): |
---|
1245 | n/a | msg = "POST data should be bytes, an iterable of bytes, " \ |
---|
1246 | n/a | "or a file object. It cannot be of type str." |
---|
1247 | n/a | raise TypeError(msg) |
---|
1248 | n/a | if not request.has_header('Content-type'): |
---|
1249 | n/a | request.add_unredirected_header( |
---|
1250 | n/a | 'Content-type', |
---|
1251 | n/a | 'application/x-www-form-urlencoded') |
---|
1252 | n/a | if (not request.has_header('Content-length') |
---|
1253 | n/a | and not request.has_header('Transfer-encoding')): |
---|
1254 | n/a | content_length = self._get_content_length(request) |
---|
1255 | n/a | if content_length is not None: |
---|
1256 | n/a | request.add_unredirected_header( |
---|
1257 | n/a | 'Content-length', str(content_length)) |
---|
1258 | n/a | else: |
---|
1259 | n/a | request.add_unredirected_header( |
---|
1260 | n/a | 'Transfer-encoding', 'chunked') |
---|
1261 | n/a | |
---|
1262 | n/a | sel_host = host |
---|
1263 | n/a | if request.has_proxy(): |
---|
1264 | n/a | scheme, sel = splittype(request.selector) |
---|
1265 | n/a | sel_host, sel_path = splithost(sel) |
---|
1266 | n/a | if not request.has_header('Host'): |
---|
1267 | n/a | request.add_unredirected_header('Host', sel_host) |
---|
1268 | n/a | for name, value in self.parent.addheaders: |
---|
1269 | n/a | name = name.capitalize() |
---|
1270 | n/a | if not request.has_header(name): |
---|
1271 | n/a | request.add_unredirected_header(name, value) |
---|
1272 | n/a | |
---|
1273 | n/a | return request |
---|
1274 | n/a | |
---|
1275 | n/a | def do_open(self, http_class, req, **http_conn_args): |
---|
1276 | n/a | """Return an HTTPResponse object for the request, using http_class. |
---|
1277 | n/a | |
---|
1278 | n/a | http_class must implement the HTTPConnection API from http.client. |
---|
1279 | n/a | """ |
---|
1280 | n/a | host = req.host |
---|
1281 | n/a | if not host: |
---|
1282 | n/a | raise URLError('no host given') |
---|
1283 | n/a | |
---|
1284 | n/a | # will parse host:port |
---|
1285 | n/a | h = http_class(host, timeout=req.timeout, **http_conn_args) |
---|
1286 | n/a | h.set_debuglevel(self._debuglevel) |
---|
1287 | n/a | |
---|
1288 | n/a | headers = dict(req.unredirected_hdrs) |
---|
1289 | n/a | headers.update(dict((k, v) for k, v in req.headers.items() |
---|
1290 | n/a | if k not in headers)) |
---|
1291 | n/a | |
---|
1292 | n/a | # TODO(jhylton): Should this be redesigned to handle |
---|
1293 | n/a | # persistent connections? |
---|
1294 | n/a | |
---|
1295 | n/a | # We want to make an HTTP/1.1 request, but the addinfourl |
---|
1296 | n/a | # class isn't prepared to deal with a persistent connection. |
---|
1297 | n/a | # It will try to read all remaining data from the socket, |
---|
1298 | n/a | # which will block while the server waits for the next request. |
---|
1299 | n/a | # So make sure the connection gets closed after the (only) |
---|
1300 | n/a | # request. |
---|
1301 | n/a | headers["Connection"] = "close" |
---|
1302 | n/a | headers = dict((name.title(), val) for name, val in headers.items()) |
---|
1303 | n/a | |
---|
1304 | n/a | if req._tunnel_host: |
---|
1305 | n/a | tunnel_headers = {} |
---|
1306 | n/a | proxy_auth_hdr = "Proxy-Authorization" |
---|
1307 | n/a | if proxy_auth_hdr in headers: |
---|
1308 | n/a | tunnel_headers[proxy_auth_hdr] = headers[proxy_auth_hdr] |
---|
1309 | n/a | # Proxy-Authorization should not be sent to origin |
---|
1310 | n/a | # server. |
---|
1311 | n/a | del headers[proxy_auth_hdr] |
---|
1312 | n/a | h.set_tunnel(req._tunnel_host, headers=tunnel_headers) |
---|
1313 | n/a | |
---|
1314 | n/a | try: |
---|
1315 | n/a | try: |
---|
1316 | n/a | h.request(req.get_method(), req.selector, req.data, headers, |
---|
1317 | n/a | encode_chunked=req.has_header('Transfer-encoding')) |
---|
1318 | n/a | except OSError as err: # timeout error |
---|
1319 | n/a | raise URLError(err) |
---|
1320 | n/a | r = h.getresponse() |
---|
1321 | n/a | except: |
---|
1322 | n/a | h.close() |
---|
1323 | n/a | raise |
---|
1324 | n/a | |
---|
1325 | n/a | # If the server does not send us a 'Connection: close' header, |
---|
1326 | n/a | # HTTPConnection assumes the socket should be left open. Manually |
---|
1327 | n/a | # mark the socket to be closed when this response object goes away. |
---|
1328 | n/a | if h.sock: |
---|
1329 | n/a | h.sock.close() |
---|
1330 | n/a | h.sock = None |
---|
1331 | n/a | |
---|
1332 | n/a | r.url = req.get_full_url() |
---|
1333 | n/a | # This line replaces the .msg attribute of the HTTPResponse |
---|
1334 | n/a | # with .headers, because urllib clients expect the response to |
---|
1335 | n/a | # have the reason in .msg. It would be good to mark this |
---|
1336 | n/a | # attribute is deprecated and get then to use info() or |
---|
1337 | n/a | # .headers. |
---|
1338 | n/a | r.msg = r.reason |
---|
1339 | n/a | return r |
---|
1340 | n/a | |
---|
1341 | n/a | |
---|
1342 | n/a | class HTTPHandler(AbstractHTTPHandler): |
---|
1343 | n/a | |
---|
1344 | n/a | def http_open(self, req): |
---|
1345 | n/a | return self.do_open(http.client.HTTPConnection, req) |
---|
1346 | n/a | |
---|
1347 | n/a | http_request = AbstractHTTPHandler.do_request_ |
---|
1348 | n/a | |
---|
1349 | n/a | if hasattr(http.client, 'HTTPSConnection'): |
---|
1350 | n/a | |
---|
1351 | n/a | class HTTPSHandler(AbstractHTTPHandler): |
---|
1352 | n/a | |
---|
1353 | n/a | def __init__(self, debuglevel=0, context=None, check_hostname=None): |
---|
1354 | n/a | AbstractHTTPHandler.__init__(self, debuglevel) |
---|
1355 | n/a | self._context = context |
---|
1356 | n/a | self._check_hostname = check_hostname |
---|
1357 | n/a | |
---|
1358 | n/a | def https_open(self, req): |
---|
1359 | n/a | return self.do_open(http.client.HTTPSConnection, req, |
---|
1360 | n/a | context=self._context, check_hostname=self._check_hostname) |
---|
1361 | n/a | |
---|
1362 | n/a | https_request = AbstractHTTPHandler.do_request_ |
---|
1363 | n/a | |
---|
1364 | n/a | __all__.append('HTTPSHandler') |
---|
1365 | n/a | |
---|
1366 | n/a | class HTTPCookieProcessor(BaseHandler): |
---|
1367 | n/a | def __init__(self, cookiejar=None): |
---|
1368 | n/a | import http.cookiejar |
---|
1369 | n/a | if cookiejar is None: |
---|
1370 | n/a | cookiejar = http.cookiejar.CookieJar() |
---|
1371 | n/a | self.cookiejar = cookiejar |
---|
1372 | n/a | |
---|
1373 | n/a | def http_request(self, request): |
---|
1374 | n/a | self.cookiejar.add_cookie_header(request) |
---|
1375 | n/a | return request |
---|
1376 | n/a | |
---|
1377 | n/a | def http_response(self, request, response): |
---|
1378 | n/a | self.cookiejar.extract_cookies(response, request) |
---|
1379 | n/a | return response |
---|
1380 | n/a | |
---|
1381 | n/a | https_request = http_request |
---|
1382 | n/a | https_response = http_response |
---|
1383 | n/a | |
---|
1384 | n/a | class UnknownHandler(BaseHandler): |
---|
1385 | n/a | def unknown_open(self, req): |
---|
1386 | n/a | type = req.type |
---|
1387 | n/a | raise URLError('unknown url type: %s' % type) |
---|
1388 | n/a | |
---|
1389 | n/a | def parse_keqv_list(l): |
---|
1390 | n/a | """Parse list of key=value strings where keys are not duplicated.""" |
---|
1391 | n/a | parsed = {} |
---|
1392 | n/a | for elt in l: |
---|
1393 | n/a | k, v = elt.split('=', 1) |
---|
1394 | n/a | if v[0] == '"' and v[-1] == '"': |
---|
1395 | n/a | v = v[1:-1] |
---|
1396 | n/a | parsed[k] = v |
---|
1397 | n/a | return parsed |
---|
1398 | n/a | |
---|
1399 | n/a | def parse_http_list(s): |
---|
1400 | n/a | """Parse lists as described by RFC 2068 Section 2. |
---|
1401 | n/a | |
---|
1402 | n/a | In particular, parse comma-separated lists where the elements of |
---|
1403 | n/a | the list may include quoted-strings. A quoted-string could |
---|
1404 | n/a | contain a comma. A non-quoted string could have quotes in the |
---|
1405 | n/a | middle. Neither commas nor quotes count if they are escaped. |
---|
1406 | n/a | Only double-quotes count, not single-quotes. |
---|
1407 | n/a | """ |
---|
1408 | n/a | res = [] |
---|
1409 | n/a | part = '' |
---|
1410 | n/a | |
---|
1411 | n/a | escape = quote = False |
---|
1412 | n/a | for cur in s: |
---|
1413 | n/a | if escape: |
---|
1414 | n/a | part += cur |
---|
1415 | n/a | escape = False |
---|
1416 | n/a | continue |
---|
1417 | n/a | if quote: |
---|
1418 | n/a | if cur == '\\': |
---|
1419 | n/a | escape = True |
---|
1420 | n/a | continue |
---|
1421 | n/a | elif cur == '"': |
---|
1422 | n/a | quote = False |
---|
1423 | n/a | part += cur |
---|
1424 | n/a | continue |
---|
1425 | n/a | |
---|
1426 | n/a | if cur == ',': |
---|
1427 | n/a | res.append(part) |
---|
1428 | n/a | part = '' |
---|
1429 | n/a | continue |
---|
1430 | n/a | |
---|
1431 | n/a | if cur == '"': |
---|
1432 | n/a | quote = True |
---|
1433 | n/a | |
---|
1434 | n/a | part += cur |
---|
1435 | n/a | |
---|
1436 | n/a | # append last part |
---|
1437 | n/a | if part: |
---|
1438 | n/a | res.append(part) |
---|
1439 | n/a | |
---|
1440 | n/a | return [part.strip() for part in res] |
---|
1441 | n/a | |
---|
1442 | n/a | class FileHandler(BaseHandler): |
---|
1443 | n/a | # Use local file or FTP depending on form of URL |
---|
1444 | n/a | def file_open(self, req): |
---|
1445 | n/a | url = req.selector |
---|
1446 | n/a | if url[:2] == '//' and url[2:3] != '/' and (req.host and |
---|
1447 | n/a | req.host != 'localhost'): |
---|
1448 | n/a | if not req.host in self.get_names(): |
---|
1449 | n/a | raise URLError("file:// scheme is supported only on localhost") |
---|
1450 | n/a | else: |
---|
1451 | n/a | return self.open_local_file(req) |
---|
1452 | n/a | |
---|
1453 | n/a | # names for the localhost |
---|
1454 | n/a | names = None |
---|
1455 | n/a | def get_names(self): |
---|
1456 | n/a | if FileHandler.names is None: |
---|
1457 | n/a | try: |
---|
1458 | n/a | FileHandler.names = tuple( |
---|
1459 | n/a | socket.gethostbyname_ex('localhost')[2] + |
---|
1460 | n/a | socket.gethostbyname_ex(socket.gethostname())[2]) |
---|
1461 | n/a | except socket.gaierror: |
---|
1462 | n/a | FileHandler.names = (socket.gethostbyname('localhost'),) |
---|
1463 | n/a | return FileHandler.names |
---|
1464 | n/a | |
---|
1465 | n/a | # not entirely sure what the rules are here |
---|
1466 | n/a | def open_local_file(self, req): |
---|
1467 | n/a | import email.utils |
---|
1468 | n/a | import mimetypes |
---|
1469 | n/a | host = req.host |
---|
1470 | n/a | filename = req.selector |
---|
1471 | n/a | localfile = url2pathname(filename) |
---|
1472 | n/a | try: |
---|
1473 | n/a | stats = os.stat(localfile) |
---|
1474 | n/a | size = stats.st_size |
---|
1475 | n/a | modified = email.utils.formatdate(stats.st_mtime, usegmt=True) |
---|
1476 | n/a | mtype = mimetypes.guess_type(filename)[0] |
---|
1477 | n/a | headers = email.message_from_string( |
---|
1478 | n/a | 'Content-type: %s\nContent-length: %d\nLast-modified: %s\n' % |
---|
1479 | n/a | (mtype or 'text/plain', size, modified)) |
---|
1480 | n/a | if host: |
---|
1481 | n/a | host, port = splitport(host) |
---|
1482 | n/a | if not host or \ |
---|
1483 | n/a | (not port and _safe_gethostbyname(host) in self.get_names()): |
---|
1484 | n/a | if host: |
---|
1485 | n/a | origurl = 'file://' + host + filename |
---|
1486 | n/a | else: |
---|
1487 | n/a | origurl = 'file://' + filename |
---|
1488 | n/a | return addinfourl(open(localfile, 'rb'), headers, origurl) |
---|
1489 | n/a | except OSError as exp: |
---|
1490 | n/a | # users shouldn't expect OSErrors coming from urlopen() |
---|
1491 | n/a | raise URLError(exp) |
---|
1492 | n/a | raise URLError('file not on local host') |
---|
1493 | n/a | |
---|
1494 | n/a | def _safe_gethostbyname(host): |
---|
1495 | n/a | try: |
---|
1496 | n/a | return socket.gethostbyname(host) |
---|
1497 | n/a | except socket.gaierror: |
---|
1498 | n/a | return None |
---|
1499 | n/a | |
---|
1500 | n/a | class FTPHandler(BaseHandler): |
---|
1501 | n/a | def ftp_open(self, req): |
---|
1502 | n/a | import ftplib |
---|
1503 | n/a | import mimetypes |
---|
1504 | n/a | host = req.host |
---|
1505 | n/a | if not host: |
---|
1506 | n/a | raise URLError('ftp error: no host given') |
---|
1507 | n/a | host, port = splitport(host) |
---|
1508 | n/a | if port is None: |
---|
1509 | n/a | port = ftplib.FTP_PORT |
---|
1510 | n/a | else: |
---|
1511 | n/a | port = int(port) |
---|
1512 | n/a | |
---|
1513 | n/a | # username/password handling |
---|
1514 | n/a | user, host = splituser(host) |
---|
1515 | n/a | if user: |
---|
1516 | n/a | user, passwd = splitpasswd(user) |
---|
1517 | n/a | else: |
---|
1518 | n/a | passwd = None |
---|
1519 | n/a | host = unquote(host) |
---|
1520 | n/a | user = user or '' |
---|
1521 | n/a | passwd = passwd or '' |
---|
1522 | n/a | |
---|
1523 | n/a | try: |
---|
1524 | n/a | host = socket.gethostbyname(host) |
---|
1525 | n/a | except OSError as msg: |
---|
1526 | n/a | raise URLError(msg) |
---|
1527 | n/a | path, attrs = splitattr(req.selector) |
---|
1528 | n/a | dirs = path.split('/') |
---|
1529 | n/a | dirs = list(map(unquote, dirs)) |
---|
1530 | n/a | dirs, file = dirs[:-1], dirs[-1] |
---|
1531 | n/a | if dirs and not dirs[0]: |
---|
1532 | n/a | dirs = dirs[1:] |
---|
1533 | n/a | try: |
---|
1534 | n/a | fw = self.connect_ftp(user, passwd, host, port, dirs, req.timeout) |
---|
1535 | n/a | type = file and 'I' or 'D' |
---|
1536 | n/a | for attr in attrs: |
---|
1537 | n/a | attr, value = splitvalue(attr) |
---|
1538 | n/a | if attr.lower() == 'type' and \ |
---|
1539 | n/a | value in ('a', 'A', 'i', 'I', 'd', 'D'): |
---|
1540 | n/a | type = value.upper() |
---|
1541 | n/a | fp, retrlen = fw.retrfile(file, type) |
---|
1542 | n/a | headers = "" |
---|
1543 | n/a | mtype = mimetypes.guess_type(req.full_url)[0] |
---|
1544 | n/a | if mtype: |
---|
1545 | n/a | headers += "Content-type: %s\n" % mtype |
---|
1546 | n/a | if retrlen is not None and retrlen >= 0: |
---|
1547 | n/a | headers += "Content-length: %d\n" % retrlen |
---|
1548 | n/a | headers = email.message_from_string(headers) |
---|
1549 | n/a | return addinfourl(fp, headers, req.full_url) |
---|
1550 | n/a | except ftplib.all_errors as exp: |
---|
1551 | n/a | exc = URLError('ftp error: %r' % exp) |
---|
1552 | n/a | raise exc.with_traceback(sys.exc_info()[2]) |
---|
1553 | n/a | |
---|
1554 | n/a | def connect_ftp(self, user, passwd, host, port, dirs, timeout): |
---|
1555 | n/a | return ftpwrapper(user, passwd, host, port, dirs, timeout, |
---|
1556 | n/a | persistent=False) |
---|
1557 | n/a | |
---|
1558 | n/a | class CacheFTPHandler(FTPHandler): |
---|
1559 | n/a | # XXX would be nice to have pluggable cache strategies |
---|
1560 | n/a | # XXX this stuff is definitely not thread safe |
---|
1561 | n/a | def __init__(self): |
---|
1562 | n/a | self.cache = {} |
---|
1563 | n/a | self.timeout = {} |
---|
1564 | n/a | self.soonest = 0 |
---|
1565 | n/a | self.delay = 60 |
---|
1566 | n/a | self.max_conns = 16 |
---|
1567 | n/a | |
---|
1568 | n/a | def setTimeout(self, t): |
---|
1569 | n/a | self.delay = t |
---|
1570 | n/a | |
---|
1571 | n/a | def setMaxConns(self, m): |
---|
1572 | n/a | self.max_conns = m |
---|
1573 | n/a | |
---|
1574 | n/a | def connect_ftp(self, user, passwd, host, port, dirs, timeout): |
---|
1575 | n/a | key = user, host, port, '/'.join(dirs), timeout |
---|
1576 | n/a | if key in self.cache: |
---|
1577 | n/a | self.timeout[key] = time.time() + self.delay |
---|
1578 | n/a | else: |
---|
1579 | n/a | self.cache[key] = ftpwrapper(user, passwd, host, port, |
---|
1580 | n/a | dirs, timeout) |
---|
1581 | n/a | self.timeout[key] = time.time() + self.delay |
---|
1582 | n/a | self.check_cache() |
---|
1583 | n/a | return self.cache[key] |
---|
1584 | n/a | |
---|
1585 | n/a | def check_cache(self): |
---|
1586 | n/a | # first check for old ones |
---|
1587 | n/a | t = time.time() |
---|
1588 | n/a | if self.soonest <= t: |
---|
1589 | n/a | for k, v in list(self.timeout.items()): |
---|
1590 | n/a | if v < t: |
---|
1591 | n/a | self.cache[k].close() |
---|
1592 | n/a | del self.cache[k] |
---|
1593 | n/a | del self.timeout[k] |
---|
1594 | n/a | self.soonest = min(list(self.timeout.values())) |
---|
1595 | n/a | |
---|
1596 | n/a | # then check the size |
---|
1597 | n/a | if len(self.cache) == self.max_conns: |
---|
1598 | n/a | for k, v in list(self.timeout.items()): |
---|
1599 | n/a | if v == self.soonest: |
---|
1600 | n/a | del self.cache[k] |
---|
1601 | n/a | del self.timeout[k] |
---|
1602 | n/a | break |
---|
1603 | n/a | self.soonest = min(list(self.timeout.values())) |
---|
1604 | n/a | |
---|
1605 | n/a | def clear_cache(self): |
---|
1606 | n/a | for conn in self.cache.values(): |
---|
1607 | n/a | conn.close() |
---|
1608 | n/a | self.cache.clear() |
---|
1609 | n/a | self.timeout.clear() |
---|
1610 | n/a | |
---|
1611 | n/a | class DataHandler(BaseHandler): |
---|
1612 | n/a | def data_open(self, req): |
---|
1613 | n/a | # data URLs as specified in RFC 2397. |
---|
1614 | n/a | # |
---|
1615 | n/a | # ignores POSTed data |
---|
1616 | n/a | # |
---|
1617 | n/a | # syntax: |
---|
1618 | n/a | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data |
---|
1619 | n/a | # mediatype := [ type "/" subtype ] *( ";" parameter ) |
---|
1620 | n/a | # data := *urlchar |
---|
1621 | n/a | # parameter := attribute "=" value |
---|
1622 | n/a | url = req.full_url |
---|
1623 | n/a | |
---|
1624 | n/a | scheme, data = url.split(":",1) |
---|
1625 | n/a | mediatype, data = data.split(",",1) |
---|
1626 | n/a | |
---|
1627 | n/a | # even base64 encoded data URLs might be quoted so unquote in any case: |
---|
1628 | n/a | data = unquote_to_bytes(data) |
---|
1629 | n/a | if mediatype.endswith(";base64"): |
---|
1630 | n/a | data = base64.decodebytes(data) |
---|
1631 | n/a | mediatype = mediatype[:-7] |
---|
1632 | n/a | |
---|
1633 | n/a | if not mediatype: |
---|
1634 | n/a | mediatype = "text/plain;charset=US-ASCII" |
---|
1635 | n/a | |
---|
1636 | n/a | headers = email.message_from_string("Content-type: %s\nContent-length: %d\n" % |
---|
1637 | n/a | (mediatype, len(data))) |
---|
1638 | n/a | |
---|
1639 | n/a | return addinfourl(io.BytesIO(data), headers, url) |
---|
1640 | n/a | |
---|
1641 | n/a | |
---|
1642 | n/a | # Code move from the old urllib module |
---|
1643 | n/a | |
---|
1644 | n/a | MAXFTPCACHE = 10 # Trim the ftp cache beyond this size |
---|
1645 | n/a | |
---|
1646 | n/a | # Helper for non-unix systems |
---|
1647 | n/a | if os.name == 'nt': |
---|
1648 | n/a | from nturl2path import url2pathname, pathname2url |
---|
1649 | n/a | else: |
---|
1650 | n/a | def url2pathname(pathname): |
---|
1651 | n/a | """OS-specific conversion from a relative URL of the 'file' scheme |
---|
1652 | n/a | to a file system path; not recommended for general use.""" |
---|
1653 | n/a | return unquote(pathname) |
---|
1654 | n/a | |
---|
1655 | n/a | def pathname2url(pathname): |
---|
1656 | n/a | """OS-specific conversion from a file system path to a relative URL |
---|
1657 | n/a | of the 'file' scheme; not recommended for general use.""" |
---|
1658 | n/a | return quote(pathname) |
---|
1659 | n/a | |
---|
1660 | n/a | # This really consists of two pieces: |
---|
1661 | n/a | # (1) a class which handles opening of all sorts of URLs |
---|
1662 | n/a | # (plus assorted utilities etc.) |
---|
1663 | n/a | # (2) a set of functions for parsing URLs |
---|
1664 | n/a | # XXX Should these be separated out into different modules? |
---|
1665 | n/a | |
---|
1666 | n/a | |
---|
1667 | n/a | ftpcache = {} |
---|
1668 | n/a | class URLopener: |
---|
1669 | n/a | """Class to open URLs. |
---|
1670 | n/a | This is a class rather than just a subroutine because we may need |
---|
1671 | n/a | more than one set of global protocol-specific options. |
---|
1672 | n/a | Note -- this is a base class for those who don't want the |
---|
1673 | n/a | automatic handling of errors type 302 (relocated) and 401 |
---|
1674 | n/a | (authorization needed).""" |
---|
1675 | n/a | |
---|
1676 | n/a | __tempfiles = None |
---|
1677 | n/a | |
---|
1678 | n/a | version = "Python-urllib/%s" % __version__ |
---|
1679 | n/a | |
---|
1680 | n/a | # Constructor |
---|
1681 | n/a | def __init__(self, proxies=None, **x509): |
---|
1682 | n/a | msg = "%(class)s style of invoking requests is deprecated. " \ |
---|
1683 | n/a | "Use newer urlopen functions/methods" % {'class': self.__class__.__name__} |
---|
1684 | n/a | warnings.warn(msg, DeprecationWarning, stacklevel=3) |
---|
1685 | n/a | if proxies is None: |
---|
1686 | n/a | proxies = getproxies() |
---|
1687 | n/a | assert hasattr(proxies, 'keys'), "proxies must be a mapping" |
---|
1688 | n/a | self.proxies = proxies |
---|
1689 | n/a | self.key_file = x509.get('key_file') |
---|
1690 | n/a | self.cert_file = x509.get('cert_file') |
---|
1691 | n/a | self.addheaders = [('User-Agent', self.version), ('Accept', '*/*')] |
---|
1692 | n/a | self.__tempfiles = [] |
---|
1693 | n/a | self.__unlink = os.unlink # See cleanup() |
---|
1694 | n/a | self.tempcache = None |
---|
1695 | n/a | # Undocumented feature: if you assign {} to tempcache, |
---|
1696 | n/a | # it is used to cache files retrieved with |
---|
1697 | n/a | # self.retrieve(). This is not enabled by default |
---|
1698 | n/a | # since it does not work for changing documents (and I |
---|
1699 | n/a | # haven't got the logic to check expiration headers |
---|
1700 | n/a | # yet). |
---|
1701 | n/a | self.ftpcache = ftpcache |
---|
1702 | n/a | # Undocumented feature: you can use a different |
---|
1703 | n/a | # ftp cache by assigning to the .ftpcache member; |
---|
1704 | n/a | # in case you want logically independent URL openers |
---|
1705 | n/a | # XXX This is not threadsafe. Bah. |
---|
1706 | n/a | |
---|
1707 | n/a | def __del__(self): |
---|
1708 | n/a | self.close() |
---|
1709 | n/a | |
---|
1710 | n/a | def close(self): |
---|
1711 | n/a | self.cleanup() |
---|
1712 | n/a | |
---|
1713 | n/a | def cleanup(self): |
---|
1714 | n/a | # This code sometimes runs when the rest of this module |
---|
1715 | n/a | # has already been deleted, so it can't use any globals |
---|
1716 | n/a | # or import anything. |
---|
1717 | n/a | if self.__tempfiles: |
---|
1718 | n/a | for file in self.__tempfiles: |
---|
1719 | n/a | try: |
---|
1720 | n/a | self.__unlink(file) |
---|
1721 | n/a | except OSError: |
---|
1722 | n/a | pass |
---|
1723 | n/a | del self.__tempfiles[:] |
---|
1724 | n/a | if self.tempcache: |
---|
1725 | n/a | self.tempcache.clear() |
---|
1726 | n/a | |
---|
1727 | n/a | def addheader(self, *args): |
---|
1728 | n/a | """Add a header to be used by the HTTP interface only |
---|
1729 | n/a | e.g. u.addheader('Accept', 'sound/basic')""" |
---|
1730 | n/a | self.addheaders.append(args) |
---|
1731 | n/a | |
---|
1732 | n/a | # External interface |
---|
1733 | n/a | def open(self, fullurl, data=None): |
---|
1734 | n/a | """Use URLopener().open(file) instead of open(file, 'r').""" |
---|
1735 | n/a | fullurl = unwrap(to_bytes(fullurl)) |
---|
1736 | n/a | fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") |
---|
1737 | n/a | if self.tempcache and fullurl in self.tempcache: |
---|
1738 | n/a | filename, headers = self.tempcache[fullurl] |
---|
1739 | n/a | fp = open(filename, 'rb') |
---|
1740 | n/a | return addinfourl(fp, headers, fullurl) |
---|
1741 | n/a | urltype, url = splittype(fullurl) |
---|
1742 | n/a | if not urltype: |
---|
1743 | n/a | urltype = 'file' |
---|
1744 | n/a | if urltype in self.proxies: |
---|
1745 | n/a | proxy = self.proxies[urltype] |
---|
1746 | n/a | urltype, proxyhost = splittype(proxy) |
---|
1747 | n/a | host, selector = splithost(proxyhost) |
---|
1748 | n/a | url = (host, fullurl) # Signal special case to open_*() |
---|
1749 | n/a | else: |
---|
1750 | n/a | proxy = None |
---|
1751 | n/a | name = 'open_' + urltype |
---|
1752 | n/a | self.type = urltype |
---|
1753 | n/a | name = name.replace('-', '_') |
---|
1754 | n/a | if not hasattr(self, name): |
---|
1755 | n/a | if proxy: |
---|
1756 | n/a | return self.open_unknown_proxy(proxy, fullurl, data) |
---|
1757 | n/a | else: |
---|
1758 | n/a | return self.open_unknown(fullurl, data) |
---|
1759 | n/a | try: |
---|
1760 | n/a | if data is None: |
---|
1761 | n/a | return getattr(self, name)(url) |
---|
1762 | n/a | else: |
---|
1763 | n/a | return getattr(self, name)(url, data) |
---|
1764 | n/a | except (HTTPError, URLError): |
---|
1765 | n/a | raise |
---|
1766 | n/a | except OSError as msg: |
---|
1767 | n/a | raise OSError('socket error', msg).with_traceback(sys.exc_info()[2]) |
---|
1768 | n/a | |
---|
1769 | n/a | def open_unknown(self, fullurl, data=None): |
---|
1770 | n/a | """Overridable interface to open unknown URL type.""" |
---|
1771 | n/a | type, url = splittype(fullurl) |
---|
1772 | n/a | raise OSError('url error', 'unknown url type', type) |
---|
1773 | n/a | |
---|
1774 | n/a | def open_unknown_proxy(self, proxy, fullurl, data=None): |
---|
1775 | n/a | """Overridable interface to open unknown URL type.""" |
---|
1776 | n/a | type, url = splittype(fullurl) |
---|
1777 | n/a | raise OSError('url error', 'invalid proxy for %s' % type, proxy) |
---|
1778 | n/a | |
---|
1779 | n/a | # External interface |
---|
1780 | n/a | def retrieve(self, url, filename=None, reporthook=None, data=None): |
---|
1781 | n/a | """retrieve(url) returns (filename, headers) for a local object |
---|
1782 | n/a | or (tempfilename, headers) for a remote object.""" |
---|
1783 | n/a | url = unwrap(to_bytes(url)) |
---|
1784 | n/a | if self.tempcache and url in self.tempcache: |
---|
1785 | n/a | return self.tempcache[url] |
---|
1786 | n/a | type, url1 = splittype(url) |
---|
1787 | n/a | if filename is None and (not type or type == 'file'): |
---|
1788 | n/a | try: |
---|
1789 | n/a | fp = self.open_local_file(url1) |
---|
1790 | n/a | hdrs = fp.info() |
---|
1791 | n/a | fp.close() |
---|
1792 | n/a | return url2pathname(splithost(url1)[1]), hdrs |
---|
1793 | n/a | except OSError as msg: |
---|
1794 | n/a | pass |
---|
1795 | n/a | fp = self.open(url, data) |
---|
1796 | n/a | try: |
---|
1797 | n/a | headers = fp.info() |
---|
1798 | n/a | if filename: |
---|
1799 | n/a | tfp = open(filename, 'wb') |
---|
1800 | n/a | else: |
---|
1801 | n/a | import tempfile |
---|
1802 | n/a | garbage, path = splittype(url) |
---|
1803 | n/a | garbage, path = splithost(path or "") |
---|
1804 | n/a | path, garbage = splitquery(path or "") |
---|
1805 | n/a | path, garbage = splitattr(path or "") |
---|
1806 | n/a | suffix = os.path.splitext(path)[1] |
---|
1807 | n/a | (fd, filename) = tempfile.mkstemp(suffix) |
---|
1808 | n/a | self.__tempfiles.append(filename) |
---|
1809 | n/a | tfp = os.fdopen(fd, 'wb') |
---|
1810 | n/a | try: |
---|
1811 | n/a | result = filename, headers |
---|
1812 | n/a | if self.tempcache is not None: |
---|
1813 | n/a | self.tempcache[url] = result |
---|
1814 | n/a | bs = 1024*8 |
---|
1815 | n/a | size = -1 |
---|
1816 | n/a | read = 0 |
---|
1817 | n/a | blocknum = 0 |
---|
1818 | n/a | if "content-length" in headers: |
---|
1819 | n/a | size = int(headers["Content-Length"]) |
---|
1820 | n/a | if reporthook: |
---|
1821 | n/a | reporthook(blocknum, bs, size) |
---|
1822 | n/a | while 1: |
---|
1823 | n/a | block = fp.read(bs) |
---|
1824 | n/a | if not block: |
---|
1825 | n/a | break |
---|
1826 | n/a | read += len(block) |
---|
1827 | n/a | tfp.write(block) |
---|
1828 | n/a | blocknum += 1 |
---|
1829 | n/a | if reporthook: |
---|
1830 | n/a | reporthook(blocknum, bs, size) |
---|
1831 | n/a | finally: |
---|
1832 | n/a | tfp.close() |
---|
1833 | n/a | finally: |
---|
1834 | n/a | fp.close() |
---|
1835 | n/a | |
---|
1836 | n/a | # raise exception if actual size does not match content-length header |
---|
1837 | n/a | if size >= 0 and read < size: |
---|
1838 | n/a | raise ContentTooShortError( |
---|
1839 | n/a | "retrieval incomplete: got only %i out of %i bytes" |
---|
1840 | n/a | % (read, size), result) |
---|
1841 | n/a | |
---|
1842 | n/a | return result |
---|
1843 | n/a | |
---|
1844 | n/a | # Each method named open_<type> knows how to open that type of URL |
---|
1845 | n/a | |
---|
1846 | n/a | def _open_generic_http(self, connection_factory, url, data): |
---|
1847 | n/a | """Make an HTTP connection using connection_class. |
---|
1848 | n/a | |
---|
1849 | n/a | This is an internal method that should be called from |
---|
1850 | n/a | open_http() or open_https(). |
---|
1851 | n/a | |
---|
1852 | n/a | Arguments: |
---|
1853 | n/a | - connection_factory should take a host name and return an |
---|
1854 | n/a | HTTPConnection instance. |
---|
1855 | n/a | - url is the url to retrieval or a host, relative-path pair. |
---|
1856 | n/a | - data is payload for a POST request or None. |
---|
1857 | n/a | """ |
---|
1858 | n/a | |
---|
1859 | n/a | user_passwd = None |
---|
1860 | n/a | proxy_passwd= None |
---|
1861 | n/a | if isinstance(url, str): |
---|
1862 | n/a | host, selector = splithost(url) |
---|
1863 | n/a | if host: |
---|
1864 | n/a | user_passwd, host = splituser(host) |
---|
1865 | n/a | host = unquote(host) |
---|
1866 | n/a | realhost = host |
---|
1867 | n/a | else: |
---|
1868 | n/a | host, selector = url |
---|
1869 | n/a | # check whether the proxy contains authorization information |
---|
1870 | n/a | proxy_passwd, host = splituser(host) |
---|
1871 | n/a | # now we proceed with the url we want to obtain |
---|
1872 | n/a | urltype, rest = splittype(selector) |
---|
1873 | n/a | url = rest |
---|
1874 | n/a | user_passwd = None |
---|
1875 | n/a | if urltype.lower() != 'http': |
---|
1876 | n/a | realhost = None |
---|
1877 | n/a | else: |
---|
1878 | n/a | realhost, rest = splithost(rest) |
---|
1879 | n/a | if realhost: |
---|
1880 | n/a | user_passwd, realhost = splituser(realhost) |
---|
1881 | n/a | if user_passwd: |
---|
1882 | n/a | selector = "%s://%s%s" % (urltype, realhost, rest) |
---|
1883 | n/a | if proxy_bypass(realhost): |
---|
1884 | n/a | host = realhost |
---|
1885 | n/a | |
---|
1886 | n/a | if not host: raise OSError('http error', 'no host given') |
---|
1887 | n/a | |
---|
1888 | n/a | if proxy_passwd: |
---|
1889 | n/a | proxy_passwd = unquote(proxy_passwd) |
---|
1890 | n/a | proxy_auth = base64.b64encode(proxy_passwd.encode()).decode('ascii') |
---|
1891 | n/a | else: |
---|
1892 | n/a | proxy_auth = None |
---|
1893 | n/a | |
---|
1894 | n/a | if user_passwd: |
---|
1895 | n/a | user_passwd = unquote(user_passwd) |
---|
1896 | n/a | auth = base64.b64encode(user_passwd.encode()).decode('ascii') |
---|
1897 | n/a | else: |
---|
1898 | n/a | auth = None |
---|
1899 | n/a | http_conn = connection_factory(host) |
---|
1900 | n/a | headers = {} |
---|
1901 | n/a | if proxy_auth: |
---|
1902 | n/a | headers["Proxy-Authorization"] = "Basic %s" % proxy_auth |
---|
1903 | n/a | if auth: |
---|
1904 | n/a | headers["Authorization"] = "Basic %s" % auth |
---|
1905 | n/a | if realhost: |
---|
1906 | n/a | headers["Host"] = realhost |
---|
1907 | n/a | |
---|
1908 | n/a | # Add Connection:close as we don't support persistent connections yet. |
---|
1909 | n/a | # This helps in closing the socket and avoiding ResourceWarning |
---|
1910 | n/a | |
---|
1911 | n/a | headers["Connection"] = "close" |
---|
1912 | n/a | |
---|
1913 | n/a | for header, value in self.addheaders: |
---|
1914 | n/a | headers[header] = value |
---|
1915 | n/a | |
---|
1916 | n/a | if data is not None: |
---|
1917 | n/a | headers["Content-Type"] = "application/x-www-form-urlencoded" |
---|
1918 | n/a | http_conn.request("POST", selector, data, headers) |
---|
1919 | n/a | else: |
---|
1920 | n/a | http_conn.request("GET", selector, headers=headers) |
---|
1921 | n/a | |
---|
1922 | n/a | try: |
---|
1923 | n/a | response = http_conn.getresponse() |
---|
1924 | n/a | except http.client.BadStatusLine: |
---|
1925 | n/a | # something went wrong with the HTTP status line |
---|
1926 | n/a | raise URLError("http protocol error: bad status line") |
---|
1927 | n/a | |
---|
1928 | n/a | # According to RFC 2616, "2xx" code indicates that the client's |
---|
1929 | n/a | # request was successfully received, understood, and accepted. |
---|
1930 | n/a | if 200 <= response.status < 300: |
---|
1931 | n/a | return addinfourl(response, response.msg, "http:" + url, |
---|
1932 | n/a | response.status) |
---|
1933 | n/a | else: |
---|
1934 | n/a | return self.http_error( |
---|
1935 | n/a | url, response.fp, |
---|
1936 | n/a | response.status, response.reason, response.msg, data) |
---|
1937 | n/a | |
---|
1938 | n/a | def open_http(self, url, data=None): |
---|
1939 | n/a | """Use HTTP protocol.""" |
---|
1940 | n/a | return self._open_generic_http(http.client.HTTPConnection, url, data) |
---|
1941 | n/a | |
---|
1942 | n/a | def http_error(self, url, fp, errcode, errmsg, headers, data=None): |
---|
1943 | n/a | """Handle http errors. |
---|
1944 | n/a | |
---|
1945 | n/a | Derived class can override this, or provide specific handlers |
---|
1946 | n/a | named http_error_DDD where DDD is the 3-digit error code.""" |
---|
1947 | n/a | # First check if there's a specific handler for this error |
---|
1948 | n/a | name = 'http_error_%d' % errcode |
---|
1949 | n/a | if hasattr(self, name): |
---|
1950 | n/a | method = getattr(self, name) |
---|
1951 | n/a | if data is None: |
---|
1952 | n/a | result = method(url, fp, errcode, errmsg, headers) |
---|
1953 | n/a | else: |
---|
1954 | n/a | result = method(url, fp, errcode, errmsg, headers, data) |
---|
1955 | n/a | if result: return result |
---|
1956 | n/a | return self.http_error_default(url, fp, errcode, errmsg, headers) |
---|
1957 | n/a | |
---|
1958 | n/a | def http_error_default(self, url, fp, errcode, errmsg, headers): |
---|
1959 | n/a | """Default error handler: close the connection and raise OSError.""" |
---|
1960 | n/a | fp.close() |
---|
1961 | n/a | raise HTTPError(url, errcode, errmsg, headers, None) |
---|
1962 | n/a | |
---|
1963 | n/a | if _have_ssl: |
---|
1964 | n/a | def _https_connection(self, host): |
---|
1965 | n/a | return http.client.HTTPSConnection(host, |
---|
1966 | n/a | key_file=self.key_file, |
---|
1967 | n/a | cert_file=self.cert_file) |
---|
1968 | n/a | |
---|
1969 | n/a | def open_https(self, url, data=None): |
---|
1970 | n/a | """Use HTTPS protocol.""" |
---|
1971 | n/a | return self._open_generic_http(self._https_connection, url, data) |
---|
1972 | n/a | |
---|
1973 | n/a | def open_file(self, url): |
---|
1974 | n/a | """Use local file or FTP depending on form of URL.""" |
---|
1975 | n/a | if not isinstance(url, str): |
---|
1976 | n/a | raise URLError('file error: proxy support for file protocol currently not implemented') |
---|
1977 | n/a | if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/': |
---|
1978 | n/a | raise ValueError("file:// scheme is supported only on localhost") |
---|
1979 | n/a | else: |
---|
1980 | n/a | return self.open_local_file(url) |
---|
1981 | n/a | |
---|
1982 | n/a | def open_local_file(self, url): |
---|
1983 | n/a | """Use local file.""" |
---|
1984 | n/a | import email.utils |
---|
1985 | n/a | import mimetypes |
---|
1986 | n/a | host, file = splithost(url) |
---|
1987 | n/a | localname = url2pathname(file) |
---|
1988 | n/a | try: |
---|
1989 | n/a | stats = os.stat(localname) |
---|
1990 | n/a | except OSError as e: |
---|
1991 | n/a | raise URLError(e.strerror, e.filename) |
---|
1992 | n/a | size = stats.st_size |
---|
1993 | n/a | modified = email.utils.formatdate(stats.st_mtime, usegmt=True) |
---|
1994 | n/a | mtype = mimetypes.guess_type(url)[0] |
---|
1995 | n/a | headers = email.message_from_string( |
---|
1996 | n/a | 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' % |
---|
1997 | n/a | (mtype or 'text/plain', size, modified)) |
---|
1998 | n/a | if not host: |
---|
1999 | n/a | urlfile = file |
---|
2000 | n/a | if file[:1] == '/': |
---|
2001 | n/a | urlfile = 'file://' + file |
---|
2002 | n/a | return addinfourl(open(localname, 'rb'), headers, urlfile) |
---|
2003 | n/a | host, port = splitport(host) |
---|
2004 | n/a | if (not port |
---|
2005 | n/a | and socket.gethostbyname(host) in ((localhost(),) + thishost())): |
---|
2006 | n/a | urlfile = file |
---|
2007 | n/a | if file[:1] == '/': |
---|
2008 | n/a | urlfile = 'file://' + file |
---|
2009 | n/a | elif file[:2] == './': |
---|
2010 | n/a | raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) |
---|
2011 | n/a | return addinfourl(open(localname, 'rb'), headers, urlfile) |
---|
2012 | n/a | raise URLError('local file error: not on local host') |
---|
2013 | n/a | |
---|
2014 | n/a | def open_ftp(self, url): |
---|
2015 | n/a | """Use FTP protocol.""" |
---|
2016 | n/a | if not isinstance(url, str): |
---|
2017 | n/a | raise URLError('ftp error: proxy support for ftp protocol currently not implemented') |
---|
2018 | n/a | import mimetypes |
---|
2019 | n/a | host, path = splithost(url) |
---|
2020 | n/a | if not host: raise URLError('ftp error: no host given') |
---|
2021 | n/a | host, port = splitport(host) |
---|
2022 | n/a | user, host = splituser(host) |
---|
2023 | n/a | if user: user, passwd = splitpasswd(user) |
---|
2024 | n/a | else: passwd = None |
---|
2025 | n/a | host = unquote(host) |
---|
2026 | n/a | user = unquote(user or '') |
---|
2027 | n/a | passwd = unquote(passwd or '') |
---|
2028 | n/a | host = socket.gethostbyname(host) |
---|
2029 | n/a | if not port: |
---|
2030 | n/a | import ftplib |
---|
2031 | n/a | port = ftplib.FTP_PORT |
---|
2032 | n/a | else: |
---|
2033 | n/a | port = int(port) |
---|
2034 | n/a | path, attrs = splitattr(path) |
---|
2035 | n/a | path = unquote(path) |
---|
2036 | n/a | dirs = path.split('/') |
---|
2037 | n/a | dirs, file = dirs[:-1], dirs[-1] |
---|
2038 | n/a | if dirs and not dirs[0]: dirs = dirs[1:] |
---|
2039 | n/a | if dirs and not dirs[0]: dirs[0] = '/' |
---|
2040 | n/a | key = user, host, port, '/'.join(dirs) |
---|
2041 | n/a | # XXX thread unsafe! |
---|
2042 | n/a | if len(self.ftpcache) > MAXFTPCACHE: |
---|
2043 | n/a | # Prune the cache, rather arbitrarily |
---|
2044 | n/a | for k in list(self.ftpcache): |
---|
2045 | n/a | if k != key: |
---|
2046 | n/a | v = self.ftpcache[k] |
---|
2047 | n/a | del self.ftpcache[k] |
---|
2048 | n/a | v.close() |
---|
2049 | n/a | try: |
---|
2050 | n/a | if key not in self.ftpcache: |
---|
2051 | n/a | self.ftpcache[key] = \ |
---|
2052 | n/a | ftpwrapper(user, passwd, host, port, dirs) |
---|
2053 | n/a | if not file: type = 'D' |
---|
2054 | n/a | else: type = 'I' |
---|
2055 | n/a | for attr in attrs: |
---|
2056 | n/a | attr, value = splitvalue(attr) |
---|
2057 | n/a | if attr.lower() == 'type' and \ |
---|
2058 | n/a | value in ('a', 'A', 'i', 'I', 'd', 'D'): |
---|
2059 | n/a | type = value.upper() |
---|
2060 | n/a | (fp, retrlen) = self.ftpcache[key].retrfile(file, type) |
---|
2061 | n/a | mtype = mimetypes.guess_type("ftp:" + url)[0] |
---|
2062 | n/a | headers = "" |
---|
2063 | n/a | if mtype: |
---|
2064 | n/a | headers += "Content-Type: %s\n" % mtype |
---|
2065 | n/a | if retrlen is not None and retrlen >= 0: |
---|
2066 | n/a | headers += "Content-Length: %d\n" % retrlen |
---|
2067 | n/a | headers = email.message_from_string(headers) |
---|
2068 | n/a | return addinfourl(fp, headers, "ftp:" + url) |
---|
2069 | n/a | except ftperrors() as exp: |
---|
2070 | n/a | raise URLError('ftp error %r' % exp).with_traceback(sys.exc_info()[2]) |
---|
2071 | n/a | |
---|
2072 | n/a | def open_data(self, url, data=None): |
---|
2073 | n/a | """Use "data" URL.""" |
---|
2074 | n/a | if not isinstance(url, str): |
---|
2075 | n/a | raise URLError('data error: proxy support for data protocol currently not implemented') |
---|
2076 | n/a | # ignore POSTed data |
---|
2077 | n/a | # |
---|
2078 | n/a | # syntax of data URLs: |
---|
2079 | n/a | # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data |
---|
2080 | n/a | # mediatype := [ type "/" subtype ] *( ";" parameter ) |
---|
2081 | n/a | # data := *urlchar |
---|
2082 | n/a | # parameter := attribute "=" value |
---|
2083 | n/a | try: |
---|
2084 | n/a | [type, data] = url.split(',', 1) |
---|
2085 | n/a | except ValueError: |
---|
2086 | n/a | raise OSError('data error', 'bad data URL') |
---|
2087 | n/a | if not type: |
---|
2088 | n/a | type = 'text/plain;charset=US-ASCII' |
---|
2089 | n/a | semi = type.rfind(';') |
---|
2090 | n/a | if semi >= 0 and '=' not in type[semi:]: |
---|
2091 | n/a | encoding = type[semi+1:] |
---|
2092 | n/a | type = type[:semi] |
---|
2093 | n/a | else: |
---|
2094 | n/a | encoding = '' |
---|
2095 | n/a | msg = [] |
---|
2096 | n/a | msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', |
---|
2097 | n/a | time.gmtime(time.time()))) |
---|
2098 | n/a | msg.append('Content-type: %s' % type) |
---|
2099 | n/a | if encoding == 'base64': |
---|
2100 | n/a | # XXX is this encoding/decoding ok? |
---|
2101 | n/a | data = base64.decodebytes(data.encode('ascii')).decode('latin-1') |
---|
2102 | n/a | else: |
---|
2103 | n/a | data = unquote(data) |
---|
2104 | n/a | msg.append('Content-Length: %d' % len(data)) |
---|
2105 | n/a | msg.append('') |
---|
2106 | n/a | msg.append(data) |
---|
2107 | n/a | msg = '\n'.join(msg) |
---|
2108 | n/a | headers = email.message_from_string(msg) |
---|
2109 | n/a | f = io.StringIO(msg) |
---|
2110 | n/a | #f.fileno = None # needed for addinfourl |
---|
2111 | n/a | return addinfourl(f, headers, url) |
---|
2112 | n/a | |
---|
2113 | n/a | |
---|
2114 | n/a | class FancyURLopener(URLopener): |
---|
2115 | n/a | """Derived class with handlers for errors we can handle (perhaps).""" |
---|
2116 | n/a | |
---|
2117 | n/a | def __init__(self, *args, **kwargs): |
---|
2118 | n/a | URLopener.__init__(self, *args, **kwargs) |
---|
2119 | n/a | self.auth_cache = {} |
---|
2120 | n/a | self.tries = 0 |
---|
2121 | n/a | self.maxtries = 10 |
---|
2122 | n/a | |
---|
2123 | n/a | def http_error_default(self, url, fp, errcode, errmsg, headers): |
---|
2124 | n/a | """Default error handling -- don't raise an exception.""" |
---|
2125 | n/a | return addinfourl(fp, headers, "http:" + url, errcode) |
---|
2126 | n/a | |
---|
2127 | n/a | def http_error_302(self, url, fp, errcode, errmsg, headers, data=None): |
---|
2128 | n/a | """Error 302 -- relocated (temporarily).""" |
---|
2129 | n/a | self.tries += 1 |
---|
2130 | n/a | try: |
---|
2131 | n/a | if self.maxtries and self.tries >= self.maxtries: |
---|
2132 | n/a | if hasattr(self, "http_error_500"): |
---|
2133 | n/a | meth = self.http_error_500 |
---|
2134 | n/a | else: |
---|
2135 | n/a | meth = self.http_error_default |
---|
2136 | n/a | return meth(url, fp, 500, |
---|
2137 | n/a | "Internal Server Error: Redirect Recursion", |
---|
2138 | n/a | headers) |
---|
2139 | n/a | result = self.redirect_internal(url, fp, errcode, errmsg, |
---|
2140 | n/a | headers, data) |
---|
2141 | n/a | return result |
---|
2142 | n/a | finally: |
---|
2143 | n/a | self.tries = 0 |
---|
2144 | n/a | |
---|
2145 | n/a | def redirect_internal(self, url, fp, errcode, errmsg, headers, data): |
---|
2146 | n/a | if 'location' in headers: |
---|
2147 | n/a | newurl = headers['location'] |
---|
2148 | n/a | elif 'uri' in headers: |
---|
2149 | n/a | newurl = headers['uri'] |
---|
2150 | n/a | else: |
---|
2151 | n/a | return |
---|
2152 | n/a | fp.close() |
---|
2153 | n/a | |
---|
2154 | n/a | # In case the server sent a relative URL, join with original: |
---|
2155 | n/a | newurl = urljoin(self.type + ":" + url, newurl) |
---|
2156 | n/a | |
---|
2157 | n/a | urlparts = urlparse(newurl) |
---|
2158 | n/a | |
---|
2159 | n/a | # For security reasons, we don't allow redirection to anything other |
---|
2160 | n/a | # than http, https and ftp. |
---|
2161 | n/a | |
---|
2162 | n/a | # We are using newer HTTPError with older redirect_internal method |
---|
2163 | n/a | # This older method will get deprecated in 3.3 |
---|
2164 | n/a | |
---|
2165 | n/a | if urlparts.scheme not in ('http', 'https', 'ftp', ''): |
---|
2166 | n/a | raise HTTPError(newurl, errcode, |
---|
2167 | n/a | errmsg + |
---|
2168 | n/a | " Redirection to url '%s' is not allowed." % newurl, |
---|
2169 | n/a | headers, fp) |
---|
2170 | n/a | |
---|
2171 | n/a | return self.open(newurl) |
---|
2172 | n/a | |
---|
2173 | n/a | def http_error_301(self, url, fp, errcode, errmsg, headers, data=None): |
---|
2174 | n/a | """Error 301 -- also relocated (permanently).""" |
---|
2175 | n/a | return self.http_error_302(url, fp, errcode, errmsg, headers, data) |
---|
2176 | n/a | |
---|
2177 | n/a | def http_error_303(self, url, fp, errcode, errmsg, headers, data=None): |
---|
2178 | n/a | """Error 303 -- also relocated (essentially identical to 302).""" |
---|
2179 | n/a | return self.http_error_302(url, fp, errcode, errmsg, headers, data) |
---|
2180 | n/a | |
---|
2181 | n/a | def http_error_307(self, url, fp, errcode, errmsg, headers, data=None): |
---|
2182 | n/a | """Error 307 -- relocated, but turn POST into error.""" |
---|
2183 | n/a | if data is None: |
---|
2184 | n/a | return self.http_error_302(url, fp, errcode, errmsg, headers, data) |
---|
2185 | n/a | else: |
---|
2186 | n/a | return self.http_error_default(url, fp, errcode, errmsg, headers) |
---|
2187 | n/a | |
---|
2188 | n/a | def http_error_401(self, url, fp, errcode, errmsg, headers, data=None, |
---|
2189 | n/a | retry=False): |
---|
2190 | n/a | """Error 401 -- authentication required. |
---|
2191 | n/a | This function supports Basic authentication only.""" |
---|
2192 | n/a | if 'www-authenticate' not in headers: |
---|
2193 | n/a | URLopener.http_error_default(self, url, fp, |
---|
2194 | n/a | errcode, errmsg, headers) |
---|
2195 | n/a | stuff = headers['www-authenticate'] |
---|
2196 | n/a | match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) |
---|
2197 | n/a | if not match: |
---|
2198 | n/a | URLopener.http_error_default(self, url, fp, |
---|
2199 | n/a | errcode, errmsg, headers) |
---|
2200 | n/a | scheme, realm = match.groups() |
---|
2201 | n/a | if scheme.lower() != 'basic': |
---|
2202 | n/a | URLopener.http_error_default(self, url, fp, |
---|
2203 | n/a | errcode, errmsg, headers) |
---|
2204 | n/a | if not retry: |
---|
2205 | n/a | URLopener.http_error_default(self, url, fp, errcode, errmsg, |
---|
2206 | n/a | headers) |
---|
2207 | n/a | name = 'retry_' + self.type + '_basic_auth' |
---|
2208 | n/a | if data is None: |
---|
2209 | n/a | return getattr(self,name)(url, realm) |
---|
2210 | n/a | else: |
---|
2211 | n/a | return getattr(self,name)(url, realm, data) |
---|
2212 | n/a | |
---|
2213 | n/a | def http_error_407(self, url, fp, errcode, errmsg, headers, data=None, |
---|
2214 | n/a | retry=False): |
---|
2215 | n/a | """Error 407 -- proxy authentication required. |
---|
2216 | n/a | This function supports Basic authentication only.""" |
---|
2217 | n/a | if 'proxy-authenticate' not in headers: |
---|
2218 | n/a | URLopener.http_error_default(self, url, fp, |
---|
2219 | n/a | errcode, errmsg, headers) |
---|
2220 | n/a | stuff = headers['proxy-authenticate'] |
---|
2221 | n/a | match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff) |
---|
2222 | n/a | if not match: |
---|
2223 | n/a | URLopener.http_error_default(self, url, fp, |
---|
2224 | n/a | errcode, errmsg, headers) |
---|
2225 | n/a | scheme, realm = match.groups() |
---|
2226 | n/a | if scheme.lower() != 'basic': |
---|
2227 | n/a | URLopener.http_error_default(self, url, fp, |
---|
2228 | n/a | errcode, errmsg, headers) |
---|
2229 | n/a | if not retry: |
---|
2230 | n/a | URLopener.http_error_default(self, url, fp, errcode, errmsg, |
---|
2231 | n/a | headers) |
---|
2232 | n/a | name = 'retry_proxy_' + self.type + '_basic_auth' |
---|
2233 | n/a | if data is None: |
---|
2234 | n/a | return getattr(self,name)(url, realm) |
---|
2235 | n/a | else: |
---|
2236 | n/a | return getattr(self,name)(url, realm, data) |
---|
2237 | n/a | |
---|
2238 | n/a | def retry_proxy_http_basic_auth(self, url, realm, data=None): |
---|
2239 | n/a | host, selector = splithost(url) |
---|
2240 | n/a | newurl = 'http://' + host + selector |
---|
2241 | n/a | proxy = self.proxies['http'] |
---|
2242 | n/a | urltype, proxyhost = splittype(proxy) |
---|
2243 | n/a | proxyhost, proxyselector = splithost(proxyhost) |
---|
2244 | n/a | i = proxyhost.find('@') + 1 |
---|
2245 | n/a | proxyhost = proxyhost[i:] |
---|
2246 | n/a | user, passwd = self.get_user_passwd(proxyhost, realm, i) |
---|
2247 | n/a | if not (user or passwd): return None |
---|
2248 | n/a | proxyhost = "%s:%s@%s" % (quote(user, safe=''), |
---|
2249 | n/a | quote(passwd, safe=''), proxyhost) |
---|
2250 | n/a | self.proxies['http'] = 'http://' + proxyhost + proxyselector |
---|
2251 | n/a | if data is None: |
---|
2252 | n/a | return self.open(newurl) |
---|
2253 | n/a | else: |
---|
2254 | n/a | return self.open(newurl, data) |
---|
2255 | n/a | |
---|
2256 | n/a | def retry_proxy_https_basic_auth(self, url, realm, data=None): |
---|
2257 | n/a | host, selector = splithost(url) |
---|
2258 | n/a | newurl = 'https://' + host + selector |
---|
2259 | n/a | proxy = self.proxies['https'] |
---|
2260 | n/a | urltype, proxyhost = splittype(proxy) |
---|
2261 | n/a | proxyhost, proxyselector = splithost(proxyhost) |
---|
2262 | n/a | i = proxyhost.find('@') + 1 |
---|
2263 | n/a | proxyhost = proxyhost[i:] |
---|
2264 | n/a | user, passwd = self.get_user_passwd(proxyhost, realm, i) |
---|
2265 | n/a | if not (user or passwd): return None |
---|
2266 | n/a | proxyhost = "%s:%s@%s" % (quote(user, safe=''), |
---|
2267 | n/a | quote(passwd, safe=''), proxyhost) |
---|
2268 | n/a | self.proxies['https'] = 'https://' + proxyhost + proxyselector |
---|
2269 | n/a | if data is None: |
---|
2270 | n/a | return self.open(newurl) |
---|
2271 | n/a | else: |
---|
2272 | n/a | return self.open(newurl, data) |
---|
2273 | n/a | |
---|
2274 | n/a | def retry_http_basic_auth(self, url, realm, data=None): |
---|
2275 | n/a | host, selector = splithost(url) |
---|
2276 | n/a | i = host.find('@') + 1 |
---|
2277 | n/a | host = host[i:] |
---|
2278 | n/a | user, passwd = self.get_user_passwd(host, realm, i) |
---|
2279 | n/a | if not (user or passwd): return None |
---|
2280 | n/a | host = "%s:%s@%s" % (quote(user, safe=''), |
---|
2281 | n/a | quote(passwd, safe=''), host) |
---|
2282 | n/a | newurl = 'http://' + host + selector |
---|
2283 | n/a | if data is None: |
---|
2284 | n/a | return self.open(newurl) |
---|
2285 | n/a | else: |
---|
2286 | n/a | return self.open(newurl, data) |
---|
2287 | n/a | |
---|
2288 | n/a | def retry_https_basic_auth(self, url, realm, data=None): |
---|
2289 | n/a | host, selector = splithost(url) |
---|
2290 | n/a | i = host.find('@') + 1 |
---|
2291 | n/a | host = host[i:] |
---|
2292 | n/a | user, passwd = self.get_user_passwd(host, realm, i) |
---|
2293 | n/a | if not (user or passwd): return None |
---|
2294 | n/a | host = "%s:%s@%s" % (quote(user, safe=''), |
---|
2295 | n/a | quote(passwd, safe=''), host) |
---|
2296 | n/a | newurl = 'https://' + host + selector |
---|
2297 | n/a | if data is None: |
---|
2298 | n/a | return self.open(newurl) |
---|
2299 | n/a | else: |
---|
2300 | n/a | return self.open(newurl, data) |
---|
2301 | n/a | |
---|
2302 | n/a | def get_user_passwd(self, host, realm, clear_cache=0): |
---|
2303 | n/a | key = realm + '@' + host.lower() |
---|
2304 | n/a | if key in self.auth_cache: |
---|
2305 | n/a | if clear_cache: |
---|
2306 | n/a | del self.auth_cache[key] |
---|
2307 | n/a | else: |
---|
2308 | n/a | return self.auth_cache[key] |
---|
2309 | n/a | user, passwd = self.prompt_user_passwd(host, realm) |
---|
2310 | n/a | if user or passwd: self.auth_cache[key] = (user, passwd) |
---|
2311 | n/a | return user, passwd |
---|
2312 | n/a | |
---|
2313 | n/a | def prompt_user_passwd(self, host, realm): |
---|
2314 | n/a | """Override this in a GUI environment!""" |
---|
2315 | n/a | import getpass |
---|
2316 | n/a | try: |
---|
2317 | n/a | user = input("Enter username for %s at %s: " % (realm, host)) |
---|
2318 | n/a | passwd = getpass.getpass("Enter password for %s in %s at %s: " % |
---|
2319 | n/a | (user, realm, host)) |
---|
2320 | n/a | return user, passwd |
---|
2321 | n/a | except KeyboardInterrupt: |
---|
2322 | n/a | print() |
---|
2323 | n/a | return None, None |
---|
2324 | n/a | |
---|
2325 | n/a | |
---|
2326 | n/a | # Utility functions |
---|
2327 | n/a | |
---|
2328 | n/a | _localhost = None |
---|
2329 | n/a | def localhost(): |
---|
2330 | n/a | """Return the IP address of the magic hostname 'localhost'.""" |
---|
2331 | n/a | global _localhost |
---|
2332 | n/a | if _localhost is None: |
---|
2333 | n/a | _localhost = socket.gethostbyname('localhost') |
---|
2334 | n/a | return _localhost |
---|
2335 | n/a | |
---|
2336 | n/a | _thishost = None |
---|
2337 | n/a | def thishost(): |
---|
2338 | n/a | """Return the IP addresses of the current host.""" |
---|
2339 | n/a | global _thishost |
---|
2340 | n/a | if _thishost is None: |
---|
2341 | n/a | try: |
---|
2342 | n/a | _thishost = tuple(socket.gethostbyname_ex(socket.gethostname())[2]) |
---|
2343 | n/a | except socket.gaierror: |
---|
2344 | n/a | _thishost = tuple(socket.gethostbyname_ex('localhost')[2]) |
---|
2345 | n/a | return _thishost |
---|
2346 | n/a | |
---|
2347 | n/a | _ftperrors = None |
---|
2348 | n/a | def ftperrors(): |
---|
2349 | n/a | """Return the set of errors raised by the FTP class.""" |
---|
2350 | n/a | global _ftperrors |
---|
2351 | n/a | if _ftperrors is None: |
---|
2352 | n/a | import ftplib |
---|
2353 | n/a | _ftperrors = ftplib.all_errors |
---|
2354 | n/a | return _ftperrors |
---|
2355 | n/a | |
---|
2356 | n/a | _noheaders = None |
---|
2357 | n/a | def noheaders(): |
---|
2358 | n/a | """Return an empty email Message object.""" |
---|
2359 | n/a | global _noheaders |
---|
2360 | n/a | if _noheaders is None: |
---|
2361 | n/a | _noheaders = email.message_from_string("") |
---|
2362 | n/a | return _noheaders |
---|
2363 | n/a | |
---|
2364 | n/a | |
---|
2365 | n/a | # Utility classes |
---|
2366 | n/a | |
---|
2367 | n/a | class ftpwrapper: |
---|
2368 | n/a | """Class used by open_ftp() for cache of open FTP connections.""" |
---|
2369 | n/a | |
---|
2370 | n/a | def __init__(self, user, passwd, host, port, dirs, timeout=None, |
---|
2371 | n/a | persistent=True): |
---|
2372 | n/a | self.user = user |
---|
2373 | n/a | self.passwd = passwd |
---|
2374 | n/a | self.host = host |
---|
2375 | n/a | self.port = port |
---|
2376 | n/a | self.dirs = dirs |
---|
2377 | n/a | self.timeout = timeout |
---|
2378 | n/a | self.refcount = 0 |
---|
2379 | n/a | self.keepalive = persistent |
---|
2380 | n/a | try: |
---|
2381 | n/a | self.init() |
---|
2382 | n/a | except: |
---|
2383 | n/a | self.close() |
---|
2384 | n/a | raise |
---|
2385 | n/a | |
---|
2386 | n/a | def init(self): |
---|
2387 | n/a | import ftplib |
---|
2388 | n/a | self.busy = 0 |
---|
2389 | n/a | self.ftp = ftplib.FTP() |
---|
2390 | n/a | self.ftp.connect(self.host, self.port, self.timeout) |
---|
2391 | n/a | self.ftp.login(self.user, self.passwd) |
---|
2392 | n/a | _target = '/'.join(self.dirs) |
---|
2393 | n/a | self.ftp.cwd(_target) |
---|
2394 | n/a | |
---|
2395 | n/a | def retrfile(self, file, type): |
---|
2396 | n/a | import ftplib |
---|
2397 | n/a | self.endtransfer() |
---|
2398 | n/a | if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1 |
---|
2399 | n/a | else: cmd = 'TYPE ' + type; isdir = 0 |
---|
2400 | n/a | try: |
---|
2401 | n/a | self.ftp.voidcmd(cmd) |
---|
2402 | n/a | except ftplib.all_errors: |
---|
2403 | n/a | self.init() |
---|
2404 | n/a | self.ftp.voidcmd(cmd) |
---|
2405 | n/a | conn = None |
---|
2406 | n/a | if file and not isdir: |
---|
2407 | n/a | # Try to retrieve as a file |
---|
2408 | n/a | try: |
---|
2409 | n/a | cmd = 'RETR ' + file |
---|
2410 | n/a | conn, retrlen = self.ftp.ntransfercmd(cmd) |
---|
2411 | n/a | except ftplib.error_perm as reason: |
---|
2412 | n/a | if str(reason)[:3] != '550': |
---|
2413 | n/a | raise URLError('ftp error: %r' % reason).with_traceback( |
---|
2414 | n/a | sys.exc_info()[2]) |
---|
2415 | n/a | if not conn: |
---|
2416 | n/a | # Set transfer mode to ASCII! |
---|
2417 | n/a | self.ftp.voidcmd('TYPE A') |
---|
2418 | n/a | # Try a directory listing. Verify that directory exists. |
---|
2419 | n/a | if file: |
---|
2420 | n/a | pwd = self.ftp.pwd() |
---|
2421 | n/a | try: |
---|
2422 | n/a | try: |
---|
2423 | n/a | self.ftp.cwd(file) |
---|
2424 | n/a | except ftplib.error_perm as reason: |
---|
2425 | n/a | raise URLError('ftp error: %r' % reason) from reason |
---|
2426 | n/a | finally: |
---|
2427 | n/a | self.ftp.cwd(pwd) |
---|
2428 | n/a | cmd = 'LIST ' + file |
---|
2429 | n/a | else: |
---|
2430 | n/a | cmd = 'LIST' |
---|
2431 | n/a | conn, retrlen = self.ftp.ntransfercmd(cmd) |
---|
2432 | n/a | self.busy = 1 |
---|
2433 | n/a | |
---|
2434 | n/a | ftpobj = addclosehook(conn.makefile('rb'), self.file_close) |
---|
2435 | n/a | self.refcount += 1 |
---|
2436 | n/a | conn.close() |
---|
2437 | n/a | # Pass back both a suitably decorated object and a retrieval length |
---|
2438 | n/a | return (ftpobj, retrlen) |
---|
2439 | n/a | |
---|
2440 | n/a | def endtransfer(self): |
---|
2441 | n/a | self.busy = 0 |
---|
2442 | n/a | |
---|
2443 | n/a | def close(self): |
---|
2444 | n/a | self.keepalive = False |
---|
2445 | n/a | if self.refcount <= 0: |
---|
2446 | n/a | self.real_close() |
---|
2447 | n/a | |
---|
2448 | n/a | def file_close(self): |
---|
2449 | n/a | self.endtransfer() |
---|
2450 | n/a | self.refcount -= 1 |
---|
2451 | n/a | if self.refcount <= 0 and not self.keepalive: |
---|
2452 | n/a | self.real_close() |
---|
2453 | n/a | |
---|
2454 | n/a | def real_close(self): |
---|
2455 | n/a | self.endtransfer() |
---|
2456 | n/a | try: |
---|
2457 | n/a | self.ftp.close() |
---|
2458 | n/a | except ftperrors(): |
---|
2459 | n/a | pass |
---|
2460 | n/a | |
---|
2461 | n/a | # Proxy handling |
---|
2462 | n/a | def getproxies_environment(): |
---|
2463 | n/a | """Return a dictionary of scheme -> proxy server URL mappings. |
---|
2464 | n/a | |
---|
2465 | n/a | Scan the environment for variables named <scheme>_proxy; |
---|
2466 | n/a | this seems to be the standard convention. If you need a |
---|
2467 | n/a | different way, you can pass a proxies dictionary to the |
---|
2468 | n/a | [Fancy]URLopener constructor. |
---|
2469 | n/a | |
---|
2470 | n/a | """ |
---|
2471 | n/a | proxies = {} |
---|
2472 | n/a | # in order to prefer lowercase variables, process environment in |
---|
2473 | n/a | # two passes: first matches any, second pass matches lowercase only |
---|
2474 | n/a | for name, value in os.environ.items(): |
---|
2475 | n/a | name = name.lower() |
---|
2476 | n/a | if value and name[-6:] == '_proxy': |
---|
2477 | n/a | proxies[name[:-6]] = value |
---|
2478 | n/a | # CVE-2016-1000110 - If we are running as CGI script, forget HTTP_PROXY |
---|
2479 | n/a | # (non-all-lowercase) as it may be set from the web server by a "Proxy:" |
---|
2480 | n/a | # header from the client |
---|
2481 | n/a | # If "proxy" is lowercase, it will still be used thanks to the next block |
---|
2482 | n/a | if 'REQUEST_METHOD' in os.environ: |
---|
2483 | n/a | proxies.pop('http', None) |
---|
2484 | n/a | for name, value in os.environ.items(): |
---|
2485 | n/a | if name[-6:] == '_proxy': |
---|
2486 | n/a | name = name.lower() |
---|
2487 | n/a | if value: |
---|
2488 | n/a | proxies[name[:-6]] = value |
---|
2489 | n/a | else: |
---|
2490 | n/a | proxies.pop(name[:-6], None) |
---|
2491 | n/a | return proxies |
---|
2492 | n/a | |
---|
2493 | n/a | def proxy_bypass_environment(host, proxies=None): |
---|
2494 | n/a | """Test if proxies should not be used for a particular host. |
---|
2495 | n/a | |
---|
2496 | n/a | Checks the proxy dict for the value of no_proxy, which should |
---|
2497 | n/a | be a list of comma separated DNS suffixes, or '*' for all hosts. |
---|
2498 | n/a | |
---|
2499 | n/a | """ |
---|
2500 | n/a | if proxies is None: |
---|
2501 | n/a | proxies = getproxies_environment() |
---|
2502 | n/a | # don't bypass, if no_proxy isn't specified |
---|
2503 | n/a | try: |
---|
2504 | n/a | no_proxy = proxies['no'] |
---|
2505 | n/a | except KeyError: |
---|
2506 | n/a | return 0 |
---|
2507 | n/a | # '*' is special case for always bypass |
---|
2508 | n/a | if no_proxy == '*': |
---|
2509 | n/a | return 1 |
---|
2510 | n/a | # strip port off host |
---|
2511 | n/a | hostonly, port = splitport(host) |
---|
2512 | n/a | # check if the host ends with any of the DNS suffixes |
---|
2513 | n/a | no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] |
---|
2514 | n/a | for name in no_proxy_list: |
---|
2515 | n/a | if name: |
---|
2516 | n/a | name = name.lstrip('.') # ignore leading dots |
---|
2517 | n/a | name = re.escape(name) |
---|
2518 | n/a | pattern = r'(.+\.)?%s$' % name |
---|
2519 | n/a | if (re.match(pattern, hostonly, re.I) |
---|
2520 | n/a | or re.match(pattern, host, re.I)): |
---|
2521 | n/a | return 1 |
---|
2522 | n/a | # otherwise, don't bypass |
---|
2523 | n/a | return 0 |
---|
2524 | n/a | |
---|
2525 | n/a | |
---|
2526 | n/a | # This code tests an OSX specific data structure but is testable on all |
---|
2527 | n/a | # platforms |
---|
2528 | n/a | def _proxy_bypass_macosx_sysconf(host, proxy_settings): |
---|
2529 | n/a | """ |
---|
2530 | n/a | Return True iff this host shouldn't be accessed using a proxy |
---|
2531 | n/a | |
---|
2532 | n/a | This function uses the MacOSX framework SystemConfiguration |
---|
2533 | n/a | to fetch the proxy information. |
---|
2534 | n/a | |
---|
2535 | n/a | proxy_settings come from _scproxy._get_proxy_settings or get mocked ie: |
---|
2536 | n/a | { 'exclude_simple': bool, |
---|
2537 | n/a | 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.1', '10.0/16'] |
---|
2538 | n/a | } |
---|
2539 | n/a | """ |
---|
2540 | n/a | from fnmatch import fnmatch |
---|
2541 | n/a | |
---|
2542 | n/a | hostonly, port = splitport(host) |
---|
2543 | n/a | |
---|
2544 | n/a | def ip2num(ipAddr): |
---|
2545 | n/a | parts = ipAddr.split('.') |
---|
2546 | n/a | parts = list(map(int, parts)) |
---|
2547 | n/a | if len(parts) != 4: |
---|
2548 | n/a | parts = (parts + [0, 0, 0, 0])[:4] |
---|
2549 | n/a | return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] |
---|
2550 | n/a | |
---|
2551 | n/a | # Check for simple host names: |
---|
2552 | n/a | if '.' not in host: |
---|
2553 | n/a | if proxy_settings['exclude_simple']: |
---|
2554 | n/a | return True |
---|
2555 | n/a | |
---|
2556 | n/a | hostIP = None |
---|
2557 | n/a | |
---|
2558 | n/a | for value in proxy_settings.get('exceptions', ()): |
---|
2559 | n/a | # Items in the list are strings like these: *.local, 169.254/16 |
---|
2560 | n/a | if not value: continue |
---|
2561 | n/a | |
---|
2562 | n/a | m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) |
---|
2563 | n/a | if m is not None: |
---|
2564 | n/a | if hostIP is None: |
---|
2565 | n/a | try: |
---|
2566 | n/a | hostIP = socket.gethostbyname(hostonly) |
---|
2567 | n/a | hostIP = ip2num(hostIP) |
---|
2568 | n/a | except OSError: |
---|
2569 | n/a | continue |
---|
2570 | n/a | |
---|
2571 | n/a | base = ip2num(m.group(1)) |
---|
2572 | n/a | mask = m.group(2) |
---|
2573 | n/a | if mask is None: |
---|
2574 | n/a | mask = 8 * (m.group(1).count('.') + 1) |
---|
2575 | n/a | else: |
---|
2576 | n/a | mask = int(mask[1:]) |
---|
2577 | n/a | mask = 32 - mask |
---|
2578 | n/a | |
---|
2579 | n/a | if (hostIP >> mask) == (base >> mask): |
---|
2580 | n/a | return True |
---|
2581 | n/a | |
---|
2582 | n/a | elif fnmatch(host, value): |
---|
2583 | n/a | return True |
---|
2584 | n/a | |
---|
2585 | n/a | return False |
---|
2586 | n/a | |
---|
2587 | n/a | |
---|
2588 | n/a | if sys.platform == 'darwin': |
---|
2589 | n/a | from _scproxy import _get_proxy_settings, _get_proxies |
---|
2590 | n/a | |
---|
2591 | n/a | def proxy_bypass_macosx_sysconf(host): |
---|
2592 | n/a | proxy_settings = _get_proxy_settings() |
---|
2593 | n/a | return _proxy_bypass_macosx_sysconf(host, proxy_settings) |
---|
2594 | n/a | |
---|
2595 | n/a | def getproxies_macosx_sysconf(): |
---|
2596 | n/a | """Return a dictionary of scheme -> proxy server URL mappings. |
---|
2597 | n/a | |
---|
2598 | n/a | This function uses the MacOSX framework SystemConfiguration |
---|
2599 | n/a | to fetch the proxy information. |
---|
2600 | n/a | """ |
---|
2601 | n/a | return _get_proxies() |
---|
2602 | n/a | |
---|
2603 | n/a | |
---|
2604 | n/a | |
---|
2605 | n/a | def proxy_bypass(host): |
---|
2606 | n/a | """Return True, if host should be bypassed. |
---|
2607 | n/a | |
---|
2608 | n/a | Checks proxy settings gathered from the environment, if specified, |
---|
2609 | n/a | or from the MacOSX framework SystemConfiguration. |
---|
2610 | n/a | |
---|
2611 | n/a | """ |
---|
2612 | n/a | proxies = getproxies_environment() |
---|
2613 | n/a | if proxies: |
---|
2614 | n/a | return proxy_bypass_environment(host, proxies) |
---|
2615 | n/a | else: |
---|
2616 | n/a | return proxy_bypass_macosx_sysconf(host) |
---|
2617 | n/a | |
---|
2618 | n/a | def getproxies(): |
---|
2619 | n/a | return getproxies_environment() or getproxies_macosx_sysconf() |
---|
2620 | n/a | |
---|
2621 | n/a | |
---|
2622 | n/a | elif os.name == 'nt': |
---|
2623 | n/a | def getproxies_registry(): |
---|
2624 | n/a | """Return a dictionary of scheme -> proxy server URL mappings. |
---|
2625 | n/a | |
---|
2626 | n/a | Win32 uses the registry to store proxies. |
---|
2627 | n/a | |
---|
2628 | n/a | """ |
---|
2629 | n/a | proxies = {} |
---|
2630 | n/a | try: |
---|
2631 | n/a | import winreg |
---|
2632 | n/a | except ImportError: |
---|
2633 | n/a | # Std module, so should be around - but you never know! |
---|
2634 | n/a | return proxies |
---|
2635 | n/a | try: |
---|
2636 | n/a | internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, |
---|
2637 | n/a | r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') |
---|
2638 | n/a | proxyEnable = winreg.QueryValueEx(internetSettings, |
---|
2639 | n/a | 'ProxyEnable')[0] |
---|
2640 | n/a | if proxyEnable: |
---|
2641 | n/a | # Returned as Unicode but problems if not converted to ASCII |
---|
2642 | n/a | proxyServer = str(winreg.QueryValueEx(internetSettings, |
---|
2643 | n/a | 'ProxyServer')[0]) |
---|
2644 | n/a | if '=' in proxyServer: |
---|
2645 | n/a | # Per-protocol settings |
---|
2646 | n/a | for p in proxyServer.split(';'): |
---|
2647 | n/a | protocol, address = p.split('=', 1) |
---|
2648 | n/a | # See if address has a type:// prefix |
---|
2649 | n/a | if not re.match('^([^/:]+)://', address): |
---|
2650 | n/a | address = '%s://%s' % (protocol, address) |
---|
2651 | n/a | proxies[protocol] = address |
---|
2652 | n/a | else: |
---|
2653 | n/a | # Use one setting for all protocols |
---|
2654 | n/a | if proxyServer[:5] == 'http:': |
---|
2655 | n/a | proxies['http'] = proxyServer |
---|
2656 | n/a | else: |
---|
2657 | n/a | proxies['http'] = 'http://%s' % proxyServer |
---|
2658 | n/a | proxies['https'] = 'https://%s' % proxyServer |
---|
2659 | n/a | proxies['ftp'] = 'ftp://%s' % proxyServer |
---|
2660 | n/a | internetSettings.Close() |
---|
2661 | n/a | except (OSError, ValueError, TypeError): |
---|
2662 | n/a | # Either registry key not found etc, or the value in an |
---|
2663 | n/a | # unexpected format. |
---|
2664 | n/a | # proxies already set up to be empty so nothing to do |
---|
2665 | n/a | pass |
---|
2666 | n/a | return proxies |
---|
2667 | n/a | |
---|
2668 | n/a | def getproxies(): |
---|
2669 | n/a | """Return a dictionary of scheme -> proxy server URL mappings. |
---|
2670 | n/a | |
---|
2671 | n/a | Returns settings gathered from the environment, if specified, |
---|
2672 | n/a | or the registry. |
---|
2673 | n/a | |
---|
2674 | n/a | """ |
---|
2675 | n/a | return getproxies_environment() or getproxies_registry() |
---|
2676 | n/a | |
---|
2677 | n/a | def proxy_bypass_registry(host): |
---|
2678 | n/a | try: |
---|
2679 | n/a | import winreg |
---|
2680 | n/a | except ImportError: |
---|
2681 | n/a | # Std modules, so should be around - but you never know! |
---|
2682 | n/a | return 0 |
---|
2683 | n/a | try: |
---|
2684 | n/a | internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, |
---|
2685 | n/a | r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') |
---|
2686 | n/a | proxyEnable = winreg.QueryValueEx(internetSettings, |
---|
2687 | n/a | 'ProxyEnable')[0] |
---|
2688 | n/a | proxyOverride = str(winreg.QueryValueEx(internetSettings, |
---|
2689 | n/a | 'ProxyOverride')[0]) |
---|
2690 | n/a | # ^^^^ Returned as Unicode but problems if not converted to ASCII |
---|
2691 | n/a | except OSError: |
---|
2692 | n/a | return 0 |
---|
2693 | n/a | if not proxyEnable or not proxyOverride: |
---|
2694 | n/a | return 0 |
---|
2695 | n/a | # try to make a host list from name and IP address. |
---|
2696 | n/a | rawHost, port = splitport(host) |
---|
2697 | n/a | host = [rawHost] |
---|
2698 | n/a | try: |
---|
2699 | n/a | addr = socket.gethostbyname(rawHost) |
---|
2700 | n/a | if addr != rawHost: |
---|
2701 | n/a | host.append(addr) |
---|
2702 | n/a | except OSError: |
---|
2703 | n/a | pass |
---|
2704 | n/a | try: |
---|
2705 | n/a | fqdn = socket.getfqdn(rawHost) |
---|
2706 | n/a | if fqdn != rawHost: |
---|
2707 | n/a | host.append(fqdn) |
---|
2708 | n/a | except OSError: |
---|
2709 | n/a | pass |
---|
2710 | n/a | # make a check value list from the registry entry: replace the |
---|
2711 | n/a | # '<local>' string by the localhost entry and the corresponding |
---|
2712 | n/a | # canonical entry. |
---|
2713 | n/a | proxyOverride = proxyOverride.split(';') |
---|
2714 | n/a | # now check if we match one of the registry values. |
---|
2715 | n/a | for test in proxyOverride: |
---|
2716 | n/a | if test == '<local>': |
---|
2717 | n/a | if '.' not in rawHost: |
---|
2718 | n/a | return 1 |
---|
2719 | n/a | test = test.replace(".", r"\.") # mask dots |
---|
2720 | n/a | test = test.replace("*", r".*") # change glob sequence |
---|
2721 | n/a | test = test.replace("?", r".") # change glob char |
---|
2722 | n/a | for val in host: |
---|
2723 | n/a | if re.match(test, val, re.I): |
---|
2724 | n/a | return 1 |
---|
2725 | n/a | return 0 |
---|
2726 | n/a | |
---|
2727 | n/a | def proxy_bypass(host): |
---|
2728 | n/a | """Return True, if host should be bypassed. |
---|
2729 | n/a | |
---|
2730 | n/a | Checks proxy settings gathered from the environment, if specified, |
---|
2731 | n/a | or the registry. |
---|
2732 | n/a | |
---|
2733 | n/a | """ |
---|
2734 | n/a | proxies = getproxies_environment() |
---|
2735 | n/a | if proxies: |
---|
2736 | n/a | return proxy_bypass_environment(host, proxies) |
---|
2737 | n/a | else: |
---|
2738 | n/a | return proxy_bypass_registry(host) |
---|
2739 | n/a | |
---|
2740 | n/a | else: |
---|
2741 | n/a | # By default use environment variables |
---|
2742 | n/a | getproxies = getproxies_environment |
---|
2743 | n/a | proxy_bypass = proxy_bypass_environment |
---|