Python code coverage for Lib/urllib/parse.py

#	count	content
1	n/a	"""Parse (absolute and relative) URLs.
2	n/a
3	n/a	urlparse module is based upon the following RFC specifications.
4	n/a
5	n/a	RFC 3986 (STD66): "Uniform Resource Identifiers" by T. Berners-Lee, R. Fielding
6	n/a	and L. Masinter, January 2005.
7	n/a
8	n/a	RFC 2732 : "Format for Literal IPv6 Addresses in URL's by R.Hinden, B.Carpenter
9	n/a	and L.Masinter, December 1999.
10	n/a
11	n/a	RFC 2396: "Uniform Resource Identifiers (URI)": Generic Syntax by T.
12	n/a	Berners-Lee, R. Fielding, and L. Masinter, August 1998.
13	n/a
14	n/a	RFC 2368: "The mailto URL scheme", by P.Hoffman , L Masinter, J. Zawinski, July 1998.
15	n/a
16	n/a	RFC 1808: "Relative Uniform Resource Locators", by R. Fielding, UC Irvine, June
17	n/a	1995.
18	n/a
19	n/a	RFC 1738: "Uniform Resource Locators (URL)" by T. Berners-Lee, L. Masinter, M.
20	n/a	McCahill, December 1994
21	n/a
22	n/a	RFC 3986 is considered the current standard and any future changes to
23	n/a	urlparse module should conform with it. The urlparse module is
24	n/a	currently not entirely compliant with this RFC due to defacto
25	n/a	scenarios for parsing, and for backward compatibility purposes, some
26	n/a	parsing quirks from older RFCs are retained. The testcases in
27	n/a	test_urlparse.py provides a good indicator of parsing behavior.
28	n/a	"""
29	n/a
30	n/a	import re
31	n/a	import sys
32	n/a	import collections
33	n/a
34	n/a	__all__ = ["urlparse", "urlunparse", "urljoin", "urldefrag",
35	n/a	"urlsplit", "urlunsplit", "urlencode", "parse_qs",
36	n/a	"parse_qsl", "quote", "quote_plus", "quote_from_bytes",
37	n/a	"unquote", "unquote_plus", "unquote_to_bytes",
38	n/a	"DefragResult", "ParseResult", "SplitResult",
39	n/a	"DefragResultBytes", "ParseResultBytes", "SplitResultBytes"]
40	n/a
41	n/a	# A classification of schemes ('' means apply by default)
42	n/a	uses_relative = ['ftp', 'http', 'gopher', 'nntp', 'imap',
43	n/a	'wais', 'file', 'https', 'shttp', 'mms',
44	n/a	'prospero', 'rtsp', 'rtspu', '', 'sftp',
45	n/a	'svn', 'svn+ssh', 'ws', 'wss']
46	n/a	uses_netloc = ['ftp', 'http', 'gopher', 'nntp', 'telnet',
47	n/a	'imap', 'wais', 'file', 'mms', 'https', 'shttp',
48	n/a	'snews', 'prospero', 'rtsp', 'rtspu', 'rsync', '',
49	n/a	'svn', 'svn+ssh', 'sftp', 'nfs', 'git', 'git+ssh',
50	n/a	'ws', 'wss']
51	n/a	uses_params = ['ftp', 'hdl', 'prospero', 'http', 'imap',
52	n/a	'https', 'shttp', 'rtsp', 'rtspu', 'sip', 'sips',
53	n/a	'mms', '', 'sftp', 'tel']
54	n/a
55	n/a	# These are not actually used anymore, but should stay for backwards
56	n/a	# compatibility. (They are undocumented, but have a public-looking name.)
57	n/a	non_hierarchical = ['gopher', 'hdl', 'mailto', 'news',
58	n/a	'telnet', 'wais', 'imap', 'snews', 'sip', 'sips']
59	n/a	uses_query = ['http', 'wais', 'imap', 'https', 'shttp', 'mms',
60	n/a	'gopher', 'rtsp', 'rtspu', 'sip', 'sips', '']
61	n/a	uses_fragment = ['ftp', 'hdl', 'http', 'gopher', 'news',
62	n/a	'nntp', 'wais', 'https', 'shttp', 'snews',
63	n/a	'file', 'prospero', '']
64	n/a
65	n/a	# Characters valid in scheme names
66	n/a	scheme_chars = ('abcdefghijklmnopqrstuvwxyz'
67	n/a	'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
68	n/a	'0123456789'
69	n/a	'+-.')
70	n/a
71	n/a	# XXX: Consider replacing with functools.lru_cache
72	n/a	MAX_CACHE_SIZE = 20
73	n/a	_parse_cache = {}
74	n/a
75	n/a	def clear_cache():
76	n/a	"""Clear the parse cache and the quoters cache."""
77	n/a	_parse_cache.clear()
78	n/a	_safe_quoters.clear()
79	n/a
80	n/a
81	n/a	# Helpers for bytes handling
82	n/a	# For 3.2, we deliberately require applications that
83	n/a	# handle improperly quoted URLs to do their own
84	n/a	# decoding and encoding. If valid use cases are
85	n/a	# presented, we may relax this by using latin-1
86	n/a	# decoding internally for 3.3
87	n/a	_implicit_encoding = 'ascii'
88	n/a	_implicit_errors = 'strict'
89	n/a
90	n/a	def _noop(obj):
91	n/a	return obj
92	n/a
93	n/a	def _encode_result(obj, encoding=_implicit_encoding,
94	n/a	errors=_implicit_errors):
95	n/a	return obj.encode(encoding, errors)
96	n/a
97	n/a	def _decode_args(args, encoding=_implicit_encoding,
98	n/a	errors=_implicit_errors):
99	n/a	return tuple(x.decode(encoding, errors) if x else '' for x in args)
100	n/a
101	n/a	def _coerce_args(*args):
102	n/a	# Invokes decode if necessary to create str args
103	n/a	# and returns the coerced inputs along with
104	n/a	# an appropriate result coercion function
105	n/a	# - noop for str inputs
106	n/a	# - encoding function otherwise
107	n/a	str_input = isinstance(args[0], str)
108	n/a	for arg in args[1:]:
109	n/a	# We special-case the empty string to support the
110	n/a	# "scheme=''" default argument to some functions
111	n/a	if arg and isinstance(arg, str) != str_input:
112	n/a	raise TypeError("Cannot mix str and non-str arguments")
113	n/a	if str_input:
114	n/a	return args + (_noop,)
115	n/a	return _decode_args(args) + (_encode_result,)
116	n/a
117	n/a	# Result objects are more helpful than simple tuples
118	n/a	class _ResultMixinStr(object):
119	n/a	"""Standard approach to encoding parsed results from str to bytes"""
120	n/a	__slots__ = ()
121	n/a
122	n/a	def encode(self, encoding='ascii', errors='strict'):
123	n/a	return self._encoded_counterpart(*(x.encode(encoding, errors) for x in self))
124	n/a
125	n/a
126	n/a	class _ResultMixinBytes(object):
127	n/a	"""Standard approach to decoding parsed results from bytes to str"""
128	n/a	__slots__ = ()
129	n/a
130	n/a	def decode(self, encoding='ascii', errors='strict'):
131	n/a	return self._decoded_counterpart(*(x.decode(encoding, errors) for x in self))
132	n/a
133	n/a
134	n/a	class _NetlocResultMixinBase(object):
135	n/a	"""Shared methods for the parsed result objects containing a netloc element"""
136	n/a	__slots__ = ()
137	n/a
138	n/a	@property
139	n/a	def username(self):
140	n/a	return self._userinfo[0]
141	n/a
142	n/a	@property
143	n/a	def password(self):
144	n/a	return self._userinfo[1]
145	n/a
146	n/a	@property
147	n/a	def hostname(self):
148	n/a	hostname = self._hostinfo[0]
149	n/a	if not hostname:
150	n/a	hostname = None
151	n/a	elif hostname is not None:
152	n/a	hostname = hostname.lower()
153	n/a	return hostname
154	n/a
155	n/a	@property
156	n/a	def port(self):
157	n/a	port = self._hostinfo[1]
158	n/a	if port is not None:
159	n/a	port = int(port, 10)
160	n/a	if not ( 0 <= port <= 65535):
161	n/a	raise ValueError("Port out of range 0-65535")
162	n/a	return port
163	n/a
164	n/a
165	n/a	class _NetlocResultMixinStr(_NetlocResultMixinBase, _ResultMixinStr):
166	n/a	__slots__ = ()
167	n/a
168	n/a	@property
169	n/a	def _userinfo(self):
170	n/a	netloc = self.netloc
171	n/a	userinfo, have_info, hostinfo = netloc.rpartition('@')
172	n/a	if have_info:
173	n/a	username, have_password, password = userinfo.partition(':')
174	n/a	if not have_password:
175	n/a	password = None
176	n/a	else:
177	n/a	username = password = None
178	n/a	return username, password
179	n/a
180	n/a	@property
181	n/a	def _hostinfo(self):
182	n/a	netloc = self.netloc
183	n/a	_, _, hostinfo = netloc.rpartition('@')
184	n/a	_, have_open_br, bracketed = hostinfo.partition('[')
185	n/a	if have_open_br:
186	n/a	hostname, _, port = bracketed.partition(']')
187	n/a	_, _, port = port.partition(':')
188	n/a	else:
189	n/a	hostname, _, port = hostinfo.partition(':')
190	n/a	if not port:
191	n/a	port = None
192	n/a	return hostname, port
193	n/a
194	n/a
195	n/a	class _NetlocResultMixinBytes(_NetlocResultMixinBase, _ResultMixinBytes):
196	n/a	__slots__ = ()
197	n/a
198	n/a	@property
199	n/a	def _userinfo(self):
200	n/a	netloc = self.netloc
201	n/a	userinfo, have_info, hostinfo = netloc.rpartition(b'@')
202	n/a	if have_info:
203	n/a	username, have_password, password = userinfo.partition(b':')
204	n/a	if not have_password:
205	n/a	password = None
206	n/a	else:
207	n/a	username = password = None
208	n/a	return username, password
209	n/a
210	n/a	@property
211	n/a	def _hostinfo(self):
212	n/a	netloc = self.netloc
213	n/a	_, _, hostinfo = netloc.rpartition(b'@')
214	n/a	_, have_open_br, bracketed = hostinfo.partition(b'[')
215	n/a	if have_open_br:
216	n/a	hostname, _, port = bracketed.partition(b']')
217	n/a	_, _, port = port.partition(b':')
218	n/a	else:
219	n/a	hostname, _, port = hostinfo.partition(b':')
220	n/a	if not port:
221	n/a	port = None
222	n/a	return hostname, port
223	n/a
224	n/a
225	n/a	from collections import namedtuple
226	n/a
227	n/a	_DefragResultBase = namedtuple('DefragResult', 'url fragment')
228	n/a	_SplitResultBase = namedtuple(
229	n/a	'SplitResult', 'scheme netloc path query fragment')
230	n/a	_ParseResultBase = namedtuple(
231	n/a	'ParseResult', 'scheme netloc path params query fragment')
232	n/a
233	n/a	_DefragResultBase.__doc__ = """
234	n/a	DefragResult(url, fragment)
235	n/a
236	n/a	A 2-tuple that contains the url without fragment identifier and the fragment
237	n/a	identifier as a separate argument.
238	n/a	"""
239	n/a
240	n/a	_DefragResultBase.url.__doc__ = """The URL with no fragment identifier."""
241	n/a
242	n/a	_DefragResultBase.fragment.__doc__ = """
243	n/a	Fragment identifier separated from URL, that allows indirect identification of a
244	n/a	secondary resource by reference to a primary resource and additional identifying
245	n/a	information.
246	n/a	"""
247	n/a
248	n/a	_SplitResultBase.__doc__ = """
249	n/a	SplitResult(scheme, netloc, path, query, fragment)
250	n/a
251	n/a	A 5-tuple that contains the different components of a URL. Similar to
252	n/a	ParseResult, but does not split params.
253	n/a	"""
254	n/a
255	n/a	_SplitResultBase.scheme.__doc__ = """Specifies URL scheme for the request."""
256	n/a
257	n/a	_SplitResultBase.netloc.__doc__ = """
258	n/a	Network location where the request is made to.
259	n/a	"""
260	n/a
261	n/a	_SplitResultBase.path.__doc__ = """
262	n/a	The hierarchical path, such as the path to a file to download.
263	n/a	"""
264	n/a
265	n/a	_SplitResultBase.query.__doc__ = """
266	n/a	The query component, that contains non-hierarchical data, that along with data
267	n/a	in path component, identifies a resource in the scope of URI's scheme and
268	n/a	network location.
269	n/a	"""
270	n/a
271	n/a	_SplitResultBase.fragment.__doc__ = """
272	n/a	Fragment identifier, that allows indirect identification of a secondary resource
273	n/a	by reference to a primary resource and additional identifying information.
274	n/a	"""
275	n/a
276	n/a	_ParseResultBase.__doc__ = """
277	n/a	ParseResult(scheme, netloc, path, params, query, fragment)
278	n/a
279	n/a	A 6-tuple that contains components of a parsed URL.
280	n/a	"""
281	n/a
282	n/a	_ParseResultBase.scheme.__doc__ = _SplitResultBase.scheme.__doc__
283	n/a	_ParseResultBase.netloc.__doc__ = _SplitResultBase.netloc.__doc__
284	n/a	_ParseResultBase.path.__doc__ = _SplitResultBase.path.__doc__
285	n/a	_ParseResultBase.params.__doc__ = """
286	n/a	Parameters for last path element used to dereference the URI in order to provide
287	n/a	access to perform some operation on the resource.
288	n/a	"""
289	n/a
290	n/a	_ParseResultBase.query.__doc__ = _SplitResultBase.query.__doc__
291	n/a	_ParseResultBase.fragment.__doc__ = _SplitResultBase.fragment.__doc__
292	n/a
293	n/a
294	n/a	# For backwards compatibility, alias _NetlocResultMixinStr
295	n/a	# ResultBase is no longer part of the documented API, but it is
296	n/a	# retained since deprecating it isn't worth the hassle
297	n/a	ResultBase = _NetlocResultMixinStr
298	n/a
299	n/a	# Structured result objects for string data
300	n/a	class DefragResult(_DefragResultBase, _ResultMixinStr):
301	n/a	__slots__ = ()
302	n/a	def geturl(self):
303	n/a	if self.fragment:
304	n/a	return self.url + '#' + self.fragment
305	n/a	else:
306	n/a	return self.url
307	n/a
308	n/a	class SplitResult(_SplitResultBase, _NetlocResultMixinStr):
309	n/a	__slots__ = ()
310	n/a	def geturl(self):
311	n/a	return urlunsplit(self)
312	n/a
313	n/a	class ParseResult(_ParseResultBase, _NetlocResultMixinStr):
314	n/a	__slots__ = ()
315	n/a	def geturl(self):
316	n/a	return urlunparse(self)
317	n/a
318	n/a	# Structured result objects for bytes data
319	n/a	class DefragResultBytes(_DefragResultBase, _ResultMixinBytes):
320	n/a	__slots__ = ()
321	n/a	def geturl(self):
322	n/a	if self.fragment:
323	n/a	return self.url + b'#' + self.fragment
324	n/a	else:
325	n/a	return self.url
326	n/a
327	n/a	class SplitResultBytes(_SplitResultBase, _NetlocResultMixinBytes):
328	n/a	__slots__ = ()
329	n/a	def geturl(self):
330	n/a	return urlunsplit(self)
331	n/a
332	n/a	class ParseResultBytes(_ParseResultBase, _NetlocResultMixinBytes):
333	n/a	__slots__ = ()
334	n/a	def geturl(self):
335	n/a	return urlunparse(self)
336	n/a
337	n/a	# Set up the encode/decode result pairs
338	n/a	def _fix_result_transcoding():
339	n/a	_result_pairs = (
340	n/a	(DefragResult, DefragResultBytes),
341	n/a	(SplitResult, SplitResultBytes),
342	n/a	(ParseResult, ParseResultBytes),
343	n/a	)
344	n/a	for _decoded, _encoded in _result_pairs:
345	n/a	_decoded._encoded_counterpart = _encoded
346	n/a	_encoded._decoded_counterpart = _decoded
347	n/a
348	n/a	_fix_result_transcoding()
349	n/a	del _fix_result_transcoding
350	n/a
351	n/a	def urlparse(url, scheme='', allow_fragments=True):
352	n/a	"""Parse a URL into 6 components:
353	n/a	<scheme>://<netloc>/<path>;<params>?<query>#<fragment>
354	n/a	Return a 6-tuple: (scheme, netloc, path, params, query, fragment).
355	n/a	Note that we don't break the components up in smaller bits
356	n/a	(e.g. netloc is a single string) and we don't expand % escapes."""
357	n/a	url, scheme, _coerce_result = _coerce_args(url, scheme)
358	n/a	splitresult = urlsplit(url, scheme, allow_fragments)
359	n/a	scheme, netloc, url, query, fragment = splitresult
360	n/a	if scheme in uses_params and ';' in url:
361	n/a	url, params = _splitparams(url)
362	n/a	else:
363	n/a	params = ''
364	n/a	result = ParseResult(scheme, netloc, url, params, query, fragment)
365	n/a	return _coerce_result(result)
366	n/a
367	n/a	def _splitparams(url):
368	n/a	if '/' in url:
369	n/a	i = url.find(';', url.rfind('/'))
370	n/a	if i < 0:
371	n/a	return url, ''
372	n/a	else:
373	n/a	i = url.find(';')
374	n/a	return url[:i], url[i+1:]
375	n/a
376	n/a	def _splitnetloc(url, start=0):
377	n/a	delim = len(url) # position of end of domain part of url, default is end
378	n/a	for c in '/?#': # look for delimiters; the order is NOT important
379	n/a	wdelim = url.find(c, start) # find first of this delim
380	n/a	if wdelim >= 0: # if found
381	n/a	delim = min(delim, wdelim) # use earliest delim position
382	n/a	return url[start:delim], url[delim:] # return (domain, rest)
383	n/a
384	n/a	def urlsplit(url, scheme='', allow_fragments=True):
385	n/a	"""Parse a URL into 5 components:
386	n/a	<scheme>://<netloc>/<path>?<query>#<fragment>
387	n/a	Return a 5-tuple: (scheme, netloc, path, query, fragment).
388	n/a	Note that we don't break the components up in smaller bits
389	n/a	(e.g. netloc is a single string) and we don't expand % escapes."""
390	n/a	url, scheme, _coerce_result = _coerce_args(url, scheme)
391	n/a	allow_fragments = bool(allow_fragments)
392	n/a	key = url, scheme, allow_fragments, type(url), type(scheme)
393	n/a	cached = _parse_cache.get(key, None)
394	n/a	if cached:
395	n/a	return _coerce_result(cached)
396	n/a	if len(_parse_cache) >= MAX_CACHE_SIZE: # avoid runaway growth
397	n/a	clear_cache()
398	n/a	netloc = query = fragment = ''
399	n/a	i = url.find(':')
400	n/a	if i > 0:
401	n/a	if url[:i] == 'http': # optimize the common case
402	n/a	scheme = url[:i].lower()
403	n/a	url = url[i+1:]
404	n/a	if url[:2] == '//':
405	n/a	netloc, url = _splitnetloc(url, 2)
406	n/a	if (('[' in netloc and ']' not in netloc) or
407	n/a	(']' in netloc and '[' not in netloc)):
408	n/a	raise ValueError("Invalid IPv6 URL")
409	n/a	if allow_fragments and '#' in url:
410	n/a	url, fragment = url.split('#', 1)
411	n/a	if '?' in url:
412	n/a	url, query = url.split('?', 1)
413	n/a	v = SplitResult(scheme, netloc, url, query, fragment)
414	n/a	_parse_cache[key] = v
415	n/a	return _coerce_result(v)
416	n/a	for c in url[:i]:
417	n/a	if c not in scheme_chars:
418	n/a	break
419	n/a	else:
420	n/a	# make sure "url" is not actually a port number (in which case
421	n/a	# "scheme" is really part of the path)
422	n/a	rest = url[i+1:]
423	n/a	if not rest or any(c not in '0123456789' for c in rest):
424	n/a	# not a port number
425	n/a	scheme, url = url[:i].lower(), rest
426	n/a
427	n/a	if url[:2] == '//':
428	n/a	netloc, url = _splitnetloc(url, 2)
429	n/a	if (('[' in netloc and ']' not in netloc) or
430	n/a	(']' in netloc and '[' not in netloc)):
431	n/a	raise ValueError("Invalid IPv6 URL")
432	n/a	if allow_fragments and '#' in url:
433	n/a	url, fragment = url.split('#', 1)
434	n/a	if '?' in url:
435	n/a	url, query = url.split('?', 1)
436	n/a	v = SplitResult(scheme, netloc, url, query, fragment)
437	n/a	_parse_cache[key] = v
438	n/a	return _coerce_result(v)
439	n/a
440	n/a	def urlunparse(components):
441	n/a	"""Put a parsed URL back together again. This may result in a
442	n/a	slightly different, but equivalent URL, if the URL that was parsed
443	n/a	originally had redundant delimiters, e.g. a ? with an empty query
444	n/a	(the draft states that these are equivalent)."""
445	n/a	scheme, netloc, url, params, query, fragment, _coerce_result = (
446	n/a	_coerce_args(*components))
447	n/a	if params:
448	n/a	url = "%s;%s" % (url, params)
449	n/a	return _coerce_result(urlunsplit((scheme, netloc, url, query, fragment)))
450	n/a
451	n/a	def urlunsplit(components):
452	n/a	"""Combine the elements of a tuple as returned by urlsplit() into a
453	n/a	complete URL as a string. The data argument can be any five-item iterable.
454	n/a	This may result in a slightly different, but equivalent URL, if the URL that
455	n/a	was parsed originally had unnecessary delimiters (for example, a ? with an
456	n/a	empty query; the RFC states that these are equivalent)."""
457	n/a	scheme, netloc, url, query, fragment, _coerce_result = (
458	n/a	_coerce_args(*components))
459	n/a	if netloc or (scheme and scheme in uses_netloc and url[:2] != '//'):
460	n/a	if url and url[:1] != '/': url = '/' + url
461	n/a	url = '//' + (netloc or '') + url
462	n/a	if scheme:
463	n/a	url = scheme + ':' + url
464	n/a	if query:
465	n/a	url = url + '?' + query
466	n/a	if fragment:
467	n/a	url = url + '#' + fragment
468	n/a	return _coerce_result(url)
469	n/a
470	n/a	def urljoin(base, url, allow_fragments=True):
471	n/a	"""Join a base URL and a possibly relative URL to form an absolute
472	n/a	interpretation of the latter."""
473	n/a	if not base:
474	n/a	return url
475	n/a	if not url:
476	n/a	return base
477	n/a
478	n/a	base, url, _coerce_result = _coerce_args(base, url)
479	n/a	bscheme, bnetloc, bpath, bparams, bquery, bfragment = \
480	n/a	urlparse(base, '', allow_fragments)
481	n/a	scheme, netloc, path, params, query, fragment = \
482	n/a	urlparse(url, bscheme, allow_fragments)
483	n/a
484	n/a	if scheme != bscheme or scheme not in uses_relative:
485	n/a	return _coerce_result(url)
486	n/a	if scheme in uses_netloc:
487	n/a	if netloc:
488	n/a	return _coerce_result(urlunparse((scheme, netloc, path,
489	n/a	params, query, fragment)))
490	n/a	netloc = bnetloc
491	n/a
492	n/a	if not path and not params:
493	n/a	path = bpath
494	n/a	params = bparams
495	n/a	if not query:
496	n/a	query = bquery
497	n/a	return _coerce_result(urlunparse((scheme, netloc, path,
498	n/a	params, query, fragment)))
499	n/a
500	n/a	base_parts = bpath.split('/')
501	n/a	if base_parts[-1] != '':
502	n/a	# the last item is not a directory, so will not be taken into account
503	n/a	# in resolving the relative path
504	n/a	del base_parts[-1]
505	n/a
506	n/a	# for rfc3986, ignore all base path should the first character be root.
507	n/a	if path[:1] == '/':
508	n/a	segments = path.split('/')
509	n/a	else:
510	n/a	segments = base_parts + path.split('/')
511	n/a	# filter out elements that would cause redundant slashes on re-joining
512	n/a	# the resolved_path
513	n/a	segments[1:-1] = filter(None, segments[1:-1])
514	n/a
515	n/a	resolved_path = []
516	n/a
517	n/a	for seg in segments:
518	n/a	if seg == '..':
519	n/a	try:
520	n/a	resolved_path.pop()
521	n/a	except IndexError:
522	n/a	# ignore any .. segments that would otherwise cause an IndexError
523	n/a	# when popped from resolved_path if resolving for rfc3986
524	n/a	pass
525	n/a	elif seg == '.':
526	n/a	continue
527	n/a	else:
528	n/a	resolved_path.append(seg)
529	n/a
530	n/a	if segments[-1] in ('.', '..'):
531	n/a	# do some post-processing here. if the last segment was a relative dir,
532	n/a	# then we need to append the trailing '/'
533	n/a	resolved_path.append('')
534	n/a
535	n/a	return _coerce_result(urlunparse((scheme, netloc, '/'.join(
536	n/a	resolved_path) or '/', params, query, fragment)))
537	n/a
538	n/a
539	n/a	def urldefrag(url):
540	n/a	"""Removes any existing fragment from URL.
541	n/a
542	n/a	Returns a tuple of the defragmented URL and the fragment. If
543	n/a	the URL contained no fragments, the second element is the
544	n/a	empty string.
545	n/a	"""
546	n/a	url, _coerce_result = _coerce_args(url)
547	n/a	if '#' in url:
548	n/a	s, n, p, a, q, frag = urlparse(url)
549	n/a	defrag = urlunparse((s, n, p, a, q, ''))
550	n/a	else:
551	n/a	frag = ''
552	n/a	defrag = url
553	n/a	return _coerce_result(DefragResult(defrag, frag))
554	n/a
555	n/a	_hexdig = '0123456789ABCDEFabcdef'
556	n/a	_hextobyte = None
557	n/a
558	n/a	def unquote_to_bytes(string):
559	n/a	"""unquote_to_bytes('abc%20def') -> b'abc def'."""
560	n/a	# Note: strings are encoded as UTF-8. This is only an issue if it contains
561	n/a	# unescaped non-ASCII characters, which URIs should not.
562	n/a	if not string:
563	n/a	# Is it a string-like object?
564	n/a	string.split
565	n/a	return b''
566	n/a	if isinstance(string, str):
567	n/a	string = string.encode('utf-8')
568	n/a	bits = string.split(b'%')
569	n/a	if len(bits) == 1:
570	n/a	return string
571	n/a	res = [bits[0]]
572	n/a	append = res.append
573	n/a	# Delay the initialization of the table to not waste memory
574	n/a	# if the function is never called
575	n/a	global _hextobyte
576	n/a	if _hextobyte is None:
577	n/a	_hextobyte = {(a + b).encode(): bytes.fromhex(a + b)
578	n/a	for a in _hexdig for b in _hexdig}
579	n/a	for item in bits[1:]:
580	n/a	try:
581	n/a	append(_hextobyte[item[:2]])
582	n/a	append(item[2:])
583	n/a	except KeyError:
584	n/a	append(b'%')
585	n/a	append(item)
586	n/a	return b''.join(res)
587	n/a
588	n/a	_asciire = re.compile('([\x00-\x7f]+)')
589	n/a
590	n/a	def unquote(string, encoding='utf-8', errors='replace'):
591	n/a	"""Replace %xx escapes by their single-character equivalent. The optional
592	n/a	encoding and errors parameters specify how to decode percent-encoded
593	n/a	sequences into Unicode characters, as accepted by the bytes.decode()
594	n/a	method.
595	n/a	By default, percent-encoded sequences are decoded with UTF-8, and invalid
596	n/a	sequences are replaced by a placeholder character.
597	n/a
598	n/a	unquote('abc%20def') -> 'abc def'.
599	n/a	"""
600	n/a	if '%' not in string:
601	n/a	string.split
602	n/a	return string
603	n/a	if encoding is None:
604	n/a	encoding = 'utf-8'
605	n/a	if errors is None:
606	n/a	errors = 'replace'
607	n/a	bits = _asciire.split(string)
608	n/a	res = [bits[0]]
609	n/a	append = res.append
610	n/a	for i in range(1, len(bits), 2):
611	n/a	append(unquote_to_bytes(bits[i]).decode(encoding, errors))
612	n/a	append(bits[i + 1])
613	n/a	return ''.join(res)
614	n/a
615	n/a	def parse_qs(qs, keep_blank_values=False, strict_parsing=False,
616	n/a	encoding='utf-8', errors='replace'):
617	n/a	"""Parse a query given as a string argument.
618	n/a
619	n/a	Arguments:
620	n/a
621	n/a	qs: percent-encoded query string to be parsed
622	n/a
623	n/a	keep_blank_values: flag indicating whether blank values in
624	n/a	percent-encoded queries should be treated as blank strings.
625	n/a	A true value indicates that blanks should be retained as
626	n/a	blank strings. The default false value indicates that
627	n/a	blank values are to be ignored and treated as if they were
628	n/a	not included.
629	n/a
630	n/a	strict_parsing: flag indicating what to do with parsing errors.
631	n/a	If false (the default), errors are silently ignored.
632	n/a	If true, errors raise a ValueError exception.
633	n/a
634	n/a	encoding and errors: specify how to decode percent-encoded sequences
635	n/a	into Unicode characters, as accepted by the bytes.decode() method.
636	n/a	"""
637	n/a	parsed_result = {}
638	n/a	pairs = parse_qsl(qs, keep_blank_values, strict_parsing,
639	n/a	encoding=encoding, errors=errors)
640	n/a	for name, value in pairs:
641	n/a	if name in parsed_result:
642	n/a	parsed_result[name].append(value)
643	n/a	else:
644	n/a	parsed_result[name] = [value]
645	n/a	return parsed_result
646	n/a
647	n/a	def parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
648	n/a	encoding='utf-8', errors='replace'):
649	n/a	"""Parse a query given as a string argument.
650	n/a
651	n/a	Arguments:
652	n/a
653	n/a	qs: percent-encoded query string to be parsed
654	n/a
655	n/a	keep_blank_values: flag indicating whether blank values in
656	n/a	percent-encoded queries should be treated as blank strings. A
657	n/a	true value indicates that blanks should be retained as blank
658	n/a	strings. The default false value indicates that blank values
659	n/a	are to be ignored and treated as if they were not included.
660	n/a
661	n/a	strict_parsing: flag indicating what to do with parsing errors. If
662	n/a	false (the default), errors are silently ignored. If true,
663	n/a	errors raise a ValueError exception.
664	n/a
665	n/a	encoding and errors: specify how to decode percent-encoded sequences
666	n/a	into Unicode characters, as accepted by the bytes.decode() method.
667	n/a
668	n/a	Returns a list, as G-d intended.
669	n/a	"""
670	n/a	qs, _coerce_result = _coerce_args(qs)
671	n/a	pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
672	n/a	r = []
673	n/a	for name_value in pairs:
674	n/a	if not name_value and not strict_parsing:
675	n/a	continue
676	n/a	nv = name_value.split('=', 1)
677	n/a	if len(nv) != 2:
678	n/a	if strict_parsing:
679	n/a	raise ValueError("bad query field: %r" % (name_value,))
680	n/a	# Handle case of a control-name with no equal sign
681	n/a	if keep_blank_values:
682	n/a	nv.append('')
683	n/a	else:
684	n/a	continue
685	n/a	if len(nv[1]) or keep_blank_values:
686	n/a	name = nv[0].replace('+', ' ')
687	n/a	name = unquote(name, encoding=encoding, errors=errors)
688	n/a	name = _coerce_result(name)
689	n/a	value = nv[1].replace('+', ' ')
690	n/a	value = unquote(value, encoding=encoding, errors=errors)
691	n/a	value = _coerce_result(value)
692	n/a	r.append((name, value))
693	n/a	return r
694	n/a
695	n/a	def unquote_plus(string, encoding='utf-8', errors='replace'):
696	n/a	"""Like unquote(), but also replace plus signs by spaces, as required for
697	n/a	unquoting HTML form values.
698	n/a
699	n/a	unquote_plus('%7e/abc+def') -> '~/abc def'
700	n/a	"""
701	n/a	string = string.replace('+', ' ')
702	n/a	return unquote(string, encoding, errors)
703	n/a
704	n/a	_ALWAYS_SAFE = frozenset(b'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
705	n/a	b'abcdefghijklmnopqrstuvwxyz'
706	n/a	b'0123456789'
707	n/a	b'_.-')
708	n/a	_ALWAYS_SAFE_BYTES = bytes(_ALWAYS_SAFE)
709	n/a	_safe_quoters = {}
710	n/a
711	n/a	class Quoter(collections.defaultdict):
712	n/a	"""A mapping from bytes (in range(0,256)) to strings.
713	n/a
714	n/a	String values are percent-encoded byte values, unless the key < 128, and
715	n/a	in the "safe" set (either the specified safe set, or default set).
716	n/a	"""
717	n/a	# Keeps a cache internally, using defaultdict, for efficiency (lookups
718	n/a	# of cached keys don't call Python code at all).
719	n/a	def __init__(self, safe):
720	n/a	"""safe: bytes object."""
721	n/a	self.safe = _ALWAYS_SAFE.union(safe)
722	n/a
723	n/a	def __repr__(self):
724	n/a	# Without this, will just display as a defaultdict
725	n/a	return "<%s %r>" % (self.__class__.__name__, dict(self))
726	n/a
727	n/a	def __missing__(self, b):
728	n/a	# Handle a cache miss. Store quoted string in cache and return.
729	n/a	res = chr(b) if b in self.safe else '%{:02X}'.format(b)
730	n/a	self[b] = res
731	n/a	return res
732	n/a
733	n/a	def quote(string, safe='/', encoding=None, errors=None):
734	n/a	"""quote('abc def') -> 'abc%20def'
735	n/a
736	n/a	Each part of a URL, e.g. the path info, the query, etc., has a
737	n/a	different set of reserved characters that must be quoted.
738	n/a
739	n/a	RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
740	n/a	the following reserved characters.
741	n/a
742	n/a	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
743	n/a	"$" \| ","
744	n/a
745	n/a	Each of these characters is reserved in some component of a URL,
746	n/a	but not necessarily in all of them.
747	n/a
748	n/a	By default, the quote function is intended for quoting the path
749	n/a	section of a URL. Thus, it will not encode '/'. This character
750	n/a	is reserved, but in typical usage the quote function is being
751	n/a	called on a path where the existing slash characters are used as
752	n/a	reserved characters.
753	n/a
754	n/a	string and safe may be either str or bytes objects. encoding and errors
755	n/a	must not be specified if string is a bytes object.
756	n/a
757	n/a	The optional encoding and errors parameters specify how to deal with
758	n/a	non-ASCII characters, as accepted by the str.encode method.
759	n/a	By default, encoding='utf-8' (characters are encoded with UTF-8), and
760	n/a	errors='strict' (unsupported characters raise a UnicodeEncodeError).
761	n/a	"""
762	n/a	if isinstance(string, str):
763	n/a	if not string:
764	n/a	return string
765	n/a	if encoding is None:
766	n/a	encoding = 'utf-8'
767	n/a	if errors is None:
768	n/a	errors = 'strict'
769	n/a	string = string.encode(encoding, errors)
770	n/a	else:
771	n/a	if encoding is not None:
772	n/a	raise TypeError("quote() doesn't support 'encoding' for bytes")
773	n/a	if errors is not None:
774	n/a	raise TypeError("quote() doesn't support 'errors' for bytes")
775	n/a	return quote_from_bytes(string, safe)
776	n/a
777	n/a	def quote_plus(string, safe='', encoding=None, errors=None):
778	n/a	"""Like quote(), but also replace ' ' with '+', as required for quoting
779	n/a	HTML form values. Plus signs in the original string are escaped unless
780	n/a	they are included in safe. It also does not have safe default to '/'.
781	n/a	"""
782	n/a	# Check if ' ' in string, where string may either be a str or bytes. If
783	n/a	# there are no spaces, the regular quote will produce the right answer.
784	n/a	if ((isinstance(string, str) and ' ' not in string) or
785	n/a	(isinstance(string, bytes) and b' ' not in string)):
786	n/a	return quote(string, safe, encoding, errors)
787	n/a	if isinstance(safe, str):
788	n/a	space = ' '
789	n/a	else:
790	n/a	space = b' '
791	n/a	string = quote(string, safe + space, encoding, errors)
792	n/a	return string.replace(' ', '+')
793	n/a
794	n/a	def quote_from_bytes(bs, safe='/'):
795	n/a	"""Like quote(), but accepts a bytes object rather than a str, and does
796	n/a	not perform string-to-bytes encoding. It always returns an ASCII string.
797	n/a	quote_from_bytes(b'abc def\x3f') -> 'abc%20def%3f'
798	n/a	"""
799	n/a	if not isinstance(bs, (bytes, bytearray)):
800	n/a	raise TypeError("quote_from_bytes() expected bytes")
801	n/a	if not bs:
802	n/a	return ''
803	n/a	if isinstance(safe, str):
804	n/a	# Normalize 'safe' by converting to bytes and removing non-ASCII chars
805	n/a	safe = safe.encode('ascii', 'ignore')
806	n/a	else:
807	n/a	safe = bytes([c for c in safe if c < 128])
808	n/a	if not bs.rstrip(_ALWAYS_SAFE_BYTES + safe):
809	n/a	return bs.decode()
810	n/a	try:
811	n/a	quoter = _safe_quoters[safe]
812	n/a	except KeyError:
813	n/a	_safe_quoters[safe] = quoter = Quoter(safe).__getitem__
814	n/a	return ''.join([quoter(char) for char in bs])
815	n/a
816	n/a	def urlencode(query, doseq=False, safe='', encoding=None, errors=None,
817	n/a	quote_via=quote_plus):
818	n/a	"""Encode a dict or sequence of two-element tuples into a URL query string.
819	n/a
820	n/a	If any values in the query arg are sequences and doseq is true, each
821	n/a	sequence element is converted to a separate parameter.
822	n/a
823	n/a	If the query arg is a sequence of two-element tuples, the order of the
824	n/a	parameters in the output will match the order of parameters in the
825	n/a	input.
826	n/a
827	n/a	The components of a query arg may each be either a string or a bytes type.
828	n/a
829	n/a	The safe, encoding, and errors parameters are passed down to the function
830	n/a	specified by quote_via (encoding and errors only if a component is a str).
831	n/a	"""
832	n/a
833	n/a	if hasattr(query, "items"):
834	n/a	query = query.items()
835	n/a	else:
836	n/a	# It's a bother at times that strings and string-like objects are
837	n/a	# sequences.
838	n/a	try:
839	n/a	# non-sequence items should not work with len()
840	n/a	# non-empty strings will fail this
841	n/a	if len(query) and not isinstance(query[0], tuple):
842	n/a	raise TypeError
843	n/a	# Zero-length sequences of all types will get here and succeed,
844	n/a	# but that's a minor nit. Since the original implementation
845	n/a	# allowed empty dicts that type of behavior probably should be
846	n/a	# preserved for consistency
847	n/a	except TypeError:
848	n/a	ty, va, tb = sys.exc_info()
849	n/a	raise TypeError("not a valid non-string sequence "
850	n/a	"or mapping object").with_traceback(tb)
851	n/a
852	n/a	l = []
853	n/a	if not doseq:
854	n/a	for k, v in query:
855	n/a	if isinstance(k, bytes):
856	n/a	k = quote_via(k, safe)
857	n/a	else:
858	n/a	k = quote_via(str(k), safe, encoding, errors)
859	n/a
860	n/a	if isinstance(v, bytes):
861	n/a	v = quote_via(v, safe)
862	n/a	else:
863	n/a	v = quote_via(str(v), safe, encoding, errors)
864	n/a	l.append(k + '=' + v)
865	n/a	else:
866	n/a	for k, v in query:
867	n/a	if isinstance(k, bytes):
868	n/a	k = quote_via(k, safe)
869	n/a	else:
870	n/a	k = quote_via(str(k), safe, encoding, errors)
871	n/a
872	n/a	if isinstance(v, bytes):
873	n/a	v = quote_via(v, safe)
874	n/a	l.append(k + '=' + v)
875	n/a	elif isinstance(v, str):
876	n/a	v = quote_via(v, safe, encoding, errors)
877	n/a	l.append(k + '=' + v)
878	n/a	else:
879	n/a	try:
880	n/a	# Is this a sufficient test for sequence-ness?
881	n/a	x = len(v)
882	n/a	except TypeError:
883	n/a	# not a sequence
884	n/a	v = quote_via(str(v), safe, encoding, errors)
885	n/a	l.append(k + '=' + v)
886	n/a	else:
887	n/a	# loop over the sequence
888	n/a	for elt in v:
889	n/a	if isinstance(elt, bytes):
890	n/a	elt = quote_via(elt, safe)
891	n/a	else:
892	n/a	elt = quote_via(str(elt), safe, encoding, errors)
893	n/a	l.append(k + '=' + elt)
894	n/a	return '&'.join(l)
895	n/a
896	n/a	def to_bytes(url):
897	n/a	"""to_bytes(u"URL") --> 'URL'."""
898	n/a	# Most URL schemes require ASCII. If that changes, the conversion
899	n/a	# can be relaxed.
900	n/a	# XXX get rid of to_bytes()
901	n/a	if isinstance(url, str):
902	n/a	try:
903	n/a	url = url.encode("ASCII").decode()
904	n/a	except UnicodeError:
905	n/a	raise UnicodeError("URL " + repr(url) +
906	n/a	" contains non-ASCII characters")
907	n/a	return url
908	n/a
909	n/a	def unwrap(url):
910	n/a	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
911	n/a	url = str(url).strip()
912	n/a	if url[:1] == '<' and url[-1:] == '>':
913	n/a	url = url[1:-1].strip()
914	n/a	if url[:4] == 'URL:': url = url[4:].strip()
915	n/a	return url
916	n/a
917	n/a	_typeprog = None
918	n/a	def splittype(url):
919	n/a	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
920	n/a	global _typeprog
921	n/a	if _typeprog is None:
922	n/a	_typeprog = re.compile('([^/:]+):(.*)', re.DOTALL)
923	n/a
924	n/a	match = _typeprog.match(url)
925	n/a	if match:
926	n/a	scheme, data = match.groups()
927	n/a	return scheme.lower(), data
928	n/a	return None, url
929	n/a
930	n/a	_hostprog = None
931	n/a	def splithost(url):
932	n/a	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
933	n/a	global _hostprog
934	n/a	if _hostprog is None:
935	n/a	_hostprog = re.compile('//([^/?])(.)', re.DOTALL)
936	n/a
937	n/a	match = _hostprog.match(url)
938	n/a	if match:
939	n/a	host_port, path = match.groups()
940	n/a	if path and path[0] != '/':
941	n/a	path = '/' + path
942	n/a	return host_port, path
943	n/a	return None, url
944	n/a
945	n/a	def splituser(host):
946	n/a	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
947	n/a	user, delim, host = host.rpartition('@')
948	n/a	return (user if delim else None), host
949	n/a
950	n/a	def splitpasswd(user):
951	n/a	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
952	n/a	user, delim, passwd = user.partition(':')
953	n/a	return user, (passwd if delim else None)
954	n/a
955	n/a	# splittag('/path#tag') --> '/path', 'tag'
956	n/a	_portprog = None
957	n/a	def splitport(host):
958	n/a	"""splitport('host:port') --> 'host', 'port'."""
959	n/a	global _portprog
960	n/a	if _portprog is None:
961	n/a	_portprog = re.compile('(.):([0-9])$', re.DOTALL)
962	n/a
963	n/a	match = _portprog.match(host)
964	n/a	if match:
965	n/a	host, port = match.groups()
966	n/a	if port:
967	n/a	return host, port
968	n/a	return host, None
969	n/a
970	n/a	def splitnport(host, defport=-1):
971	n/a	"""Split host and port, returning numeric port.
972	n/a	Return given default port if no ':' found; defaults to -1.
973	n/a	Return numerical port if a valid number are found after ':'.
974	n/a	Return None if ':' but not a valid number."""
975	n/a	host, delim, port = host.rpartition(':')
976	n/a	if not delim:
977	n/a	host = port
978	n/a	elif port:
979	n/a	try:
980	n/a	nport = int(port)
981	n/a	except ValueError:
982	n/a	nport = None
983	n/a	return host, nport
984	n/a	return host, defport
985	n/a
986	n/a	def splitquery(url):
987	n/a	"""splitquery('/path?query') --> '/path', 'query'."""
988	n/a	path, delim, query = url.rpartition('?')
989	n/a	if delim:
990	n/a	return path, query
991	n/a	return url, None
992	n/a
993	n/a	def splittag(url):
994	n/a	"""splittag('/path#tag') --> '/path', 'tag'."""
995	n/a	path, delim, tag = url.rpartition('#')
996	n/a	if delim:
997	n/a	return path, tag
998	n/a	return url, None
999	n/a
1000	n/a	def splitattr(url):
1001	n/a	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
1002	n/a	'/path', ['attr1=value1', 'attr2=value2', ...]."""
1003	n/a	words = url.split(';')
1004	n/a	return words[0], words[1:]
1005	n/a
1006	n/a	def splitvalue(attr):
1007	n/a	"""splitvalue('attr=value') --> 'attr', 'value'."""
1008	n/a	attr, delim, value = attr.partition('=')
1009	n/a	return attr, (value if delim else None)