ยปCore Development>Code coverage>Lib/test/test_urllib.py

Python code coverage for Lib/test/test_urllib.py

#countcontent
1n/a"""Regression tests for what was in Python 2's "urllib" module"""
2n/a
3n/aimport urllib.parse
4n/aimport urllib.request
5n/aimport urllib.error
6n/aimport http.client
7n/aimport email.message
8n/aimport io
9n/aimport unittest
10n/afrom unittest.mock import patch
11n/afrom test import support
12n/aimport os
13n/atry:
14n/a import ssl
15n/aexcept ImportError:
16n/a ssl = None
17n/aimport sys
18n/aimport tempfile
19n/afrom nturl2path import url2pathname, pathname2url
20n/a
21n/afrom base64 import b64encode
22n/aimport collections
23n/a
24n/a
25n/adef hexescape(char):
26n/a """Escape char as RFC 2396 specifies"""
27n/a hex_repr = hex(ord(char))[2:].upper()
28n/a if len(hex_repr) == 1:
29n/a hex_repr = "0%s" % hex_repr
30n/a return "%" + hex_repr
31n/a
32n/a# Shortcut for testing FancyURLopener
33n/a_urlopener = None
34n/a
35n/a
36n/adef urlopen(url, data=None, proxies=None):
37n/a """urlopen(url [, data]) -> open file-like object"""
38n/a global _urlopener
39n/a if proxies is not None:
40n/a opener = urllib.request.FancyURLopener(proxies=proxies)
41n/a elif not _urlopener:
42n/a opener = FancyURLopener()
43n/a _urlopener = opener
44n/a else:
45n/a opener = _urlopener
46n/a if data is None:
47n/a return opener.open(url)
48n/a else:
49n/a return opener.open(url, data)
50n/a
51n/a
52n/adef FancyURLopener():
53n/a with support.check_warnings(
54n/a ('FancyURLopener style of invoking requests is deprecated.',
55n/a DeprecationWarning)):
56n/a return urllib.request.FancyURLopener()
57n/a
58n/a
59n/adef fakehttp(fakedata):
60n/a class FakeSocket(io.BytesIO):
61n/a io_refs = 1
62n/a
63n/a def sendall(self, data):
64n/a FakeHTTPConnection.buf = data
65n/a
66n/a def makefile(self, *args, **kwds):
67n/a self.io_refs += 1
68n/a return self
69n/a
70n/a def read(self, amt=None):
71n/a if self.closed:
72n/a return b""
73n/a return io.BytesIO.read(self, amt)
74n/a
75n/a def readline(self, length=None):
76n/a if self.closed:
77n/a return b""
78n/a return io.BytesIO.readline(self, length)
79n/a
80n/a def close(self):
81n/a self.io_refs -= 1
82n/a if self.io_refs == 0:
83n/a io.BytesIO.close(self)
84n/a
85n/a class FakeHTTPConnection(http.client.HTTPConnection):
86n/a
87n/a # buffer to store data for verification in urlopen tests.
88n/a buf = None
89n/a
90n/a def connect(self):
91n/a self.sock = FakeSocket(self.fakedata)
92n/a type(self).fakesock = self.sock
93n/a FakeHTTPConnection.fakedata = fakedata
94n/a
95n/a return FakeHTTPConnection
96n/a
97n/a
98n/aclass FakeHTTPMixin(object):
99n/a def fakehttp(self, fakedata):
100n/a self._connection_class = http.client.HTTPConnection
101n/a http.client.HTTPConnection = fakehttp(fakedata)
102n/a
103n/a def unfakehttp(self):
104n/a http.client.HTTPConnection = self._connection_class
105n/a
106n/a
107n/aclass FakeFTPMixin(object):
108n/a def fakeftp(self):
109n/a class FakeFtpWrapper(object):
110n/a def __init__(self, user, passwd, host, port, dirs, timeout=None,
111n/a persistent=True):
112n/a pass
113n/a
114n/a def retrfile(self, file, type):
115n/a return io.BytesIO(), 0
116n/a
117n/a def close(self):
118n/a pass
119n/a
120n/a self._ftpwrapper_class = urllib.request.ftpwrapper
121n/a urllib.request.ftpwrapper = FakeFtpWrapper
122n/a
123n/a def unfakeftp(self):
124n/a urllib.request.ftpwrapper = self._ftpwrapper_class
125n/a
126n/a
127n/aclass urlopen_FileTests(unittest.TestCase):
128n/a """Test urlopen() opening a temporary file.
129n/a
130n/a Try to test as much functionality as possible so as to cut down on reliance
131n/a on connecting to the Net for testing.
132n/a
133n/a """
134n/a
135n/a def setUp(self):
136n/a # Create a temp file to use for testing
137n/a self.text = bytes("test_urllib: %s\n" % self.__class__.__name__,
138n/a "ascii")
139n/a f = open(support.TESTFN, 'wb')
140n/a try:
141n/a f.write(self.text)
142n/a finally:
143n/a f.close()
144n/a self.pathname = support.TESTFN
145n/a self.returned_obj = urlopen("file:%s" % self.pathname)
146n/a
147n/a def tearDown(self):
148n/a """Shut down the open object"""
149n/a self.returned_obj.close()
150n/a os.remove(support.TESTFN)
151n/a
152n/a def test_interface(self):
153n/a # Make sure object returned by urlopen() has the specified methods
154n/a for attr in ("read", "readline", "readlines", "fileno",
155n/a "close", "info", "geturl", "getcode", "__iter__"):
156n/a self.assertTrue(hasattr(self.returned_obj, attr),
157n/a "object returned by urlopen() lacks %s attribute" %
158n/a attr)
159n/a
160n/a def test_read(self):
161n/a self.assertEqual(self.text, self.returned_obj.read())
162n/a
163n/a def test_readline(self):
164n/a self.assertEqual(self.text, self.returned_obj.readline())
165n/a self.assertEqual(b'', self.returned_obj.readline(),
166n/a "calling readline() after exhausting the file did not"
167n/a " return an empty string")
168n/a
169n/a def test_readlines(self):
170n/a lines_list = self.returned_obj.readlines()
171n/a self.assertEqual(len(lines_list), 1,
172n/a "readlines() returned the wrong number of lines")
173n/a self.assertEqual(lines_list[0], self.text,
174n/a "readlines() returned improper text")
175n/a
176n/a def test_fileno(self):
177n/a file_num = self.returned_obj.fileno()
178n/a self.assertIsInstance(file_num, int, "fileno() did not return an int")
179n/a self.assertEqual(os.read(file_num, len(self.text)), self.text,
180n/a "Reading on the file descriptor returned by fileno() "
181n/a "did not return the expected text")
182n/a
183n/a def test_close(self):
184n/a # Test close() by calling it here and then having it be called again
185n/a # by the tearDown() method for the test
186n/a self.returned_obj.close()
187n/a
188n/a def test_info(self):
189n/a self.assertIsInstance(self.returned_obj.info(), email.message.Message)
190n/a
191n/a def test_geturl(self):
192n/a self.assertEqual(self.returned_obj.geturl(), self.pathname)
193n/a
194n/a def test_getcode(self):
195n/a self.assertIsNone(self.returned_obj.getcode())
196n/a
197n/a def test_iter(self):
198n/a # Test iterator
199n/a # Don't need to count number of iterations since test would fail the
200n/a # instant it returned anything beyond the first line from the
201n/a # comparison.
202n/a # Use the iterator in the usual implicit way to test for ticket #4608.
203n/a for line in self.returned_obj:
204n/a self.assertEqual(line, self.text)
205n/a
206n/a def test_relativelocalfile(self):
207n/a self.assertRaises(ValueError,urllib.request.urlopen,'./' + self.pathname)
208n/a
209n/aclass ProxyTests(unittest.TestCase):
210n/a
211n/a def setUp(self):
212n/a # Records changes to env vars
213n/a self.env = support.EnvironmentVarGuard()
214n/a # Delete all proxy related env vars
215n/a for k in list(os.environ):
216n/a if 'proxy' in k.lower():
217n/a self.env.unset(k)
218n/a
219n/a def tearDown(self):
220n/a # Restore all proxy related env vars
221n/a self.env.__exit__()
222n/a del self.env
223n/a
224n/a def test_getproxies_environment_keep_no_proxies(self):
225n/a self.env.set('NO_PROXY', 'localhost')
226n/a proxies = urllib.request.getproxies_environment()
227n/a # getproxies_environment use lowered case truncated (no '_proxy') keys
228n/a self.assertEqual('localhost', proxies['no'])
229n/a # List of no_proxies with space.
230n/a self.env.set('NO_PROXY', 'localhost, anotherdomain.com, newdomain.com:1234')
231n/a self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com'))
232n/a self.assertTrue(urllib.request.proxy_bypass_environment('anotherdomain.com:8888'))
233n/a self.assertTrue(urllib.request.proxy_bypass_environment('newdomain.com:1234'))
234n/a
235n/a def test_proxy_cgi_ignore(self):
236n/a try:
237n/a self.env.set('HTTP_PROXY', 'http://somewhere:3128')
238n/a proxies = urllib.request.getproxies_environment()
239n/a self.assertEqual('http://somewhere:3128', proxies['http'])
240n/a self.env.set('REQUEST_METHOD', 'GET')
241n/a proxies = urllib.request.getproxies_environment()
242n/a self.assertNotIn('http', proxies)
243n/a finally:
244n/a self.env.unset('REQUEST_METHOD')
245n/a self.env.unset('HTTP_PROXY')
246n/a
247n/a def test_proxy_bypass_environment_host_match(self):
248n/a bypass = urllib.request.proxy_bypass_environment
249n/a self.env.set('NO_PROXY',
250n/a 'localhost, anotherdomain.com, newdomain.com:1234, .d.o.t')
251n/a self.assertTrue(bypass('localhost'))
252n/a self.assertTrue(bypass('LocalHost')) # MixedCase
253n/a self.assertTrue(bypass('LOCALHOST')) # UPPERCASE
254n/a self.assertTrue(bypass('newdomain.com:1234'))
255n/a self.assertTrue(bypass('foo.d.o.t')) # issue 29142
256n/a self.assertTrue(bypass('anotherdomain.com:8888'))
257n/a self.assertTrue(bypass('www.newdomain.com:1234'))
258n/a self.assertFalse(bypass('prelocalhost'))
259n/a self.assertFalse(bypass('newdomain.com')) # no port
260n/a self.assertFalse(bypass('newdomain.com:1235')) # wrong port
261n/a
262n/aclass ProxyTests_withOrderedEnv(unittest.TestCase):
263n/a
264n/a def setUp(self):
265n/a # We need to test conditions, where variable order _is_ significant
266n/a self._saved_env = os.environ
267n/a # Monkey patch os.environ, start with empty fake environment
268n/a os.environ = collections.OrderedDict()
269n/a
270n/a def tearDown(self):
271n/a os.environ = self._saved_env
272n/a
273n/a def test_getproxies_environment_prefer_lowercase(self):
274n/a # Test lowercase preference with removal
275n/a os.environ['no_proxy'] = ''
276n/a os.environ['No_Proxy'] = 'localhost'
277n/a self.assertFalse(urllib.request.proxy_bypass_environment('localhost'))
278n/a self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
279n/a os.environ['http_proxy'] = ''
280n/a os.environ['HTTP_PROXY'] = 'http://somewhere:3128'
281n/a proxies = urllib.request.getproxies_environment()
282n/a self.assertEqual({}, proxies)
283n/a # Test lowercase preference of proxy bypass and correct matching including ports
284n/a os.environ['no_proxy'] = 'localhost, noproxy.com, my.proxy:1234'
285n/a os.environ['No_Proxy'] = 'xyz.com'
286n/a self.assertTrue(urllib.request.proxy_bypass_environment('localhost'))
287n/a self.assertTrue(urllib.request.proxy_bypass_environment('noproxy.com:5678'))
288n/a self.assertTrue(urllib.request.proxy_bypass_environment('my.proxy:1234'))
289n/a self.assertFalse(urllib.request.proxy_bypass_environment('my.proxy'))
290n/a self.assertFalse(urllib.request.proxy_bypass_environment('arbitrary'))
291n/a # Test lowercase preference with replacement
292n/a os.environ['http_proxy'] = 'http://somewhere:3128'
293n/a os.environ['Http_Proxy'] = 'http://somewhereelse:3128'
294n/a proxies = urllib.request.getproxies_environment()
295n/a self.assertEqual('http://somewhere:3128', proxies['http'])
296n/a
297n/aclass urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
298n/a """Test urlopen() opening a fake http connection."""
299n/a
300n/a def check_read(self, ver):
301n/a self.fakehttp(b"HTTP/" + ver + b" 200 OK\r\n\r\nHello!")
302n/a try:
303n/a fp = urlopen("http://python.org/")
304n/a self.assertEqual(fp.readline(), b"Hello!")
305n/a self.assertEqual(fp.readline(), b"")
306n/a self.assertEqual(fp.geturl(), 'http://python.org/')
307n/a self.assertEqual(fp.getcode(), 200)
308n/a finally:
309n/a self.unfakehttp()
310n/a
311n/a def test_url_fragment(self):
312n/a # Issue #11703: geturl() omits fragments in the original URL.
313n/a url = 'http://docs.python.org/library/urllib.html#OK'
314n/a self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
315n/a try:
316n/a fp = urllib.request.urlopen(url)
317n/a self.assertEqual(fp.geturl(), url)
318n/a finally:
319n/a self.unfakehttp()
320n/a
321n/a def test_willclose(self):
322n/a self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello!")
323n/a try:
324n/a resp = urlopen("http://www.python.org")
325n/a self.assertTrue(resp.fp.will_close)
326n/a finally:
327n/a self.unfakehttp()
328n/a
329n/a def test_read_0_9(self):
330n/a # "0.9" response accepted (but not "simple responses" without
331n/a # a status line)
332n/a self.check_read(b"0.9")
333n/a
334n/a def test_read_1_0(self):
335n/a self.check_read(b"1.0")
336n/a
337n/a def test_read_1_1(self):
338n/a self.check_read(b"1.1")
339n/a
340n/a def test_read_bogus(self):
341n/a # urlopen() should raise OSError for many error codes.
342n/a self.fakehttp(b'''HTTP/1.1 401 Authentication Required
343n/aDate: Wed, 02 Jan 2008 03:03:54 GMT
344n/aServer: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
345n/aConnection: close
346n/aContent-Type: text/html; charset=iso-8859-1
347n/a''')
348n/a try:
349n/a self.assertRaises(OSError, urlopen, "http://python.org/")
350n/a finally:
351n/a self.unfakehttp()
352n/a
353n/a def test_invalid_redirect(self):
354n/a # urlopen() should raise OSError for many error codes.
355n/a self.fakehttp(b'''HTTP/1.1 302 Found
356n/aDate: Wed, 02 Jan 2008 03:03:54 GMT
357n/aServer: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
358n/aLocation: file://guidocomputer.athome.com:/python/license
359n/aConnection: close
360n/aContent-Type: text/html; charset=iso-8859-1
361n/a''')
362n/a try:
363n/a msg = "Redirection to url 'file:"
364n/a with self.assertRaisesRegex(urllib.error.HTTPError, msg):
365n/a urlopen("http://python.org/")
366n/a finally:
367n/a self.unfakehttp()
368n/a
369n/a def test_redirect_limit_independent(self):
370n/a # Ticket #12923: make sure independent requests each use their
371n/a # own retry limit.
372n/a for i in range(FancyURLopener().maxtries):
373n/a self.fakehttp(b'''HTTP/1.1 302 Found
374n/aLocation: file://guidocomputer.athome.com:/python/license
375n/aConnection: close
376n/a''')
377n/a try:
378n/a self.assertRaises(urllib.error.HTTPError, urlopen,
379n/a "http://something")
380n/a finally:
381n/a self.unfakehttp()
382n/a
383n/a def test_empty_socket(self):
384n/a # urlopen() raises OSError if the underlying socket does not send any
385n/a # data. (#1680230)
386n/a self.fakehttp(b'')
387n/a try:
388n/a self.assertRaises(OSError, urlopen, "http://something")
389n/a finally:
390n/a self.unfakehttp()
391n/a
392n/a def test_missing_localfile(self):
393n/a # Test for #10836
394n/a with self.assertRaises(urllib.error.URLError) as e:
395n/a urlopen('file://localhost/a/file/which/doesnot/exists.py')
396n/a self.assertTrue(e.exception.filename)
397n/a self.assertTrue(e.exception.reason)
398n/a
399n/a def test_file_notexists(self):
400n/a fd, tmp_file = tempfile.mkstemp()
401n/a tmp_fileurl = 'file://localhost/' + tmp_file.replace(os.path.sep, '/')
402n/a try:
403n/a self.assertTrue(os.path.exists(tmp_file))
404n/a with urlopen(tmp_fileurl) as fobj:
405n/a self.assertTrue(fobj)
406n/a finally:
407n/a os.close(fd)
408n/a os.unlink(tmp_file)
409n/a self.assertFalse(os.path.exists(tmp_file))
410n/a with self.assertRaises(urllib.error.URLError):
411n/a urlopen(tmp_fileurl)
412n/a
413n/a def test_ftp_nohost(self):
414n/a test_ftp_url = 'ftp:///path'
415n/a with self.assertRaises(urllib.error.URLError) as e:
416n/a urlopen(test_ftp_url)
417n/a self.assertFalse(e.exception.filename)
418n/a self.assertTrue(e.exception.reason)
419n/a
420n/a def test_ftp_nonexisting(self):
421n/a with self.assertRaises(urllib.error.URLError) as e:
422n/a urlopen('ftp://localhost/a/file/which/doesnot/exists.py')
423n/a self.assertFalse(e.exception.filename)
424n/a self.assertTrue(e.exception.reason)
425n/a
426n/a @patch.object(urllib.request, 'MAXFTPCACHE', 0)
427n/a def test_ftp_cache_pruning(self):
428n/a self.fakeftp()
429n/a try:
430n/a urllib.request.ftpcache['test'] = urllib.request.ftpwrapper('user', 'pass', 'localhost', 21, [])
431n/a urlopen('ftp://localhost')
432n/a finally:
433n/a self.unfakeftp()
434n/a
435n/a
436n/a def test_userpass_inurl(self):
437n/a self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
438n/a try:
439n/a fp = urlopen("http://user:pass@python.org/")
440n/a self.assertEqual(fp.readline(), b"Hello!")
441n/a self.assertEqual(fp.readline(), b"")
442n/a self.assertEqual(fp.geturl(), 'http://user:pass@python.org/')
443n/a self.assertEqual(fp.getcode(), 200)
444n/a finally:
445n/a self.unfakehttp()
446n/a
447n/a def test_userpass_inurl_w_spaces(self):
448n/a self.fakehttp(b"HTTP/1.0 200 OK\r\n\r\nHello!")
449n/a try:
450n/a userpass = "a b:c d"
451n/a url = "http://{}@python.org/".format(userpass)
452n/a fakehttp_wrapper = http.client.HTTPConnection
453n/a authorization = ("Authorization: Basic %s\r\n" %
454n/a b64encode(userpass.encode("ASCII")).decode("ASCII"))
455n/a fp = urlopen(url)
456n/a # The authorization header must be in place
457n/a self.assertIn(authorization, fakehttp_wrapper.buf.decode("UTF-8"))
458n/a self.assertEqual(fp.readline(), b"Hello!")
459n/a self.assertEqual(fp.readline(), b"")
460n/a # the spaces are quoted in URL so no match
461n/a self.assertNotEqual(fp.geturl(), url)
462n/a self.assertEqual(fp.getcode(), 200)
463n/a finally:
464n/a self.unfakehttp()
465n/a
466n/a def test_URLopener_deprecation(self):
467n/a with support.check_warnings(('',DeprecationWarning)):
468n/a urllib.request.URLopener()
469n/a
470n/a @unittest.skipUnless(ssl, "ssl module required")
471n/a def test_cafile_and_context(self):
472n/a context = ssl.create_default_context()
473n/a with support.check_warnings(('', DeprecationWarning)):
474n/a with self.assertRaises(ValueError):
475n/a urllib.request.urlopen(
476n/a "https://localhost", cafile="/nonexistent/path", context=context
477n/a )
478n/a
479n/aclass urlopen_DataTests(unittest.TestCase):
480n/a """Test urlopen() opening a data URL."""
481n/a
482n/a def setUp(self):
483n/a # text containing URL special- and unicode-characters
484n/a self.text = "test data URLs :;,%=& \u00f6 \u00c4 "
485n/a # 2x1 pixel RGB PNG image with one black and one white pixel
486n/a self.image = (
487n/a b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x02\x00\x00\x00'
488n/a b'\x01\x08\x02\x00\x00\x00{@\xe8\xdd\x00\x00\x00\x01sRGB\x00\xae'
489n/a b'\xce\x1c\xe9\x00\x00\x00\x0fIDAT\x08\xd7c```\xf8\xff\xff?\x00'
490n/a b'\x06\x01\x02\xfe\no/\x1e\x00\x00\x00\x00IEND\xaeB`\x82')
491n/a
492n/a self.text_url = (
493n/a "data:text/plain;charset=UTF-8,test%20data%20URLs%20%3A%3B%2C%25%3"
494n/a "D%26%20%C3%B6%20%C3%84%20")
495n/a self.text_url_base64 = (
496n/a "data:text/plain;charset=ISO-8859-1;base64,dGVzdCBkYXRhIFVSTHMgOjs"
497n/a "sJT0mIPYgxCA%3D")
498n/a # base64 encoded data URL that contains ignorable spaces,
499n/a # such as "\n", " ", "%0A", and "%20".
500n/a self.image_url = (
501n/a "\n"
502n/a "QOjdAAAAAXNSR0IArs4c6QAAAA9JREFUCNdj%0AYGBg%2BP//PwAGAQL%2BCm8 "
503n/a "vHgAAAABJRU5ErkJggg%3D%3D%0A%20")
504n/a
505n/a self.text_url_resp = urllib.request.urlopen(self.text_url)
506n/a self.text_url_base64_resp = urllib.request.urlopen(
507n/a self.text_url_base64)
508n/a self.image_url_resp = urllib.request.urlopen(self.image_url)
509n/a
510n/a def test_interface(self):
511n/a # Make sure object returned by urlopen() has the specified methods
512n/a for attr in ("read", "readline", "readlines",
513n/a "close", "info", "geturl", "getcode", "__iter__"):
514n/a self.assertTrue(hasattr(self.text_url_resp, attr),
515n/a "object returned by urlopen() lacks %s attribute" %
516n/a attr)
517n/a
518n/a def test_info(self):
519n/a self.assertIsInstance(self.text_url_resp.info(), email.message.Message)
520n/a self.assertEqual(self.text_url_base64_resp.info().get_params(),
521n/a [('text/plain', ''), ('charset', 'ISO-8859-1')])
522n/a self.assertEqual(self.image_url_resp.info()['content-length'],
523n/a str(len(self.image)))
524n/a self.assertEqual(urllib.request.urlopen("data:,").info().get_params(),
525n/a [('text/plain', ''), ('charset', 'US-ASCII')])
526n/a
527n/a def test_geturl(self):
528n/a self.assertEqual(self.text_url_resp.geturl(), self.text_url)
529n/a self.assertEqual(self.text_url_base64_resp.geturl(),
530n/a self.text_url_base64)
531n/a self.assertEqual(self.image_url_resp.geturl(), self.image_url)
532n/a
533n/a def test_read_text(self):
534n/a self.assertEqual(self.text_url_resp.read().decode(
535n/a dict(self.text_url_resp.info().get_params())['charset']), self.text)
536n/a
537n/a def test_read_text_base64(self):
538n/a self.assertEqual(self.text_url_base64_resp.read().decode(
539n/a dict(self.text_url_base64_resp.info().get_params())['charset']),
540n/a self.text)
541n/a
542n/a def test_read_image(self):
543n/a self.assertEqual(self.image_url_resp.read(), self.image)
544n/a
545n/a def test_missing_comma(self):
546n/a self.assertRaises(ValueError,urllib.request.urlopen,'data:text/plain')
547n/a
548n/a def test_invalid_base64_data(self):
549n/a # missing padding character
550n/a self.assertRaises(ValueError,urllib.request.urlopen,'data:;base64,Cg=')
551n/a
552n/aclass urlretrieve_FileTests(unittest.TestCase):
553n/a """Test urllib.urlretrieve() on local files"""
554n/a
555n/a def setUp(self):
556n/a # Create a list of temporary files. Each item in the list is a file
557n/a # name (absolute path or relative to the current working directory).
558n/a # All files in this list will be deleted in the tearDown method. Note,
559n/a # this only helps to makes sure temporary files get deleted, but it
560n/a # does nothing about trying to close files that may still be open. It
561n/a # is the responsibility of the developer to properly close files even
562n/a # when exceptional conditions occur.
563n/a self.tempFiles = []
564n/a
565n/a # Create a temporary file.
566n/a self.registerFileForCleanUp(support.TESTFN)
567n/a self.text = b'testing urllib.urlretrieve'
568n/a try:
569n/a FILE = open(support.TESTFN, 'wb')
570n/a FILE.write(self.text)
571n/a FILE.close()
572n/a finally:
573n/a try: FILE.close()
574n/a except: pass
575n/a
576n/a def tearDown(self):
577n/a # Delete the temporary files.
578n/a for each in self.tempFiles:
579n/a try: os.remove(each)
580n/a except: pass
581n/a
582n/a def constructLocalFileUrl(self, filePath):
583n/a filePath = os.path.abspath(filePath)
584n/a try:
585n/a filePath.encode("utf-8")
586n/a except UnicodeEncodeError:
587n/a raise unittest.SkipTest("filePath is not encodable to utf8")
588n/a return "file://%s" % urllib.request.pathname2url(filePath)
589n/a
590n/a def createNewTempFile(self, data=b""):
591n/a """Creates a new temporary file containing the specified data,
592n/a registers the file for deletion during the test fixture tear down, and
593n/a returns the absolute path of the file."""
594n/a
595n/a newFd, newFilePath = tempfile.mkstemp()
596n/a try:
597n/a self.registerFileForCleanUp(newFilePath)
598n/a newFile = os.fdopen(newFd, "wb")
599n/a newFile.write(data)
600n/a newFile.close()
601n/a finally:
602n/a try: newFile.close()
603n/a except: pass
604n/a return newFilePath
605n/a
606n/a def registerFileForCleanUp(self, fileName):
607n/a self.tempFiles.append(fileName)
608n/a
609n/a def test_basic(self):
610n/a # Make sure that a local file just gets its own location returned and
611n/a # a headers value is returned.
612n/a result = urllib.request.urlretrieve("file:%s" % support.TESTFN)
613n/a self.assertEqual(result[0], support.TESTFN)
614n/a self.assertIsInstance(result[1], email.message.Message,
615n/a "did not get an email.message.Message instance "
616n/a "as second returned value")
617n/a
618n/a def test_copy(self):
619n/a # Test that setting the filename argument works.
620n/a second_temp = "%s.2" % support.TESTFN
621n/a self.registerFileForCleanUp(second_temp)
622n/a result = urllib.request.urlretrieve(self.constructLocalFileUrl(
623n/a support.TESTFN), second_temp)
624n/a self.assertEqual(second_temp, result[0])
625n/a self.assertTrue(os.path.exists(second_temp), "copy of the file was not "
626n/a "made")
627n/a FILE = open(second_temp, 'rb')
628n/a try:
629n/a text = FILE.read()
630n/a FILE.close()
631n/a finally:
632n/a try: FILE.close()
633n/a except: pass
634n/a self.assertEqual(self.text, text)
635n/a
636n/a def test_reporthook(self):
637n/a # Make sure that the reporthook works.
638n/a def hooktester(block_count, block_read_size, file_size, count_holder=[0]):
639n/a self.assertIsInstance(block_count, int)
640n/a self.assertIsInstance(block_read_size, int)
641n/a self.assertIsInstance(file_size, int)
642n/a self.assertEqual(block_count, count_holder[0])
643n/a count_holder[0] = count_holder[0] + 1
644n/a second_temp = "%s.2" % support.TESTFN
645n/a self.registerFileForCleanUp(second_temp)
646n/a urllib.request.urlretrieve(
647n/a self.constructLocalFileUrl(support.TESTFN),
648n/a second_temp, hooktester)
649n/a
650n/a def test_reporthook_0_bytes(self):
651n/a # Test on zero length file. Should call reporthook only 1 time.
652n/a report = []
653n/a def hooktester(block_count, block_read_size, file_size, _report=report):
654n/a _report.append((block_count, block_read_size, file_size))
655n/a srcFileName = self.createNewTempFile()
656n/a urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
657n/a support.TESTFN, hooktester)
658n/a self.assertEqual(len(report), 1)
659n/a self.assertEqual(report[0][2], 0)
660n/a
661n/a def test_reporthook_5_bytes(self):
662n/a # Test on 5 byte file. Should call reporthook only 2 times (once when
663n/a # the "network connection" is established and once when the block is
664n/a # read).
665n/a report = []
666n/a def hooktester(block_count, block_read_size, file_size, _report=report):
667n/a _report.append((block_count, block_read_size, file_size))
668n/a srcFileName = self.createNewTempFile(b"x" * 5)
669n/a urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
670n/a support.TESTFN, hooktester)
671n/a self.assertEqual(len(report), 2)
672n/a self.assertEqual(report[0][2], 5)
673n/a self.assertEqual(report[1][2], 5)
674n/a
675n/a def test_reporthook_8193_bytes(self):
676n/a # Test on 8193 byte file. Should call reporthook only 3 times (once
677n/a # when the "network connection" is established, once for the next 8192
678n/a # bytes, and once for the last byte).
679n/a report = []
680n/a def hooktester(block_count, block_read_size, file_size, _report=report):
681n/a _report.append((block_count, block_read_size, file_size))
682n/a srcFileName = self.createNewTempFile(b"x" * 8193)
683n/a urllib.request.urlretrieve(self.constructLocalFileUrl(srcFileName),
684n/a support.TESTFN, hooktester)
685n/a self.assertEqual(len(report), 3)
686n/a self.assertEqual(report[0][2], 8193)
687n/a self.assertEqual(report[0][1], 8192)
688n/a self.assertEqual(report[1][1], 8192)
689n/a self.assertEqual(report[2][1], 8192)
690n/a
691n/a
692n/aclass urlretrieve_HttpTests(unittest.TestCase, FakeHTTPMixin):
693n/a """Test urllib.urlretrieve() using fake http connections"""
694n/a
695n/a def test_short_content_raises_ContentTooShortError(self):
696n/a self.fakehttp(b'''HTTP/1.1 200 OK
697n/aDate: Wed, 02 Jan 2008 03:03:54 GMT
698n/aServer: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
699n/aConnection: close
700n/aContent-Length: 100
701n/aContent-Type: text/html; charset=iso-8859-1
702n/a
703n/aFF
704n/a''')
705n/a
706n/a def _reporthook(par1, par2, par3):
707n/a pass
708n/a
709n/a with self.assertRaises(urllib.error.ContentTooShortError):
710n/a try:
711n/a urllib.request.urlretrieve('http://example.com/',
712n/a reporthook=_reporthook)
713n/a finally:
714n/a self.unfakehttp()
715n/a
716n/a def test_short_content_raises_ContentTooShortError_without_reporthook(self):
717n/a self.fakehttp(b'''HTTP/1.1 200 OK
718n/aDate: Wed, 02 Jan 2008 03:03:54 GMT
719n/aServer: Apache/1.3.33 (Debian GNU/Linux) mod_ssl/2.8.22 OpenSSL/0.9.7e
720n/aConnection: close
721n/aContent-Length: 100
722n/aContent-Type: text/html; charset=iso-8859-1
723n/a
724n/aFF
725n/a''')
726n/a with self.assertRaises(urllib.error.ContentTooShortError):
727n/a try:
728n/a urllib.request.urlretrieve('http://example.com/')
729n/a finally:
730n/a self.unfakehttp()
731n/a
732n/a
733n/aclass QuotingTests(unittest.TestCase):
734n/a r"""Tests for urllib.quote() and urllib.quote_plus()
735n/a
736n/a According to RFC 2396 (Uniform Resource Identifiers), to escape a
737n/a character you write it as '%' + <2 character US-ASCII hex value>.
738n/a The Python code of ``'%' + hex(ord(<character>))[2:]`` escapes a
739n/a character properly. Case does not matter on the hex letters.
740n/a
741n/a The various character sets specified are:
742n/a
743n/a Reserved characters : ";/?:@&=+$,"
744n/a Have special meaning in URIs and must be escaped if not being used for
745n/a their special meaning
746n/a Data characters : letters, digits, and "-_.!~*'()"
747n/a Unreserved and do not need to be escaped; can be, though, if desired
748n/a Control characters : 0x00 - 0x1F, 0x7F
749n/a Have no use in URIs so must be escaped
750n/a space : 0x20
751n/a Must be escaped
752n/a Delimiters : '<>#%"'
753n/a Must be escaped
754n/a Unwise : "{}|\^[]`"
755n/a Must be escaped
756n/a
757n/a """
758n/a
759n/a def test_never_quote(self):
760n/a # Make sure quote() does not quote letters, digits, and "_,.-"
761n/a do_not_quote = '' .join(["ABCDEFGHIJKLMNOPQRSTUVWXYZ",
762n/a "abcdefghijklmnopqrstuvwxyz",
763n/a "0123456789",
764n/a "_.-"])
765n/a result = urllib.parse.quote(do_not_quote)
766n/a self.assertEqual(do_not_quote, result,
767n/a "using quote(): %r != %r" % (do_not_quote, result))
768n/a result = urllib.parse.quote_plus(do_not_quote)
769n/a self.assertEqual(do_not_quote, result,
770n/a "using quote_plus(): %r != %r" % (do_not_quote, result))
771n/a
772n/a def test_default_safe(self):
773n/a # Test '/' is default value for 'safe' parameter
774n/a self.assertEqual(urllib.parse.quote.__defaults__[0], '/')
775n/a
776n/a def test_safe(self):
777n/a # Test setting 'safe' parameter does what it should do
778n/a quote_by_default = "<>"
779n/a result = urllib.parse.quote(quote_by_default, safe=quote_by_default)
780n/a self.assertEqual(quote_by_default, result,
781n/a "using quote(): %r != %r" % (quote_by_default, result))
782n/a result = urllib.parse.quote_plus(quote_by_default,
783n/a safe=quote_by_default)
784n/a self.assertEqual(quote_by_default, result,
785n/a "using quote_plus(): %r != %r" %
786n/a (quote_by_default, result))
787n/a # Safe expressed as bytes rather than str
788n/a result = urllib.parse.quote(quote_by_default, safe=b"<>")
789n/a self.assertEqual(quote_by_default, result,
790n/a "using quote(): %r != %r" % (quote_by_default, result))
791n/a # "Safe" non-ASCII characters should have no effect
792n/a # (Since URIs are not allowed to have non-ASCII characters)
793n/a result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="\xfc")
794n/a expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
795n/a self.assertEqual(expect, result,
796n/a "using quote(): %r != %r" %
797n/a (expect, result))
798n/a # Same as above, but using a bytes rather than str
799n/a result = urllib.parse.quote("a\xfcb", encoding="latin-1", safe=b"\xfc")
800n/a expect = urllib.parse.quote("a\xfcb", encoding="latin-1", safe="")
801n/a self.assertEqual(expect, result,
802n/a "using quote(): %r != %r" %
803n/a (expect, result))
804n/a
805n/a def test_default_quoting(self):
806n/a # Make sure all characters that should be quoted are by default sans
807n/a # space (separate test for that).
808n/a should_quote = [chr(num) for num in range(32)] # For 0x00 - 0x1F
809n/a should_quote.append(r'<>#%"{}|\^[]`')
810n/a should_quote.append(chr(127)) # For 0x7F
811n/a should_quote = ''.join(should_quote)
812n/a for char in should_quote:
813n/a result = urllib.parse.quote(char)
814n/a self.assertEqual(hexescape(char), result,
815n/a "using quote(): "
816n/a "%s should be escaped to %s, not %s" %
817n/a (char, hexescape(char), result))
818n/a result = urllib.parse.quote_plus(char)
819n/a self.assertEqual(hexescape(char), result,
820n/a "using quote_plus(): "
821n/a "%s should be escapes to %s, not %s" %
822n/a (char, hexescape(char), result))
823n/a del should_quote
824n/a partial_quote = "ab[]cd"
825n/a expected = "ab%5B%5Dcd"
826n/a result = urllib.parse.quote(partial_quote)
827n/a self.assertEqual(expected, result,
828n/a "using quote(): %r != %r" % (expected, result))
829n/a result = urllib.parse.quote_plus(partial_quote)
830n/a self.assertEqual(expected, result,
831n/a "using quote_plus(): %r != %r" % (expected, result))
832n/a
833n/a def test_quoting_space(self):
834n/a # Make sure quote() and quote_plus() handle spaces as specified in
835n/a # their unique way
836n/a result = urllib.parse.quote(' ')
837n/a self.assertEqual(result, hexescape(' '),
838n/a "using quote(): %r != %r" % (result, hexescape(' ')))
839n/a result = urllib.parse.quote_plus(' ')
840n/a self.assertEqual(result, '+',
841n/a "using quote_plus(): %r != +" % result)
842n/a given = "a b cd e f"
843n/a expect = given.replace(' ', hexescape(' '))
844n/a result = urllib.parse.quote(given)
845n/a self.assertEqual(expect, result,
846n/a "using quote(): %r != %r" % (expect, result))
847n/a expect = given.replace(' ', '+')
848n/a result = urllib.parse.quote_plus(given)
849n/a self.assertEqual(expect, result,
850n/a "using quote_plus(): %r != %r" % (expect, result))
851n/a
852n/a def test_quoting_plus(self):
853n/a self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma'),
854n/a 'alpha%2Bbeta+gamma')
855n/a self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', '+'),
856n/a 'alpha+beta+gamma')
857n/a # Test with bytes
858n/a self.assertEqual(urllib.parse.quote_plus(b'alpha+beta gamma'),
859n/a 'alpha%2Bbeta+gamma')
860n/a # Test with safe bytes
861n/a self.assertEqual(urllib.parse.quote_plus('alpha+beta gamma', b'+'),
862n/a 'alpha+beta+gamma')
863n/a
864n/a def test_quote_bytes(self):
865n/a # Bytes should quote directly to percent-encoded values
866n/a given = b"\xa2\xd8ab\xff"
867n/a expect = "%A2%D8ab%FF"
868n/a result = urllib.parse.quote(given)
869n/a self.assertEqual(expect, result,
870n/a "using quote(): %r != %r" % (expect, result))
871n/a # Encoding argument should raise type error on bytes input
872n/a self.assertRaises(TypeError, urllib.parse.quote, given,
873n/a encoding="latin-1")
874n/a # quote_from_bytes should work the same
875n/a result = urllib.parse.quote_from_bytes(given)
876n/a self.assertEqual(expect, result,
877n/a "using quote_from_bytes(): %r != %r"
878n/a % (expect, result))
879n/a
880n/a def test_quote_with_unicode(self):
881n/a # Characters in Latin-1 range, encoded by default in UTF-8
882n/a given = "\xa2\xd8ab\xff"
883n/a expect = "%C2%A2%C3%98ab%C3%BF"
884n/a result = urllib.parse.quote(given)
885n/a self.assertEqual(expect, result,
886n/a "using quote(): %r != %r" % (expect, result))
887n/a # Characters in Latin-1 range, encoded by with None (default)
888n/a result = urllib.parse.quote(given, encoding=None, errors=None)
889n/a self.assertEqual(expect, result,
890n/a "using quote(): %r != %r" % (expect, result))
891n/a # Characters in Latin-1 range, encoded with Latin-1
892n/a given = "\xa2\xd8ab\xff"
893n/a expect = "%A2%D8ab%FF"
894n/a result = urllib.parse.quote(given, encoding="latin-1")
895n/a self.assertEqual(expect, result,
896n/a "using quote(): %r != %r" % (expect, result))
897n/a # Characters in BMP, encoded by default in UTF-8
898n/a given = "\u6f22\u5b57" # "Kanji"
899n/a expect = "%E6%BC%A2%E5%AD%97"
900n/a result = urllib.parse.quote(given)
901n/a self.assertEqual(expect, result,
902n/a "using quote(): %r != %r" % (expect, result))
903n/a # Characters in BMP, encoded with Latin-1
904n/a given = "\u6f22\u5b57"
905n/a self.assertRaises(UnicodeEncodeError, urllib.parse.quote, given,
906n/a encoding="latin-1")
907n/a # Characters in BMP, encoded with Latin-1, with replace error handling
908n/a given = "\u6f22\u5b57"
909n/a expect = "%3F%3F" # "??"
910n/a result = urllib.parse.quote(given, encoding="latin-1",
911n/a errors="replace")
912n/a self.assertEqual(expect, result,
913n/a "using quote(): %r != %r" % (expect, result))
914n/a # Characters in BMP, Latin-1, with xmlcharref error handling
915n/a given = "\u6f22\u5b57"
916n/a expect = "%26%2328450%3B%26%2323383%3B" # "&#28450;&#23383;"
917n/a result = urllib.parse.quote(given, encoding="latin-1",
918n/a errors="xmlcharrefreplace")
919n/a self.assertEqual(expect, result,
920n/a "using quote(): %r != %r" % (expect, result))
921n/a
922n/a def test_quote_plus_with_unicode(self):
923n/a # Encoding (latin-1) test for quote_plus
924n/a given = "\xa2\xd8 \xff"
925n/a expect = "%A2%D8+%FF"
926n/a result = urllib.parse.quote_plus(given, encoding="latin-1")
927n/a self.assertEqual(expect, result,
928n/a "using quote_plus(): %r != %r" % (expect, result))
929n/a # Errors test for quote_plus
930n/a given = "ab\u6f22\u5b57 cd"
931n/a expect = "ab%3F%3F+cd"
932n/a result = urllib.parse.quote_plus(given, encoding="latin-1",
933n/a errors="replace")
934n/a self.assertEqual(expect, result,
935n/a "using quote_plus(): %r != %r" % (expect, result))
936n/a
937n/a
938n/aclass UnquotingTests(unittest.TestCase):
939n/a """Tests for unquote() and unquote_plus()
940n/a
941n/a See the doc string for quoting_Tests for details on quoting and such.
942n/a
943n/a """
944n/a
945n/a def test_unquoting(self):
946n/a # Make sure unquoting of all ASCII values works
947n/a escape_list = []
948n/a for num in range(128):
949n/a given = hexescape(chr(num))
950n/a expect = chr(num)
951n/a result = urllib.parse.unquote(given)
952n/a self.assertEqual(expect, result,
953n/a "using unquote(): %r != %r" % (expect, result))
954n/a result = urllib.parse.unquote_plus(given)
955n/a self.assertEqual(expect, result,
956n/a "using unquote_plus(): %r != %r" %
957n/a (expect, result))
958n/a escape_list.append(given)
959n/a escape_string = ''.join(escape_list)
960n/a del escape_list
961n/a result = urllib.parse.unquote(escape_string)
962n/a self.assertEqual(result.count('%'), 1,
963n/a "using unquote(): not all characters escaped: "
964n/a "%s" % result)
965n/a self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, None)
966n/a self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, ())
967n/a with support.check_warnings(('', BytesWarning), quiet=True):
968n/a self.assertRaises((TypeError, AttributeError), urllib.parse.unquote, b'')
969n/a
970n/a def test_unquoting_badpercent(self):
971n/a # Test unquoting on bad percent-escapes
972n/a given = '%xab'
973n/a expect = given
974n/a result = urllib.parse.unquote(given)
975n/a self.assertEqual(expect, result, "using unquote(): %r != %r"
976n/a % (expect, result))
977n/a given = '%x'
978n/a expect = given
979n/a result = urllib.parse.unquote(given)
980n/a self.assertEqual(expect, result, "using unquote(): %r != %r"
981n/a % (expect, result))
982n/a given = '%'
983n/a expect = given
984n/a result = urllib.parse.unquote(given)
985n/a self.assertEqual(expect, result, "using unquote(): %r != %r"
986n/a % (expect, result))
987n/a # unquote_to_bytes
988n/a given = '%xab'
989n/a expect = bytes(given, 'ascii')
990n/a result = urllib.parse.unquote_to_bytes(given)
991n/a self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
992n/a % (expect, result))
993n/a given = '%x'
994n/a expect = bytes(given, 'ascii')
995n/a result = urllib.parse.unquote_to_bytes(given)
996n/a self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
997n/a % (expect, result))
998n/a given = '%'
999n/a expect = bytes(given, 'ascii')
1000n/a result = urllib.parse.unquote_to_bytes(given)
1001n/a self.assertEqual(expect, result, "using unquote_to_bytes(): %r != %r"
1002n/a % (expect, result))
1003n/a self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, None)
1004n/a self.assertRaises((TypeError, AttributeError), urllib.parse.unquote_to_bytes, ())
1005n/a
1006n/a def test_unquoting_mixed_case(self):
1007n/a # Test unquoting on mixed-case hex digits in the percent-escapes
1008n/a given = '%Ab%eA'
1009n/a expect = b'\xab\xea'
1010n/a result = urllib.parse.unquote_to_bytes(given)
1011n/a self.assertEqual(expect, result,
1012n/a "using unquote_to_bytes(): %r != %r"
1013n/a % (expect, result))
1014n/a
1015n/a def test_unquoting_parts(self):
1016n/a # Make sure unquoting works when have non-quoted characters
1017n/a # interspersed
1018n/a given = 'ab%sd' % hexescape('c')
1019n/a expect = "abcd"
1020n/a result = urllib.parse.unquote(given)
1021n/a self.assertEqual(expect, result,
1022n/a "using quote(): %r != %r" % (expect, result))
1023n/a result = urllib.parse.unquote_plus(given)
1024n/a self.assertEqual(expect, result,
1025n/a "using unquote_plus(): %r != %r" % (expect, result))
1026n/a
1027n/a def test_unquoting_plus(self):
1028n/a # Test difference between unquote() and unquote_plus()
1029n/a given = "are+there+spaces..."
1030n/a expect = given
1031n/a result = urllib.parse.unquote(given)
1032n/a self.assertEqual(expect, result,
1033n/a "using unquote(): %r != %r" % (expect, result))
1034n/a expect = given.replace('+', ' ')
1035n/a result = urllib.parse.unquote_plus(given)
1036n/a self.assertEqual(expect, result,
1037n/a "using unquote_plus(): %r != %r" % (expect, result))
1038n/a
1039n/a def test_unquote_to_bytes(self):
1040n/a given = 'br%C3%BCckner_sapporo_20050930.doc'
1041n/a expect = b'br\xc3\xbcckner_sapporo_20050930.doc'
1042n/a result = urllib.parse.unquote_to_bytes(given)
1043n/a self.assertEqual(expect, result,
1044n/a "using unquote_to_bytes(): %r != %r"
1045n/a % (expect, result))
1046n/a # Test on a string with unescaped non-ASCII characters
1047n/a # (Technically an invalid URI; expect those characters to be UTF-8
1048n/a # encoded).
1049n/a result = urllib.parse.unquote_to_bytes("\u6f22%C3%BC")
1050n/a expect = b'\xe6\xbc\xa2\xc3\xbc' # UTF-8 for "\u6f22\u00fc"
1051n/a self.assertEqual(expect, result,
1052n/a "using unquote_to_bytes(): %r != %r"
1053n/a % (expect, result))
1054n/a # Test with a bytes as input
1055n/a given = b'%A2%D8ab%FF'
1056n/a expect = b'\xa2\xd8ab\xff'
1057n/a result = urllib.parse.unquote_to_bytes(given)
1058n/a self.assertEqual(expect, result,
1059n/a "using unquote_to_bytes(): %r != %r"
1060n/a % (expect, result))
1061n/a # Test with a bytes as input, with unescaped non-ASCII bytes
1062n/a # (Technically an invalid URI; expect those bytes to be preserved)
1063n/a given = b'%A2\xd8ab%FF'
1064n/a expect = b'\xa2\xd8ab\xff'
1065n/a result = urllib.parse.unquote_to_bytes(given)
1066n/a self.assertEqual(expect, result,
1067n/a "using unquote_to_bytes(): %r != %r"
1068n/a % (expect, result))
1069n/a
1070n/a def test_unquote_with_unicode(self):
1071n/a # Characters in the Latin-1 range, encoded with UTF-8
1072n/a given = 'br%C3%BCckner_sapporo_20050930.doc'
1073n/a expect = 'br\u00fcckner_sapporo_20050930.doc'
1074n/a result = urllib.parse.unquote(given)
1075n/a self.assertEqual(expect, result,
1076n/a "using unquote(): %r != %r" % (expect, result))
1077n/a # Characters in the Latin-1 range, encoded with None (default)
1078n/a result = urllib.parse.unquote(given, encoding=None, errors=None)
1079n/a self.assertEqual(expect, result,
1080n/a "using unquote(): %r != %r" % (expect, result))
1081n/a
1082n/a # Characters in the Latin-1 range, encoded with Latin-1
1083n/a result = urllib.parse.unquote('br%FCckner_sapporo_20050930.doc',
1084n/a encoding="latin-1")
1085n/a expect = 'br\u00fcckner_sapporo_20050930.doc'
1086n/a self.assertEqual(expect, result,
1087n/a "using unquote(): %r != %r" % (expect, result))
1088n/a
1089n/a # Characters in BMP, encoded with UTF-8
1090n/a given = "%E6%BC%A2%E5%AD%97"
1091n/a expect = "\u6f22\u5b57" # "Kanji"
1092n/a result = urllib.parse.unquote(given)
1093n/a self.assertEqual(expect, result,
1094n/a "using unquote(): %r != %r" % (expect, result))
1095n/a
1096n/a # Decode with UTF-8, invalid sequence
1097n/a given = "%F3%B1"
1098n/a expect = "\ufffd" # Replacement character
1099n/a result = urllib.parse.unquote(given)
1100n/a self.assertEqual(expect, result,
1101n/a "using unquote(): %r != %r" % (expect, result))
1102n/a
1103n/a # Decode with UTF-8, invalid sequence, replace errors
1104n/a result = urllib.parse.unquote(given, errors="replace")
1105n/a self.assertEqual(expect, result,
1106n/a "using unquote(): %r != %r" % (expect, result))
1107n/a
1108n/a # Decode with UTF-8, invalid sequence, ignoring errors
1109n/a given = "%F3%B1"
1110n/a expect = ""
1111n/a result = urllib.parse.unquote(given, errors="ignore")
1112n/a self.assertEqual(expect, result,
1113n/a "using unquote(): %r != %r" % (expect, result))
1114n/a
1115n/a # A mix of non-ASCII and percent-encoded characters, UTF-8
1116n/a result = urllib.parse.unquote("\u6f22%C3%BC")
1117n/a expect = '\u6f22\u00fc'
1118n/a self.assertEqual(expect, result,
1119n/a "using unquote(): %r != %r" % (expect, result))
1120n/a
1121n/a # A mix of non-ASCII and percent-encoded characters, Latin-1
1122n/a # (Note, the string contains non-Latin-1-representable characters)
1123n/a result = urllib.parse.unquote("\u6f22%FC", encoding="latin-1")
1124n/a expect = '\u6f22\u00fc'
1125n/a self.assertEqual(expect, result,
1126n/a "using unquote(): %r != %r" % (expect, result))
1127n/a
1128n/aclass urlencode_Tests(unittest.TestCase):
1129n/a """Tests for urlencode()"""
1130n/a
1131n/a def help_inputtype(self, given, test_type):
1132n/a """Helper method for testing different input types.
1133n/a
1134n/a 'given' must lead to only the pairs:
1135n/a * 1st, 1
1136n/a * 2nd, 2
1137n/a * 3rd, 3
1138n/a
1139n/a Test cannot assume anything about order. Docs make no guarantee and
1140n/a have possible dictionary input.
1141n/a
1142n/a """
1143n/a expect_somewhere = ["1st=1", "2nd=2", "3rd=3"]
1144n/a result = urllib.parse.urlencode(given)
1145n/a for expected in expect_somewhere:
1146n/a self.assertIn(expected, result,
1147n/a "testing %s: %s not found in %s" %
1148n/a (test_type, expected, result))
1149n/a self.assertEqual(result.count('&'), 2,
1150n/a "testing %s: expected 2 '&'s; got %s" %
1151n/a (test_type, result.count('&')))
1152n/a amp_location = result.index('&')
1153n/a on_amp_left = result[amp_location - 1]
1154n/a on_amp_right = result[amp_location + 1]
1155n/a self.assertTrue(on_amp_left.isdigit() and on_amp_right.isdigit(),
1156n/a "testing %s: '&' not located in proper place in %s" %
1157n/a (test_type, result))
1158n/a self.assertEqual(len(result), (5 * 3) + 2, #5 chars per thing and amps
1159n/a "testing %s: "
1160n/a "unexpected number of characters: %s != %s" %
1161n/a (test_type, len(result), (5 * 3) + 2))
1162n/a
1163n/a def test_using_mapping(self):
1164n/a # Test passing in a mapping object as an argument.
1165n/a self.help_inputtype({"1st":'1', "2nd":'2', "3rd":'3'},
1166n/a "using dict as input type")
1167n/a
1168n/a def test_using_sequence(self):
1169n/a # Test passing in a sequence of two-item sequences as an argument.
1170n/a self.help_inputtype([('1st', '1'), ('2nd', '2'), ('3rd', '3')],
1171n/a "using sequence of two-item tuples as input")
1172n/a
1173n/a def test_quoting(self):
1174n/a # Make sure keys and values are quoted using quote_plus()
1175n/a given = {"&":"="}
1176n/a expect = "%s=%s" % (hexescape('&'), hexescape('='))
1177n/a result = urllib.parse.urlencode(given)
1178n/a self.assertEqual(expect, result)
1179n/a given = {"key name":"A bunch of pluses"}
1180n/a expect = "key+name=A+bunch+of+pluses"
1181n/a result = urllib.parse.urlencode(given)
1182n/a self.assertEqual(expect, result)
1183n/a
1184n/a def test_doseq(self):
1185n/a # Test that passing True for 'doseq' parameter works correctly
1186n/a given = {'sequence':['1', '2', '3']}
1187n/a expect = "sequence=%s" % urllib.parse.quote_plus(str(['1', '2', '3']))
1188n/a result = urllib.parse.urlencode(given)
1189n/a self.assertEqual(expect, result)
1190n/a result = urllib.parse.urlencode(given, True)
1191n/a for value in given["sequence"]:
1192n/a expect = "sequence=%s" % value
1193n/a self.assertIn(expect, result)
1194n/a self.assertEqual(result.count('&'), 2,
1195n/a "Expected 2 '&'s, got %s" % result.count('&'))
1196n/a
1197n/a def test_empty_sequence(self):
1198n/a self.assertEqual("", urllib.parse.urlencode({}))
1199n/a self.assertEqual("", urllib.parse.urlencode([]))
1200n/a
1201n/a def test_nonstring_values(self):
1202n/a self.assertEqual("a=1", urllib.parse.urlencode({"a": 1}))
1203n/a self.assertEqual("a=None", urllib.parse.urlencode({"a": None}))
1204n/a
1205n/a def test_nonstring_seq_values(self):
1206n/a self.assertEqual("a=1&a=2", urllib.parse.urlencode({"a": [1, 2]}, True))
1207n/a self.assertEqual("a=None&a=a",
1208n/a urllib.parse.urlencode({"a": [None, "a"]}, True))
1209n/a data = collections.OrderedDict([("a", 1), ("b", 1)])
1210n/a self.assertEqual("a=a&a=b",
1211n/a urllib.parse.urlencode({"a": data}, True))
1212n/a
1213n/a def test_urlencode_encoding(self):
1214n/a # ASCII encoding. Expect %3F with errors="replace'
1215n/a given = (('\u00a0', '\u00c1'),)
1216n/a expect = '%3F=%3F'
1217n/a result = urllib.parse.urlencode(given, encoding="ASCII", errors="replace")
1218n/a self.assertEqual(expect, result)
1219n/a
1220n/a # Default is UTF-8 encoding.
1221n/a given = (('\u00a0', '\u00c1'),)
1222n/a expect = '%C2%A0=%C3%81'
1223n/a result = urllib.parse.urlencode(given)
1224n/a self.assertEqual(expect, result)
1225n/a
1226n/a # Latin-1 encoding.
1227n/a given = (('\u00a0', '\u00c1'),)
1228n/a expect = '%A0=%C1'
1229n/a result = urllib.parse.urlencode(given, encoding="latin-1")
1230n/a self.assertEqual(expect, result)
1231n/a
1232n/a def test_urlencode_encoding_doseq(self):
1233n/a # ASCII Encoding. Expect %3F with errors="replace'
1234n/a given = (('\u00a0', '\u00c1'),)
1235n/a expect = '%3F=%3F'
1236n/a result = urllib.parse.urlencode(given, doseq=True,
1237n/a encoding="ASCII", errors="replace")
1238n/a self.assertEqual(expect, result)
1239n/a
1240n/a # ASCII Encoding. On a sequence of values.
1241n/a given = (("\u00a0", (1, "\u00c1")),)
1242n/a expect = '%3F=1&%3F=%3F'
1243n/a result = urllib.parse.urlencode(given, True,
1244n/a encoding="ASCII", errors="replace")
1245n/a self.assertEqual(expect, result)
1246n/a
1247n/a # Utf-8
1248n/a given = (("\u00a0", "\u00c1"),)
1249n/a expect = '%C2%A0=%C3%81'
1250n/a result = urllib.parse.urlencode(given, True)
1251n/a self.assertEqual(expect, result)
1252n/a
1253n/a given = (("\u00a0", (42, "\u00c1")),)
1254n/a expect = '%C2%A0=42&%C2%A0=%C3%81'
1255n/a result = urllib.parse.urlencode(given, True)
1256n/a self.assertEqual(expect, result)
1257n/a
1258n/a # latin-1
1259n/a given = (("\u00a0", "\u00c1"),)
1260n/a expect = '%A0=%C1'
1261n/a result = urllib.parse.urlencode(given, True, encoding="latin-1")
1262n/a self.assertEqual(expect, result)
1263n/a
1264n/a given = (("\u00a0", (42, "\u00c1")),)
1265n/a expect = '%A0=42&%A0=%C1'
1266n/a result = urllib.parse.urlencode(given, True, encoding="latin-1")
1267n/a self.assertEqual(expect, result)
1268n/a
1269n/a def test_urlencode_bytes(self):
1270n/a given = ((b'\xa0\x24', b'\xc1\x24'),)
1271n/a expect = '%A0%24=%C1%24'
1272n/a result = urllib.parse.urlencode(given)
1273n/a self.assertEqual(expect, result)
1274n/a result = urllib.parse.urlencode(given, True)
1275n/a self.assertEqual(expect, result)
1276n/a
1277n/a # Sequence of values
1278n/a given = ((b'\xa0\x24', (42, b'\xc1\x24')),)
1279n/a expect = '%A0%24=42&%A0%24=%C1%24'
1280n/a result = urllib.parse.urlencode(given, True)
1281n/a self.assertEqual(expect, result)
1282n/a
1283n/a def test_urlencode_encoding_safe_parameter(self):
1284n/a
1285n/a # Send '$' (\x24) as safe character
1286n/a # Default utf-8 encoding
1287n/a
1288n/a given = ((b'\xa0\x24', b'\xc1\x24'),)
1289n/a result = urllib.parse.urlencode(given, safe=":$")
1290n/a expect = '%A0$=%C1$'
1291n/a self.assertEqual(expect, result)
1292n/a
1293n/a given = ((b'\xa0\x24', b'\xc1\x24'),)
1294n/a result = urllib.parse.urlencode(given, doseq=True, safe=":$")
1295n/a expect = '%A0$=%C1$'
1296n/a self.assertEqual(expect, result)
1297n/a
1298n/a # Safe parameter in sequence
1299n/a given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1300n/a expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1301n/a result = urllib.parse.urlencode(given, True, safe=":$")
1302n/a self.assertEqual(expect, result)
1303n/a
1304n/a # Test all above in latin-1 encoding
1305n/a
1306n/a given = ((b'\xa0\x24', b'\xc1\x24'),)
1307n/a result = urllib.parse.urlencode(given, safe=":$",
1308n/a encoding="latin-1")
1309n/a expect = '%A0$=%C1$'
1310n/a self.assertEqual(expect, result)
1311n/a
1312n/a given = ((b'\xa0\x24', b'\xc1\x24'),)
1313n/a expect = '%A0$=%C1$'
1314n/a result = urllib.parse.urlencode(given, doseq=True, safe=":$",
1315n/a encoding="latin-1")
1316n/a
1317n/a given = ((b'\xa0\x24', (b'\xc1\x24', 0xd, 42)),)
1318n/a expect = '%A0$=%C1$&%A0$=13&%A0$=42'
1319n/a result = urllib.parse.urlencode(given, True, safe=":$",
1320n/a encoding="latin-1")
1321n/a self.assertEqual(expect, result)
1322n/a
1323n/aclass Pathname_Tests(unittest.TestCase):
1324n/a """Test pathname2url() and url2pathname()"""
1325n/a
1326n/a def test_basic(self):
1327n/a # Make sure simple tests pass
1328n/a expected_path = os.path.join("parts", "of", "a", "path")
1329n/a expected_url = "parts/of/a/path"
1330n/a result = urllib.request.pathname2url(expected_path)
1331n/a self.assertEqual(expected_url, result,
1332n/a "pathname2url() failed; %s != %s" %
1333n/a (result, expected_url))
1334n/a result = urllib.request.url2pathname(expected_url)
1335n/a self.assertEqual(expected_path, result,
1336n/a "url2pathame() failed; %s != %s" %
1337n/a (result, expected_path))
1338n/a
1339n/a def test_quoting(self):
1340n/a # Test automatic quoting and unquoting works for pathnam2url() and
1341n/a # url2pathname() respectively
1342n/a given = os.path.join("needs", "quot=ing", "here")
1343n/a expect = "needs/%s/here" % urllib.parse.quote("quot=ing")
1344n/a result = urllib.request.pathname2url(given)
1345n/a self.assertEqual(expect, result,
1346n/a "pathname2url() failed; %s != %s" %
1347n/a (expect, result))
1348n/a expect = given
1349n/a result = urllib.request.url2pathname(result)
1350n/a self.assertEqual(expect, result,
1351n/a "url2pathname() failed; %s != %s" %
1352n/a (expect, result))
1353n/a given = os.path.join("make sure", "using_quote")
1354n/a expect = "%s/using_quote" % urllib.parse.quote("make sure")
1355n/a result = urllib.request.pathname2url(given)
1356n/a self.assertEqual(expect, result,
1357n/a "pathname2url() failed; %s != %s" %
1358n/a (expect, result))
1359n/a given = "make+sure/using_unquote"
1360n/a expect = os.path.join("make+sure", "using_unquote")
1361n/a result = urllib.request.url2pathname(given)
1362n/a self.assertEqual(expect, result,
1363n/a "url2pathname() failed; %s != %s" %
1364n/a (expect, result))
1365n/a
1366n/a @unittest.skipUnless(sys.platform == 'win32',
1367n/a 'test specific to the urllib.url2path function.')
1368n/a def test_ntpath(self):
1369n/a given = ('/C:/', '///C:/', '/C|//')
1370n/a expect = 'C:\\'
1371n/a for url in given:
1372n/a result = urllib.request.url2pathname(url)
1373n/a self.assertEqual(expect, result,
1374n/a 'urllib.request..url2pathname() failed; %s != %s' %
1375n/a (expect, result))
1376n/a given = '///C|/path'
1377n/a expect = 'C:\\path'
1378n/a result = urllib.request.url2pathname(given)
1379n/a self.assertEqual(expect, result,
1380n/a 'urllib.request.url2pathname() failed; %s != %s' %
1381n/a (expect, result))
1382n/a
1383n/aclass Utility_Tests(unittest.TestCase):
1384n/a """Testcase to test the various utility functions in the urllib."""
1385n/a
1386n/a def test_thishost(self):
1387n/a """Test the urllib.request.thishost utility function returns a tuple"""
1388n/a self.assertIsInstance(urllib.request.thishost(), tuple)
1389n/a
1390n/a
1391n/aclass URLopener_Tests(unittest.TestCase):
1392n/a """Testcase to test the open method of URLopener class."""
1393n/a
1394n/a def test_quoted_open(self):
1395n/a class DummyURLopener(urllib.request.URLopener):
1396n/a def open_spam(self, url):
1397n/a return url
1398n/a with support.check_warnings(
1399n/a ('DummyURLopener style of invoking requests is deprecated.',
1400n/a DeprecationWarning)):
1401n/a self.assertEqual(DummyURLopener().open(
1402n/a 'spam://example/ /'),'//example/%20/')
1403n/a
1404n/a # test the safe characters are not quoted by urlopen
1405n/a self.assertEqual(DummyURLopener().open(
1406n/a "spam://c:|windows%/:=&?~#+!$,;'@()*[]|/path/"),
1407n/a "//c:|windows%/:=&?~#+!$,;'@()*[]|/path/")
1408n/a
1409n/a# Just commented them out.
1410n/a# Can't really tell why keep failing in windows and sparc.
1411n/a# Everywhere else they work ok, but on those machines, sometimes
1412n/a# fail in one of the tests, sometimes in other. I have a linux, and
1413n/a# the tests go ok.
1414n/a# If anybody has one of the problematic environments, please help!
1415n/a# . Facundo
1416n/a#
1417n/a# def server(evt):
1418n/a# import socket, time
1419n/a# serv = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1420n/a# serv.settimeout(3)
1421n/a# serv.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
1422n/a# serv.bind(("", 9093))
1423n/a# serv.listen()
1424n/a# try:
1425n/a# conn, addr = serv.accept()
1426n/a# conn.send("1 Hola mundo\n")
1427n/a# cantdata = 0
1428n/a# while cantdata < 13:
1429n/a# data = conn.recv(13-cantdata)
1430n/a# cantdata += len(data)
1431n/a# time.sleep(.3)
1432n/a# conn.send("2 No more lines\n")
1433n/a# conn.close()
1434n/a# except socket.timeout:
1435n/a# pass
1436n/a# finally:
1437n/a# serv.close()
1438n/a# evt.set()
1439n/a#
1440n/a# class FTPWrapperTests(unittest.TestCase):
1441n/a#
1442n/a# def setUp(self):
1443n/a# import ftplib, time, threading
1444n/a# ftplib.FTP.port = 9093
1445n/a# self.evt = threading.Event()
1446n/a# threading.Thread(target=server, args=(self.evt,)).start()
1447n/a# time.sleep(.1)
1448n/a#
1449n/a# def tearDown(self):
1450n/a# self.evt.wait()
1451n/a#
1452n/a# def testBasic(self):
1453n/a# # connects
1454n/a# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1455n/a# ftp.close()
1456n/a#
1457n/a# def testTimeoutNone(self):
1458n/a# # global default timeout is ignored
1459n/a# import socket
1460n/a# self.assertIsNone(socket.getdefaulttimeout())
1461n/a# socket.setdefaulttimeout(30)
1462n/a# try:
1463n/a# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1464n/a# finally:
1465n/a# socket.setdefaulttimeout(None)
1466n/a# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1467n/a# ftp.close()
1468n/a#
1469n/a# def testTimeoutDefault(self):
1470n/a# # global default timeout is used
1471n/a# import socket
1472n/a# self.assertIsNone(socket.getdefaulttimeout())
1473n/a# socket.setdefaulttimeout(30)
1474n/a# try:
1475n/a# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [])
1476n/a# finally:
1477n/a# socket.setdefaulttimeout(None)
1478n/a# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1479n/a# ftp.close()
1480n/a#
1481n/a# def testTimeoutValue(self):
1482n/a# ftp = urllib.ftpwrapper("myuser", "mypass", "localhost", 9093, [],
1483n/a# timeout=30)
1484n/a# self.assertEqual(ftp.ftp.sock.gettimeout(), 30)
1485n/a# ftp.close()
1486n/a
1487n/a
1488n/aclass RequestTests(unittest.TestCase):
1489n/a """Unit tests for urllib.request.Request."""
1490n/a
1491n/a def test_default_values(self):
1492n/a Request = urllib.request.Request
1493n/a request = Request("http://www.python.org")
1494n/a self.assertEqual(request.get_method(), 'GET')
1495n/a request = Request("http://www.python.org", {})
1496n/a self.assertEqual(request.get_method(), 'POST')
1497n/a
1498n/a def test_with_method_arg(self):
1499n/a Request = urllib.request.Request
1500n/a request = Request("http://www.python.org", method='HEAD')
1501n/a self.assertEqual(request.method, 'HEAD')
1502n/a self.assertEqual(request.get_method(), 'HEAD')
1503n/a request = Request("http://www.python.org", {}, method='HEAD')
1504n/a self.assertEqual(request.method, 'HEAD')
1505n/a self.assertEqual(request.get_method(), 'HEAD')
1506n/a request = Request("http://www.python.org", method='GET')
1507n/a self.assertEqual(request.get_method(), 'GET')
1508n/a request.method = 'HEAD'
1509n/a self.assertEqual(request.get_method(), 'HEAD')
1510n/a
1511n/a
1512n/aclass URL2PathNameTests(unittest.TestCase):
1513n/a
1514n/a def test_converting_drive_letter(self):
1515n/a self.assertEqual(url2pathname("///C|"), 'C:')
1516n/a self.assertEqual(url2pathname("///C:"), 'C:')
1517n/a self.assertEqual(url2pathname("///C|/"), 'C:\\')
1518n/a
1519n/a def test_converting_when_no_drive_letter(self):
1520n/a # cannot end a raw string in \
1521n/a self.assertEqual(url2pathname("///C/test/"), r'\\\C\test' '\\')
1522n/a self.assertEqual(url2pathname("////C/test/"), r'\\C\test' '\\')
1523n/a
1524n/a def test_simple_compare(self):
1525n/a self.assertEqual(url2pathname("///C|/foo/bar/spam.foo"),
1526n/a r'C:\foo\bar\spam.foo')
1527n/a
1528n/a def test_non_ascii_drive_letter(self):
1529n/a self.assertRaises(IOError, url2pathname, "///\u00e8|/")
1530n/a
1531n/a def test_roundtrip_url2pathname(self):
1532n/a list_of_paths = ['C:',
1533n/a r'\\\C\test\\',
1534n/a r'C:\foo\bar\spam.foo'
1535n/a ]
1536n/a for path in list_of_paths:
1537n/a self.assertEqual(url2pathname(pathname2url(path)), path)
1538n/a
1539n/aclass PathName2URLTests(unittest.TestCase):
1540n/a
1541n/a def test_converting_drive_letter(self):
1542n/a self.assertEqual(pathname2url("C:"), '///C:')
1543n/a self.assertEqual(pathname2url("C:\\"), '///C:')
1544n/a
1545n/a def test_converting_when_no_drive_letter(self):
1546n/a self.assertEqual(pathname2url(r"\\\folder\test" "\\"),
1547n/a '/////folder/test/')
1548n/a self.assertEqual(pathname2url(r"\\folder\test" "\\"),
1549n/a '////folder/test/')
1550n/a self.assertEqual(pathname2url(r"\folder\test" "\\"),
1551n/a '/folder/test/')
1552n/a
1553n/a def test_simple_compare(self):
1554n/a self.assertEqual(pathname2url(r'C:\foo\bar\spam.foo'),
1555n/a "///C:/foo/bar/spam.foo" )
1556n/a
1557n/a def test_long_drive_letter(self):
1558n/a self.assertRaises(IOError, pathname2url, "XX:\\")
1559n/a
1560n/a def test_roundtrip_pathname2url(self):
1561n/a list_of_paths = ['///C:',
1562n/a '/////folder/test/',
1563n/a '///C:/foo/bar/spam.foo']
1564n/a for path in list_of_paths:
1565n/a self.assertEqual(pathname2url(url2pathname(path)), path)
1566n/a
1567n/aif __name__ == '__main__':
1568n/a unittest.main()