ยปCore Development>Code coverage>Lib/packaging/tests/test_pypi_simple.py

Python code coverage for Lib/packaging/tests/test_pypi_simple.py

#countcontent
1n/a"""Tests for the packaging.pypi.simple module."""
2n/aimport re
3n/aimport os
4n/aimport sys
5n/aimport http.client
6n/aimport urllib.error
7n/aimport urllib.parse
8n/aimport urllib.request
9n/a
10n/afrom packaging.pypi.simple import Crawler
11n/a
12n/afrom packaging.tests import unittest
13n/afrom packaging.tests.support import (TempdirManager, LoggingCatcher,
14n/a fake_dec)
15n/a
16n/atry:
17n/a import _thread
18n/a from packaging.tests.pypi_server import (use_pypi_server, PyPIServer,
19n/a PYPI_DEFAULT_STATIC_PATH)
20n/aexcept ImportError:
21n/a _thread = None
22n/a use_pypi_server = fake_dec
23n/a PYPI_DEFAULT_STATIC_PATH = os.path.join(
24n/a os.path.dirname(os.path.abspath(__file__)), 'pypiserver')
25n/a
26n/a
27n/a
28n/aclass SimpleCrawlerTestCase(TempdirManager,
29n/a LoggingCatcher,
30n/a unittest.TestCase):
31n/a
32n/a def _get_simple_crawler(self, server, base_url="/simple/", hosts=None,
33n/a *args, **kwargs):
34n/a """Build and return a SimpleIndex with the test server urls"""
35n/a if hosts is None:
36n/a hosts = (server.full_address.replace("http://", ""),)
37n/a kwargs['hosts'] = hosts
38n/a return Crawler(server.full_address + base_url, *args,
39n/a **kwargs)
40n/a
41n/a @unittest.skipIf(_thread is None, 'needs threads')
42n/a @use_pypi_server()
43n/a def test_bad_urls(self, server):
44n/a crawler = Crawler()
45n/a url = 'http://127.0.0.1:0/nonesuch/test_simple'
46n/a try:
47n/a v = crawler._open_url(url)
48n/a except Exception as v:
49n/a self.assertIn(url, str(v))
50n/a else:
51n/a v.close()
52n/a self.assertIsInstance(v, urllib.error.HTTPError)
53n/a
54n/a # issue 16
55n/a # easy_install inquant.contentmirror.plone breaks because of a typo
56n/a # in its home URL
57n/a crawler = Crawler(hosts=('example.org',))
58n/a url = ('url:%20https://svn.plone.org/svn/collective/'
59n/a 'inquant.contentmirror.plone/trunk')
60n/a try:
61n/a v = crawler._open_url(url)
62n/a except Exception as v:
63n/a self.assertIn(url, str(v))
64n/a else:
65n/a v.close()
66n/a self.assertIsInstance(v, urllib.error.HTTPError)
67n/a
68n/a def _urlopen(*args):
69n/a raise http.client.BadStatusLine('line')
70n/a
71n/a old_urlopen = urllib.request.urlopen
72n/a urllib.request.urlopen = _urlopen
73n/a url = 'http://example.org'
74n/a try:
75n/a v = crawler._open_url(url)
76n/a except Exception as v:
77n/a self.assertIn('line', str(v))
78n/a else:
79n/a v.close()
80n/a # TODO use self.assertRaises
81n/a raise AssertionError('Should have raise here!')
82n/a finally:
83n/a urllib.request.urlopen = old_urlopen
84n/a
85n/a # issue 20
86n/a url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk'
87n/a try:
88n/a crawler._open_url(url)
89n/a except Exception as v:
90n/a self.assertIn('Download error', str(v))
91n/a
92n/a # issue #160
93n/a url = server.full_address
94n/a page = ('<a href="http://www.famfamfam.com]('
95n/a 'http://www.famfamfam.com/">')
96n/a crawler._process_url(url, page)
97n/a
98n/a @unittest.skipIf(_thread is None, 'needs threads')
99n/a @use_pypi_server("test_found_links")
100n/a def test_found_links(self, server):
101n/a # Browse the index, asking for a specified release version
102n/a # The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1
103n/a crawler = self._get_simple_crawler(server)
104n/a last_release = crawler.get_release("foobar")
105n/a
106n/a # we have scanned the index page
107n/a self.assertIn(server.full_address + "/simple/foobar/",
108n/a crawler._processed_urls)
109n/a
110n/a # we have found 4 releases in this page
111n/a self.assertEqual(len(crawler._projects["foobar"]), 4)
112n/a
113n/a # and returned the most recent one
114n/a self.assertEqual("%s" % last_release.version, '2.0.1')
115n/a
116n/a def test_is_browsable(self):
117n/a crawler = Crawler(follow_externals=False)
118n/a self.assertTrue(crawler._is_browsable(crawler.index_url + "test"))
119n/a
120n/a # Now, when following externals, we can have a list of hosts to trust.
121n/a # and don't follow other external links than the one described here.
122n/a crawler = Crawler(hosts=["pypi.python.org", "example.org"],
123n/a follow_externals=True)
124n/a good_urls = (
125n/a "http://pypi.python.org/foo/bar",
126n/a "http://pypi.python.org/simple/foobar",
127n/a "http://example.org",
128n/a "http://example.org/",
129n/a "http://example.org/simple/",
130n/a )
131n/a bad_urls = (
132n/a "http://python.org",
133n/a "http://example.tld",
134n/a )
135n/a
136n/a for url in good_urls:
137n/a self.assertTrue(crawler._is_browsable(url))
138n/a
139n/a for url in bad_urls:
140n/a self.assertFalse(crawler._is_browsable(url))
141n/a
142n/a # allow all hosts
143n/a crawler = Crawler(follow_externals=True, hosts=("*",))
144n/a self.assertTrue(crawler._is_browsable("http://an-external.link/path"))
145n/a self.assertTrue(crawler._is_browsable("pypi.example.org/a/path"))
146n/a
147n/a # specify a list of hosts we want to allow
148n/a crawler = Crawler(follow_externals=True,
149n/a hosts=("*.example.org",))
150n/a self.assertFalse(crawler._is_browsable("http://an-external.link/path"))
151n/a self.assertTrue(
152n/a crawler._is_browsable("http://pypi.example.org/a/path"))
153n/a
154n/a @unittest.skipIf(_thread is None, 'needs threads')
155n/a @use_pypi_server("with_externals")
156n/a def test_follow_externals(self, server):
157n/a # Include external pages
158n/a # Try to request the package index, wich contains links to "externals"
159n/a # resources. They have to be scanned too.
160n/a crawler = self._get_simple_crawler(server, follow_externals=True)
161n/a crawler.get_release("foobar")
162n/a self.assertIn(server.full_address + "/external/external.html",
163n/a crawler._processed_urls)
164n/a
165n/a @unittest.skipIf(_thread is None, 'needs threads')
166n/a @use_pypi_server("with_real_externals")
167n/a def test_restrict_hosts(self, server):
168n/a # Only use a list of allowed hosts is possible
169n/a # Test that telling the simple pyPI client to not retrieve external
170n/a # works
171n/a crawler = self._get_simple_crawler(server, follow_externals=False)
172n/a crawler.get_release("foobar")
173n/a self.assertNotIn(server.full_address + "/external/external.html",
174n/a crawler._processed_urls)
175n/a
176n/a @unittest.skipIf(_thread is None, 'needs threads')
177n/a @use_pypi_server(static_filesystem_paths=["with_externals"],
178n/a static_uri_paths=["simple", "external"])
179n/a def test_links_priority(self, server):
180n/a # Download links from the pypi simple index should be used before
181n/a # external download links.
182n/a # http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error
183n/a #
184n/a # Usecase :
185n/a # - someone uploads a package on pypi, a md5 is generated
186n/a # - someone manually coindexes this link (with the md5 in the url) onto
187n/a # an external page accessible from the package page.
188n/a # - someone reuploads the package (with a different md5)
189n/a # - while easy_installing, an MD5 error occurs because the external
190n/a # link is used
191n/a # -> The index should use the link from pypi, not the external one.
192n/a
193n/a # start an index server
194n/a index_url = server.full_address + '/simple/'
195n/a
196n/a # scan a test index
197n/a crawler = Crawler(index_url, follow_externals=True)
198n/a releases = crawler.get_releases("foobar")
199n/a server.stop()
200n/a
201n/a # we have only one link, because links are compared without md5
202n/a self.assertEqual(1, len(releases))
203n/a self.assertEqual(1, len(releases[0].dists))
204n/a # the link should be from the index
205n/a self.assertEqual(2, len(releases[0].dists['sdist'].urls))
206n/a self.assertEqual('12345678901234567',
207n/a releases[0].dists['sdist'].url['hashval'])
208n/a self.assertEqual('md5', releases[0].dists['sdist'].url['hashname'])
209n/a
210n/a @unittest.skipIf(_thread is None, 'needs threads')
211n/a @use_pypi_server(static_filesystem_paths=["with_norel_links"],
212n/a static_uri_paths=["simple", "external"])
213n/a def test_not_scan_all_links(self, server):
214n/a # Do not follow all index page links.
215n/a # The links not tagged with rel="download" and rel="homepage" have
216n/a # to not be processed by the package index, while processing "pages".
217n/a
218n/a # process the pages
219n/a crawler = self._get_simple_crawler(server, follow_externals=True)
220n/a crawler.get_releases("foobar")
221n/a # now it should have processed only pages with links rel="download"
222n/a # and rel="homepage"
223n/a self.assertIn("%s/simple/foobar/" % server.full_address,
224n/a crawler._processed_urls) # it's the simple index page
225n/a self.assertIn("%s/external/homepage.html" % server.full_address,
226n/a crawler._processed_urls) # the external homepage is rel="homepage"
227n/a self.assertNotIn("%s/external/nonrel.html" % server.full_address,
228n/a crawler._processed_urls) # this link contains no rel=*
229n/a self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address,
230n/a crawler._processed_urls) # linked from simple index (no rel)
231n/a self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address,
232n/a crawler._processed_urls) # linked from simple index (rel)
233n/a self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address,
234n/a crawler._processed_urls) # linked from external homepage (rel)
235n/a
236n/a @unittest.skipIf(_thread is None, 'needs threads')
237n/a def test_uses_mirrors(self):
238n/a # When the main repository seems down, try using the given mirrors"""
239n/a server = PyPIServer("foo_bar_baz")
240n/a mirror = PyPIServer("foo_bar_baz")
241n/a mirror.start() # we dont start the server here
242n/a
243n/a try:
244n/a # create the index using both servers
245n/a crawler = Crawler(server.full_address + "/simple/", hosts=('*',),
246n/a # set the timeout to 1s for the tests
247n/a timeout=1, mirrors=[mirror.full_address])
248n/a
249n/a # this should not raise a timeout
250n/a self.assertEqual(4, len(crawler.get_releases("foo")))
251n/a finally:
252n/a mirror.stop()
253n/a server.stop()
254n/a
255n/a def test_simple_link_matcher(self):
256n/a # Test that the simple link matcher finds the right links"""
257n/a crawler = Crawler(follow_externals=False)
258n/a
259n/a # Here, we define:
260n/a # 1. one link that must be followed, cause it's a download one
261n/a # 2. one link that must *not* be followed, cause the is_browsable
262n/a # returns false for it.
263n/a # 3. one link that must be followed cause it's a homepage that is
264n/a # browsable
265n/a # 4. one link that must be followed, because it contain a md5 hash
266n/a self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
267n/a self.assertFalse(crawler._is_browsable("http://dl-link2"))
268n/a content = """
269n/a <a href="http://dl-link1" rel="download">download_link1</a>
270n/a <a href="http://dl-link2" rel="homepage">homepage_link1</a>
271n/a <a href="%(index_url)stest" rel="homepage">homepage_link2</a>
272n/a <a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a>
273n/a """ % {'index_url': crawler.index_url}
274n/a
275n/a # Test that the simple link matcher yield the good links.
276n/a generator = crawler._simple_link_matcher(content, crawler.index_url)
277n/a self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
278n/a crawler.index_url, True), next(generator))
279n/a self.assertEqual(('http://dl-link1', True), next(generator))
280n/a self.assertEqual(('%stest' % crawler.index_url, False),
281n/a next(generator))
282n/a self.assertRaises(StopIteration, generator.__next__)
283n/a
284n/a # Follow the external links is possible (eg. homepages)
285n/a crawler.follow_externals = True
286n/a generator = crawler._simple_link_matcher(content, crawler.index_url)
287n/a self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
288n/a crawler.index_url, True), next(generator))
289n/a self.assertEqual(('http://dl-link1', True), next(generator))
290n/a self.assertEqual(('http://dl-link2', False), next(generator))
291n/a self.assertEqual(('%stest' % crawler.index_url, False),
292n/a next(generator))
293n/a self.assertRaises(StopIteration, generator.__next__)
294n/a
295n/a def test_browse_local_files(self):
296n/a # Test that we can browse local files"""
297n/a index_url = "file://" + PYPI_DEFAULT_STATIC_PATH
298n/a if sys.platform == 'win32':
299n/a # under windows the correct syntax is:
300n/a # file:///C|\the\path\here
301n/a # instead of
302n/a # file://C:\the\path\here
303n/a fix = re.compile(r'^(file://)([A-Za-z])(:)')
304n/a index_url = fix.sub('\\1/\\2|', index_url)
305n/a
306n/a index_path = os.sep.join([index_url, "test_found_links", "simple"])
307n/a crawler = Crawler(index_path)
308n/a dists = crawler.get_releases("foobar")
309n/a self.assertEqual(4, len(dists))
310n/a
311n/a def test_get_link_matcher(self):
312n/a crawler = Crawler("http://example.org")
313n/a self.assertEqual('_simple_link_matcher', crawler._get_link_matcher(
314n/a "http://example.org/some/file").__name__)
315n/a self.assertEqual('_default_link_matcher', crawler._get_link_matcher(
316n/a "http://other-url").__name__)
317n/a
318n/a def test_default_link_matcher(self):
319n/a crawler = Crawler("http://example.org", mirrors=[])
320n/a crawler.follow_externals = True
321n/a crawler._is_browsable = lambda *args: True
322n/a base_url = "http://example.org/some/file/"
323n/a content = """
324n/a<a href="../homepage" rel="homepage">link</a>
325n/a<a href="../download" rel="download">link2</a>
326n/a<a href="../simpleurl">link2</a>
327n/a """
328n/a found_links = set(uri for uri, _ in
329n/a crawler._default_link_matcher(content, base_url))
330n/a self.assertIn('http://example.org/some/homepage', found_links)
331n/a self.assertIn('http://example.org/some/simpleurl', found_links)
332n/a self.assertIn('http://example.org/some/download', found_links)
333n/a
334n/a @unittest.skipIf(_thread is None, 'needs threads')
335n/a @use_pypi_server("project_list")
336n/a def test_search_projects(self, server):
337n/a # we can search the index for some projects, on their names
338n/a # the case used no matters here
339n/a crawler = self._get_simple_crawler(server)
340n/a tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']),
341n/a ('foobar*', ['FooBar-bar', 'Foobar-baz']),
342n/a ('*foobar', ['Baz-FooBar']))
343n/a
344n/a for search, expected in tests:
345n/a projects = [p.name for p in crawler.search_projects(search)]
346n/a self.assertListEqual(expected, projects)
347n/a
348n/a
349n/adef test_suite():
350n/a return unittest.makeSuite(SimpleCrawlerTestCase)
351n/a
352n/aif __name__ == '__main__':
353n/a unittest.main(defaultTest="test_suite")