Python code coverage for Lib/packaging/tests/test_pypi_simple.py

#	count	content
1	n/a	"""Tests for the packaging.pypi.simple module."""
2	n/a	import re
3	n/a	import os
4	n/a	import sys
5	n/a	import http.client
6	n/a	import urllib.error
7	n/a	import urllib.parse
8	n/a	import urllib.request
9	n/a
10	n/a	from packaging.pypi.simple import Crawler
11	n/a
12	n/a	from packaging.tests import unittest
13	n/a	from packaging.tests.support import (TempdirManager, LoggingCatcher,
14	n/a	fake_dec)
15	n/a
16	n/a	try:
17	n/a	import _thread
18	n/a	from packaging.tests.pypi_server import (use_pypi_server, PyPIServer,
19	n/a	PYPI_DEFAULT_STATIC_PATH)
20	n/a	except ImportError:
21	n/a	_thread = None
22	n/a	use_pypi_server = fake_dec
23	n/a	PYPI_DEFAULT_STATIC_PATH = os.path.join(
24	n/a	os.path.dirname(os.path.abspath(__file__)), 'pypiserver')
25	n/a
26	n/a
27	n/a
28	n/a	class SimpleCrawlerTestCase(TempdirManager,
29	n/a	LoggingCatcher,
30	n/a	unittest.TestCase):
31	n/a
32	n/a	def _get_simple_crawler(self, server, base_url="/simple/", hosts=None,
33	n/a	args, *kwargs):
34	n/a	"""Build and return a SimpleIndex with the test server urls"""
35	n/a	if hosts is None:
36	n/a	hosts = (server.full_address.replace("http://", ""),)
37	n/a	kwargs['hosts'] = hosts
38	n/a	return Crawler(server.full_address + base_url, *args,
39	n/a	**kwargs)
40	n/a
41	n/a	@unittest.skipIf(_thread is None, 'needs threads')
42	n/a	@use_pypi_server()
43	n/a	def test_bad_urls(self, server):
44	n/a	crawler = Crawler()
45	n/a	url = 'http://127.0.0.1:0/nonesuch/test_simple'
46	n/a	try:
47	n/a	v = crawler._open_url(url)
48	n/a	except Exception as v:
49	n/a	self.assertIn(url, str(v))
50	n/a	else:
51	n/a	v.close()
52	n/a	self.assertIsInstance(v, urllib.error.HTTPError)
53	n/a
54	n/a	# issue 16
55	n/a	# easy_install inquant.contentmirror.plone breaks because of a typo
56	n/a	# in its home URL
57	n/a	crawler = Crawler(hosts=('example.org',))
58	n/a	url = ('url:%20https://svn.plone.org/svn/collective/'
59	n/a	'inquant.contentmirror.plone/trunk')
60	n/a	try:
61	n/a	v = crawler._open_url(url)
62	n/a	except Exception as v:
63	n/a	self.assertIn(url, str(v))
64	n/a	else:
65	n/a	v.close()
66	n/a	self.assertIsInstance(v, urllib.error.HTTPError)
67	n/a
68	n/a	def _urlopen(*args):
69	n/a	raise http.client.BadStatusLine('line')
70	n/a
71	n/a	old_urlopen = urllib.request.urlopen
72	n/a	urllib.request.urlopen = _urlopen
73	n/a	url = 'http://example.org'
74	n/a	try:
75	n/a	v = crawler._open_url(url)
76	n/a	except Exception as v:
77	n/a	self.assertIn('line', str(v))
78	n/a	else:
79	n/a	v.close()
80	n/a	# TODO use self.assertRaises
81	n/a	raise AssertionError('Should have raise here!')
82	n/a	finally:
83	n/a	urllib.request.urlopen = old_urlopen
84	n/a
85	n/a	# issue 20
86	n/a	url = 'http://http://svn.pythonpaste.org/Paste/wphp/trunk'
87	n/a	try:
88	n/a	crawler._open_url(url)
89	n/a	except Exception as v:
90	n/a	self.assertIn('Download error', str(v))
91	n/a
92	n/a	# issue #160
93	n/a	url = server.full_address
94	n/a	page = ('<a href="http://www.famfamfam.com]('
95	n/a	'http://www.famfamfam.com/">')
96	n/a	crawler._process_url(url, page)
97	n/a
98	n/a	@unittest.skipIf(_thread is None, 'needs threads')
99	n/a	@use_pypi_server("test_found_links")
100	n/a	def test_found_links(self, server):
101	n/a	# Browse the index, asking for a specified release version
102	n/a	# The PyPI index contains links for version 1.0, 1.1, 2.0 and 2.0.1
103	n/a	crawler = self._get_simple_crawler(server)
104	n/a	last_release = crawler.get_release("foobar")
105	n/a
106	n/a	# we have scanned the index page
107	n/a	self.assertIn(server.full_address + "/simple/foobar/",
108	n/a	crawler._processed_urls)
109	n/a
110	n/a	# we have found 4 releases in this page
111	n/a	self.assertEqual(len(crawler._projects["foobar"]), 4)
112	n/a
113	n/a	# and returned the most recent one
114	n/a	self.assertEqual("%s" % last_release.version, '2.0.1')
115	n/a
116	n/a	def test_is_browsable(self):
117	n/a	crawler = Crawler(follow_externals=False)
118	n/a	self.assertTrue(crawler._is_browsable(crawler.index_url + "test"))
119	n/a
120	n/a	# Now, when following externals, we can have a list of hosts to trust.
121	n/a	# and don't follow other external links than the one described here.
122	n/a	crawler = Crawler(hosts=["pypi.python.org", "example.org"],
123	n/a	follow_externals=True)
124	n/a	good_urls = (
125	n/a	"http://pypi.python.org/foo/bar",
126	n/a	"http://pypi.python.org/simple/foobar",
127	n/a	"http://example.org",
128	n/a	"http://example.org/",
129	n/a	"http://example.org/simple/",
130	n/a	)
131	n/a	bad_urls = (
132	n/a	"http://python.org",
133	n/a	"http://example.tld",
134	n/a	)
135	n/a
136	n/a	for url in good_urls:
137	n/a	self.assertTrue(crawler._is_browsable(url))
138	n/a
139	n/a	for url in bad_urls:
140	n/a	self.assertFalse(crawler._is_browsable(url))
141	n/a
142	n/a	# allow all hosts
143	n/a	crawler = Crawler(follow_externals=True, hosts=("*",))
144	n/a	self.assertTrue(crawler._is_browsable("http://an-external.link/path"))
145	n/a	self.assertTrue(crawler._is_browsable("pypi.example.org/a/path"))
146	n/a
147	n/a	# specify a list of hosts we want to allow
148	n/a	crawler = Crawler(follow_externals=True,
149	n/a	hosts=("*.example.org",))
150	n/a	self.assertFalse(crawler._is_browsable("http://an-external.link/path"))
151	n/a	self.assertTrue(
152	n/a	crawler._is_browsable("http://pypi.example.org/a/path"))
153	n/a
154	n/a	@unittest.skipIf(_thread is None, 'needs threads')
155	n/a	@use_pypi_server("with_externals")
156	n/a	def test_follow_externals(self, server):
157	n/a	# Include external pages
158	n/a	# Try to request the package index, wich contains links to "externals"
159	n/a	# resources. They have to be scanned too.
160	n/a	crawler = self._get_simple_crawler(server, follow_externals=True)
161	n/a	crawler.get_release("foobar")
162	n/a	self.assertIn(server.full_address + "/external/external.html",
163	n/a	crawler._processed_urls)
164	n/a
165	n/a	@unittest.skipIf(_thread is None, 'needs threads')
166	n/a	@use_pypi_server("with_real_externals")
167	n/a	def test_restrict_hosts(self, server):
168	n/a	# Only use a list of allowed hosts is possible
169	n/a	# Test that telling the simple pyPI client to not retrieve external
170	n/a	# works
171	n/a	crawler = self._get_simple_crawler(server, follow_externals=False)
172	n/a	crawler.get_release("foobar")
173	n/a	self.assertNotIn(server.full_address + "/external/external.html",
174	n/a	crawler._processed_urls)
175	n/a
176	n/a	@unittest.skipIf(_thread is None, 'needs threads')
177	n/a	@use_pypi_server(static_filesystem_paths=["with_externals"],
178	n/a	static_uri_paths=["simple", "external"])
179	n/a	def test_links_priority(self, server):
180	n/a	# Download links from the pypi simple index should be used before
181	n/a	# external download links.
182	n/a	# http://bitbucket.org/tarek/distribute/issue/163/md5-validation-error
183	n/a	#
184	n/a	# Usecase :
185	n/a	# - someone uploads a package on pypi, a md5 is generated
186	n/a	# - someone manually coindexes this link (with the md5 in the url) onto
187	n/a	# an external page accessible from the package page.
188	n/a	# - someone reuploads the package (with a different md5)
189	n/a	# - while easy_installing, an MD5 error occurs because the external
190	n/a	# link is used
191	n/a	# -> The index should use the link from pypi, not the external one.
192	n/a
193	n/a	# start an index server
194	n/a	index_url = server.full_address + '/simple/'
195	n/a
196	n/a	# scan a test index
197	n/a	crawler = Crawler(index_url, follow_externals=True)
198	n/a	releases = crawler.get_releases("foobar")
199	n/a	server.stop()
200	n/a
201	n/a	# we have only one link, because links are compared without md5
202	n/a	self.assertEqual(1, len(releases))
203	n/a	self.assertEqual(1, len(releases[0].dists))
204	n/a	# the link should be from the index
205	n/a	self.assertEqual(2, len(releases[0].dists['sdist'].urls))
206	n/a	self.assertEqual('12345678901234567',
207	n/a	releases[0].dists['sdist'].url['hashval'])
208	n/a	self.assertEqual('md5', releases[0].dists['sdist'].url['hashname'])
209	n/a
210	n/a	@unittest.skipIf(_thread is None, 'needs threads')
211	n/a	@use_pypi_server(static_filesystem_paths=["with_norel_links"],
212	n/a	static_uri_paths=["simple", "external"])
213	n/a	def test_not_scan_all_links(self, server):
214	n/a	# Do not follow all index page links.
215	n/a	# The links not tagged with rel="download" and rel="homepage" have
216	n/a	# to not be processed by the package index, while processing "pages".
217	n/a
218	n/a	# process the pages
219	n/a	crawler = self._get_simple_crawler(server, follow_externals=True)
220	n/a	crawler.get_releases("foobar")
221	n/a	# now it should have processed only pages with links rel="download"
222	n/a	# and rel="homepage"
223	n/a	self.assertIn("%s/simple/foobar/" % server.full_address,
224	n/a	crawler._processed_urls) # it's the simple index page
225	n/a	self.assertIn("%s/external/homepage.html" % server.full_address,
226	n/a	crawler._processed_urls) # the external homepage is rel="homepage"
227	n/a	self.assertNotIn("%s/external/nonrel.html" % server.full_address,
228	n/a	crawler._processed_urls) # this link contains no rel=*
229	n/a	self.assertNotIn("%s/unrelated-0.2.tar.gz" % server.full_address,
230	n/a	crawler._processed_urls) # linked from simple index (no rel)
231	n/a	self.assertIn("%s/foobar-0.1.tar.gz" % server.full_address,
232	n/a	crawler._processed_urls) # linked from simple index (rel)
233	n/a	self.assertIn("%s/foobar-2.0.tar.gz" % server.full_address,
234	n/a	crawler._processed_urls) # linked from external homepage (rel)
235	n/a
236	n/a	@unittest.skipIf(_thread is None, 'needs threads')
237	n/a	def test_uses_mirrors(self):
238	n/a	# When the main repository seems down, try using the given mirrors"""
239	n/a	server = PyPIServer("foo_bar_baz")
240	n/a	mirror = PyPIServer("foo_bar_baz")
241	n/a	mirror.start() # we dont start the server here
242	n/a
243	n/a	try:
244	n/a	# create the index using both servers
245	n/a	crawler = Crawler(server.full_address + "/simple/", hosts=('*',),
246	n/a	# set the timeout to 1s for the tests
247	n/a	timeout=1, mirrors=[mirror.full_address])
248	n/a
249	n/a	# this should not raise a timeout
250	n/a	self.assertEqual(4, len(crawler.get_releases("foo")))
251	n/a	finally:
252	n/a	mirror.stop()
253	n/a	server.stop()
254	n/a
255	n/a	def test_simple_link_matcher(self):
256	n/a	# Test that the simple link matcher finds the right links"""
257	n/a	crawler = Crawler(follow_externals=False)
258	n/a
259	n/a	# Here, we define:
260	n/a	# 1. one link that must be followed, cause it's a download one
261	n/a	# 2. one link that must not be followed, cause the is_browsable
262	n/a	# returns false for it.
263	n/a	# 3. one link that must be followed cause it's a homepage that is
264	n/a	# browsable
265	n/a	# 4. one link that must be followed, because it contain a md5 hash
266	n/a	self.assertTrue(crawler._is_browsable("%stest" % crawler.index_url))
267	n/a	self.assertFalse(crawler._is_browsable("http://dl-link2"))
268	n/a	content = """
269	n/a	<a href="http://dl-link1" rel="download">download_link1</a>
270	n/a	<a href="http://dl-link2" rel="homepage">homepage_link1</a>
271	n/a	<a href="%(index_url)stest" rel="homepage">homepage_link2</a>
272	n/a	<a href="%(index_url)stest/foobar-1.tar.gz#md5=abcdef>download_link2</a>
273	n/a	""" % {'index_url': crawler.index_url}
274	n/a
275	n/a	# Test that the simple link matcher yield the good links.
276	n/a	generator = crawler._simple_link_matcher(content, crawler.index_url)
277	n/a	self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
278	n/a	crawler.index_url, True), next(generator))
279	n/a	self.assertEqual(('http://dl-link1', True), next(generator))
280	n/a	self.assertEqual(('%stest' % crawler.index_url, False),
281	n/a	next(generator))
282	n/a	self.assertRaises(StopIteration, generator.__next__)
283	n/a
284	n/a	# Follow the external links is possible (eg. homepages)
285	n/a	crawler.follow_externals = True
286	n/a	generator = crawler._simple_link_matcher(content, crawler.index_url)
287	n/a	self.assertEqual(('%stest/foobar-1.tar.gz#md5=abcdef' %
288	n/a	crawler.index_url, True), next(generator))
289	n/a	self.assertEqual(('http://dl-link1', True), next(generator))
290	n/a	self.assertEqual(('http://dl-link2', False), next(generator))
291	n/a	self.assertEqual(('%stest' % crawler.index_url, False),
292	n/a	next(generator))
293	n/a	self.assertRaises(StopIteration, generator.__next__)
294	n/a
295	n/a	def test_browse_local_files(self):
296	n/a	# Test that we can browse local files"""
297	n/a	index_url = "file://" + PYPI_DEFAULT_STATIC_PATH
298	n/a	if sys.platform == 'win32':
299	n/a	# under windows the correct syntax is:
300	n/a	# file:///C\|\the\path\here
301	n/a	# instead of
302	n/a	# file://C:\the\path\here
303	n/a	fix = re.compile(r'^(file://)([A-Za-z])(:)')
304	n/a	index_url = fix.sub('\\1/\\2\|', index_url)
305	n/a
306	n/a	index_path = os.sep.join([index_url, "test_found_links", "simple"])
307	n/a	crawler = Crawler(index_path)
308	n/a	dists = crawler.get_releases("foobar")
309	n/a	self.assertEqual(4, len(dists))
310	n/a
311	n/a	def test_get_link_matcher(self):
312	n/a	crawler = Crawler("http://example.org")
313	n/a	self.assertEqual('_simple_link_matcher', crawler._get_link_matcher(
314	n/a	"http://example.org/some/file").__name__)
315	n/a	self.assertEqual('_default_link_matcher', crawler._get_link_matcher(
316	n/a	"http://other-url").__name__)
317	n/a
318	n/a	def test_default_link_matcher(self):
319	n/a	crawler = Crawler("http://example.org", mirrors=[])
320	n/a	crawler.follow_externals = True
321	n/a	crawler._is_browsable = lambda *args: True
322	n/a	base_url = "http://example.org/some/file/"
323	n/a	content = """
324	n/a	<a href="../homepage" rel="homepage">link</a>
325	n/a	<a href="../download" rel="download">link2</a>
326	n/a	<a href="../simpleurl">link2</a>
327	n/a	"""
328	n/a	found_links = set(uri for uri, _ in
329	n/a	crawler._default_link_matcher(content, base_url))
330	n/a	self.assertIn('http://example.org/some/homepage', found_links)
331	n/a	self.assertIn('http://example.org/some/simpleurl', found_links)
332	n/a	self.assertIn('http://example.org/some/download', found_links)
333	n/a
334	n/a	@unittest.skipIf(_thread is None, 'needs threads')
335	n/a	@use_pypi_server("project_list")
336	n/a	def test_search_projects(self, server):
337	n/a	# we can search the index for some projects, on their names
338	n/a	# the case used no matters here
339	n/a	crawler = self._get_simple_crawler(server)
340	n/a	tests = (('Foobar', ['FooBar-bar', 'Foobar-baz', 'Baz-FooBar']),
341	n/a	('foobar*', ['FooBar-bar', 'Foobar-baz']),
342	n/a	('*foobar', ['Baz-FooBar']))
343	n/a
344	n/a	for search, expected in tests:
345	n/a	projects = [p.name for p in crawler.search_projects(search)]
346	n/a	self.assertListEqual(expected, projects)
347	n/a
348	n/a
349	n/a	def test_suite():
350	n/a	return unittest.makeSuite(SimpleCrawlerTestCase)
351	n/a
352	n/a	if __name__ == '__main__':
353	n/a	unittest.main(defaultTest="test_suite")