»Core Development>Code coverage>Lib/test/test_difflib.py

Python code coverage for Lib/test/test_difflib.py

1n/aimport difflib
2n/afrom test.support import run_unittest, findfile
3n/aimport unittest
4n/aimport doctest
5n/aimport sys
8n/aclass TestWithAscii(unittest.TestCase):
9n/a def test_one_insert(self):
10n/a sm = difflib.SequenceMatcher(None, 'b' * 100, 'a' + 'b' * 100)
11n/a self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
12n/a self.assertEqual(list(sm.get_opcodes()),
13n/a [ ('insert', 0, 0, 0, 1),
14n/a ('equal', 0, 100, 1, 101)])
15n/a self.assertEqual(sm.bpopular, set())
16n/a sm = difflib.SequenceMatcher(None, 'b' * 100, 'b' * 50 + 'a' + 'b' * 50)
17n/a self.assertAlmostEqual(sm.ratio(), 0.995, places=3)
18n/a self.assertEqual(list(sm.get_opcodes()),
19n/a [ ('equal', 0, 50, 0, 50),
20n/a ('insert', 50, 50, 50, 51),
21n/a ('equal', 50, 100, 51, 101)])
22n/a self.assertEqual(sm.bpopular, set())
24n/a def test_one_delete(self):
25n/a sm = difflib.SequenceMatcher(None, 'a' * 40 + 'c' + 'b' * 40, 'a' * 40 + 'b' * 40)
26n/a self.assertAlmostEqual(sm.ratio(), 0.994, places=3)
27n/a self.assertEqual(list(sm.get_opcodes()),
28n/a [ ('equal', 0, 40, 0, 40),
29n/a ('delete', 40, 41, 40, 40),
30n/a ('equal', 41, 81, 40, 80)])
32n/a def test_bjunk(self):
33n/a sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
34n/a a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40)
35n/a self.assertEqual(sm.bjunk, set())
37n/a sm = difflib.SequenceMatcher(isjunk=lambda x: x == ' ',
38n/a a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
39n/a self.assertEqual(sm.bjunk, {' '})
41n/a sm = difflib.SequenceMatcher(isjunk=lambda x: x in [' ', 'b'],
42n/a a='a' * 40 + 'b' * 40, b='a' * 44 + 'b' * 40 + ' ' * 20)
43n/a self.assertEqual(sm.bjunk, {' ', 'b'})
46n/aclass TestAutojunk(unittest.TestCase):
47n/a """Tests for the autojunk parameter added in 2.7"""
48n/a def test_one_insert_homogenous_sequence(self):
49n/a # By default autojunk=True and the heuristic kicks in for a sequence
50n/a # of length 200+
51n/a seq1 = 'b' * 200
52n/a seq2 = 'a' + 'b' * 200
54n/a sm = difflib.SequenceMatcher(None, seq1, seq2)
55n/a self.assertAlmostEqual(sm.ratio(), 0, places=3)
56n/a self.assertEqual(sm.bpopular, {'b'})
58n/a # Now turn the heuristic off
59n/a sm = difflib.SequenceMatcher(None, seq1, seq2, autojunk=False)
60n/a self.assertAlmostEqual(sm.ratio(), 0.9975, places=3)
61n/a self.assertEqual(sm.bpopular, set())
64n/aclass TestSFbugs(unittest.TestCase):
65n/a def test_ratio_for_null_seqn(self):
66n/a # Check clearing of SF bug 763023
67n/a s = difflib.SequenceMatcher(None, [], [])
68n/a self.assertEqual(s.ratio(), 1)
69n/a self.assertEqual(s.quick_ratio(), 1)
70n/a self.assertEqual(s.real_quick_ratio(), 1)
72n/a def test_comparing_empty_lists(self):
73n/a # Check fix for bug #979794
74n/a group_gen = difflib.SequenceMatcher(None, [], []).get_grouped_opcodes()
75n/a self.assertRaises(StopIteration, next, group_gen)
76n/a diff_gen = difflib.unified_diff([], [])
77n/a self.assertRaises(StopIteration, next, diff_gen)
79n/a def test_matching_blocks_cache(self):
80n/a # Issue #21635
81n/a s = difflib.SequenceMatcher(None, "abxcd", "abcd")
82n/a first = s.get_matching_blocks()
83n/a second = s.get_matching_blocks()
84n/a self.assertEqual(second[0].size, 2)
85n/a self.assertEqual(second[1].size, 2)
86n/a self.assertEqual(second[2].size, 0)
88n/a def test_added_tab_hint(self):
89n/a # Check fix for bug #1488943
90n/a diff = list(difflib.Differ().compare(["\tI am a buggy"],["\t\tI am a bug"]))
91n/a self.assertEqual("- \tI am a buggy", diff[0])
92n/a self.assertEqual("? --\n", diff[1])
93n/a self.assertEqual("+ \t\tI am a bug", diff[2])
94n/a self.assertEqual("? +\n", diff[3])
96n/apatch914575_from1 = """
97n/a 1. Beautiful is beTTer than ugly.
98n/a 2. Explicit is better than implicit.
99n/a 3. Simple is better than complex.
100n/a 4. Complex is better than complicated.
103n/apatch914575_to1 = """
104n/a 1. Beautiful is better than ugly.
105n/a 3. Simple is better than complex.
106n/a 4. Complicated is better than complex.
107n/a 5. Flat is better than nested.
110n/apatch914575_nonascii_from1 = """
111n/a 1. Beautiful is beTTer than ugly.
112n/a 2. Explicit is better than ımplıcıt.
113n/a 3. Simple is better than complex.
114n/a 4. Complex is better than complicated.
117n/apatch914575_nonascii_to1 = """
118n/a 1. Beautiful is better than ügly.
119n/a 3. Sımple is better than complex.
120n/a 4. Complicated is better than cömplex.
121n/a 5. Flat is better than nested.
124n/apatch914575_from2 = """
125n/a\t\tLine 1: preceded by from:[tt] to:[ssss]
126n/a \t\tLine 2: preceded by from:[sstt] to:[sssst]
127n/a \t \tLine 3: preceded by from:[sstst] to:[ssssss]
128n/aLine 4: \thas from:[sst] to:[sss] after :
129n/aLine 5: has from:[t] to:[ss] at end\t
132n/apatch914575_to2 = """
133n/a Line 1: preceded by from:[tt] to:[ssss]
134n/a \tLine 2: preceded by from:[sstt] to:[sssst]
135n/a Line 3: preceded by from:[sstst] to:[ssssss]
136n/aLine 4: has from:[sst] to:[sss] after :
137n/aLine 5: has from:[t] to:[ss] at end
140n/apatch914575_from3 = """line 0
142n/aline 1
143n/aline 2
144n/aline 3
145n/aline 4 changed
146n/aline 5 changed
147n/aline 6 changed
148n/aline 7
149n/aline 8 subtracted
150n/aline 9
152n/ashort line
153n/ajust fits in!!
154n/ajust fits in two lines yup!!
155n/athe end"""
157n/apatch914575_to3 = """line 0
159n/aline 1
160n/aline 2 added
161n/aline 3
162n/aline 4 chanGEd
163n/aline 5a chanGed
164n/aline 6a changEd
165n/aline 7
166n/aline 8
167n/aline 9
169n/aanother long line that needs to be wrapped
170n/ajust fitS in!!
171n/ajust fits in two lineS yup!!
172n/athe end"""
174n/aclass TestSFpatches(unittest.TestCase):
176n/a def test_html_diff(self):
177n/a # Check SF patch 914575 for generating HTML differences
178n/a f1a = ((patch914575_from1 + '123\n'*10)*3)
179n/a t1a = (patch914575_to1 + '123\n'*10)*3
180n/a f1b = '456\n'*10 + f1a
181n/a t1b = '456\n'*10 + t1a
182n/a f1a = f1a.splitlines()
183n/a t1a = t1a.splitlines()
184n/a f1b = f1b.splitlines()
185n/a t1b = t1b.splitlines()
186n/a f2 = patch914575_from2.splitlines()
187n/a t2 = patch914575_to2.splitlines()
188n/a f3 = patch914575_from3
189n/a t3 = patch914575_to3
190n/a i = difflib.HtmlDiff()
191n/a j = difflib.HtmlDiff(tabsize=2)
192n/a k = difflib.HtmlDiff(wrapcolumn=14)
194n/a full = i.make_file(f1a,t1a,'from','to',context=False,numlines=5)
195n/a tables = '\n'.join(
196n/a [
197n/a '<h2>Context (first diff within numlines=5(default))</h2>',
198n/a i.make_table(f1a,t1a,'from','to',context=True),
199n/a '<h2>Context (first diff after numlines=5(default))</h2>',
200n/a i.make_table(f1b,t1b,'from','to',context=True),
201n/a '<h2>Context (numlines=6)</h2>',
202n/a i.make_table(f1a,t1a,'from','to',context=True,numlines=6),
203n/a '<h2>Context (numlines=0)</h2>',
204n/a i.make_table(f1a,t1a,'from','to',context=True,numlines=0),
205n/a '<h2>Same Context</h2>',
206n/a i.make_table(f1a,f1a,'from','to',context=True),
207n/a '<h2>Same Full</h2>',
208n/a i.make_table(f1a,f1a,'from','to',context=False),
209n/a '<h2>Empty Context</h2>',
210n/a i.make_table([],[],'from','to',context=True),
211n/a '<h2>Empty Full</h2>',
212n/a i.make_table([],[],'from','to',context=False),
213n/a '<h2>tabsize=2</h2>',
214n/a j.make_table(f2,t2),
215n/a '<h2>tabsize=default</h2>',
216n/a i.make_table(f2,t2),
217n/a '<h2>Context (wrapcolumn=14,numlines=0)</h2>',
218n/a k.make_table(f3.splitlines(),t3.splitlines(),context=True,numlines=0),
219n/a '<h2>wrapcolumn=14,splitlines()</h2>',
220n/a k.make_table(f3.splitlines(),t3.splitlines()),
221n/a '<h2>wrapcolumn=14,splitlines(True)</h2>',
222n/a k.make_table(f3.splitlines(True),t3.splitlines(True)),
223n/a ])
224n/a actual = full.replace('</body>','\n%s\n</body>' % tables)
226n/a # temporarily uncomment next two lines to baseline this test
227n/a #with open('test_difflib_expect.html','w') as fp:
228n/a # fp.write(actual)
230n/a with open(findfile('test_difflib_expect.html')) as fp:
231n/a self.assertEqual(actual, fp.read())
233n/a def test_recursion_limit(self):
234n/a # Check if the problem described in patch #1413711 exists.
235n/a limit = sys.getrecursionlimit()
236n/a old = [(i%2 and "K:%d" or "V:A:%d") % i for i in range(limit*2)]
237n/a new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
238n/a difflib.SequenceMatcher(None, old, new).get_opcodes()
240n/a def test_make_file_default_charset(self):
241n/a html_diff = difflib.HtmlDiff()
242n/a output = html_diff.make_file(patch914575_from1.splitlines(),
243n/a patch914575_to1.splitlines())
244n/a self.assertIn('content="text/html; charset=utf-8"', output)
246n/a def test_make_file_iso88591_charset(self):
247n/a html_diff = difflib.HtmlDiff()
248n/a output = html_diff.make_file(patch914575_from1.splitlines(),
249n/a patch914575_to1.splitlines(),
250n/a charset='iso-8859-1')
251n/a self.assertIn('content="text/html; charset=iso-8859-1"', output)
253n/a def test_make_file_usascii_charset_with_nonascii_input(self):
254n/a html_diff = difflib.HtmlDiff()
255n/a output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
256n/a patch914575_nonascii_to1.splitlines(),
257n/a charset='us-ascii')
258n/a self.assertIn('content="text/html; charset=us-ascii"', output)
259n/a self.assertIn('&#305;mpl&#305;c&#305;t', output)
262n/aclass TestOutputFormat(unittest.TestCase):
263n/a def test_tab_delimiter(self):
264n/a args = ['one', 'two', 'Original', 'Current',
265n/a '2005-01-26 23:30:50', '2010-04-02 10:20:52']
266n/a ud = difflib.unified_diff(*args, lineterm='')
267n/a self.assertEqual(list(ud)[0:2], [
268n/a "--- Original\t2005-01-26 23:30:50",
269n/a "+++ Current\t2010-04-02 10:20:52"])
270n/a cd = difflib.context_diff(*args, lineterm='')
271n/a self.assertEqual(list(cd)[0:2], [
272n/a "*** Original\t2005-01-26 23:30:50",
273n/a "--- Current\t2010-04-02 10:20:52"])
275n/a def test_no_trailing_tab_on_empty_filedate(self):
276n/a args = ['one', 'two', 'Original', 'Current']
277n/a ud = difflib.unified_diff(*args, lineterm='')
278n/a self.assertEqual(list(ud)[0:2], ["--- Original", "+++ Current"])
280n/a cd = difflib.context_diff(*args, lineterm='')
281n/a self.assertEqual(list(cd)[0:2], ["*** Original", "--- Current"])
283n/a def test_range_format_unified(self):
284n/a # Per the diff spec at http://www.unix.org/single_unix_specification/
285n/a spec = '''\
286n/a Each <range> field shall be of the form:
287n/a %1d", <beginning line number> if the range contains exactly one line,
288n/a and:
289n/a "%1d,%1d", <beginning line number>, <number of lines> otherwise.
290n/a If a range is empty, its beginning line number shall be the number of
291n/a the line just before the range, or 0 if the empty range starts the file.
292n/a '''
293n/a fmt = difflib._format_range_unified
294n/a self.assertEqual(fmt(3,3), '3,0')
295n/a self.assertEqual(fmt(3,4), '4')
296n/a self.assertEqual(fmt(3,5), '4,2')
297n/a self.assertEqual(fmt(3,6), '4,3')
298n/a self.assertEqual(fmt(0,0), '0,0')
300n/a def test_range_format_context(self):
301n/a # Per the diff spec at http://www.unix.org/single_unix_specification/
302n/a spec = '''\
303n/a The range of lines in file1 shall be written in the following format
304n/a if the range contains two or more lines:
305n/a "*** %d,%d ****\n", <beginning line number>, <ending line number>
306n/a and the following format otherwise:
307n/a "*** %d ****\n", <ending line number>
308n/a The ending line number of an empty range shall be the number of the preceding line,
309n/a or 0 if the range is at the start of the file.
311n/a Next, the range of lines in file2 shall be written in the following format
312n/a if the range contains two or more lines:
313n/a "--- %d,%d ----\n", <beginning line number>, <ending line number>
314n/a and the following format otherwise:
315n/a "--- %d ----\n", <ending line number>
316n/a '''
317n/a fmt = difflib._format_range_context
318n/a self.assertEqual(fmt(3,3), '3')
319n/a self.assertEqual(fmt(3,4), '4')
320n/a self.assertEqual(fmt(3,5), '4,5')
321n/a self.assertEqual(fmt(3,6), '4,6')
322n/a self.assertEqual(fmt(0,0), '0')
325n/aclass TestBytes(unittest.TestCase):
326n/a # don't really care about the content of the output, just the fact
327n/a # that it's bytes and we don't crash
328n/a def check(self, diff):
329n/a diff = list(diff) # trigger exceptions first
330n/a for line in diff:
331n/a self.assertIsInstance(
332n/a line, bytes,
333n/a "all lines of diff should be bytes, but got: %r" % line)
335n/a def test_byte_content(self):
336n/a # if we receive byte strings, we return byte strings
337n/a a = [b'hello', b'andr\xe9'] # iso-8859-1 bytes
338n/a b = [b'hello', b'andr\xc3\xa9'] # utf-8 bytes
340n/a unified = difflib.unified_diff
341n/a context = difflib.context_diff
343n/a check = self.check
344n/a check(difflib.diff_bytes(unified, a, a))
345n/a check(difflib.diff_bytes(unified, a, b))
347n/a # now with filenames (content and filenames are all bytes!)
348n/a check(difflib.diff_bytes(unified, a, a, b'a', b'a'))
349n/a check(difflib.diff_bytes(unified, a, b, b'a', b'b'))
351n/a # and with filenames and dates
352n/a check(difflib.diff_bytes(unified, a, a, b'a', b'a', b'2005', b'2013'))
353n/a check(difflib.diff_bytes(unified, a, b, b'a', b'b', b'2005', b'2013'))
355n/a # same all over again, with context diff
356n/a check(difflib.diff_bytes(context, a, a))
357n/a check(difflib.diff_bytes(context, a, b))
358n/a check(difflib.diff_bytes(context, a, a, b'a', b'a'))
359n/a check(difflib.diff_bytes(context, a, b, b'a', b'b'))
360n/a check(difflib.diff_bytes(context, a, a, b'a', b'a', b'2005', b'2013'))
361n/a check(difflib.diff_bytes(context, a, b, b'a', b'b', b'2005', b'2013'))
363n/a def test_byte_filenames(self):
364n/a # somebody renamed a file from ISO-8859-2 to UTF-8
365n/a fna = b'\xb3odz.txt' # "Å‚odz.txt"
366n/a fnb = b'\xc5\x82odz.txt'
368n/a # they transcoded the content at the same time
369n/a a = [b'\xa3odz is a city in Poland.']
370n/a b = [b'\xc5\x81odz is a city in Poland.']
372n/a check = self.check
373n/a unified = difflib.unified_diff
374n/a context = difflib.context_diff
375n/a check(difflib.diff_bytes(unified, a, b, fna, fnb))
376n/a check(difflib.diff_bytes(context, a, b, fna, fnb))
378n/a def assertDiff(expect, actual):
379n/a # do not compare expect and equal as lists, because unittest
380n/a # uses difflib to report difference between lists
381n/a actual = list(actual)
382n/a self.assertEqual(len(expect), len(actual))
383n/a for e, a in zip(expect, actual):
384n/a self.assertEqual(e, a)
386n/a expect = [
387n/a b'--- \xb3odz.txt',
388n/a b'+++ \xc5\x82odz.txt',
389n/a b'@@ -1 +1 @@',
390n/a b'-\xa3odz is a city in Poland.',
391n/a b'+\xc5\x81odz is a city in Poland.',
392n/a ]
393n/a actual = difflib.diff_bytes(unified, a, b, fna, fnb, lineterm=b'')
394n/a assertDiff(expect, actual)
396n/a # with dates (plain ASCII)
397n/a datea = b'2005-03-18'
398n/a dateb = b'2005-03-19'
399n/a check(difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb))
400n/a check(difflib.diff_bytes(context, a, b, fna, fnb, datea, dateb))
402n/a expect = [
403n/a # note the mixed encodings here: this is deeply wrong by every
404n/a # tenet of Unicode, but it doesn't crash, it's parseable by
405n/a # patch, and it's how UNIX(tm) diff behaves
406n/a b'--- \xb3odz.txt\t2005-03-18',
407n/a b'+++ \xc5\x82odz.txt\t2005-03-19',
408n/a b'@@ -1 +1 @@',
409n/a b'-\xa3odz is a city in Poland.',
410n/a b'+\xc5\x81odz is a city in Poland.',
411n/a ]
412n/a actual = difflib.diff_bytes(unified, a, b, fna, fnb, datea, dateb,
413n/a lineterm=b'')
414n/a assertDiff(expect, actual)
416n/a def test_mixed_types_content(self):
417n/a # type of input content must be consistent: all str or all bytes
418n/a a = [b'hello']
419n/a b = ['hello']
421n/a unified = difflib.unified_diff
422n/a context = difflib.context_diff
424n/a expect = "lines to compare must be str, not bytes (b'hello')"
425n/a self._assert_type_error(expect, unified, a, b)
426n/a self._assert_type_error(expect, unified, b, a)
427n/a self._assert_type_error(expect, context, a, b)
428n/a self._assert_type_error(expect, context, b, a)
430n/a expect = "all arguments must be bytes, not str ('hello')"
431n/a self._assert_type_error(expect, difflib.diff_bytes, unified, a, b)
432n/a self._assert_type_error(expect, difflib.diff_bytes, unified, b, a)
433n/a self._assert_type_error(expect, difflib.diff_bytes, context, a, b)
434n/a self._assert_type_error(expect, difflib.diff_bytes, context, b, a)
436n/a def test_mixed_types_filenames(self):
437n/a # cannot pass filenames as bytes if content is str (this may not be
438n/a # the right behaviour, but at least the test demonstrates how
439n/a # things work)
440n/a a = ['hello\n']
441n/a b = ['ohell\n']
442n/a fna = b'ol\xe9.txt' # filename transcoded from ISO-8859-1
443n/a fnb = b'ol\xc3a9.txt' # to UTF-8
444n/a self._assert_type_error(
445n/a "all arguments must be str, not: b'ol\\xe9.txt'",
446n/a difflib.unified_diff, a, b, fna, fnb)
448n/a def test_mixed_types_dates(self):
449n/a # type of dates must be consistent with type of contents
450n/a a = [b'foo\n']
451n/a b = [b'bar\n']
452n/a datea = '1 fév'
453n/a dateb = '3 fév'
454n/a self._assert_type_error(
455n/a "all arguments must be bytes, not str ('1 fév')",
456n/a difflib.diff_bytes, difflib.unified_diff,
457n/a a, b, b'a', b'b', datea, dateb)
459n/a # if input is str, non-ASCII dates are fine
460n/a a = ['foo\n']
461n/a b = ['bar\n']
462n/a list(difflib.unified_diff(a, b, 'a', 'b', datea, dateb))
464n/a def _assert_type_error(self, msg, generator, *args):
465n/a with self.assertRaises(TypeError) as ctx:
466n/a list(generator(*args))
467n/a self.assertEqual(msg, str(ctx.exception))
470n/adef test_main():
471n/a difflib.HtmlDiff._default_prefix = 0
472n/a Doctests = doctest.DocTestSuite(difflib)
473n/a run_unittest(
474n/a TestWithAscii, TestAutojunk, TestSFpatches, TestSFbugs,
475n/a TestOutputFormat, TestBytes, Doctests)
477n/aif __name__ == '__main__':
478n/a test_main()