ยปCore Development>Code coverage>Lib/test/test_tokenize.py

Python code coverage for Lib/test/test_tokenize.py

#countcontent
1n/afrom test import support
2n/afrom tokenize import (tokenize, _tokenize, untokenize, NUMBER, NAME, OP,
3n/a STRING, ENDMARKER, ENCODING, tok_name, detect_encoding,
4n/a open as tokenize_open, Untokenizer)
5n/afrom io import BytesIO
6n/afrom unittest import TestCase, mock
7n/afrom test.test_grammar import (VALID_UNDERSCORE_LITERALS,
8n/a INVALID_UNDERSCORE_LITERALS)
9n/aimport os
10n/aimport token
11n/a
12n/a
13n/aclass TokenizeTest(TestCase):
14n/a # Tests for the tokenize module.
15n/a
16n/a # The tests can be really simple. Given a small fragment of source
17n/a # code, print out a table with tokens. The ENDMARKER is omitted for
18n/a # brevity.
19n/a
20n/a def check_tokenize(self, s, expected):
21n/a # Format the tokens in s in a table format.
22n/a # The ENDMARKER is omitted.
23n/a result = []
24n/a f = BytesIO(s.encode('utf-8'))
25n/a for type, token, start, end, line in tokenize(f.readline):
26n/a if type == ENDMARKER:
27n/a break
28n/a type = tok_name[type]
29n/a result.append(f" {type:10} {token!r:13} {start} {end}")
30n/a self.assertEqual(result,
31n/a [" ENCODING 'utf-8' (0, 0) (0, 0)"] +
32n/a expected.rstrip().splitlines())
33n/a
34n/a def test_basic(self):
35n/a self.check_tokenize("1 + 1", """\
36n/a NUMBER '1' (1, 0) (1, 1)
37n/a OP '+' (1, 2) (1, 3)
38n/a NUMBER '1' (1, 4) (1, 5)
39n/a """)
40n/a self.check_tokenize("if False:\n"
41n/a " # NL\n"
42n/a " True = False # NEWLINE\n", """\
43n/a NAME 'if' (1, 0) (1, 2)
44n/a NAME 'False' (1, 3) (1, 8)
45n/a OP ':' (1, 8) (1, 9)
46n/a NEWLINE '\\n' (1, 9) (1, 10)
47n/a COMMENT '# NL' (2, 4) (2, 8)
48n/a NL '\\n' (2, 8) (2, 9)
49n/a INDENT ' ' (3, 0) (3, 4)
50n/a NAME 'True' (3, 4) (3, 8)
51n/a OP '=' (3, 9) (3, 10)
52n/a NAME 'False' (3, 11) (3, 16)
53n/a COMMENT '# NEWLINE' (3, 17) (3, 26)
54n/a NEWLINE '\\n' (3, 26) (3, 27)
55n/a DEDENT '' (4, 0) (4, 0)
56n/a """)
57n/a indent_error_file = b"""\
58n/adef k(x):
59n/a x += 2
60n/a x += 5
61n/a"""
62n/a readline = BytesIO(indent_error_file).readline
63n/a with self.assertRaisesRegex(IndentationError,
64n/a "unindent does not match any "
65n/a "outer indentation level"):
66n/a for tok in tokenize(readline):
67n/a pass
68n/a
69n/a def test_int(self):
70n/a # Ordinary integers and binary operators
71n/a self.check_tokenize("0xff <= 255", """\
72n/a NUMBER '0xff' (1, 0) (1, 4)
73n/a OP '<=' (1, 5) (1, 7)
74n/a NUMBER '255' (1, 8) (1, 11)
75n/a """)
76n/a self.check_tokenize("0b10 <= 255", """\
77n/a NUMBER '0b10' (1, 0) (1, 4)
78n/a OP '<=' (1, 5) (1, 7)
79n/a NUMBER '255' (1, 8) (1, 11)
80n/a """)
81n/a self.check_tokenize("0o123 <= 0O123", """\
82n/a NUMBER '0o123' (1, 0) (1, 5)
83n/a OP '<=' (1, 6) (1, 8)
84n/a NUMBER '0O123' (1, 9) (1, 14)
85n/a """)
86n/a self.check_tokenize("1234567 > ~0x15", """\
87n/a NUMBER '1234567' (1, 0) (1, 7)
88n/a OP '>' (1, 8) (1, 9)
89n/a OP '~' (1, 10) (1, 11)
90n/a NUMBER '0x15' (1, 11) (1, 15)
91n/a """)
92n/a self.check_tokenize("2134568 != 1231515", """\
93n/a NUMBER '2134568' (1, 0) (1, 7)
94n/a OP '!=' (1, 8) (1, 10)
95n/a NUMBER '1231515' (1, 11) (1, 18)
96n/a """)
97n/a self.check_tokenize("(-124561-1) & 200000000", """\
98n/a OP '(' (1, 0) (1, 1)
99n/a OP '-' (1, 1) (1, 2)
100n/a NUMBER '124561' (1, 2) (1, 8)
101n/a OP '-' (1, 8) (1, 9)
102n/a NUMBER '1' (1, 9) (1, 10)
103n/a OP ')' (1, 10) (1, 11)
104n/a OP '&' (1, 12) (1, 13)
105n/a NUMBER '200000000' (1, 14) (1, 23)
106n/a """)
107n/a self.check_tokenize("0xdeadbeef != -1", """\
108n/a NUMBER '0xdeadbeef' (1, 0) (1, 10)
109n/a OP '!=' (1, 11) (1, 13)
110n/a OP '-' (1, 14) (1, 15)
111n/a NUMBER '1' (1, 15) (1, 16)
112n/a """)
113n/a self.check_tokenize("0xdeadc0de & 12345", """\
114n/a NUMBER '0xdeadc0de' (1, 0) (1, 10)
115n/a OP '&' (1, 11) (1, 12)
116n/a NUMBER '12345' (1, 13) (1, 18)
117n/a """)
118n/a self.check_tokenize("0xFF & 0x15 | 1234", """\
119n/a NUMBER '0xFF' (1, 0) (1, 4)
120n/a OP '&' (1, 5) (1, 6)
121n/a NUMBER '0x15' (1, 7) (1, 11)
122n/a OP '|' (1, 12) (1, 13)
123n/a NUMBER '1234' (1, 14) (1, 18)
124n/a """)
125n/a
126n/a def test_long(self):
127n/a # Long integers
128n/a self.check_tokenize("x = 0", """\
129n/a NAME 'x' (1, 0) (1, 1)
130n/a OP '=' (1, 2) (1, 3)
131n/a NUMBER '0' (1, 4) (1, 5)
132n/a """)
133n/a self.check_tokenize("x = 0xfffffffffff", """\
134n/a NAME 'x' (1, 0) (1, 1)
135n/a OP '=' (1, 2) (1, 3)
136n/a NUMBER '0xfffffffffff' (1, 4) (1, 17)
137n/a """)
138n/a self.check_tokenize("x = 123141242151251616110", """\
139n/a NAME 'x' (1, 0) (1, 1)
140n/a OP '=' (1, 2) (1, 3)
141n/a NUMBER '123141242151251616110' (1, 4) (1, 25)
142n/a """)
143n/a self.check_tokenize("x = -15921590215012591", """\
144n/a NAME 'x' (1, 0) (1, 1)
145n/a OP '=' (1, 2) (1, 3)
146n/a OP '-' (1, 4) (1, 5)
147n/a NUMBER '15921590215012591' (1, 5) (1, 22)
148n/a """)
149n/a
150n/a def test_float(self):
151n/a # Floating point numbers
152n/a self.check_tokenize("x = 3.14159", """\
153n/a NAME 'x' (1, 0) (1, 1)
154n/a OP '=' (1, 2) (1, 3)
155n/a NUMBER '3.14159' (1, 4) (1, 11)
156n/a """)
157n/a self.check_tokenize("x = 314159.", """\
158n/a NAME 'x' (1, 0) (1, 1)
159n/a OP '=' (1, 2) (1, 3)
160n/a NUMBER '314159.' (1, 4) (1, 11)
161n/a """)
162n/a self.check_tokenize("x = .314159", """\
163n/a NAME 'x' (1, 0) (1, 1)
164n/a OP '=' (1, 2) (1, 3)
165n/a NUMBER '.314159' (1, 4) (1, 11)
166n/a """)
167n/a self.check_tokenize("x = 3e14159", """\
168n/a NAME 'x' (1, 0) (1, 1)
169n/a OP '=' (1, 2) (1, 3)
170n/a NUMBER '3e14159' (1, 4) (1, 11)
171n/a """)
172n/a self.check_tokenize("x = 3E123", """\
173n/a NAME 'x' (1, 0) (1, 1)
174n/a OP '=' (1, 2) (1, 3)
175n/a NUMBER '3E123' (1, 4) (1, 9)
176n/a """)
177n/a self.check_tokenize("x+y = 3e-1230", """\
178n/a NAME 'x' (1, 0) (1, 1)
179n/a OP '+' (1, 1) (1, 2)
180n/a NAME 'y' (1, 2) (1, 3)
181n/a OP '=' (1, 4) (1, 5)
182n/a NUMBER '3e-1230' (1, 6) (1, 13)
183n/a """)
184n/a self.check_tokenize("x = 3.14e159", """\
185n/a NAME 'x' (1, 0) (1, 1)
186n/a OP '=' (1, 2) (1, 3)
187n/a NUMBER '3.14e159' (1, 4) (1, 12)
188n/a """)
189n/a
190n/a def test_underscore_literals(self):
191n/a def number_token(s):
192n/a f = BytesIO(s.encode('utf-8'))
193n/a for toktype, token, start, end, line in tokenize(f.readline):
194n/a if toktype == NUMBER:
195n/a return token
196n/a return 'invalid token'
197n/a for lit in VALID_UNDERSCORE_LITERALS:
198n/a if '(' in lit:
199n/a # this won't work with compound complex inputs
200n/a continue
201n/a self.assertEqual(number_token(lit), lit)
202n/a for lit in INVALID_UNDERSCORE_LITERALS:
203n/a self.assertNotEqual(number_token(lit), lit)
204n/a
205n/a def test_string(self):
206n/a # String literals
207n/a self.check_tokenize("x = ''; y = \"\"", """\
208n/a NAME 'x' (1, 0) (1, 1)
209n/a OP '=' (1, 2) (1, 3)
210n/a STRING "''" (1, 4) (1, 6)
211n/a OP ';' (1, 6) (1, 7)
212n/a NAME 'y' (1, 8) (1, 9)
213n/a OP '=' (1, 10) (1, 11)
214n/a STRING '""' (1, 12) (1, 14)
215n/a """)
216n/a self.check_tokenize("x = '\"'; y = \"'\"", """\
217n/a NAME 'x' (1, 0) (1, 1)
218n/a OP '=' (1, 2) (1, 3)
219n/a STRING '\\'"\\'' (1, 4) (1, 7)
220n/a OP ';' (1, 7) (1, 8)
221n/a NAME 'y' (1, 9) (1, 10)
222n/a OP '=' (1, 11) (1, 12)
223n/a STRING '"\\'"' (1, 13) (1, 16)
224n/a """)
225n/a self.check_tokenize("x = \"doesn't \"shrink\", does it\"", """\
226n/a NAME 'x' (1, 0) (1, 1)
227n/a OP '=' (1, 2) (1, 3)
228n/a STRING '"doesn\\'t "' (1, 4) (1, 14)
229n/a NAME 'shrink' (1, 14) (1, 20)
230n/a STRING '", does it"' (1, 20) (1, 31)
231n/a """)
232n/a self.check_tokenize("x = 'abc' + 'ABC'", """\
233n/a NAME 'x' (1, 0) (1, 1)
234n/a OP '=' (1, 2) (1, 3)
235n/a STRING "'abc'" (1, 4) (1, 9)
236n/a OP '+' (1, 10) (1, 11)
237n/a STRING "'ABC'" (1, 12) (1, 17)
238n/a """)
239n/a self.check_tokenize('y = "ABC" + "ABC"', """\
240n/a NAME 'y' (1, 0) (1, 1)
241n/a OP '=' (1, 2) (1, 3)
242n/a STRING '"ABC"' (1, 4) (1, 9)
243n/a OP '+' (1, 10) (1, 11)
244n/a STRING '"ABC"' (1, 12) (1, 17)
245n/a """)
246n/a self.check_tokenize("x = r'abc' + r'ABC' + R'ABC' + R'ABC'", """\
247n/a NAME 'x' (1, 0) (1, 1)
248n/a OP '=' (1, 2) (1, 3)
249n/a STRING "r'abc'" (1, 4) (1, 10)
250n/a OP '+' (1, 11) (1, 12)
251n/a STRING "r'ABC'" (1, 13) (1, 19)
252n/a OP '+' (1, 20) (1, 21)
253n/a STRING "R'ABC'" (1, 22) (1, 28)
254n/a OP '+' (1, 29) (1, 30)
255n/a STRING "R'ABC'" (1, 31) (1, 37)
256n/a """)
257n/a self.check_tokenize('y = r"abc" + r"ABC" + R"ABC" + R"ABC"', """\
258n/a NAME 'y' (1, 0) (1, 1)
259n/a OP '=' (1, 2) (1, 3)
260n/a STRING 'r"abc"' (1, 4) (1, 10)
261n/a OP '+' (1, 11) (1, 12)
262n/a STRING 'r"ABC"' (1, 13) (1, 19)
263n/a OP '+' (1, 20) (1, 21)
264n/a STRING 'R"ABC"' (1, 22) (1, 28)
265n/a OP '+' (1, 29) (1, 30)
266n/a STRING 'R"ABC"' (1, 31) (1, 37)
267n/a """)
268n/a
269n/a self.check_tokenize("u'abc' + U'abc'", """\
270n/a STRING "u'abc'" (1, 0) (1, 6)
271n/a OP '+' (1, 7) (1, 8)
272n/a STRING "U'abc'" (1, 9) (1, 15)
273n/a """)
274n/a self.check_tokenize('u"abc" + U"abc"', """\
275n/a STRING 'u"abc"' (1, 0) (1, 6)
276n/a OP '+' (1, 7) (1, 8)
277n/a STRING 'U"abc"' (1, 9) (1, 15)
278n/a """)
279n/a
280n/a self.check_tokenize("b'abc' + B'abc'", """\
281n/a STRING "b'abc'" (1, 0) (1, 6)
282n/a OP '+' (1, 7) (1, 8)
283n/a STRING "B'abc'" (1, 9) (1, 15)
284n/a """)
285n/a self.check_tokenize('b"abc" + B"abc"', """\
286n/a STRING 'b"abc"' (1, 0) (1, 6)
287n/a OP '+' (1, 7) (1, 8)
288n/a STRING 'B"abc"' (1, 9) (1, 15)
289n/a """)
290n/a self.check_tokenize("br'abc' + bR'abc' + Br'abc' + BR'abc'", """\
291n/a STRING "br'abc'" (1, 0) (1, 7)
292n/a OP '+' (1, 8) (1, 9)
293n/a STRING "bR'abc'" (1, 10) (1, 17)
294n/a OP '+' (1, 18) (1, 19)
295n/a STRING "Br'abc'" (1, 20) (1, 27)
296n/a OP '+' (1, 28) (1, 29)
297n/a STRING "BR'abc'" (1, 30) (1, 37)
298n/a """)
299n/a self.check_tokenize('br"abc" + bR"abc" + Br"abc" + BR"abc"', """\
300n/a STRING 'br"abc"' (1, 0) (1, 7)
301n/a OP '+' (1, 8) (1, 9)
302n/a STRING 'bR"abc"' (1, 10) (1, 17)
303n/a OP '+' (1, 18) (1, 19)
304n/a STRING 'Br"abc"' (1, 20) (1, 27)
305n/a OP '+' (1, 28) (1, 29)
306n/a STRING 'BR"abc"' (1, 30) (1, 37)
307n/a """)
308n/a self.check_tokenize("rb'abc' + rB'abc' + Rb'abc' + RB'abc'", """\
309n/a STRING "rb'abc'" (1, 0) (1, 7)
310n/a OP '+' (1, 8) (1, 9)
311n/a STRING "rB'abc'" (1, 10) (1, 17)
312n/a OP '+' (1, 18) (1, 19)
313n/a STRING "Rb'abc'" (1, 20) (1, 27)
314n/a OP '+' (1, 28) (1, 29)
315n/a STRING "RB'abc'" (1, 30) (1, 37)
316n/a """)
317n/a self.check_tokenize('rb"abc" + rB"abc" + Rb"abc" + RB"abc"', """\
318n/a STRING 'rb"abc"' (1, 0) (1, 7)
319n/a OP '+' (1, 8) (1, 9)
320n/a STRING 'rB"abc"' (1, 10) (1, 17)
321n/a OP '+' (1, 18) (1, 19)
322n/a STRING 'Rb"abc"' (1, 20) (1, 27)
323n/a OP '+' (1, 28) (1, 29)
324n/a STRING 'RB"abc"' (1, 30) (1, 37)
325n/a """)
326n/a # Check 0, 1, and 2 character string prefixes.
327n/a self.check_tokenize(r'"a\
328n/ade\
329n/afg"', """\
330n/a STRING '"a\\\\\\nde\\\\\\nfg"\' (1, 0) (3, 3)
331n/a """)
332n/a self.check_tokenize(r'u"a\
333n/ade"', """\
334n/a STRING 'u"a\\\\\\nde"\' (1, 0) (2, 3)
335n/a """)
336n/a self.check_tokenize(r'rb"a\
337n/ad"', """\
338n/a STRING 'rb"a\\\\\\nd"\' (1, 0) (2, 2)
339n/a """)
340n/a self.check_tokenize(r'"""a\
341n/ab"""', """\
342n/a STRING '\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
343n/a """)
344n/a self.check_tokenize(r'u"""a\
345n/ab"""', """\
346n/a STRING 'u\"\""a\\\\\\nb\"\""' (1, 0) (2, 4)
347n/a """)
348n/a self.check_tokenize(r'rb"""a\
349n/ab\
350n/ac"""', """\
351n/a STRING 'rb"\""a\\\\\\nb\\\\\\nc"\""' (1, 0) (3, 4)
352n/a """)
353n/a self.check_tokenize('f"abc"', """\
354n/a STRING 'f"abc"' (1, 0) (1, 6)
355n/a """)
356n/a self.check_tokenize('fR"a{b}c"', """\
357n/a STRING 'fR"a{b}c"' (1, 0) (1, 9)
358n/a """)
359n/a self.check_tokenize('f"""abc"""', """\
360n/a STRING 'f\"\"\"abc\"\"\"' (1, 0) (1, 10)
361n/a """)
362n/a self.check_tokenize(r'f"abc\
363n/adef"', """\
364n/a STRING 'f"abc\\\\\\ndef"' (1, 0) (2, 4)
365n/a """)
366n/a self.check_tokenize(r'Rf"abc\
367n/adef"', """\
368n/a STRING 'Rf"abc\\\\\\ndef"' (1, 0) (2, 4)
369n/a """)
370n/a
371n/a def test_function(self):
372n/a self.check_tokenize("def d22(a, b, c=2, d=2, *k): pass", """\
373n/a NAME 'def' (1, 0) (1, 3)
374n/a NAME 'd22' (1, 4) (1, 7)
375n/a OP '(' (1, 7) (1, 8)
376n/a NAME 'a' (1, 8) (1, 9)
377n/a OP ',' (1, 9) (1, 10)
378n/a NAME 'b' (1, 11) (1, 12)
379n/a OP ',' (1, 12) (1, 13)
380n/a NAME 'c' (1, 14) (1, 15)
381n/a OP '=' (1, 15) (1, 16)
382n/a NUMBER '2' (1, 16) (1, 17)
383n/a OP ',' (1, 17) (1, 18)
384n/a NAME 'd' (1, 19) (1, 20)
385n/a OP '=' (1, 20) (1, 21)
386n/a NUMBER '2' (1, 21) (1, 22)
387n/a OP ',' (1, 22) (1, 23)
388n/a OP '*' (1, 24) (1, 25)
389n/a NAME 'k' (1, 25) (1, 26)
390n/a OP ')' (1, 26) (1, 27)
391n/a OP ':' (1, 27) (1, 28)
392n/a NAME 'pass' (1, 29) (1, 33)
393n/a """)
394n/a self.check_tokenize("def d01v_(a=1, *k, **w): pass", """\
395n/a NAME 'def' (1, 0) (1, 3)
396n/a NAME 'd01v_' (1, 4) (1, 9)
397n/a OP '(' (1, 9) (1, 10)
398n/a NAME 'a' (1, 10) (1, 11)
399n/a OP '=' (1, 11) (1, 12)
400n/a NUMBER '1' (1, 12) (1, 13)
401n/a OP ',' (1, 13) (1, 14)
402n/a OP '*' (1, 15) (1, 16)
403n/a NAME 'k' (1, 16) (1, 17)
404n/a OP ',' (1, 17) (1, 18)
405n/a OP '**' (1, 19) (1, 21)
406n/a NAME 'w' (1, 21) (1, 22)
407n/a OP ')' (1, 22) (1, 23)
408n/a OP ':' (1, 23) (1, 24)
409n/a NAME 'pass' (1, 25) (1, 29)
410n/a """)
411n/a
412n/a def test_comparison(self):
413n/a # Comparison
414n/a self.check_tokenize("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != "
415n/a "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass", """\
416n/a NAME 'if' (1, 0) (1, 2)
417n/a NUMBER '1' (1, 3) (1, 4)
418n/a OP '<' (1, 5) (1, 6)
419n/a NUMBER '1' (1, 7) (1, 8)
420n/a OP '>' (1, 9) (1, 10)
421n/a NUMBER '1' (1, 11) (1, 12)
422n/a OP '==' (1, 13) (1, 15)
423n/a NUMBER '1' (1, 16) (1, 17)
424n/a OP '>=' (1, 18) (1, 20)
425n/a NUMBER '5' (1, 21) (1, 22)
426n/a OP '<=' (1, 23) (1, 25)
427n/a NUMBER '0x15' (1, 26) (1, 30)
428n/a OP '<=' (1, 31) (1, 33)
429n/a NUMBER '0x12' (1, 34) (1, 38)
430n/a OP '!=' (1, 39) (1, 41)
431n/a NUMBER '1' (1, 42) (1, 43)
432n/a NAME 'and' (1, 44) (1, 47)
433n/a NUMBER '5' (1, 48) (1, 49)
434n/a NAME 'in' (1, 50) (1, 52)
435n/a NUMBER '1' (1, 53) (1, 54)
436n/a NAME 'not' (1, 55) (1, 58)
437n/a NAME 'in' (1, 59) (1, 61)
438n/a NUMBER '1' (1, 62) (1, 63)
439n/a NAME 'is' (1, 64) (1, 66)
440n/a NUMBER '1' (1, 67) (1, 68)
441n/a NAME 'or' (1, 69) (1, 71)
442n/a NUMBER '5' (1, 72) (1, 73)
443n/a NAME 'is' (1, 74) (1, 76)
444n/a NAME 'not' (1, 77) (1, 80)
445n/a NUMBER '1' (1, 81) (1, 82)
446n/a OP ':' (1, 82) (1, 83)
447n/a NAME 'pass' (1, 84) (1, 88)
448n/a """)
449n/a
450n/a def test_shift(self):
451n/a # Shift
452n/a self.check_tokenize("x = 1 << 1 >> 5", """\
453n/a NAME 'x' (1, 0) (1, 1)
454n/a OP '=' (1, 2) (1, 3)
455n/a NUMBER '1' (1, 4) (1, 5)
456n/a OP '<<' (1, 6) (1, 8)
457n/a NUMBER '1' (1, 9) (1, 10)
458n/a OP '>>' (1, 11) (1, 13)
459n/a NUMBER '5' (1, 14) (1, 15)
460n/a """)
461n/a
462n/a def test_additive(self):
463n/a # Additive
464n/a self.check_tokenize("x = 1 - y + 15 - 1 + 0x124 + z + a[5]", """\
465n/a NAME 'x' (1, 0) (1, 1)
466n/a OP '=' (1, 2) (1, 3)
467n/a NUMBER '1' (1, 4) (1, 5)
468n/a OP '-' (1, 6) (1, 7)
469n/a NAME 'y' (1, 8) (1, 9)
470n/a OP '+' (1, 10) (1, 11)
471n/a NUMBER '15' (1, 12) (1, 14)
472n/a OP '-' (1, 15) (1, 16)
473n/a NUMBER '1' (1, 17) (1, 18)
474n/a OP '+' (1, 19) (1, 20)
475n/a NUMBER '0x124' (1, 21) (1, 26)
476n/a OP '+' (1, 27) (1, 28)
477n/a NAME 'z' (1, 29) (1, 30)
478n/a OP '+' (1, 31) (1, 32)
479n/a NAME 'a' (1, 33) (1, 34)
480n/a OP '[' (1, 34) (1, 35)
481n/a NUMBER '5' (1, 35) (1, 36)
482n/a OP ']' (1, 36) (1, 37)
483n/a """)
484n/a
485n/a def test_multiplicative(self):
486n/a # Multiplicative
487n/a self.check_tokenize("x = 1//1*1/5*12%0x12@42", """\
488n/a NAME 'x' (1, 0) (1, 1)
489n/a OP '=' (1, 2) (1, 3)
490n/a NUMBER '1' (1, 4) (1, 5)
491n/a OP '//' (1, 5) (1, 7)
492n/a NUMBER '1' (1, 7) (1, 8)
493n/a OP '*' (1, 8) (1, 9)
494n/a NUMBER '1' (1, 9) (1, 10)
495n/a OP '/' (1, 10) (1, 11)
496n/a NUMBER '5' (1, 11) (1, 12)
497n/a OP '*' (1, 12) (1, 13)
498n/a NUMBER '12' (1, 13) (1, 15)
499n/a OP '%' (1, 15) (1, 16)
500n/a NUMBER '0x12' (1, 16) (1, 20)
501n/a OP '@' (1, 20) (1, 21)
502n/a NUMBER '42' (1, 21) (1, 23)
503n/a """)
504n/a
505n/a def test_unary(self):
506n/a # Unary
507n/a self.check_tokenize("~1 ^ 1 & 1 |1 ^ -1", """\
508n/a OP '~' (1, 0) (1, 1)
509n/a NUMBER '1' (1, 1) (1, 2)
510n/a OP '^' (1, 3) (1, 4)
511n/a NUMBER '1' (1, 5) (1, 6)
512n/a OP '&' (1, 7) (1, 8)
513n/a NUMBER '1' (1, 9) (1, 10)
514n/a OP '|' (1, 11) (1, 12)
515n/a NUMBER '1' (1, 12) (1, 13)
516n/a OP '^' (1, 14) (1, 15)
517n/a OP '-' (1, 16) (1, 17)
518n/a NUMBER '1' (1, 17) (1, 18)
519n/a """)
520n/a self.check_tokenize("-1*1/1+1*1//1 - ---1**1", """\
521n/a OP '-' (1, 0) (1, 1)
522n/a NUMBER '1' (1, 1) (1, 2)
523n/a OP '*' (1, 2) (1, 3)
524n/a NUMBER '1' (1, 3) (1, 4)
525n/a OP '/' (1, 4) (1, 5)
526n/a NUMBER '1' (1, 5) (1, 6)
527n/a OP '+' (1, 6) (1, 7)
528n/a NUMBER '1' (1, 7) (1, 8)
529n/a OP '*' (1, 8) (1, 9)
530n/a NUMBER '1' (1, 9) (1, 10)
531n/a OP '//' (1, 10) (1, 12)
532n/a NUMBER '1' (1, 12) (1, 13)
533n/a OP '-' (1, 14) (1, 15)
534n/a OP '-' (1, 16) (1, 17)
535n/a OP '-' (1, 17) (1, 18)
536n/a OP '-' (1, 18) (1, 19)
537n/a NUMBER '1' (1, 19) (1, 20)
538n/a OP '**' (1, 20) (1, 22)
539n/a NUMBER '1' (1, 22) (1, 23)
540n/a """)
541n/a
542n/a def test_selector(self):
543n/a # Selector
544n/a self.check_tokenize("import sys, time\nx = sys.modules['time'].time()", """\
545n/a NAME 'import' (1, 0) (1, 6)
546n/a NAME 'sys' (1, 7) (1, 10)
547n/a OP ',' (1, 10) (1, 11)
548n/a NAME 'time' (1, 12) (1, 16)
549n/a NEWLINE '\\n' (1, 16) (1, 17)
550n/a NAME 'x' (2, 0) (2, 1)
551n/a OP '=' (2, 2) (2, 3)
552n/a NAME 'sys' (2, 4) (2, 7)
553n/a OP '.' (2, 7) (2, 8)
554n/a NAME 'modules' (2, 8) (2, 15)
555n/a OP '[' (2, 15) (2, 16)
556n/a STRING "'time'" (2, 16) (2, 22)
557n/a OP ']' (2, 22) (2, 23)
558n/a OP '.' (2, 23) (2, 24)
559n/a NAME 'time' (2, 24) (2, 28)
560n/a OP '(' (2, 28) (2, 29)
561n/a OP ')' (2, 29) (2, 30)
562n/a """)
563n/a
564n/a def test_method(self):
565n/a # Methods
566n/a self.check_tokenize("@staticmethod\ndef foo(x,y): pass", """\
567n/a OP '@' (1, 0) (1, 1)
568n/a NAME 'staticmethod' (1, 1) (1, 13)
569n/a NEWLINE '\\n' (1, 13) (1, 14)
570n/a NAME 'def' (2, 0) (2, 3)
571n/a NAME 'foo' (2, 4) (2, 7)
572n/a OP '(' (2, 7) (2, 8)
573n/a NAME 'x' (2, 8) (2, 9)
574n/a OP ',' (2, 9) (2, 10)
575n/a NAME 'y' (2, 10) (2, 11)
576n/a OP ')' (2, 11) (2, 12)
577n/a OP ':' (2, 12) (2, 13)
578n/a NAME 'pass' (2, 14) (2, 18)
579n/a """)
580n/a
581n/a def test_tabs(self):
582n/a # Evil tabs
583n/a self.check_tokenize("def f():\n"
584n/a "\tif x\n"
585n/a " \tpass", """\
586n/a NAME 'def' (1, 0) (1, 3)
587n/a NAME 'f' (1, 4) (1, 5)
588n/a OP '(' (1, 5) (1, 6)
589n/a OP ')' (1, 6) (1, 7)
590n/a OP ':' (1, 7) (1, 8)
591n/a NEWLINE '\\n' (1, 8) (1, 9)
592n/a INDENT '\\t' (2, 0) (2, 1)
593n/a NAME 'if' (2, 1) (2, 3)
594n/a NAME 'x' (2, 4) (2, 5)
595n/a NEWLINE '\\n' (2, 5) (2, 6)
596n/a INDENT ' \\t' (3, 0) (3, 9)
597n/a NAME 'pass' (3, 9) (3, 13)
598n/a DEDENT '' (4, 0) (4, 0)
599n/a DEDENT '' (4, 0) (4, 0)
600n/a """)
601n/a
602n/a def test_non_ascii_identifiers(self):
603n/a # Non-ascii identifiers
604n/a self.check_tokenize("รƒ–rter = 'places'\ngrรƒยผn = 'green'", """\
605n/a NAME 'รƒ–rter' (1, 0) (1, 5)
606n/a OP '=' (1, 6) (1, 7)
607n/a STRING "'places'" (1, 8) (1, 16)
608n/a NEWLINE '\\n' (1, 16) (1, 17)
609n/a NAME 'grรƒยผn' (2, 0) (2, 4)
610n/a OP '=' (2, 5) (2, 6)
611n/a STRING "'green'" (2, 7) (2, 14)
612n/a """)
613n/a
614n/a def test_unicode(self):
615n/a # Legacy unicode literals:
616n/a self.check_tokenize("รƒ–rter = u'places'\ngrรƒยผn = U'green'", """\
617n/a NAME 'รƒ–rter' (1, 0) (1, 5)
618n/a OP '=' (1, 6) (1, 7)
619n/a STRING "u'places'" (1, 8) (1, 17)
620n/a NEWLINE '\\n' (1, 17) (1, 18)
621n/a NAME 'grรƒยผn' (2, 0) (2, 4)
622n/a OP '=' (2, 5) (2, 6)
623n/a STRING "U'green'" (2, 7) (2, 15)
624n/a """)
625n/a
626n/a def test_async(self):
627n/a # Async/await extension:
628n/a self.check_tokenize("async = 1", """\
629n/a NAME 'async' (1, 0) (1, 5)
630n/a OP '=' (1, 6) (1, 7)
631n/a NUMBER '1' (1, 8) (1, 9)
632n/a """)
633n/a
634n/a self.check_tokenize("a = (async = 1)", """\
635n/a NAME 'a' (1, 0) (1, 1)
636n/a OP '=' (1, 2) (1, 3)
637n/a OP '(' (1, 4) (1, 5)
638n/a NAME 'async' (1, 5) (1, 10)
639n/a OP '=' (1, 11) (1, 12)
640n/a NUMBER '1' (1, 13) (1, 14)
641n/a OP ')' (1, 14) (1, 15)
642n/a """)
643n/a
644n/a self.check_tokenize("async()", """\
645n/a NAME 'async' (1, 0) (1, 5)
646n/a OP '(' (1, 5) (1, 6)
647n/a OP ')' (1, 6) (1, 7)
648n/a """)
649n/a
650n/a self.check_tokenize("class async(Bar):pass", """\
651n/a NAME 'class' (1, 0) (1, 5)
652n/a NAME 'async' (1, 6) (1, 11)
653n/a OP '(' (1, 11) (1, 12)
654n/a NAME 'Bar' (1, 12) (1, 15)
655n/a OP ')' (1, 15) (1, 16)
656n/a OP ':' (1, 16) (1, 17)
657n/a NAME 'pass' (1, 17) (1, 21)
658n/a """)
659n/a
660n/a self.check_tokenize("class async:pass", """\
661n/a NAME 'class' (1, 0) (1, 5)
662n/a NAME 'async' (1, 6) (1, 11)
663n/a OP ':' (1, 11) (1, 12)
664n/a NAME 'pass' (1, 12) (1, 16)
665n/a """)
666n/a
667n/a self.check_tokenize("await = 1", """\
668n/a NAME 'await' (1, 0) (1, 5)
669n/a OP '=' (1, 6) (1, 7)
670n/a NUMBER '1' (1, 8) (1, 9)
671n/a """)
672n/a
673n/a self.check_tokenize("foo.async", """\
674n/a NAME 'foo' (1, 0) (1, 3)
675n/a OP '.' (1, 3) (1, 4)
676n/a NAME 'async' (1, 4) (1, 9)
677n/a """)
678n/a
679n/a self.check_tokenize("async for a in b: pass", """\
680n/a NAME 'async' (1, 0) (1, 5)
681n/a NAME 'for' (1, 6) (1, 9)
682n/a NAME 'a' (1, 10) (1, 11)
683n/a NAME 'in' (1, 12) (1, 14)
684n/a NAME 'b' (1, 15) (1, 16)
685n/a OP ':' (1, 16) (1, 17)
686n/a NAME 'pass' (1, 18) (1, 22)
687n/a """)
688n/a
689n/a self.check_tokenize("async with a as b: pass", """\
690n/a NAME 'async' (1, 0) (1, 5)
691n/a NAME 'with' (1, 6) (1, 10)
692n/a NAME 'a' (1, 11) (1, 12)
693n/a NAME 'as' (1, 13) (1, 15)
694n/a NAME 'b' (1, 16) (1, 17)
695n/a OP ':' (1, 17) (1, 18)
696n/a NAME 'pass' (1, 19) (1, 23)
697n/a """)
698n/a
699n/a self.check_tokenize("async.foo", """\
700n/a NAME 'async' (1, 0) (1, 5)
701n/a OP '.' (1, 5) (1, 6)
702n/a NAME 'foo' (1, 6) (1, 9)
703n/a """)
704n/a
705n/a self.check_tokenize("async", """\
706n/a NAME 'async' (1, 0) (1, 5)
707n/a """)
708n/a
709n/a self.check_tokenize("async\n#comment\nawait", """\
710n/a NAME 'async' (1, 0) (1, 5)
711n/a NEWLINE '\\n' (1, 5) (1, 6)
712n/a COMMENT '#comment' (2, 0) (2, 8)
713n/a NL '\\n' (2, 8) (2, 9)
714n/a NAME 'await' (3, 0) (3, 5)
715n/a """)
716n/a
717n/a self.check_tokenize("async\n...\nawait", """\
718n/a NAME 'async' (1, 0) (1, 5)
719n/a NEWLINE '\\n' (1, 5) (1, 6)
720n/a OP '...' (2, 0) (2, 3)
721n/a NEWLINE '\\n' (2, 3) (2, 4)
722n/a NAME 'await' (3, 0) (3, 5)
723n/a """)
724n/a
725n/a self.check_tokenize("async\nawait", """\
726n/a NAME 'async' (1, 0) (1, 5)
727n/a NEWLINE '\\n' (1, 5) (1, 6)
728n/a NAME 'await' (2, 0) (2, 5)
729n/a """)
730n/a
731n/a self.check_tokenize("foo.async + 1", """\
732n/a NAME 'foo' (1, 0) (1, 3)
733n/a OP '.' (1, 3) (1, 4)
734n/a NAME 'async' (1, 4) (1, 9)
735n/a OP '+' (1, 10) (1, 11)
736n/a NUMBER '1' (1, 12) (1, 13)
737n/a """)
738n/a
739n/a self.check_tokenize("async def foo(): pass", """\
740n/a ASYNC 'async' (1, 0) (1, 5)
741n/a NAME 'def' (1, 6) (1, 9)
742n/a NAME 'foo' (1, 10) (1, 13)
743n/a OP '(' (1, 13) (1, 14)
744n/a OP ')' (1, 14) (1, 15)
745n/a OP ':' (1, 15) (1, 16)
746n/a NAME 'pass' (1, 17) (1, 21)
747n/a """)
748n/a
749n/a self.check_tokenize('''\
750n/aasync def foo():
751n/a def foo(await):
752n/a await = 1
753n/a if 1:
754n/a await
755n/aasync += 1
756n/a''', """\
757n/a ASYNC 'async' (1, 0) (1, 5)
758n/a NAME 'def' (1, 6) (1, 9)
759n/a NAME 'foo' (1, 10) (1, 13)
760n/a OP '(' (1, 13) (1, 14)
761n/a OP ')' (1, 14) (1, 15)
762n/a OP ':' (1, 15) (1, 16)
763n/a NEWLINE '\\n' (1, 16) (1, 17)
764n/a INDENT ' ' (2, 0) (2, 2)
765n/a NAME 'def' (2, 2) (2, 5)
766n/a NAME 'foo' (2, 6) (2, 9)
767n/a OP '(' (2, 9) (2, 10)
768n/a AWAIT 'await' (2, 10) (2, 15)
769n/a OP ')' (2, 15) (2, 16)
770n/a OP ':' (2, 16) (2, 17)
771n/a NEWLINE '\\n' (2, 17) (2, 18)
772n/a INDENT ' ' (3, 0) (3, 4)
773n/a AWAIT 'await' (3, 4) (3, 9)
774n/a OP '=' (3, 10) (3, 11)
775n/a NUMBER '1' (3, 12) (3, 13)
776n/a NEWLINE '\\n' (3, 13) (3, 14)
777n/a DEDENT '' (4, 2) (4, 2)
778n/a NAME 'if' (4, 2) (4, 4)
779n/a NUMBER '1' (4, 5) (4, 6)
780n/a OP ':' (4, 6) (4, 7)
781n/a NEWLINE '\\n' (4, 7) (4, 8)
782n/a INDENT ' ' (5, 0) (5, 4)
783n/a AWAIT 'await' (5, 4) (5, 9)
784n/a NEWLINE '\\n' (5, 9) (5, 10)
785n/a DEDENT '' (6, 0) (6, 0)
786n/a DEDENT '' (6, 0) (6, 0)
787n/a NAME 'async' (6, 0) (6, 5)
788n/a OP '+=' (6, 6) (6, 8)
789n/a NUMBER '1' (6, 9) (6, 10)
790n/a NEWLINE '\\n' (6, 10) (6, 11)
791n/a """)
792n/a
793n/a self.check_tokenize('''\
794n/aasync def foo():
795n/a async for i in 1: pass''', """\
796n/a ASYNC 'async' (1, 0) (1, 5)
797n/a NAME 'def' (1, 6) (1, 9)
798n/a NAME 'foo' (1, 10) (1, 13)
799n/a OP '(' (1, 13) (1, 14)
800n/a OP ')' (1, 14) (1, 15)
801n/a OP ':' (1, 15) (1, 16)
802n/a NEWLINE '\\n' (1, 16) (1, 17)
803n/a INDENT ' ' (2, 0) (2, 2)
804n/a ASYNC 'async' (2, 2) (2, 7)
805n/a NAME 'for' (2, 8) (2, 11)
806n/a NAME 'i' (2, 12) (2, 13)
807n/a NAME 'in' (2, 14) (2, 16)
808n/a NUMBER '1' (2, 17) (2, 18)
809n/a OP ':' (2, 18) (2, 19)
810n/a NAME 'pass' (2, 20) (2, 24)
811n/a DEDENT '' (3, 0) (3, 0)
812n/a """)
813n/a
814n/a self.check_tokenize('''async def foo(async): await''', """\
815n/a ASYNC 'async' (1, 0) (1, 5)
816n/a NAME 'def' (1, 6) (1, 9)
817n/a NAME 'foo' (1, 10) (1, 13)
818n/a OP '(' (1, 13) (1, 14)
819n/a ASYNC 'async' (1, 14) (1, 19)
820n/a OP ')' (1, 19) (1, 20)
821n/a OP ':' (1, 20) (1, 21)
822n/a AWAIT 'await' (1, 22) (1, 27)
823n/a """)
824n/a
825n/a self.check_tokenize('''\
826n/adef f():
827n/a
828n/a def baz(): pass
829n/a async def bar(): pass
830n/a
831n/a await = 2''', """\
832n/a NAME 'def' (1, 0) (1, 3)
833n/a NAME 'f' (1, 4) (1, 5)
834n/a OP '(' (1, 5) (1, 6)
835n/a OP ')' (1, 6) (1, 7)
836n/a OP ':' (1, 7) (1, 8)
837n/a NEWLINE '\\n' (1, 8) (1, 9)
838n/a NL '\\n' (2, 0) (2, 1)
839n/a INDENT ' ' (3, 0) (3, 2)
840n/a NAME 'def' (3, 2) (3, 5)
841n/a NAME 'baz' (3, 6) (3, 9)
842n/a OP '(' (3, 9) (3, 10)
843n/a OP ')' (3, 10) (3, 11)
844n/a OP ':' (3, 11) (3, 12)
845n/a NAME 'pass' (3, 13) (3, 17)
846n/a NEWLINE '\\n' (3, 17) (3, 18)
847n/a ASYNC 'async' (4, 2) (4, 7)
848n/a NAME 'def' (4, 8) (4, 11)
849n/a NAME 'bar' (4, 12) (4, 15)
850n/a OP '(' (4, 15) (4, 16)
851n/a OP ')' (4, 16) (4, 17)
852n/a OP ':' (4, 17) (4, 18)
853n/a NAME 'pass' (4, 19) (4, 23)
854n/a NEWLINE '\\n' (4, 23) (4, 24)
855n/a NL '\\n' (5, 0) (5, 1)
856n/a NAME 'await' (6, 2) (6, 7)
857n/a OP '=' (6, 8) (6, 9)
858n/a NUMBER '2' (6, 10) (6, 11)
859n/a DEDENT '' (7, 0) (7, 0)
860n/a """)
861n/a
862n/a self.check_tokenize('''\
863n/aasync def f():
864n/a
865n/a def baz(): pass
866n/a async def bar(): pass
867n/a
868n/a await = 2''', """\
869n/a ASYNC 'async' (1, 0) (1, 5)
870n/a NAME 'def' (1, 6) (1, 9)
871n/a NAME 'f' (1, 10) (1, 11)
872n/a OP '(' (1, 11) (1, 12)
873n/a OP ')' (1, 12) (1, 13)
874n/a OP ':' (1, 13) (1, 14)
875n/a NEWLINE '\\n' (1, 14) (1, 15)
876n/a NL '\\n' (2, 0) (2, 1)
877n/a INDENT ' ' (3, 0) (3, 2)
878n/a NAME 'def' (3, 2) (3, 5)
879n/a NAME 'baz' (3, 6) (3, 9)
880n/a OP '(' (3, 9) (3, 10)
881n/a OP ')' (3, 10) (3, 11)
882n/a OP ':' (3, 11) (3, 12)
883n/a NAME 'pass' (3, 13) (3, 17)
884n/a NEWLINE '\\n' (3, 17) (3, 18)
885n/a ASYNC 'async' (4, 2) (4, 7)
886n/a NAME 'def' (4, 8) (4, 11)
887n/a NAME 'bar' (4, 12) (4, 15)
888n/a OP '(' (4, 15) (4, 16)
889n/a OP ')' (4, 16) (4, 17)
890n/a OP ':' (4, 17) (4, 18)
891n/a NAME 'pass' (4, 19) (4, 23)
892n/a NEWLINE '\\n' (4, 23) (4, 24)
893n/a NL '\\n' (5, 0) (5, 1)
894n/a AWAIT 'await' (6, 2) (6, 7)
895n/a OP '=' (6, 8) (6, 9)
896n/a NUMBER '2' (6, 10) (6, 11)
897n/a DEDENT '' (7, 0) (7, 0)
898n/a """)
899n/a
900n/a
901n/adef decistmt(s):
902n/a result = []
903n/a g = tokenize(BytesIO(s.encode('utf-8')).readline) # tokenize the string
904n/a for toknum, tokval, _, _, _ in g:
905n/a if toknum == NUMBER and '.' in tokval: # replace NUMBER tokens
906n/a result.extend([
907n/a (NAME, 'Decimal'),
908n/a (OP, '('),
909n/a (STRING, repr(tokval)),
910n/a (OP, ')')
911n/a ])
912n/a else:
913n/a result.append((toknum, tokval))
914n/a return untokenize(result).decode('utf-8')
915n/a
916n/aclass TestMisc(TestCase):
917n/a
918n/a def test_decistmt(self):
919n/a # Substitute Decimals for floats in a string of statements.
920n/a # This is an example from the docs.
921n/a
922n/a from decimal import Decimal
923n/a s = '+21.3e-5*-.1234/81.7'
924n/a self.assertEqual(decistmt(s),
925n/a "+Decimal ('21.3e-5')*-Decimal ('.1234')/Decimal ('81.7')")
926n/a
927n/a # The format of the exponent is inherited from the platform C library.
928n/a # Known cases are "e-007" (Windows) and "e-07" (not Windows). Since
929n/a # we're only showing 11 digits, and the 12th isn't close to 5, the
930n/a # rest of the output should be platform-independent.
931n/a self.assertRegex(repr(eval(s)), '-3.2171603427[0-9]*e-0+7')
932n/a
933n/a # Output from calculations with Decimal should be identical across all
934n/a # platforms.
935n/a self.assertEqual(eval(decistmt(s)),
936n/a Decimal('-3.217160342717258261933904529E-7'))
937n/a
938n/a
939n/aclass TestTokenizerAdheresToPep0263(TestCase):
940n/a """
941n/a Test that tokenizer adheres to the coding behaviour stipulated in PEP 0263.
942n/a """
943n/a
944n/a def _testFile(self, filename):
945n/a path = os.path.join(os.path.dirname(__file__), filename)
946n/a TestRoundtrip.check_roundtrip(self, open(path, 'rb'))
947n/a
948n/a def test_utf8_coding_cookie_and_no_utf8_bom(self):
949n/a f = 'tokenize_tests-utf8-coding-cookie-and-no-utf8-bom-sig.txt'
950n/a self._testFile(f)
951n/a
952n/a def test_latin1_coding_cookie_and_utf8_bom(self):
953n/a """
954n/a As per PEP 0263, if a file starts with a utf-8 BOM signature, the only
955n/a allowed encoding for the comment is 'utf-8'. The text file used in
956n/a this test starts with a BOM signature, but specifies latin1 as the
957n/a coding, so verify that a SyntaxError is raised, which matches the
958n/a behaviour of the interpreter when it encounters a similar condition.
959n/a """
960n/a f = 'tokenize_tests-latin1-coding-cookie-and-utf8-bom-sig.txt'
961n/a self.assertRaises(SyntaxError, self._testFile, f)
962n/a
963n/a def test_no_coding_cookie_and_utf8_bom(self):
964n/a f = 'tokenize_tests-no-coding-cookie-and-utf8-bom-sig-only.txt'
965n/a self._testFile(f)
966n/a
967n/a def test_utf8_coding_cookie_and_utf8_bom(self):
968n/a f = 'tokenize_tests-utf8-coding-cookie-and-utf8-bom-sig.txt'
969n/a self._testFile(f)
970n/a
971n/a def test_bad_coding_cookie(self):
972n/a self.assertRaises(SyntaxError, self._testFile, 'bad_coding.py')
973n/a self.assertRaises(SyntaxError, self._testFile, 'bad_coding2.py')
974n/a
975n/a
976n/aclass Test_Tokenize(TestCase):
977n/a
978n/a def test__tokenize_decodes_with_specified_encoding(self):
979n/a literal = '"ร‰รŠรˆรร‚"'
980n/a line = literal.encode('utf-8')
981n/a first = False
982n/a def readline():
983n/a nonlocal first
984n/a if not first:
985n/a first = True
986n/a return line
987n/a else:
988n/a return b''
989n/a
990n/a # skip the initial encoding token and the end token
991n/a tokens = list(_tokenize(readline, encoding='utf-8'))[1:-1]
992n/a expected_tokens = [(3, '"ร‰รŠรˆรร‚"', (1, 0), (1, 7), '"ร‰รŠรˆรร‚"')]
993n/a self.assertEqual(tokens, expected_tokens,
994n/a "bytes not decoded with encoding")
995n/a
996n/a def test__tokenize_does_not_decode_with_encoding_none(self):
997n/a literal = '"ร‰รŠรˆรร‚"'
998n/a first = False
999n/a def readline():
1000n/a nonlocal first
1001n/a if not first:
1002n/a first = True
1003n/a return literal
1004n/a else:
1005n/a return b''
1006n/a
1007n/a # skip the end token
1008n/a tokens = list(_tokenize(readline, encoding=None))[:-1]
1009n/a expected_tokens = [(3, '"ร‰รŠรˆรร‚"', (1, 0), (1, 7), '"ร‰รŠรˆรร‚"')]
1010n/a self.assertEqual(tokens, expected_tokens,
1011n/a "string not tokenized when encoding is None")
1012n/a
1013n/a
1014n/aclass TestDetectEncoding(TestCase):
1015n/a
1016n/a def get_readline(self, lines):
1017n/a index = 0
1018n/a def readline():
1019n/a nonlocal index
1020n/a if index == len(lines):
1021n/a raise StopIteration
1022n/a line = lines[index]
1023n/a index += 1
1024n/a return line
1025n/a return readline
1026n/a
1027n/a def test_no_bom_no_encoding_cookie(self):
1028n/a lines = (
1029n/a b'# something\n',
1030n/a b'print(something)\n',
1031n/a b'do_something(else)\n'
1032n/a )
1033n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1034n/a self.assertEqual(encoding, 'utf-8')
1035n/a self.assertEqual(consumed_lines, list(lines[:2]))
1036n/a
1037n/a def test_bom_no_cookie(self):
1038n/a lines = (
1039n/a b'\xef\xbb\xbf# something\n',
1040n/a b'print(something)\n',
1041n/a b'do_something(else)\n'
1042n/a )
1043n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1044n/a self.assertEqual(encoding, 'utf-8-sig')
1045n/a self.assertEqual(consumed_lines,
1046n/a [b'# something\n', b'print(something)\n'])
1047n/a
1048n/a def test_cookie_first_line_no_bom(self):
1049n/a lines = (
1050n/a b'# -*- coding: latin-1 -*-\n',
1051n/a b'print(something)\n',
1052n/a b'do_something(else)\n'
1053n/a )
1054n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1055n/a self.assertEqual(encoding, 'iso-8859-1')
1056n/a self.assertEqual(consumed_lines, [b'# -*- coding: latin-1 -*-\n'])
1057n/a
1058n/a def test_matched_bom_and_cookie_first_line(self):
1059n/a lines = (
1060n/a b'\xef\xbb\xbf# coding=utf-8\n',
1061n/a b'print(something)\n',
1062n/a b'do_something(else)\n'
1063n/a )
1064n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1065n/a self.assertEqual(encoding, 'utf-8-sig')
1066n/a self.assertEqual(consumed_lines, [b'# coding=utf-8\n'])
1067n/a
1068n/a def test_mismatched_bom_and_cookie_first_line_raises_syntaxerror(self):
1069n/a lines = (
1070n/a b'\xef\xbb\xbf# vim: set fileencoding=ascii :\n',
1071n/a b'print(something)\n',
1072n/a b'do_something(else)\n'
1073n/a )
1074n/a readline = self.get_readline(lines)
1075n/a self.assertRaises(SyntaxError, detect_encoding, readline)
1076n/a
1077n/a def test_cookie_second_line_no_bom(self):
1078n/a lines = (
1079n/a b'#! something\n',
1080n/a b'# vim: set fileencoding=ascii :\n',
1081n/a b'print(something)\n',
1082n/a b'do_something(else)\n'
1083n/a )
1084n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1085n/a self.assertEqual(encoding, 'ascii')
1086n/a expected = [b'#! something\n', b'# vim: set fileencoding=ascii :\n']
1087n/a self.assertEqual(consumed_lines, expected)
1088n/a
1089n/a def test_matched_bom_and_cookie_second_line(self):
1090n/a lines = (
1091n/a b'\xef\xbb\xbf#! something\n',
1092n/a b'f# coding=utf-8\n',
1093n/a b'print(something)\n',
1094n/a b'do_something(else)\n'
1095n/a )
1096n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1097n/a self.assertEqual(encoding, 'utf-8-sig')
1098n/a self.assertEqual(consumed_lines,
1099n/a [b'#! something\n', b'f# coding=utf-8\n'])
1100n/a
1101n/a def test_mismatched_bom_and_cookie_second_line_raises_syntaxerror(self):
1102n/a lines = (
1103n/a b'\xef\xbb\xbf#! something\n',
1104n/a b'# vim: set fileencoding=ascii :\n',
1105n/a b'print(something)\n',
1106n/a b'do_something(else)\n'
1107n/a )
1108n/a readline = self.get_readline(lines)
1109n/a self.assertRaises(SyntaxError, detect_encoding, readline)
1110n/a
1111n/a def test_cookie_second_line_noncommented_first_line(self):
1112n/a lines = (
1113n/a b"print('\xc2\xa3')\n",
1114n/a b'# vim: set fileencoding=iso8859-15 :\n',
1115n/a b"print('\xe2\x82\xac')\n"
1116n/a )
1117n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1118n/a self.assertEqual(encoding, 'utf-8')
1119n/a expected = [b"print('\xc2\xa3')\n"]
1120n/a self.assertEqual(consumed_lines, expected)
1121n/a
1122n/a def test_cookie_second_line_commented_first_line(self):
1123n/a lines = (
1124n/a b"#print('\xc2\xa3')\n",
1125n/a b'# vim: set fileencoding=iso8859-15 :\n',
1126n/a b"print('\xe2\x82\xac')\n"
1127n/a )
1128n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1129n/a self.assertEqual(encoding, 'iso8859-15')
1130n/a expected = [b"#print('\xc2\xa3')\n", b'# vim: set fileencoding=iso8859-15 :\n']
1131n/a self.assertEqual(consumed_lines, expected)
1132n/a
1133n/a def test_cookie_second_line_empty_first_line(self):
1134n/a lines = (
1135n/a b'\n',
1136n/a b'# vim: set fileencoding=iso8859-15 :\n',
1137n/a b"print('\xe2\x82\xac')\n"
1138n/a )
1139n/a encoding, consumed_lines = detect_encoding(self.get_readline(lines))
1140n/a self.assertEqual(encoding, 'iso8859-15')
1141n/a expected = [b'\n', b'# vim: set fileencoding=iso8859-15 :\n']
1142n/a self.assertEqual(consumed_lines, expected)
1143n/a
1144n/a def test_latin1_normalization(self):
1145n/a # See get_normal_name() in tokenizer.c.
1146n/a encodings = ("latin-1", "iso-8859-1", "iso-latin-1", "latin-1-unix",
1147n/a "iso-8859-1-unix", "iso-latin-1-mac")
1148n/a for encoding in encodings:
1149n/a for rep in ("-", "_"):
1150n/a enc = encoding.replace("-", rep)
1151n/a lines = (b"#!/usr/bin/python\n",
1152n/a b"# coding: " + enc.encode("ascii") + b"\n",
1153n/a b"print(things)\n",
1154n/a b"do_something += 4\n")
1155n/a rl = self.get_readline(lines)
1156n/a found, consumed_lines = detect_encoding(rl)
1157n/a self.assertEqual(found, "iso-8859-1")
1158n/a
1159n/a def test_syntaxerror_latin1(self):
1160n/a # Issue 14629: need to raise SyntaxError if the first
1161n/a # line(s) have non-UTF-8 characters
1162n/a lines = (
1163n/a b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1164n/a )
1165n/a readline = self.get_readline(lines)
1166n/a self.assertRaises(SyntaxError, detect_encoding, readline)
1167n/a
1168n/a
1169n/a def test_utf8_normalization(self):
1170n/a # See get_normal_name() in tokenizer.c.
1171n/a encodings = ("utf-8", "utf-8-mac", "utf-8-unix")
1172n/a for encoding in encodings:
1173n/a for rep in ("-", "_"):
1174n/a enc = encoding.replace("-", rep)
1175n/a lines = (b"#!/usr/bin/python\n",
1176n/a b"# coding: " + enc.encode("ascii") + b"\n",
1177n/a b"1 + 3\n")
1178n/a rl = self.get_readline(lines)
1179n/a found, consumed_lines = detect_encoding(rl)
1180n/a self.assertEqual(found, "utf-8")
1181n/a
1182n/a def test_short_files(self):
1183n/a readline = self.get_readline((b'print(something)\n',))
1184n/a encoding, consumed_lines = detect_encoding(readline)
1185n/a self.assertEqual(encoding, 'utf-8')
1186n/a self.assertEqual(consumed_lines, [b'print(something)\n'])
1187n/a
1188n/a encoding, consumed_lines = detect_encoding(self.get_readline(()))
1189n/a self.assertEqual(encoding, 'utf-8')
1190n/a self.assertEqual(consumed_lines, [])
1191n/a
1192n/a readline = self.get_readline((b'\xef\xbb\xbfprint(something)\n',))
1193n/a encoding, consumed_lines = detect_encoding(readline)
1194n/a self.assertEqual(encoding, 'utf-8-sig')
1195n/a self.assertEqual(consumed_lines, [b'print(something)\n'])
1196n/a
1197n/a readline = self.get_readline((b'\xef\xbb\xbf',))
1198n/a encoding, consumed_lines = detect_encoding(readline)
1199n/a self.assertEqual(encoding, 'utf-8-sig')
1200n/a self.assertEqual(consumed_lines, [])
1201n/a
1202n/a readline = self.get_readline((b'# coding: bad\n',))
1203n/a self.assertRaises(SyntaxError, detect_encoding, readline)
1204n/a
1205n/a def test_false_encoding(self):
1206n/a # Issue 18873: "Encoding" detected in non-comment lines
1207n/a readline = self.get_readline((b'print("#coding=fake")',))
1208n/a encoding, consumed_lines = detect_encoding(readline)
1209n/a self.assertEqual(encoding, 'utf-8')
1210n/a self.assertEqual(consumed_lines, [b'print("#coding=fake")'])
1211n/a
1212n/a def test_open(self):
1213n/a filename = support.TESTFN + '.py'
1214n/a self.addCleanup(support.unlink, filename)
1215n/a
1216n/a # test coding cookie
1217n/a for encoding in ('iso-8859-15', 'utf-8'):
1218n/a with open(filename, 'w', encoding=encoding) as fp:
1219n/a print("# coding: %s" % encoding, file=fp)
1220n/a print("print('euro:\u20ac')", file=fp)
1221n/a with tokenize_open(filename) as fp:
1222n/a self.assertEqual(fp.encoding, encoding)
1223n/a self.assertEqual(fp.mode, 'r')
1224n/a
1225n/a # test BOM (no coding cookie)
1226n/a with open(filename, 'w', encoding='utf-8-sig') as fp:
1227n/a print("print('euro:\u20ac')", file=fp)
1228n/a with tokenize_open(filename) as fp:
1229n/a self.assertEqual(fp.encoding, 'utf-8-sig')
1230n/a self.assertEqual(fp.mode, 'r')
1231n/a
1232n/a def test_filename_in_exception(self):
1233n/a # When possible, include the file name in the exception.
1234n/a path = 'some_file_path'
1235n/a lines = (
1236n/a b'print("\xdf")', # Latin-1: LATIN SMALL LETTER SHARP S
1237n/a )
1238n/a class Bunk:
1239n/a def __init__(self, lines, path):
1240n/a self.name = path
1241n/a self._lines = lines
1242n/a self._index = 0
1243n/a
1244n/a def readline(self):
1245n/a if self._index == len(lines):
1246n/a raise StopIteration
1247n/a line = lines[self._index]
1248n/a self._index += 1
1249n/a return line
1250n/a
1251n/a with self.assertRaises(SyntaxError):
1252n/a ins = Bunk(lines, path)
1253n/a # Make sure lacking a name isn't an issue.
1254n/a del ins.name
1255n/a detect_encoding(ins.readline)
1256n/a with self.assertRaisesRegex(SyntaxError, '.*{}'.format(path)):
1257n/a ins = Bunk(lines, path)
1258n/a detect_encoding(ins.readline)
1259n/a
1260n/a def test_open_error(self):
1261n/a # Issue #23840: open() must close the binary file on error
1262n/a m = BytesIO(b'#coding:xxx')
1263n/a with mock.patch('tokenize._builtin_open', return_value=m):
1264n/a self.assertRaises(SyntaxError, tokenize_open, 'foobar')
1265n/a self.assertTrue(m.closed)
1266n/a
1267n/a
1268n/aclass TestTokenize(TestCase):
1269n/a
1270n/a def test_tokenize(self):
1271n/a import tokenize as tokenize_module
1272n/a encoding = object()
1273n/a encoding_used = None
1274n/a def mock_detect_encoding(readline):
1275n/a return encoding, [b'first', b'second']
1276n/a
1277n/a def mock__tokenize(readline, encoding):
1278n/a nonlocal encoding_used
1279n/a encoding_used = encoding
1280n/a out = []
1281n/a while True:
1282n/a next_line = readline()
1283n/a if next_line:
1284n/a out.append(next_line)
1285n/a continue
1286n/a return out
1287n/a
1288n/a counter = 0
1289n/a def mock_readline():
1290n/a nonlocal counter
1291n/a counter += 1
1292n/a if counter == 5:
1293n/a return b''
1294n/a return str(counter).encode()
1295n/a
1296n/a orig_detect_encoding = tokenize_module.detect_encoding
1297n/a orig__tokenize = tokenize_module._tokenize
1298n/a tokenize_module.detect_encoding = mock_detect_encoding
1299n/a tokenize_module._tokenize = mock__tokenize
1300n/a try:
1301n/a results = tokenize(mock_readline)
1302n/a self.assertEqual(list(results),
1303n/a [b'first', b'second', b'1', b'2', b'3', b'4'])
1304n/a finally:
1305n/a tokenize_module.detect_encoding = orig_detect_encoding
1306n/a tokenize_module._tokenize = orig__tokenize
1307n/a
1308n/a self.assertTrue(encoding_used, encoding)
1309n/a
1310n/a def test_oneline_defs(self):
1311n/a buf = []
1312n/a for i in range(500):
1313n/a buf.append('def i{i}(): return {i}'.format(i=i))
1314n/a buf.append('OK')
1315n/a buf = '\n'.join(buf)
1316n/a
1317n/a # Test that 500 consequent, one-line defs is OK
1318n/a toks = list(tokenize(BytesIO(buf.encode('utf-8')).readline))
1319n/a self.assertEqual(toks[-2].string, 'OK') # [-1] is always ENDMARKER
1320n/a
1321n/a def assertExactTypeEqual(self, opstr, *optypes):
1322n/a tokens = list(tokenize(BytesIO(opstr.encode('utf-8')).readline))
1323n/a num_optypes = len(optypes)
1324n/a self.assertEqual(len(tokens), 2 + num_optypes)
1325n/a self.assertEqual(token.tok_name[tokens[0].exact_type],
1326n/a token.tok_name[ENCODING])
1327n/a for i in range(num_optypes):
1328n/a self.assertEqual(token.tok_name[tokens[i + 1].exact_type],
1329n/a token.tok_name[optypes[i]])
1330n/a self.assertEqual(token.tok_name[tokens[1 + num_optypes].exact_type],
1331n/a token.tok_name[token.ENDMARKER])
1332n/a
1333n/a def test_exact_type(self):
1334n/a self.assertExactTypeEqual('()', token.LPAR, token.RPAR)
1335n/a self.assertExactTypeEqual('[]', token.LSQB, token.RSQB)
1336n/a self.assertExactTypeEqual(':', token.COLON)
1337n/a self.assertExactTypeEqual(',', token.COMMA)
1338n/a self.assertExactTypeEqual(';', token.SEMI)
1339n/a self.assertExactTypeEqual('+', token.PLUS)
1340n/a self.assertExactTypeEqual('-', token.MINUS)
1341n/a self.assertExactTypeEqual('*', token.STAR)
1342n/a self.assertExactTypeEqual('/', token.SLASH)
1343n/a self.assertExactTypeEqual('|', token.VBAR)
1344n/a self.assertExactTypeEqual('&', token.AMPER)
1345n/a self.assertExactTypeEqual('<', token.LESS)
1346n/a self.assertExactTypeEqual('>', token.GREATER)
1347n/a self.assertExactTypeEqual('=', token.EQUAL)
1348n/a self.assertExactTypeEqual('.', token.DOT)
1349n/a self.assertExactTypeEqual('%', token.PERCENT)
1350n/a self.assertExactTypeEqual('{}', token.LBRACE, token.RBRACE)
1351n/a self.assertExactTypeEqual('==', token.EQEQUAL)
1352n/a self.assertExactTypeEqual('!=', token.NOTEQUAL)
1353n/a self.assertExactTypeEqual('<=', token.LESSEQUAL)
1354n/a self.assertExactTypeEqual('>=', token.GREATEREQUAL)
1355n/a self.assertExactTypeEqual('~', token.TILDE)
1356n/a self.assertExactTypeEqual('^', token.CIRCUMFLEX)
1357n/a self.assertExactTypeEqual('<<', token.LEFTSHIFT)
1358n/a self.assertExactTypeEqual('>>', token.RIGHTSHIFT)
1359n/a self.assertExactTypeEqual('**', token.DOUBLESTAR)
1360n/a self.assertExactTypeEqual('+=', token.PLUSEQUAL)
1361n/a self.assertExactTypeEqual('-=', token.MINEQUAL)
1362n/a self.assertExactTypeEqual('*=', token.STAREQUAL)
1363n/a self.assertExactTypeEqual('/=', token.SLASHEQUAL)
1364n/a self.assertExactTypeEqual('%=', token.PERCENTEQUAL)
1365n/a self.assertExactTypeEqual('&=', token.AMPEREQUAL)
1366n/a self.assertExactTypeEqual('|=', token.VBAREQUAL)
1367n/a self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1368n/a self.assertExactTypeEqual('^=', token.CIRCUMFLEXEQUAL)
1369n/a self.assertExactTypeEqual('<<=', token.LEFTSHIFTEQUAL)
1370n/a self.assertExactTypeEqual('>>=', token.RIGHTSHIFTEQUAL)
1371n/a self.assertExactTypeEqual('**=', token.DOUBLESTAREQUAL)
1372n/a self.assertExactTypeEqual('//', token.DOUBLESLASH)
1373n/a self.assertExactTypeEqual('//=', token.DOUBLESLASHEQUAL)
1374n/a self.assertExactTypeEqual('@', token.AT)
1375n/a self.assertExactTypeEqual('@=', token.ATEQUAL)
1376n/a
1377n/a self.assertExactTypeEqual('a**2+b**2==c**2',
1378n/a NAME, token.DOUBLESTAR, NUMBER,
1379n/a token.PLUS,
1380n/a NAME, token.DOUBLESTAR, NUMBER,
1381n/a token.EQEQUAL,
1382n/a NAME, token.DOUBLESTAR, NUMBER)
1383n/a self.assertExactTypeEqual('{1, 2, 3}',
1384n/a token.LBRACE,
1385n/a token.NUMBER, token.COMMA,
1386n/a token.NUMBER, token.COMMA,
1387n/a token.NUMBER,
1388n/a token.RBRACE)
1389n/a self.assertExactTypeEqual('^(x & 0x1)',
1390n/a token.CIRCUMFLEX,
1391n/a token.LPAR,
1392n/a token.NAME, token.AMPER, token.NUMBER,
1393n/a token.RPAR)
1394n/a
1395n/a def test_pathological_trailing_whitespace(self):
1396n/a # See http://bugs.python.org/issue16152
1397n/a self.assertExactTypeEqual('@ ', token.AT)
1398n/a
1399n/a
1400n/aclass UntokenizeTest(TestCase):
1401n/a
1402n/a def test_bad_input_order(self):
1403n/a # raise if previous row
1404n/a u = Untokenizer()
1405n/a u.prev_row = 2
1406n/a u.prev_col = 2
1407n/a with self.assertRaises(ValueError) as cm:
1408n/a u.add_whitespace((1,3))
1409n/a self.assertEqual(cm.exception.args[0],
1410n/a 'start (1,3) precedes previous end (2,2)')
1411n/a # raise if previous column in row
1412n/a self.assertRaises(ValueError, u.add_whitespace, (2,1))
1413n/a
1414n/a def test_backslash_continuation(self):
1415n/a # The problem is that <whitespace>\<newline> leaves no token
1416n/a u = Untokenizer()
1417n/a u.prev_row = 1
1418n/a u.prev_col = 1
1419n/a u.tokens = []
1420n/a u.add_whitespace((2, 0))
1421n/a self.assertEqual(u.tokens, ['\\\n'])
1422n/a u.prev_row = 2
1423n/a u.add_whitespace((4, 4))
1424n/a self.assertEqual(u.tokens, ['\\\n', '\\\n\\\n', ' '])
1425n/a TestRoundtrip.check_roundtrip(self, 'a\n b\n c\n \\\n c\n')
1426n/a
1427n/a def test_iter_compat(self):
1428n/a u = Untokenizer()
1429n/a token = (NAME, 'Hello')
1430n/a tokens = [(ENCODING, 'utf-8'), token]
1431n/a u.compat(token, iter([]))
1432n/a self.assertEqual(u.tokens, ["Hello "])
1433n/a u = Untokenizer()
1434n/a self.assertEqual(u.untokenize(iter([token])), 'Hello ')
1435n/a u = Untokenizer()
1436n/a self.assertEqual(u.untokenize(iter(tokens)), 'Hello ')
1437n/a self.assertEqual(u.encoding, 'utf-8')
1438n/a self.assertEqual(untokenize(iter(tokens)), b'Hello ')
1439n/a
1440n/a
1441n/aclass TestRoundtrip(TestCase):
1442n/a
1443n/a def check_roundtrip(self, f):
1444n/a """
1445n/a Test roundtrip for `untokenize`. `f` is an open file or a string.
1446n/a The source code in f is tokenized to both 5- and 2-tuples.
1447n/a Both sequences are converted back to source code via
1448n/a tokenize.untokenize(), and the latter tokenized again to 2-tuples.
1449n/a The test fails if the 3 pair tokenizations do not match.
1450n/a
1451n/a When untokenize bugs are fixed, untokenize with 5-tuples should
1452n/a reproduce code that does not contain a backslash continuation
1453n/a following spaces. A proper test should test this.
1454n/a """
1455n/a # Get source code and original tokenizations
1456n/a if isinstance(f, str):
1457n/a code = f.encode('utf-8')
1458n/a else:
1459n/a code = f.read()
1460n/a f.close()
1461n/a readline = iter(code.splitlines(keepends=True)).__next__
1462n/a tokens5 = list(tokenize(readline))
1463n/a tokens2 = [tok[:2] for tok in tokens5]
1464n/a # Reproduce tokens2 from pairs
1465n/a bytes_from2 = untokenize(tokens2)
1466n/a readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__
1467n/a tokens2_from2 = [tok[:2] for tok in tokenize(readline2)]
1468n/a self.assertEqual(tokens2_from2, tokens2)
1469n/a # Reproduce tokens2 from 5-tuples
1470n/a bytes_from5 = untokenize(tokens5)
1471n/a readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__
1472n/a tokens2_from5 = [tok[:2] for tok in tokenize(readline5)]
1473n/a self.assertEqual(tokens2_from5, tokens2)
1474n/a
1475n/a def test_roundtrip(self):
1476n/a # There are some standard formatting practices that are easy to get right.
1477n/a
1478n/a self.check_roundtrip("if x == 1:\n"
1479n/a " print(x)\n")
1480n/a self.check_roundtrip("# This is a comment\n"
1481n/a "# This also")
1482n/a
1483n/a # Some people use different formatting conventions, which makes
1484n/a # untokenize a little trickier. Note that this test involves trailing
1485n/a # whitespace after the colon. Note that we use hex escapes to make the
1486n/a # two trailing blanks apparent in the expected output.
1487n/a
1488n/a self.check_roundtrip("if x == 1 : \n"
1489n/a " print(x)\n")
1490n/a fn = support.findfile("tokenize_tests.txt")
1491n/a with open(fn, 'rb') as f:
1492n/a self.check_roundtrip(f)
1493n/a self.check_roundtrip("if x == 1:\n"
1494n/a " # A comment by itself.\n"
1495n/a " print(x) # Comment here, too.\n"
1496n/a " # Another comment.\n"
1497n/a "after_if = True\n")
1498n/a self.check_roundtrip("if (x # The comments need to go in the right place\n"
1499n/a " == 1):\n"
1500n/a " print('x==1')\n")
1501n/a self.check_roundtrip("class Test: # A comment here\n"
1502n/a " # A comment with weird indent\n"
1503n/a " after_com = 5\n"
1504n/a " def x(m): return m*5 # a one liner\n"
1505n/a " def y(m): # A whitespace after the colon\n"
1506n/a " return y*4 # 3-space indent\n")
1507n/a
1508n/a # Some error-handling code
1509n/a self.check_roundtrip("try: import somemodule\n"
1510n/a "except ImportError: # comment\n"
1511n/a " print('Can not import' # comment2\n)"
1512n/a "else: print('Loaded')\n")
1513n/a
1514n/a def test_continuation(self):
1515n/a # Balancing continuation
1516n/a self.check_roundtrip("a = (3,4, \n"
1517n/a "5,6)\n"
1518n/a "y = [3, 4,\n"
1519n/a "5]\n"
1520n/a "z = {'a': 5,\n"
1521n/a "'b':15, 'c':True}\n"
1522n/a "x = len(y) + 5 - a[\n"
1523n/a "3] - a[2]\n"
1524n/a "+ len(z) - z[\n"
1525n/a "'b']\n")
1526n/a
1527n/a def test_backslash_continuation(self):
1528n/a # Backslash means line continuation, except for comments
1529n/a self.check_roundtrip("x=1+\\\n"
1530n/a "1\n"
1531n/a "# This is a comment\\\n"
1532n/a "# This also\n")
1533n/a self.check_roundtrip("# Comment \\\n"
1534n/a "x = 0")
1535n/a
1536n/a def test_string_concatenation(self):
1537n/a # Two string literals on the same line
1538n/a self.check_roundtrip("'' ''")
1539n/a
1540n/a def test_random_files(self):
1541n/a # Test roundtrip on random python modules.
1542n/a # pass the '-ucpu' option to process the full directory.
1543n/a
1544n/a import glob, random
1545n/a fn = support.findfile("tokenize_tests.txt")
1546n/a tempdir = os.path.dirname(fn) or os.curdir
1547n/a testfiles = glob.glob(os.path.join(tempdir, "test*.py"))
1548n/a
1549n/a # Tokenize is broken on test_pep3131.py because regular expressions are
1550n/a # broken on the obscure unicode identifiers in it. *sigh*
1551n/a # With roundtrip extended to test the 5-tuple mode of untokenize,
1552n/a # 7 more testfiles fail. Remove them also until the failure is diagnosed.
1553n/a
1554n/a testfiles.remove(os.path.join(tempdir, "test_unicode_identifiers.py"))
1555n/a for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'):
1556n/a testfiles.remove(os.path.join(tempdir, "test_%s.py") % f)
1557n/a
1558n/a if not support.is_resource_enabled("cpu"):
1559n/a testfiles = random.sample(testfiles, 10)
1560n/a
1561n/a for testfile in testfiles:
1562n/a with open(testfile, 'rb') as f:
1563n/a with self.subTest(file=testfile):
1564n/a self.check_roundtrip(f)
1565n/a
1566n/a
1567n/a def roundtrip(self, code):
1568n/a if isinstance(code, str):
1569n/a code = code.encode('utf-8')
1570n/a return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
1571n/a
1572n/a def test_indentation_semantics_retained(self):
1573n/a """
1574n/a Ensure that although whitespace might be mutated in a roundtrip,
1575n/a the semantic meaning of the indentation remains consistent.
1576n/a """
1577n/a code = "if False:\n\tx=3\n\tx=3\n"
1578n/a codelines = self.roundtrip(code).split('\n')
1579n/a self.assertEqual(codelines[1], codelines[2])
1580n/a self.check_roundtrip(code)
1581n/a
1582n/a
1583n/aif __name__ == "__main__":
1584n/a unittest.main()