»Core Development>Code coverage>Lib/test/test_strlit.py

Python code coverage for Lib/test/test_strlit.py

#countcontent
1n/ar"""Test correct treatment of various string literals by the parser.
2n/a
3n/aThere are four types of string literals:
4n/a
5n/a 'abc' -- normal str
6n/a r'abc' -- raw str
7n/a b'xyz' -- normal bytes
8n/a br'xyz' | rb'xyz' -- raw bytes
9n/a
10n/aThe difference between normal and raw strings is of course that in a
11n/araw string, \ escapes (while still used to determine the end of the
12n/aliteral) are not interpreted, so that r'\x00' contains four
13n/acharacters: a backslash, an x, and two zeros; while '\x00' contains a
14n/asingle character (code point zero).
15n/a
16n/aThe tricky thing is what should happen when non-ASCII bytes are used
17n/ainside literals. For bytes literals, this is considered illegal. But
18n/afor str literals, those bytes are supposed to be decoded using the
19n/aencoding declared for the file (UTF-8 by default).
20n/a
21n/aWe have to test this with various file encodings. We also test it with
22n/aexec()/eval(), which uses a different code path.
23n/a
24n/aThis file is really about correct treatment of encodings and
25n/abackslashes. It doesn't concern itself with issues like single
26n/avs. double quotes or singly- vs. triply-quoted strings: that's dealt
27n/awith elsewhere (I assume).
28n/a"""
29n/a
30n/aimport os
31n/aimport sys
32n/aimport shutil
33n/aimport tempfile
34n/aimport unittest
35n/a
36n/a
37n/aTEMPLATE = r"""# coding: %s
38n/aa = 'x'
39n/aassert ord(a) == 120
40n/ab = '\x01'
41n/aassert ord(b) == 1
42n/ac = r'\x01'
43n/aassert list(map(ord, c)) == [92, 120, 48, 49]
44n/ad = '\x81'
45n/aassert ord(d) == 0x81
46n/ae = r'\x81'
47n/aassert list(map(ord, e)) == [92, 120, 56, 49]
48n/af = '\u1881'
49n/aassert ord(f) == 0x1881
50n/ag = r'\u1881'
51n/aassert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
52n/ah = '\U0001d120'
53n/aassert ord(h) == 0x1d120
54n/ai = r'\U0001d120'
55n/aassert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
56n/a"""
57n/a
58n/a
59n/adef byte(i):
60n/a return bytes([i])
61n/a
62n/a
63n/aclass TestLiterals(unittest.TestCase):
64n/a
65n/a def setUp(self):
66n/a self.save_path = sys.path[:]
67n/a self.tmpdir = tempfile.mkdtemp()
68n/a sys.path.insert(0, self.tmpdir)
69n/a
70n/a def tearDown(self):
71n/a sys.path[:] = self.save_path
72n/a shutil.rmtree(self.tmpdir, ignore_errors=True)
73n/a
74n/a def test_template(self):
75n/a # Check that the template doesn't contain any non-printables
76n/a # except for \n.
77n/a for c in TEMPLATE:
78n/a assert c == '\n' or ' ' <= c <= '~', repr(c)
79n/a
80n/a def test_eval_str_normal(self):
81n/a self.assertEqual(eval(""" 'x' """), 'x')
82n/a self.assertEqual(eval(r""" '\x01' """), chr(1))
83n/a self.assertEqual(eval(""" '\x01' """), chr(1))
84n/a self.assertEqual(eval(r""" '\x81' """), chr(0x81))
85n/a self.assertEqual(eval(""" '\x81' """), chr(0x81))
86n/a self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
87n/a self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
88n/a self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
89n/a self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
90n/a
91n/a def test_eval_str_incomplete(self):
92n/a self.assertRaises(SyntaxError, eval, r""" '\x' """)
93n/a self.assertRaises(SyntaxError, eval, r""" '\x0' """)
94n/a self.assertRaises(SyntaxError, eval, r""" '\u' """)
95n/a self.assertRaises(SyntaxError, eval, r""" '\u0' """)
96n/a self.assertRaises(SyntaxError, eval, r""" '\u00' """)
97n/a self.assertRaises(SyntaxError, eval, r""" '\u000' """)
98n/a self.assertRaises(SyntaxError, eval, r""" '\U' """)
99n/a self.assertRaises(SyntaxError, eval, r""" '\U0' """)
100n/a self.assertRaises(SyntaxError, eval, r""" '\U00' """)
101n/a self.assertRaises(SyntaxError, eval, r""" '\U000' """)
102n/a self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
103n/a self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
104n/a self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
105n/a self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
106n/a
107n/a def test_eval_str_raw(self):
108n/a self.assertEqual(eval(""" r'x' """), 'x')
109n/a self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
110n/a self.assertEqual(eval(""" r'\x01' """), chr(1))
111n/a self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
112n/a self.assertEqual(eval(""" r'\x81' """), chr(0x81))
113n/a self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
114n/a self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
115n/a self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
116n/a self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
117n/a
118n/a def test_eval_bytes_normal(self):
119n/a self.assertEqual(eval(""" b'x' """), b'x')
120n/a self.assertEqual(eval(r""" b'\x01' """), byte(1))
121n/a self.assertEqual(eval(""" b'\x01' """), byte(1))
122n/a self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
123n/a self.assertRaises(SyntaxError, eval, """ b'\x81' """)
124n/a self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
125n/a self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
126n/a self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
127n/a self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
128n/a
129n/a def test_eval_bytes_incomplete(self):
130n/a self.assertRaises(SyntaxError, eval, r""" b'\x' """)
131n/a self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
132n/a
133n/a def test_eval_bytes_raw(self):
134n/a self.assertEqual(eval(""" br'x' """), b'x')
135n/a self.assertEqual(eval(""" rb'x' """), b'x')
136n/a self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
137n/a self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
138n/a self.assertEqual(eval(""" br'\x01' """), byte(1))
139n/a self.assertEqual(eval(""" rb'\x01' """), byte(1))
140n/a self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
141n/a self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
142n/a self.assertRaises(SyntaxError, eval, """ br'\x81' """)
143n/a self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
144n/a self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
145n/a self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
146n/a self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
147n/a self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
148n/a self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
149n/a self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
150n/a self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
151n/a self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
152n/a self.assertRaises(SyntaxError, eval, """ bb'' """)
153n/a self.assertRaises(SyntaxError, eval, """ rr'' """)
154n/a self.assertRaises(SyntaxError, eval, """ brr'' """)
155n/a self.assertRaises(SyntaxError, eval, """ bbr'' """)
156n/a self.assertRaises(SyntaxError, eval, """ rrb'' """)
157n/a self.assertRaises(SyntaxError, eval, """ rbb'' """)
158n/a
159n/a def test_eval_str_u(self):
160n/a self.assertEqual(eval(""" u'x' """), 'x')
161n/a self.assertEqual(eval(""" U'\u00e4' """), 'ä')
162n/a self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
163n/a self.assertRaises(SyntaxError, eval, """ ur'' """)
164n/a self.assertRaises(SyntaxError, eval, """ ru'' """)
165n/a self.assertRaises(SyntaxError, eval, """ bu'' """)
166n/a self.assertRaises(SyntaxError, eval, """ ub'' """)
167n/a
168n/a def check_encoding(self, encoding, extra=""):
169n/a modname = "xx_" + encoding.replace("-", "_")
170n/a fn = os.path.join(self.tmpdir, modname + ".py")
171n/a f = open(fn, "w", encoding=encoding)
172n/a try:
173n/a f.write(TEMPLATE % encoding)
174n/a f.write(extra)
175n/a finally:
176n/a f.close()
177n/a __import__(modname)
178n/a del sys.modules[modname]
179n/a
180n/a def test_file_utf_8(self):
181n/a extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
182n/a self.check_encoding("utf-8", extra)
183n/a
184n/a def test_file_utf_8_error(self):
185n/a extra = "b'\x80'\n"
186n/a self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
187n/a
188n/a def test_file_utf8(self):
189n/a self.check_encoding("utf-8")
190n/a
191n/a def test_file_iso_8859_1(self):
192n/a self.check_encoding("iso-8859-1")
193n/a
194n/a def test_file_latin_1(self):
195n/a self.check_encoding("latin-1")
196n/a
197n/a def test_file_latin9(self):
198n/a self.check_encoding("latin9")
199n/a
200n/a
201n/aif __name__ == "__main__":
202n/a unittest.main()