1n/ar"""Test correct treatment of various string literals by the parser.
3n/aThere are four types of string literals:
5n/a 'abc' -- normal str
6n/a r'abc' -- raw str
7n/a b'xyz' -- normal bytes
8n/a br'xyz' | rb'xyz' -- raw bytes
10n/aThe difference between normal and raw strings is of course that in a
11n/araw string, \ escapes (while still used to determine the end of the
12n/aliteral) are not interpreted, so that r'\x00' contains four
13n/acharacters: a backslash, an x, and two zeros; while '\x00' contains a
14n/asingle character (code point zero).
16n/aThe tricky thing is what should happen when non-ASCII bytes are used
17n/ainside literals. For bytes literals, this is considered illegal. But
18n/afor str literals, those bytes are supposed to be decoded using the
19n/aencoding declared for the file (UTF-8 by default).
21n/aWe have to test this with various file encodings. We also test it with
22n/aexec()/eval(), which uses a different code path.
24n/aThis file is really about correct treatment of encodings and
25n/abackslashes. It doesn't concern itself with issues like single
26n/avs. double quotes or singly- vs. triply-quoted strings: that's dealt
27n/awith elsewhere (I assume).
30n/aimport os
31n/aimport sys
32n/aimport shutil
33n/aimport tempfile
34n/aimport warnings
35n/aimport unittest
38n/aTEMPLATE = r"""# coding: %s
39n/aa = 'x'
40n/aassert ord(a) == 120
41n/ab = '\x01'
42n/aassert ord(b) == 1
43n/ac = r'\x01'
44n/aassert list(map(ord, c)) == [92, 120, 48, 49]
45n/ad = '\x81'
46n/aassert ord(d) == 0x81
47n/ae = r'\x81'
48n/aassert list(map(ord, e)) == [92, 120, 56, 49]
49n/af = '\u1881'
50n/aassert ord(f) == 0x1881
51n/ag = r'\u1881'
52n/aassert list(map(ord, g)) == [92, 117, 49, 56, 56, 49]
53n/ah = '\U0001d120'
54n/aassert ord(h) == 0x1d120
55n/ai = r'\U0001d120'
56n/aassert list(map(ord, i)) == [92, 85, 48, 48, 48, 49, 100, 49, 50, 48]
60n/adef byte(i):
61n/a return bytes([i])
64n/aclass TestLiterals(unittest.TestCase):
66n/a def setUp(self):
67n/a self.save_path = sys.path[:]
68n/a self.tmpdir = tempfile.mkdtemp()
69n/a sys.path.insert(0, self.tmpdir)
71n/a def tearDown(self):
72n/a sys.path[:] = self.save_path
73n/a shutil.rmtree(self.tmpdir, ignore_errors=True)
75n/a def test_template(self):
76n/a # Check that the template doesn't contain any non-printables
77n/a # except for \n.
78n/a for c in TEMPLATE:
79n/a assert c == '\n' or ' ' <= c <= '~', repr(c)
81n/a def test_eval_str_normal(self):
82n/a self.assertEqual(eval(""" 'x' """), 'x')
83n/a self.assertEqual(eval(r""" '\x01' """), chr(1))
84n/a self.assertEqual(eval(""" '\x01' """), chr(1))
85n/a self.assertEqual(eval(r""" '\x81' """), chr(0x81))
86n/a self.assertEqual(eval(""" '\x81' """), chr(0x81))
87n/a self.assertEqual(eval(r""" '\u1881' """), chr(0x1881))
88n/a self.assertEqual(eval(""" '\u1881' """), chr(0x1881))
89n/a self.assertEqual(eval(r""" '\U0001d120' """), chr(0x1d120))
90n/a self.assertEqual(eval(""" '\U0001d120' """), chr(0x1d120))
92n/a def test_eval_str_incomplete(self):
93n/a self.assertRaises(SyntaxError, eval, r""" '\x' """)
94n/a self.assertRaises(SyntaxError, eval, r""" '\x0' """)
95n/a self.assertRaises(SyntaxError, eval, r""" '\u' """)
96n/a self.assertRaises(SyntaxError, eval, r""" '\u0' """)
97n/a self.assertRaises(SyntaxError, eval, r""" '\u00' """)
98n/a self.assertRaises(SyntaxError, eval, r""" '\u000' """)
99n/a self.assertRaises(SyntaxError, eval, r""" '\U' """)
100n/a self.assertRaises(SyntaxError, eval, r""" '\U0' """)
101n/a self.assertRaises(SyntaxError, eval, r""" '\U00' """)
102n/a self.assertRaises(SyntaxError, eval, r""" '\U000' """)
103n/a self.assertRaises(SyntaxError, eval, r""" '\U0000' """)
104n/a self.assertRaises(SyntaxError, eval, r""" '\U00000' """)
105n/a self.assertRaises(SyntaxError, eval, r""" '\U000000' """)
106n/a self.assertRaises(SyntaxError, eval, r""" '\U0000000' """)
108n/a def test_eval_str_invalid_escape(self):
109n/a for b in range(1, 128):
110n/a if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
111n/a continue
112n/a with self.assertWarns(DeprecationWarning):
113n/a self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
115n/a with warnings.catch_warnings(record=True) as w:
116n/a warnings.simplefilter('always', category=DeprecationWarning)
117n/a eval("'''\n\\z'''")
118n/a self.assertEqual(len(w), 1)
119n/a self.assertEqual(w[0].filename, '<string>')
120n/a self.assertEqual(w[0].lineno, 2)
122n/a with warnings.catch_warnings(record=True) as w:
123n/a warnings.simplefilter('error', category=DeprecationWarning)
124n/a with self.assertRaises(SyntaxError) as cm:
125n/a eval("'''\n\\z'''")
126n/a exc = cm.exception
127n/a self.assertEqual(w, [])
128n/a self.assertEqual(exc.filename, '<string>')
129n/a self.assertEqual(exc.lineno, 2)
131n/a def test_eval_str_raw(self):
132n/a self.assertEqual(eval(""" r'x' """), 'x')
133n/a self.assertEqual(eval(r""" r'\x01' """), '\\' + 'x01')
134n/a self.assertEqual(eval(""" r'\x01' """), chr(1))
135n/a self.assertEqual(eval(r""" r'\x81' """), '\\' + 'x81')
136n/a self.assertEqual(eval(""" r'\x81' """), chr(0x81))
137n/a self.assertEqual(eval(r""" r'\u1881' """), '\\' + 'u1881')
138n/a self.assertEqual(eval(""" r'\u1881' """), chr(0x1881))
139n/a self.assertEqual(eval(r""" r'\U0001d120' """), '\\' + 'U0001d120')
140n/a self.assertEqual(eval(""" r'\U0001d120' """), chr(0x1d120))
142n/a def test_eval_bytes_normal(self):
143n/a self.assertEqual(eval(""" b'x' """), b'x')
144n/a self.assertEqual(eval(r""" b'\x01' """), byte(1))
145n/a self.assertEqual(eval(""" b'\x01' """), byte(1))
146n/a self.assertEqual(eval(r""" b'\x81' """), byte(0x81))
147n/a self.assertRaises(SyntaxError, eval, """ b'\x81' """)
148n/a self.assertEqual(eval(r""" br'\u1881' """), b'\\' + b'u1881')
149n/a self.assertRaises(SyntaxError, eval, """ b'\u1881' """)
150n/a self.assertEqual(eval(r""" br'\U0001d120' """), b'\\' + b'U0001d120')
151n/a self.assertRaises(SyntaxError, eval, """ b'\U0001d120' """)
153n/a def test_eval_bytes_incomplete(self):
154n/a self.assertRaises(SyntaxError, eval, r""" b'\x' """)
155n/a self.assertRaises(SyntaxError, eval, r""" b'\x0' """)
157n/a def test_eval_bytes_invalid_escape(self):
158n/a for b in range(1, 128):
159n/a if b in b"""\n\r"'01234567\\abfnrtvx""":
160n/a continue
161n/a with self.assertWarns(DeprecationWarning):
162n/a self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
164n/a with warnings.catch_warnings(record=True) as w:
165n/a warnings.simplefilter('always', category=DeprecationWarning)
166n/a eval("b'''\n\\z'''")
167n/a self.assertEqual(len(w), 1)
168n/a self.assertEqual(w[0].filename, '<string>')
169n/a self.assertEqual(w[0].lineno, 2)
171n/a with warnings.catch_warnings(record=True) as w:
172n/a warnings.simplefilter('error', category=DeprecationWarning)
173n/a with self.assertRaises(SyntaxError) as cm:
174n/a eval("b'''\n\\z'''")
175n/a exc = cm.exception
176n/a self.assertEqual(w, [])
177n/a self.assertEqual(exc.filename, '<string>')
178n/a self.assertEqual(exc.lineno, 2)
180n/a def test_eval_bytes_raw(self):
181n/a self.assertEqual(eval(""" br'x' """), b'x')
182n/a self.assertEqual(eval(""" rb'x' """), b'x')
183n/a self.assertEqual(eval(r""" br'\x01' """), b'\\' + b'x01')
184n/a self.assertEqual(eval(r""" rb'\x01' """), b'\\' + b'x01')
185n/a self.assertEqual(eval(""" br'\x01' """), byte(1))
186n/a self.assertEqual(eval(""" rb'\x01' """), byte(1))
187n/a self.assertEqual(eval(r""" br'\x81' """), b"\\" + b"x81")
188n/a self.assertEqual(eval(r""" rb'\x81' """), b"\\" + b"x81")
189n/a self.assertRaises(SyntaxError, eval, """ br'\x81' """)
190n/a self.assertRaises(SyntaxError, eval, """ rb'\x81' """)
191n/a self.assertEqual(eval(r""" br'\u1881' """), b"\\" + b"u1881")
192n/a self.assertEqual(eval(r""" rb'\u1881' """), b"\\" + b"u1881")
193n/a self.assertRaises(SyntaxError, eval, """ br'\u1881' """)
194n/a self.assertRaises(SyntaxError, eval, """ rb'\u1881' """)
195n/a self.assertEqual(eval(r""" br'\U0001d120' """), b"\\" + b"U0001d120")
196n/a self.assertEqual(eval(r""" rb'\U0001d120' """), b"\\" + b"U0001d120")
197n/a self.assertRaises(SyntaxError, eval, """ br'\U0001d120' """)
198n/a self.assertRaises(SyntaxError, eval, """ rb'\U0001d120' """)
199n/a self.assertRaises(SyntaxError, eval, """ bb'' """)
200n/a self.assertRaises(SyntaxError, eval, """ rr'' """)
201n/a self.assertRaises(SyntaxError, eval, """ brr'' """)
202n/a self.assertRaises(SyntaxError, eval, """ bbr'' """)
203n/a self.assertRaises(SyntaxError, eval, """ rrb'' """)
204n/a self.assertRaises(SyntaxError, eval, """ rbb'' """)
206n/a def test_eval_str_u(self):
207n/a self.assertEqual(eval(""" u'x' """), 'x')
208n/a self.assertEqual(eval(""" U'\u00e4' """), 'ä')
209n/a self.assertEqual(eval(""" u'\N{LATIN SMALL LETTER A WITH DIAERESIS}' """), 'ä')
210n/a self.assertRaises(SyntaxError, eval, """ ur'' """)
211n/a self.assertRaises(SyntaxError, eval, """ ru'' """)
212n/a self.assertRaises(SyntaxError, eval, """ bu'' """)
213n/a self.assertRaises(SyntaxError, eval, """ ub'' """)
215n/a def check_encoding(self, encoding, extra=""):
216n/a modname = "xx_" + encoding.replace("-", "_")
217n/a fn = os.path.join(self.tmpdir, modname + ".py")
218n/a f = open(fn, "w", encoding=encoding)
219n/a try:
220n/a f.write(TEMPLATE % encoding)
221n/a f.write(extra)
222n/a finally:
223n/a f.close()
224n/a __import__(modname)
225n/a del sys.modules[modname]
227n/a def test_file_utf_8(self):
228n/a extra = "z = '\u1234'; assert ord(z) == 0x1234\n"
229n/a self.check_encoding("utf-8", extra)
231n/a def test_file_utf_8_error(self):
232n/a extra = "b'\x80'\n"
233n/a self.assertRaises(SyntaxError, self.check_encoding, "utf-8", extra)
235n/a def test_file_utf8(self):
236n/a self.check_encoding("utf-8")
238n/a def test_file_iso_8859_1(self):
239n/a self.check_encoding("iso-8859-1")
241n/a def test_file_latin_1(self):
242n/a self.check_encoding("latin-1")
244n/a def test_file_latin9(self):
245n/a self.check_encoding("latin9")
248n/aif __name__ == "__main__":
249n/a unittest.main()