ยปCore Development>Code coverage>Lib/test/test_multibytecodec.py

Python code coverage for Lib/test/test_multibytecodec.py

#countcontent
1n/a#
2n/a# test_multibytecodec.py
3n/a# Unit test for multibytecodec itself
4n/a#
5n/a
6n/afrom test import support
7n/afrom test.support import TESTFN
8n/aimport unittest, io, codecs, sys
9n/aimport _multibytecodec
10n/a
11n/aALL_CJKENCODINGS = [
12n/a# _codecs_cn
13n/a 'gb2312', 'gbk', 'gb18030', 'hz',
14n/a# _codecs_hk
15n/a 'big5hkscs',
16n/a# _codecs_jp
17n/a 'cp932', 'shift_jis', 'euc_jp', 'euc_jisx0213', 'shift_jisx0213',
18n/a 'euc_jis_2004', 'shift_jis_2004',
19n/a# _codecs_kr
20n/a 'cp949', 'euc_kr', 'johab',
21n/a# _codecs_tw
22n/a 'big5', 'cp950',
23n/a# _codecs_iso2022
24n/a 'iso2022_jp', 'iso2022_jp_1', 'iso2022_jp_2', 'iso2022_jp_2004',
25n/a 'iso2022_jp_3', 'iso2022_jp_ext', 'iso2022_kr',
26n/a]
27n/a
28n/aclass Test_MultibyteCodec(unittest.TestCase):
29n/a
30n/a def test_nullcoding(self):
31n/a for enc in ALL_CJKENCODINGS:
32n/a self.assertEqual(b''.decode(enc), '')
33n/a self.assertEqual(str(b'', enc), '')
34n/a self.assertEqual(''.encode(enc), b'')
35n/a
36n/a def test_str_decode(self):
37n/a for enc in ALL_CJKENCODINGS:
38n/a self.assertEqual('abcd'.encode(enc), b'abcd')
39n/a
40n/a def test_errorcallback_longindex(self):
41n/a dec = codecs.getdecoder('euc-kr')
42n/a myreplace = lambda exc: ('', sys.maxsize+1)
43n/a codecs.register_error('test.cjktest', myreplace)
44n/a self.assertRaises(IndexError, dec,
45n/a b'apple\x92ham\x93spam', 'test.cjktest')
46n/a
47n/a def test_errorcallback_custom_ignore(self):
48n/a # Issue #23215: MemoryError with custom error handlers and multibyte codecs
49n/a data = 100 * "\udc00"
50n/a codecs.register_error("test.ignore", codecs.ignore_errors)
51n/a for enc in ALL_CJKENCODINGS:
52n/a self.assertEqual(data.encode(enc, "test.ignore"), b'')
53n/a
54n/a def test_codingspec(self):
55n/a try:
56n/a for enc in ALL_CJKENCODINGS:
57n/a code = '# coding: {}\n'.format(enc)
58n/a exec(code)
59n/a finally:
60n/a support.unlink(TESTFN)
61n/a
62n/a def test_init_segfault(self):
63n/a # bug #3305: this used to segfault
64n/a self.assertRaises(AttributeError,
65n/a _multibytecodec.MultibyteStreamReader, None)
66n/a self.assertRaises(AttributeError,
67n/a _multibytecodec.MultibyteStreamWriter, None)
68n/a
69n/a def test_decode_unicode(self):
70n/a # Trying to decode a unicode string should raise a TypeError
71n/a for enc in ALL_CJKENCODINGS:
72n/a self.assertRaises(TypeError, codecs.getdecoder(enc), "")
73n/a
74n/aclass Test_IncrementalEncoder(unittest.TestCase):
75n/a
76n/a def test_stateless(self):
77n/a # cp949 encoder isn't stateful at all.
78n/a encoder = codecs.getincrementalencoder('cp949')()
79n/a self.assertEqual(encoder.encode('\ud30c\uc774\uc36c \ub9c8\uc744'),
80n/a b'\xc6\xc4\xc0\xcc\xbd\xe3 \xb8\xb6\xc0\xbb')
81n/a self.assertEqual(encoder.reset(), None)
82n/a self.assertEqual(encoder.encode('\u2606\u223c\u2606', True),
83n/a b'\xa1\xd9\xa1\xad\xa1\xd9')
84n/a self.assertEqual(encoder.reset(), None)
85n/a self.assertEqual(encoder.encode('', True), b'')
86n/a self.assertEqual(encoder.encode('', False), b'')
87n/a self.assertEqual(encoder.reset(), None)
88n/a
89n/a def test_stateful(self):
90n/a # jisx0213 encoder is stateful for a few code points. eg)
91n/a # U+00E6 => A9DC
92n/a # U+00E6 U+0300 => ABC4
93n/a # U+0300 => ABDC
94n/a
95n/a encoder = codecs.getincrementalencoder('jisx0213')()
96n/a self.assertEqual(encoder.encode('\u00e6\u0300'), b'\xab\xc4')
97n/a self.assertEqual(encoder.encode('\u00e6'), b'')
98n/a self.assertEqual(encoder.encode('\u0300'), b'\xab\xc4')
99n/a self.assertEqual(encoder.encode('\u00e6', True), b'\xa9\xdc')
100n/a
101n/a self.assertEqual(encoder.reset(), None)
102n/a self.assertEqual(encoder.encode('\u0300'), b'\xab\xdc')
103n/a
104n/a self.assertEqual(encoder.encode('\u00e6'), b'')
105n/a self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
106n/a self.assertEqual(encoder.encode('', True), b'')
107n/a
108n/a def test_stateful_keep_buffer(self):
109n/a encoder = codecs.getincrementalencoder('jisx0213')()
110n/a self.assertEqual(encoder.encode('\u00e6'), b'')
111n/a self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
112n/a self.assertEqual(encoder.encode('\u0300\u00e6'), b'\xab\xc4')
113n/a self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
114n/a self.assertEqual(encoder.reset(), None)
115n/a self.assertEqual(encoder.encode('\u0300'), b'\xab\xdc')
116n/a self.assertEqual(encoder.encode('\u00e6'), b'')
117n/a self.assertRaises(UnicodeEncodeError, encoder.encode, '\u0123')
118n/a self.assertEqual(encoder.encode('', True), b'\xa9\xdc')
119n/a
120n/a def test_issue5640(self):
121n/a encoder = codecs.getincrementalencoder('shift-jis')('backslashreplace')
122n/a self.assertEqual(encoder.encode('\xff'), b'\\xff')
123n/a self.assertEqual(encoder.encode('\n'), b'\n')
124n/a
125n/aclass Test_IncrementalDecoder(unittest.TestCase):
126n/a
127n/a def test_dbcs(self):
128n/a # cp949 decoder is simple with only 1 or 2 bytes sequences.
129n/a decoder = codecs.getincrementaldecoder('cp949')()
130n/a self.assertEqual(decoder.decode(b'\xc6\xc4\xc0\xcc\xbd'),
131n/a '\ud30c\uc774')
132n/a self.assertEqual(decoder.decode(b'\xe3 \xb8\xb6\xc0\xbb'),
133n/a '\uc36c \ub9c8\uc744')
134n/a self.assertEqual(decoder.decode(b''), '')
135n/a
136n/a def test_dbcs_keep_buffer(self):
137n/a decoder = codecs.getincrementaldecoder('cp949')()
138n/a self.assertEqual(decoder.decode(b'\xc6\xc4\xc0'), '\ud30c')
139n/a self.assertRaises(UnicodeDecodeError, decoder.decode, b'', True)
140n/a self.assertEqual(decoder.decode(b'\xcc'), '\uc774')
141n/a
142n/a self.assertEqual(decoder.decode(b'\xc6\xc4\xc0'), '\ud30c')
143n/a self.assertRaises(UnicodeDecodeError, decoder.decode,
144n/a b'\xcc\xbd', True)
145n/a self.assertEqual(decoder.decode(b'\xcc'), '\uc774')
146n/a
147n/a def test_iso2022(self):
148n/a decoder = codecs.getincrementaldecoder('iso2022-jp')()
149n/a ESC = b'\x1b'
150n/a self.assertEqual(decoder.decode(ESC + b'('), '')
151n/a self.assertEqual(decoder.decode(b'B', True), '')
152n/a self.assertEqual(decoder.decode(ESC + b'$'), '')
153n/a self.assertEqual(decoder.decode(b'B@$'), '\u4e16')
154n/a self.assertEqual(decoder.decode(b'@$@'), '\u4e16')
155n/a self.assertEqual(decoder.decode(b'$', True), '\u4e16')
156n/a self.assertEqual(decoder.reset(), None)
157n/a self.assertEqual(decoder.decode(b'@$'), '@$')
158n/a self.assertEqual(decoder.decode(ESC + b'$'), '')
159n/a self.assertRaises(UnicodeDecodeError, decoder.decode, b'', True)
160n/a self.assertEqual(decoder.decode(b'B@$'), '\u4e16')
161n/a
162n/a def test_decode_unicode(self):
163n/a # Trying to decode a unicode string should raise a TypeError
164n/a for enc in ALL_CJKENCODINGS:
165n/a decoder = codecs.getincrementaldecoder(enc)()
166n/a self.assertRaises(TypeError, decoder.decode, "")
167n/a
168n/aclass Test_StreamReader(unittest.TestCase):
169n/a def test_bug1728403(self):
170n/a try:
171n/a f = open(TESTFN, 'wb')
172n/a try:
173n/a f.write(b'\xa1')
174n/a finally:
175n/a f.close()
176n/a f = codecs.open(TESTFN, encoding='cp949')
177n/a try:
178n/a self.assertRaises(UnicodeDecodeError, f.read, 2)
179n/a finally:
180n/a f.close()
181n/a finally:
182n/a support.unlink(TESTFN)
183n/a
184n/aclass Test_StreamWriter(unittest.TestCase):
185n/a def test_gb18030(self):
186n/a s= io.BytesIO()
187n/a c = codecs.getwriter('gb18030')(s)
188n/a c.write('123')
189n/a self.assertEqual(s.getvalue(), b'123')
190n/a c.write('\U00012345')
191n/a self.assertEqual(s.getvalue(), b'123\x907\x959')
192n/a c.write('\uac00\u00ac')
193n/a self.assertEqual(s.getvalue(),
194n/a b'123\x907\x959\x827\xcf5\x810\x851')
195n/a
196n/a def test_utf_8(self):
197n/a s= io.BytesIO()
198n/a c = codecs.getwriter('utf-8')(s)
199n/a c.write('123')
200n/a self.assertEqual(s.getvalue(), b'123')
201n/a c.write('\U00012345')
202n/a self.assertEqual(s.getvalue(), b'123\xf0\x92\x8d\x85')
203n/a c.write('\uac00\u00ac')
204n/a self.assertEqual(s.getvalue(),
205n/a b'123\xf0\x92\x8d\x85'
206n/a b'\xea\xb0\x80\xc2\xac')
207n/a
208n/a def test_streamwriter_strwrite(self):
209n/a s = io.BytesIO()
210n/a wr = codecs.getwriter('gb18030')(s)
211n/a wr.write('abcd')
212n/a self.assertEqual(s.getvalue(), b'abcd')
213n/a
214n/aclass Test_ISO2022(unittest.TestCase):
215n/a def test_g2(self):
216n/a iso2022jp2 = b'\x1b(B:hu4:unit\x1b.A\x1bNi de famille'
217n/a uni = ':hu4:unit\xe9 de famille'
218n/a self.assertEqual(iso2022jp2.decode('iso2022-jp-2'), uni)
219n/a
220n/a def test_iso2022_jp_g0(self):
221n/a self.assertNotIn(b'\x0e', '\N{SOFT HYPHEN}'.encode('iso-2022-jp-2'))
222n/a for encoding in ('iso-2022-jp-2004', 'iso-2022-jp-3'):
223n/a e = '\u3406'.encode(encoding)
224n/a self.assertFalse(any(x > 0x80 for x in e))
225n/a
226n/a def test_bug1572832(self):
227n/a for x in range(0x10000, 0x110000):
228n/a # Any ISO 2022 codec will cause the segfault
229n/a chr(x).encode('iso_2022_jp', 'ignore')
230n/a
231n/aclass TestStateful(unittest.TestCase):
232n/a text = '\u4E16\u4E16'
233n/a encoding = 'iso-2022-jp'
234n/a expected = b'\x1b$B@$@$'
235n/a reset = b'\x1b(B'
236n/a expected_reset = expected + reset
237n/a
238n/a def test_encode(self):
239n/a self.assertEqual(self.text.encode(self.encoding), self.expected_reset)
240n/a
241n/a def test_incrementalencoder(self):
242n/a encoder = codecs.getincrementalencoder(self.encoding)()
243n/a output = b''.join(
244n/a encoder.encode(char)
245n/a for char in self.text)
246n/a self.assertEqual(output, self.expected)
247n/a self.assertEqual(encoder.encode('', final=True), self.reset)
248n/a self.assertEqual(encoder.encode('', final=True), b'')
249n/a
250n/a def test_incrementalencoder_final(self):
251n/a encoder = codecs.getincrementalencoder(self.encoding)()
252n/a last_index = len(self.text) - 1
253n/a output = b''.join(
254n/a encoder.encode(char, index == last_index)
255n/a for index, char in enumerate(self.text))
256n/a self.assertEqual(output, self.expected_reset)
257n/a self.assertEqual(encoder.encode('', final=True), b'')
258n/a
259n/aclass TestHZStateful(TestStateful):
260n/a text = '\u804a\u804a'
261n/a encoding = 'hz'
262n/a expected = b'~{ADAD'
263n/a reset = b'~}'
264n/a expected_reset = expected + reset
265n/a
266n/adef test_main():
267n/a support.run_unittest(__name__)
268n/a
269n/aif __name__ == "__main__":
270n/a test_main()