ยปCore Development>Code coverage>Lib/test/test_multibytecodec_support.py

Python code coverage for Lib/test/test_multibytecodec_support.py

#countcontent
1n/a#!/usr/bin/env python3
2n/a#
3n/a# test_multibytecodec_support.py
4n/a# Common Unittest Routines for CJK codecs
5n/a#
6n/a
7n/aimport codecs
8n/aimport os
9n/aimport re
10n/aimport sys
11n/aimport unittest
12n/afrom http.client import HTTPException
13n/afrom test import support
14n/afrom io import BytesIO
15n/a
16n/aclass TestBase:
17n/a encoding = '' # codec name
18n/a codec = None # codec tuple (with 4 elements)
19n/a tstring = None # must set. 2 strings to test StreamReader
20n/a
21n/a codectests = None # must set. codec test tuple
22n/a roundtriptest = 1 # set if roundtrip is possible with unicode
23n/a has_iso10646 = 0 # set if this encoding contains whole iso10646 map
24n/a xmlcharnametest = None # string to test xmlcharrefreplace
25n/a unmappedunicode = '\udeee' # a unicode codepoint that is not mapped.
26n/a
27n/a def setUp(self):
28n/a if self.codec is None:
29n/a self.codec = codecs.lookup(self.encoding)
30n/a self.encode = self.codec.encode
31n/a self.decode = self.codec.decode
32n/a self.reader = self.codec.streamreader
33n/a self.writer = self.codec.streamwriter
34n/a self.incrementalencoder = self.codec.incrementalencoder
35n/a self.incrementaldecoder = self.codec.incrementaldecoder
36n/a
37n/a def test_chunkcoding(self):
38n/a tstring_lines = []
39n/a for b in self.tstring:
40n/a lines = b.split(b"\n")
41n/a last = lines.pop()
42n/a assert last == b""
43n/a lines = [line + b"\n" for line in lines]
44n/a tstring_lines.append(lines)
45n/a for native, utf8 in zip(*tstring_lines):
46n/a u = self.decode(native)[0]
47n/a self.assertEqual(u, utf8.decode('utf-8'))
48n/a if self.roundtriptest:
49n/a self.assertEqual(native, self.encode(u)[0])
50n/a
51n/a def test_errorhandle(self):
52n/a for source, scheme, expected in self.codectests:
53n/a if isinstance(source, bytes):
54n/a func = self.decode
55n/a else:
56n/a func = self.encode
57n/a if expected:
58n/a result = func(source, scheme)[0]
59n/a if func is self.decode:
60n/a self.assertTrue(type(result) is str, type(result))
61n/a self.assertEqual(result, expected,
62n/a '%a.decode(%r, %r)=%a != %a'
63n/a % (source, self.encoding, scheme, result,
64n/a expected))
65n/a else:
66n/a self.assertTrue(type(result) is bytes, type(result))
67n/a self.assertEqual(result, expected,
68n/a '%a.encode(%r, %r)=%a != %a'
69n/a % (source, self.encoding, scheme, result,
70n/a expected))
71n/a else:
72n/a self.assertRaises(UnicodeError, func, source, scheme)
73n/a
74n/a def test_xmlcharrefreplace(self):
75n/a if self.has_iso10646:
76n/a return
77n/a
78n/a s = "\u0b13\u0b23\u0b60 nd eggs"
79n/a self.assertEqual(
80n/a self.encode(s, "xmlcharrefreplace")[0],
81n/a b"ଓଣୠ nd eggs"
82n/a )
83n/a
84n/a def test_customreplace_encode(self):
85n/a if self.has_iso10646:
86n/a return
87n/a
88n/a from html.entities import codepoint2name
89n/a
90n/a def xmlcharnamereplace(exc):
91n/a if not isinstance(exc, UnicodeEncodeError):
92n/a raise TypeError("don't know how to handle %r" % exc)
93n/a l = []
94n/a for c in exc.object[exc.start:exc.end]:
95n/a if ord(c) in codepoint2name:
96n/a l.append("&%s;" % codepoint2name[ord(c)])
97n/a else:
98n/a l.append("&#%d;" % ord(c))
99n/a return ("".join(l), exc.end)
100n/a
101n/a codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace)
102n/a
103n/a if self.xmlcharnametest:
104n/a sin, sout = self.xmlcharnametest
105n/a else:
106n/a sin = "\xab\u211c\xbb = \u2329\u1234\u232a"
107n/a sout = b"«ℜ» = ⟨ሴ⟩"
108n/a self.assertEqual(self.encode(sin,
109n/a "test.xmlcharnamereplace")[0], sout)
110n/a
111n/a def test_callback_wrong_objects(self):
112n/a def myreplace(exc):
113n/a return (ret, exc.end)
114n/a codecs.register_error("test.cjktest", myreplace)
115n/a
116n/a for ret in ([1, 2, 3], [], None, object(), b'string', b''):
117n/a self.assertRaises(TypeError, self.encode, self.unmappedunicode,
118n/a 'test.cjktest')
119n/a
120n/a def test_callback_long_index(self):
121n/a def myreplace(exc):
122n/a return ('x', int(exc.end))
123n/a codecs.register_error("test.cjktest", myreplace)
124n/a self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
125n/a 'test.cjktest'), (b'abcdxefgh', 9))
126n/a
127n/a def myreplace(exc):
128n/a return ('x', sys.maxsize + 1)
129n/a codecs.register_error("test.cjktest", myreplace)
130n/a self.assertRaises(IndexError, self.encode, self.unmappedunicode,
131n/a 'test.cjktest')
132n/a
133n/a def test_callback_None_index(self):
134n/a def myreplace(exc):
135n/a return ('x', None)
136n/a codecs.register_error("test.cjktest", myreplace)
137n/a self.assertRaises(TypeError, self.encode, self.unmappedunicode,
138n/a 'test.cjktest')
139n/a
140n/a def test_callback_backward_index(self):
141n/a def myreplace(exc):
142n/a if myreplace.limit > 0:
143n/a myreplace.limit -= 1
144n/a return ('REPLACED', 0)
145n/a else:
146n/a return ('TERMINAL', exc.end)
147n/a myreplace.limit = 3
148n/a codecs.register_error("test.cjktest", myreplace)
149n/a self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
150n/a 'test.cjktest'),
151n/a (b'abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
152n/a
153n/a def test_callback_forward_index(self):
154n/a def myreplace(exc):
155n/a return ('REPLACED', exc.end + 2)
156n/a codecs.register_error("test.cjktest", myreplace)
157n/a self.assertEqual(self.encode('abcd' + self.unmappedunicode + 'efgh',
158n/a 'test.cjktest'), (b'abcdREPLACEDgh', 9))
159n/a
160n/a def test_callback_index_outofbound(self):
161n/a def myreplace(exc):
162n/a return ('TERM', 100)
163n/a codecs.register_error("test.cjktest", myreplace)
164n/a self.assertRaises(IndexError, self.encode, self.unmappedunicode,
165n/a 'test.cjktest')
166n/a
167n/a def test_incrementalencoder(self):
168n/a UTF8Reader = codecs.getreader('utf-8')
169n/a for sizehint in [None] + list(range(1, 33)) + \
170n/a [64, 128, 256, 512, 1024]:
171n/a istream = UTF8Reader(BytesIO(self.tstring[1]))
172n/a ostream = BytesIO()
173n/a encoder = self.incrementalencoder()
174n/a while 1:
175n/a if sizehint is not None:
176n/a data = istream.read(sizehint)
177n/a else:
178n/a data = istream.read()
179n/a
180n/a if not data:
181n/a break
182n/a e = encoder.encode(data)
183n/a ostream.write(e)
184n/a
185n/a self.assertEqual(ostream.getvalue(), self.tstring[0])
186n/a
187n/a def test_incrementaldecoder(self):
188n/a UTF8Writer = codecs.getwriter('utf-8')
189n/a for sizehint in [None, -1] + list(range(1, 33)) + \
190n/a [64, 128, 256, 512, 1024]:
191n/a istream = BytesIO(self.tstring[0])
192n/a ostream = UTF8Writer(BytesIO())
193n/a decoder = self.incrementaldecoder()
194n/a while 1:
195n/a data = istream.read(sizehint)
196n/a if not data:
197n/a break
198n/a else:
199n/a u = decoder.decode(data)
200n/a ostream.write(u)
201n/a
202n/a self.assertEqual(ostream.getvalue(), self.tstring[1])
203n/a
204n/a def test_incrementalencoder_error_callback(self):
205n/a inv = self.unmappedunicode
206n/a
207n/a e = self.incrementalencoder()
208n/a self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
209n/a
210n/a e.errors = 'ignore'
211n/a self.assertEqual(e.encode(inv, True), b'')
212n/a
213n/a e.reset()
214n/a def tempreplace(exc):
215n/a return ('called', exc.end)
216n/a codecs.register_error('test.incremental_error_callback', tempreplace)
217n/a e.errors = 'test.incremental_error_callback'
218n/a self.assertEqual(e.encode(inv, True), b'called')
219n/a
220n/a # again
221n/a e.errors = 'ignore'
222n/a self.assertEqual(e.encode(inv, True), b'')
223n/a
224n/a def test_streamreader(self):
225n/a UTF8Writer = codecs.getwriter('utf-8')
226n/a for name in ["read", "readline", "readlines"]:
227n/a for sizehint in [None, -1] + list(range(1, 33)) + \
228n/a [64, 128, 256, 512, 1024]:
229n/a istream = self.reader(BytesIO(self.tstring[0]))
230n/a ostream = UTF8Writer(BytesIO())
231n/a func = getattr(istream, name)
232n/a while 1:
233n/a data = func(sizehint)
234n/a if not data:
235n/a break
236n/a if name == "readlines":
237n/a ostream.writelines(data)
238n/a else:
239n/a ostream.write(data)
240n/a
241n/a self.assertEqual(ostream.getvalue(), self.tstring[1])
242n/a
243n/a def test_streamwriter(self):
244n/a readfuncs = ('read', 'readline', 'readlines')
245n/a UTF8Reader = codecs.getreader('utf-8')
246n/a for name in readfuncs:
247n/a for sizehint in [None] + list(range(1, 33)) + \
248n/a [64, 128, 256, 512, 1024]:
249n/a istream = UTF8Reader(BytesIO(self.tstring[1]))
250n/a ostream = self.writer(BytesIO())
251n/a func = getattr(istream, name)
252n/a while 1:
253n/a if sizehint is not None:
254n/a data = func(sizehint)
255n/a else:
256n/a data = func()
257n/a
258n/a if not data:
259n/a break
260n/a if name == "readlines":
261n/a ostream.writelines(data)
262n/a else:
263n/a ostream.write(data)
264n/a
265n/a self.assertEqual(ostream.getvalue(), self.tstring[0])
266n/a
267n/a
268n/aclass TestBase_Mapping(unittest.TestCase):
269n/a pass_enctest = []
270n/a pass_dectest = []
271n/a supmaps = []
272n/a codectests = []
273n/a
274n/a def __init__(self, *args, **kw):
275n/a unittest.TestCase.__init__(self, *args, **kw)
276n/a try:
277n/a self.open_mapping_file().close() # test it to report the error early
278n/a except (IOError, HTTPException):
279n/a self.skipTest("Could not retrieve "+self.mapfileurl)
280n/a
281n/a def open_mapping_file(self):
282n/a return support.open_urlresource(self.mapfileurl)
283n/a
284n/a def test_mapping_file(self):
285n/a if self.mapfileurl.endswith('.xml'):
286n/a self._test_mapping_file_ucm()
287n/a else:
288n/a self._test_mapping_file_plain()
289n/a
290n/a def _test_mapping_file_plain(self):
291n/a unichrs = lambda s: ''.join(map(chr, map(eval, s.split('+'))))
292n/a urt_wa = {}
293n/a
294n/a with self.open_mapping_file() as f:
295n/a for line in f:
296n/a if not line:
297n/a break
298n/a data = line.split('#')[0].strip().split()
299n/a if len(data) != 2:
300n/a continue
301n/a
302n/a csetval = eval(data[0])
303n/a if csetval <= 0x7F:
304n/a csetch = bytes([csetval & 0xff])
305n/a elif csetval >= 0x1000000:
306n/a csetch = bytes([(csetval >> 24), ((csetval >> 16) & 0xff),
307n/a ((csetval >> 8) & 0xff), (csetval & 0xff)])
308n/a elif csetval >= 0x10000:
309n/a csetch = bytes([(csetval >> 16), ((csetval >> 8) & 0xff),
310n/a (csetval & 0xff)])
311n/a elif csetval >= 0x100:
312n/a csetch = bytes([(csetval >> 8), (csetval & 0xff)])
313n/a else:
314n/a continue
315n/a
316n/a unich = unichrs(data[1])
317n/a if ord(unich) == 0xfffd or unich in urt_wa:
318n/a continue
319n/a urt_wa[unich] = csetch
320n/a
321n/a self._testpoint(csetch, unich)
322n/a
323n/a def _test_mapping_file_ucm(self):
324n/a with self.open_mapping_file() as f:
325n/a ucmdata = f.read()
326n/a uc = re.findall('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>', ucmdata)
327n/a for uni, coded in uc:
328n/a unich = chr(int(uni, 16))
329n/a codech = bytes(int(c, 16) for c in coded.split())
330n/a self._testpoint(codech, unich)
331n/a
332n/a def test_mapping_supplemental(self):
333n/a for mapping in self.supmaps:
334n/a self._testpoint(*mapping)
335n/a
336n/a def _testpoint(self, csetch, unich):
337n/a if (csetch, unich) not in self.pass_enctest:
338n/a self.assertEqual(unich.encode(self.encoding), csetch)
339n/a if (csetch, unich) not in self.pass_dectest:
340n/a self.assertEqual(str(csetch, self.encoding), unich)
341n/a
342n/a def test_errorhandle(self):
343n/a for source, scheme, expected in self.codectests:
344n/a if isinstance(source, bytes):
345n/a func = source.decode
346n/a else:
347n/a func = source.encode
348n/a if expected:
349n/a if isinstance(source, bytes):
350n/a result = func(self.encoding, scheme)
351n/a self.assertTrue(type(result) is str, type(result))
352n/a self.assertEqual(result, expected,
353n/a '%a.decode(%r, %r)=%a != %a'
354n/a % (source, self.encoding, scheme, result,
355n/a expected))
356n/a else:
357n/a result = func(self.encoding, scheme)
358n/a self.assertTrue(type(result) is bytes, type(result))
359n/a self.assertEqual(result, expected,
360n/a '%a.encode(%r, %r)=%a != %a'
361n/a % (source, self.encoding, scheme, result,
362n/a expected))
363n/a else:
364n/a self.assertRaises(UnicodeError, func, self.encoding, scheme)
365n/a
366n/adef load_teststring(name):
367n/a dir = os.path.join(os.path.dirname(__file__), 'cjkencodings')
368n/a with open(os.path.join(dir, name + '.txt'), 'rb') as f:
369n/a encoded = f.read()
370n/a with open(os.path.join(dir, name + '-utf8.txt'), 'rb') as f:
371n/a utf8 = f.read()
372n/a return encoded, utf8