| 1 | n/a | # |
|---|
| 2 | n/a | # test_codecencodings_jp.py |
|---|
| 3 | n/a | # Codec encoding tests for Japanese encodings. |
|---|
| 4 | n/a | # |
|---|
| 5 | n/a | |
|---|
| 6 | n/a | from test import multibytecodec_support |
|---|
| 7 | n/a | import unittest |
|---|
| 8 | n/a | |
|---|
| 9 | n/a | class Test_CP932(multibytecodec_support.TestBase, unittest.TestCase): |
|---|
| 10 | n/a | encoding = 'cp932' |
|---|
| 11 | n/a | tstring = multibytecodec_support.load_teststring('shift_jis') |
|---|
| 12 | n/a | codectests = ( |
|---|
| 13 | n/a | # invalid bytes |
|---|
| 14 | n/a | (b"abc\x81\x00\x81\x00\x82\x84", "strict", None), |
|---|
| 15 | n/a | (b"abc\xf8", "strict", None), |
|---|
| 16 | n/a | (b"abc\x81\x00\x82\x84", "replace", "abc\ufffd\x00\uff44"), |
|---|
| 17 | n/a | (b"abc\x81\x00\x82\x84\x88", "replace", "abc\ufffd\x00\uff44\ufffd"), |
|---|
| 18 | n/a | (b"abc\x81\x00\x82\x84", "ignore", "abc\x00\uff44"), |
|---|
| 19 | n/a | (b"ab\xEBxy", "replace", "ab\uFFFDxy"), |
|---|
| 20 | n/a | (b"ab\xF0\x39xy", "replace", "ab\uFFFD9xy"), |
|---|
| 21 | n/a | (b"ab\xEA\xF0xy", "replace", 'ab\ufffd\ue038y'), |
|---|
| 22 | n/a | # sjis vs cp932 |
|---|
| 23 | n/a | (b"\\\x7e", "replace", "\\\x7e"), |
|---|
| 24 | n/a | (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\uff3c\u2225\uff0d"), |
|---|
| 25 | n/a | ) |
|---|
| 26 | n/a | |
|---|
| 27 | n/a | euc_commontests = ( |
|---|
| 28 | n/a | # invalid bytes |
|---|
| 29 | n/a | (b"abc\x80\x80\xc1\xc4", "strict", None), |
|---|
| 30 | n/a | (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u7956"), |
|---|
| 31 | n/a | (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u7956\ufffd"), |
|---|
| 32 | n/a | (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u7956"), |
|---|
| 33 | n/a | (b"abc\xc8", "strict", None), |
|---|
| 34 | n/a | (b"abc\x8f\x83\x83", "replace", "abc\ufffd\ufffd\ufffd"), |
|---|
| 35 | n/a | (b"\x82\xFCxy", "replace", "\ufffd\ufffdxy"), |
|---|
| 36 | n/a | (b"\xc1\x64", "strict", None), |
|---|
| 37 | n/a | (b"\xa1\xc0", "strict", "\uff3c"), |
|---|
| 38 | n/a | (b"\xa1\xc0\\", "strict", "\uff3c\\"), |
|---|
| 39 | n/a | (b"\x8eXY", "replace", "\ufffdXY"), |
|---|
| 40 | n/a | ) |
|---|
| 41 | n/a | |
|---|
| 42 | n/a | class Test_EUC_JIS_2004(multibytecodec_support.TestBase, |
|---|
| 43 | n/a | unittest.TestCase): |
|---|
| 44 | n/a | encoding = 'euc_jis_2004' |
|---|
| 45 | n/a | tstring = multibytecodec_support.load_teststring('euc_jisx0213') |
|---|
| 46 | n/a | codectests = euc_commontests |
|---|
| 47 | n/a | xmlcharnametest = ( |
|---|
| 48 | n/a | "\xab\u211c\xbb = \u2329\u1234\u232a", |
|---|
| 49 | n/a | b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" |
|---|
| 50 | n/a | ) |
|---|
| 51 | n/a | |
|---|
| 52 | n/a | class Test_EUC_JISX0213(multibytecodec_support.TestBase, |
|---|
| 53 | n/a | unittest.TestCase): |
|---|
| 54 | n/a | encoding = 'euc_jisx0213' |
|---|
| 55 | n/a | tstring = multibytecodec_support.load_teststring('euc_jisx0213') |
|---|
| 56 | n/a | codectests = euc_commontests |
|---|
| 57 | n/a | xmlcharnametest = ( |
|---|
| 58 | n/a | "\xab\u211c\xbb = \u2329\u1234\u232a", |
|---|
| 59 | n/a | b"\xa9\xa8ℜ\xa9\xb2 = ⟨ሴ⟩" |
|---|
| 60 | n/a | ) |
|---|
| 61 | n/a | |
|---|
| 62 | n/a | class Test_EUC_JP_COMPAT(multibytecodec_support.TestBase, |
|---|
| 63 | n/a | unittest.TestCase): |
|---|
| 64 | n/a | encoding = 'euc_jp' |
|---|
| 65 | n/a | tstring = multibytecodec_support.load_teststring('euc_jp') |
|---|
| 66 | n/a | codectests = euc_commontests + ( |
|---|
| 67 | n/a | ("\xa5", "strict", b"\x5c"), |
|---|
| 68 | n/a | ("\u203e", "strict", b"\x7e"), |
|---|
| 69 | n/a | ) |
|---|
| 70 | n/a | |
|---|
| 71 | n/a | shiftjis_commonenctests = ( |
|---|
| 72 | n/a | (b"abc\x80\x80\x82\x84", "strict", None), |
|---|
| 73 | n/a | (b"abc\xf8", "strict", None), |
|---|
| 74 | n/a | (b"abc\x80\x80\x82\x84def", "ignore", "abc\uff44def"), |
|---|
| 75 | n/a | ) |
|---|
| 76 | n/a | |
|---|
| 77 | n/a | class Test_SJIS_COMPAT(multibytecodec_support.TestBase, unittest.TestCase): |
|---|
| 78 | n/a | encoding = 'shift_jis' |
|---|
| 79 | n/a | tstring = multibytecodec_support.load_teststring('shift_jis') |
|---|
| 80 | n/a | codectests = shiftjis_commonenctests + ( |
|---|
| 81 | n/a | (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), |
|---|
| 82 | n/a | (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), |
|---|
| 83 | n/a | |
|---|
| 84 | n/a | (b"\\\x7e", "strict", "\\\x7e"), |
|---|
| 85 | n/a | (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\uff3c\u2016\u2212"), |
|---|
| 86 | n/a | (b"abc\x81\x39", "replace", "abc\ufffd9"), |
|---|
| 87 | n/a | (b"abc\xEA\xFC", "replace", "abc\ufffd\ufffd"), |
|---|
| 88 | n/a | (b"abc\xFF\x58", "replace", "abc\ufffdX"), |
|---|
| 89 | n/a | ) |
|---|
| 90 | n/a | |
|---|
| 91 | n/a | class Test_SJIS_2004(multibytecodec_support.TestBase, unittest.TestCase): |
|---|
| 92 | n/a | encoding = 'shift_jis_2004' |
|---|
| 93 | n/a | tstring = multibytecodec_support.load_teststring('shift_jis') |
|---|
| 94 | n/a | codectests = shiftjis_commonenctests + ( |
|---|
| 95 | n/a | (b"\\\x7e", "strict", "\xa5\u203e"), |
|---|
| 96 | n/a | (b"\x81\x5f\x81\x61\x81\x7c", "strict", "\\\u2016\u2212"), |
|---|
| 97 | n/a | (b"abc\xEA\xFC", "strict", "abc\u64bf"), |
|---|
| 98 | n/a | (b"\x81\x39xy", "replace", "\ufffd9xy"), |
|---|
| 99 | n/a | (b"\xFF\x58xy", "replace", "\ufffdXxy"), |
|---|
| 100 | n/a | (b"\x80\x80\x82\x84xy", "replace", "\ufffd\ufffd\uff44xy"), |
|---|
| 101 | n/a | (b"\x80\x80\x82\x84\x88xy", "replace", "\ufffd\ufffd\uff44\u5864y"), |
|---|
| 102 | n/a | (b"\xFC\xFBxy", "replace", '\ufffd\u95b4y'), |
|---|
| 103 | n/a | ) |
|---|
| 104 | n/a | xmlcharnametest = ( |
|---|
| 105 | n/a | "\xab\u211c\xbb = \u2329\u1234\u232a", |
|---|
| 106 | n/a | b"\x85Gℜ\x85Q = ⟨ሴ⟩" |
|---|
| 107 | n/a | ) |
|---|
| 108 | n/a | |
|---|
| 109 | n/a | class Test_SJISX0213(multibytecodec_support.TestBase, unittest.TestCase): |
|---|
| 110 | n/a | encoding = 'shift_jisx0213' |
|---|
| 111 | n/a | tstring = multibytecodec_support.load_teststring('shift_jisx0213') |
|---|
| 112 | n/a | codectests = shiftjis_commonenctests + ( |
|---|
| 113 | n/a | (b"abc\x80\x80\x82\x84", "replace", "abc\ufffd\ufffd\uff44"), |
|---|
| 114 | n/a | (b"abc\x80\x80\x82\x84\x88", "replace", "abc\ufffd\ufffd\uff44\ufffd"), |
|---|
| 115 | n/a | |
|---|
| 116 | n/a | # sjis vs cp932 |
|---|
| 117 | n/a | (b"\\\x7e", "replace", "\xa5\u203e"), |
|---|
| 118 | n/a | (b"\x81\x5f\x81\x61\x81\x7c", "replace", "\x5c\u2016\u2212"), |
|---|
| 119 | n/a | ) |
|---|
| 120 | n/a | xmlcharnametest = ( |
|---|
| 121 | n/a | "\xab\u211c\xbb = \u2329\u1234\u232a", |
|---|
| 122 | n/a | b"\x85Gℜ\x85Q = ⟨ሴ⟩" |
|---|
| 123 | n/a | ) |
|---|
| 124 | n/a | |
|---|
| 125 | n/a | if __name__ == "__main__": |
|---|
| 126 | n/a | unittest.main() |
|---|