| 1 | n/a | from test.support import open_urlresource |
|---|
| 2 | n/a | import unittest |
|---|
| 3 | n/a | |
|---|
| 4 | n/a | from http.client import HTTPException |
|---|
| 5 | n/a | import sys |
|---|
| 6 | n/a | from unicodedata import normalize, unidata_version |
|---|
| 7 | n/a | |
|---|
| 8 | n/a | TESTDATAFILE = "NormalizationTest.txt" |
|---|
| 9 | n/a | TESTDATAURL = "http://www.pythontest.net/unicode/" + unidata_version + "/" + TESTDATAFILE |
|---|
| 10 | n/a | |
|---|
| 11 | n/a | def check_version(testfile): |
|---|
| 12 | n/a | hdr = testfile.readline() |
|---|
| 13 | n/a | return unidata_version in hdr |
|---|
| 14 | n/a | |
|---|
| 15 | n/a | class RangeError(Exception): |
|---|
| 16 | n/a | pass |
|---|
| 17 | n/a | |
|---|
| 18 | n/a | def NFC(str): |
|---|
| 19 | n/a | return normalize("NFC", str) |
|---|
| 20 | n/a | |
|---|
| 21 | n/a | def NFKC(str): |
|---|
| 22 | n/a | return normalize("NFKC", str) |
|---|
| 23 | n/a | |
|---|
| 24 | n/a | def NFD(str): |
|---|
| 25 | n/a | return normalize("NFD", str) |
|---|
| 26 | n/a | |
|---|
| 27 | n/a | def NFKD(str): |
|---|
| 28 | n/a | return normalize("NFKD", str) |
|---|
| 29 | n/a | |
|---|
| 30 | n/a | def unistr(data): |
|---|
| 31 | n/a | data = [int(x, 16) for x in data.split(" ")] |
|---|
| 32 | n/a | for x in data: |
|---|
| 33 | n/a | if x > sys.maxunicode: |
|---|
| 34 | n/a | raise RangeError |
|---|
| 35 | n/a | return "".join([chr(x) for x in data]) |
|---|
| 36 | n/a | |
|---|
| 37 | n/a | class NormalizationTest(unittest.TestCase): |
|---|
| 38 | n/a | def test_main(self): |
|---|
| 39 | n/a | part = None |
|---|
| 40 | n/a | part1_data = {} |
|---|
| 41 | n/a | # Hit the exception early |
|---|
| 42 | n/a | try: |
|---|
| 43 | n/a | testdata = open_urlresource(TESTDATAURL, encoding="utf-8", |
|---|
| 44 | n/a | check=check_version) |
|---|
| 45 | n/a | except (OSError, HTTPException): |
|---|
| 46 | n/a | self.skipTest("Could not retrieve " + TESTDATAURL) |
|---|
| 47 | n/a | self.addCleanup(testdata.close) |
|---|
| 48 | n/a | for line in testdata: |
|---|
| 49 | n/a | if '#' in line: |
|---|
| 50 | n/a | line = line.split('#')[0] |
|---|
| 51 | n/a | line = line.strip() |
|---|
| 52 | n/a | if not line: |
|---|
| 53 | n/a | continue |
|---|
| 54 | n/a | if line.startswith("@Part"): |
|---|
| 55 | n/a | part = line.split()[0] |
|---|
| 56 | n/a | continue |
|---|
| 57 | n/a | try: |
|---|
| 58 | n/a | c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]] |
|---|
| 59 | n/a | except RangeError: |
|---|
| 60 | n/a | # Skip unsupported characters; |
|---|
| 61 | n/a | # try at least adding c1 if we are in part1 |
|---|
| 62 | n/a | if part == "@Part1": |
|---|
| 63 | n/a | try: |
|---|
| 64 | n/a | c1 = unistr(line.split(';')[0]) |
|---|
| 65 | n/a | except RangeError: |
|---|
| 66 | n/a | pass |
|---|
| 67 | n/a | else: |
|---|
| 68 | n/a | part1_data[c1] = 1 |
|---|
| 69 | n/a | continue |
|---|
| 70 | n/a | |
|---|
| 71 | n/a | # Perform tests |
|---|
| 72 | n/a | self.assertTrue(c2 == NFC(c1) == NFC(c2) == NFC(c3), line) |
|---|
| 73 | n/a | self.assertTrue(c4 == NFC(c4) == NFC(c5), line) |
|---|
| 74 | n/a | self.assertTrue(c3 == NFD(c1) == NFD(c2) == NFD(c3), line) |
|---|
| 75 | n/a | self.assertTrue(c5 == NFD(c4) == NFD(c5), line) |
|---|
| 76 | n/a | self.assertTrue(c4 == NFKC(c1) == NFKC(c2) == \ |
|---|
| 77 | n/a | NFKC(c3) == NFKC(c4) == NFKC(c5), |
|---|
| 78 | n/a | line) |
|---|
| 79 | n/a | self.assertTrue(c5 == NFKD(c1) == NFKD(c2) == \ |
|---|
| 80 | n/a | NFKD(c3) == NFKD(c4) == NFKD(c5), |
|---|
| 81 | n/a | line) |
|---|
| 82 | n/a | |
|---|
| 83 | n/a | # Record part 1 data |
|---|
| 84 | n/a | if part == "@Part1": |
|---|
| 85 | n/a | part1_data[c1] = 1 |
|---|
| 86 | n/a | |
|---|
| 87 | n/a | # Perform tests for all other data |
|---|
| 88 | n/a | for c in range(sys.maxunicode+1): |
|---|
| 89 | n/a | X = chr(c) |
|---|
| 90 | n/a | if X in part1_data: |
|---|
| 91 | n/a | continue |
|---|
| 92 | n/a | self.assertTrue(X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c) |
|---|
| 93 | n/a | |
|---|
| 94 | n/a | def test_bug_834676(self): |
|---|
| 95 | n/a | # Check for bug 834676 |
|---|
| 96 | n/a | normalize('NFC', '\ud55c\uae00') |
|---|
| 97 | n/a | |
|---|
| 98 | n/a | |
|---|
| 99 | n/a | if __name__ == "__main__": |
|---|
| 100 | n/a | unittest.main() |
|---|