1 | n/a | """This script generates a Python codec module from a Windows Code Page. |
---|
2 | n/a | |
---|
3 | n/a | It uses the function MultiByteToWideChar to generate a decoding table. |
---|
4 | n/a | """ |
---|
5 | n/a | |
---|
6 | n/a | import ctypes |
---|
7 | n/a | from ctypes import wintypes |
---|
8 | n/a | from gencodec import codegen |
---|
9 | n/a | import unicodedata |
---|
10 | n/a | |
---|
11 | n/a | def genwinmap(codepage): |
---|
12 | n/a | MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar |
---|
13 | n/a | MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, |
---|
14 | n/a | wintypes.LPCSTR, ctypes.c_int, |
---|
15 | n/a | wintypes.LPWSTR, ctypes.c_int] |
---|
16 | n/a | MultiByteToWideChar.restype = ctypes.c_int |
---|
17 | n/a | |
---|
18 | n/a | enc2uni = {} |
---|
19 | n/a | |
---|
20 | n/a | for i in list(range(32)) + [127]: |
---|
21 | n/a | enc2uni[i] = (i, 'CONTROL CHARACTER') |
---|
22 | n/a | |
---|
23 | n/a | for i in range(256): |
---|
24 | n/a | buf = ctypes.create_unicode_buffer(2) |
---|
25 | n/a | ret = MultiByteToWideChar( |
---|
26 | n/a | codepage, 0, |
---|
27 | n/a | bytes([i]), 1, |
---|
28 | n/a | buf, 2) |
---|
29 | n/a | assert ret == 1, "invalid code page" |
---|
30 | n/a | assert buf[1] == '\x00' |
---|
31 | n/a | try: |
---|
32 | n/a | name = unicodedata.name(buf[0]) |
---|
33 | n/a | except ValueError: |
---|
34 | n/a | try: |
---|
35 | n/a | name = enc2uni[i][1] |
---|
36 | n/a | except KeyError: |
---|
37 | n/a | name = '' |
---|
38 | n/a | |
---|
39 | n/a | enc2uni[i] = (ord(buf[0]), name) |
---|
40 | n/a | |
---|
41 | n/a | return enc2uni |
---|
42 | n/a | |
---|
43 | n/a | def genwincodec(codepage): |
---|
44 | n/a | import platform |
---|
45 | n/a | map = genwinmap(codepage) |
---|
46 | n/a | encodingname = 'cp%d' % codepage |
---|
47 | n/a | code = codegen("", map, encodingname) |
---|
48 | n/a | # Replace first lines with our own docstring |
---|
49 | n/a | code = '''\ |
---|
50 | n/a | """Python Character Mapping Codec %s generated on Windows: |
---|
51 | n/a | %s with the command: |
---|
52 | n/a | python Tools/unicode/genwincodec.py %s |
---|
53 | n/a | """#" |
---|
54 | n/a | ''' % (encodingname, ' '.join(platform.win32_ver()), codepage |
---|
55 | n/a | ) + code.split('"""#"', 1)[1] |
---|
56 | n/a | |
---|
57 | n/a | print(code) |
---|
58 | n/a | |
---|
59 | n/a | if __name__ == '__main__': |
---|
60 | n/a | import sys |
---|
61 | n/a | genwincodec(int(sys.argv[1])) |
---|