| 1 | n/a | """This script generates a Python codec module from a Windows Code Page. |
|---|
| 2 | n/a | |
|---|
| 3 | n/a | It uses the function MultiByteToWideChar to generate a decoding table. |
|---|
| 4 | n/a | """ |
|---|
| 5 | n/a | |
|---|
| 6 | n/a | import ctypes |
|---|
| 7 | n/a | from ctypes import wintypes |
|---|
| 8 | n/a | from gencodec import codegen |
|---|
| 9 | n/a | import unicodedata |
|---|
| 10 | n/a | |
|---|
| 11 | n/a | def genwinmap(codepage): |
|---|
| 12 | n/a | MultiByteToWideChar = ctypes.windll.kernel32.MultiByteToWideChar |
|---|
| 13 | n/a | MultiByteToWideChar.argtypes = [wintypes.UINT, wintypes.DWORD, |
|---|
| 14 | n/a | wintypes.LPCSTR, ctypes.c_int, |
|---|
| 15 | n/a | wintypes.LPWSTR, ctypes.c_int] |
|---|
| 16 | n/a | MultiByteToWideChar.restype = ctypes.c_int |
|---|
| 17 | n/a | |
|---|
| 18 | n/a | enc2uni = {} |
|---|
| 19 | n/a | |
|---|
| 20 | n/a | for i in list(range(32)) + [127]: |
|---|
| 21 | n/a | enc2uni[i] = (i, 'CONTROL CHARACTER') |
|---|
| 22 | n/a | |
|---|
| 23 | n/a | for i in range(256): |
|---|
| 24 | n/a | buf = ctypes.create_unicode_buffer(2) |
|---|
| 25 | n/a | ret = MultiByteToWideChar( |
|---|
| 26 | n/a | codepage, 0, |
|---|
| 27 | n/a | bytes([i]), 1, |
|---|
| 28 | n/a | buf, 2) |
|---|
| 29 | n/a | assert ret == 1, "invalid code page" |
|---|
| 30 | n/a | assert buf[1] == '\x00' |
|---|
| 31 | n/a | try: |
|---|
| 32 | n/a | name = unicodedata.name(buf[0]) |
|---|
| 33 | n/a | except ValueError: |
|---|
| 34 | n/a | try: |
|---|
| 35 | n/a | name = enc2uni[i][1] |
|---|
| 36 | n/a | except KeyError: |
|---|
| 37 | n/a | name = '' |
|---|
| 38 | n/a | |
|---|
| 39 | n/a | enc2uni[i] = (ord(buf[0]), name) |
|---|
| 40 | n/a | |
|---|
| 41 | n/a | return enc2uni |
|---|
| 42 | n/a | |
|---|
| 43 | n/a | def genwincodec(codepage): |
|---|
| 44 | n/a | import platform |
|---|
| 45 | n/a | map = genwinmap(codepage) |
|---|
| 46 | n/a | encodingname = 'cp%d' % codepage |
|---|
| 47 | n/a | code = codegen("", map, encodingname) |
|---|
| 48 | n/a | # Replace first lines with our own docstring |
|---|
| 49 | n/a | code = '''\ |
|---|
| 50 | n/a | """Python Character Mapping Codec %s generated on Windows: |
|---|
| 51 | n/a | %s with the command: |
|---|
| 52 | n/a | python Tools/unicode/genwincodec.py %s |
|---|
| 53 | n/a | """#" |
|---|
| 54 | n/a | ''' % (encodingname, ' '.join(platform.win32_ver()), codepage |
|---|
| 55 | n/a | ) + code.split('"""#"', 1)[1] |
|---|
| 56 | n/a | |
|---|
| 57 | n/a | print(code) |
|---|
| 58 | n/a | |
|---|
| 59 | n/a | if __name__ == '__main__': |
|---|
| 60 | n/a | import sys |
|---|
| 61 | n/a | genwincodec(int(sys.argv[1])) |
|---|