1 | n/a | /* |
---|
2 | n/a | * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings. |
---|
3 | n/a | * |
---|
4 | n/a | * Written by Hye-Shik Chang <perky@FreeBSD.org> |
---|
5 | n/a | */ |
---|
6 | n/a | |
---|
7 | n/a | #define USING_IMPORTED_MAPS |
---|
8 | n/a | #define USING_BINARY_PAIR_SEARCH |
---|
9 | n/a | #define EXTERN_JISX0213_PAIR |
---|
10 | n/a | #define EMULATE_JISX0213_2000_ENCODE_INVALID MAP_UNMAPPABLE |
---|
11 | n/a | #define EMULATE_JISX0213_2000_DECODE_INVALID MAP_UNMAPPABLE |
---|
12 | n/a | |
---|
13 | n/a | #include "cjkcodecs.h" |
---|
14 | n/a | #include "alg_jisx0201.h" |
---|
15 | n/a | #include "emu_jisx0213_2000.h" |
---|
16 | n/a | #include "mappings_jisx0213_pair.h" |
---|
17 | n/a | |
---|
18 | n/a | /* STATE |
---|
19 | n/a | |
---|
20 | n/a | state->c[0-3] |
---|
21 | n/a | |
---|
22 | n/a | 00000000 |
---|
23 | n/a | ||^^^^^| |
---|
24 | n/a | |+-----+---- G0-3 Character Set |
---|
25 | n/a | +----------- Is G0-3 double byte? |
---|
26 | n/a | |
---|
27 | n/a | state->c[4] |
---|
28 | n/a | |
---|
29 | n/a | 00000000 |
---|
30 | n/a | || |
---|
31 | n/a | |+---- Locked-Shift? |
---|
32 | n/a | +----- ESC Throughout |
---|
33 | n/a | */ |
---|
34 | n/a | |
---|
35 | n/a | #define ESC 0x1B |
---|
36 | n/a | #define SO 0x0E |
---|
37 | n/a | #define SI 0x0F |
---|
38 | n/a | #define LF 0x0A |
---|
39 | n/a | |
---|
40 | n/a | #define MAX_ESCSEQLEN 16 |
---|
41 | n/a | |
---|
42 | n/a | #define CHARSET_ISO8859_1 'A' |
---|
43 | n/a | #define CHARSET_ASCII 'B' |
---|
44 | n/a | #define CHARSET_ISO8859_7 'F' |
---|
45 | n/a | #define CHARSET_JISX0201_K 'I' |
---|
46 | n/a | #define CHARSET_JISX0201_R 'J' |
---|
47 | n/a | |
---|
48 | n/a | #define CHARSET_GB2312 ('A'|CHARSET_DBCS) |
---|
49 | n/a | #define CHARSET_JISX0208 ('B'|CHARSET_DBCS) |
---|
50 | n/a | #define CHARSET_KSX1001 ('C'|CHARSET_DBCS) |
---|
51 | n/a | #define CHARSET_JISX0212 ('D'|CHARSET_DBCS) |
---|
52 | n/a | #define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS) |
---|
53 | n/a | #define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS) |
---|
54 | n/a | #define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS) |
---|
55 | n/a | #define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS) |
---|
56 | n/a | #define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS) |
---|
57 | n/a | #define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS) |
---|
58 | n/a | #define CHARSET_JISX0208_O ('@'|CHARSET_DBCS) |
---|
59 | n/a | |
---|
60 | n/a | #define CHARSET_DBCS 0x80 |
---|
61 | n/a | #define ESCMARK(mark) ((mark) & 0x7f) |
---|
62 | n/a | |
---|
63 | n/a | #define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@') |
---|
64 | n/a | #define IS_ISO2022ESC(c2) \ |
---|
65 | n/a | ((c2) == '(' || (c2) == ')' || (c2) == '$' || \ |
---|
66 | n/a | (c2) == '.' || (c2) == '&') |
---|
67 | n/a | /* this is not a complete list of ISO-2022 escape sequence headers. |
---|
68 | n/a | * but, it's enough to implement CJK instances of iso-2022. */ |
---|
69 | n/a | |
---|
70 | n/a | #define MAP_UNMAPPABLE 0xFFFF |
---|
71 | n/a | #define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */ |
---|
72 | n/a | |
---|
73 | n/a | #define F_SHIFTED 0x01 |
---|
74 | n/a | #define F_ESCTHROUGHOUT 0x02 |
---|
75 | n/a | |
---|
76 | n/a | #define STATE_SETG(dn, v) do { ((state)->c[dn]) = (v); } while (0) |
---|
77 | n/a | #define STATE_GETG(dn) ((state)->c[dn]) |
---|
78 | n/a | |
---|
79 | n/a | #define STATE_G0 STATE_GETG(0) |
---|
80 | n/a | #define STATE_G1 STATE_GETG(1) |
---|
81 | n/a | #define STATE_G2 STATE_GETG(2) |
---|
82 | n/a | #define STATE_G3 STATE_GETG(3) |
---|
83 | n/a | #define STATE_SETG0(v) STATE_SETG(0, v) |
---|
84 | n/a | #define STATE_SETG1(v) STATE_SETG(1, v) |
---|
85 | n/a | #define STATE_SETG2(v) STATE_SETG(2, v) |
---|
86 | n/a | #define STATE_SETG3(v) STATE_SETG(3, v) |
---|
87 | n/a | |
---|
88 | n/a | #define STATE_SETFLAG(f) do { ((state)->c[4]) |= (f); } while (0) |
---|
89 | n/a | #define STATE_GETFLAG(f) ((state)->c[4] & (f)) |
---|
90 | n/a | #define STATE_CLEARFLAG(f) do { ((state)->c[4]) &= ~(f); } while (0) |
---|
91 | n/a | #define STATE_CLEARFLAGS() do { ((state)->c[4]) = 0; } while (0) |
---|
92 | n/a | |
---|
93 | n/a | #define ISO2022_CONFIG ((const struct iso2022_config *)config) |
---|
94 | n/a | #define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag)) |
---|
95 | n/a | #define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations) |
---|
96 | n/a | |
---|
97 | n/a | /* iso2022_config.flags */ |
---|
98 | n/a | #define NO_SHIFT 0x01 |
---|
99 | n/a | #define USE_G2 0x02 |
---|
100 | n/a | #define USE_JISX0208_EXT 0x04 |
---|
101 | n/a | |
---|
102 | n/a | /*-*- internal data structures -*-*/ |
---|
103 | n/a | |
---|
104 | n/a | typedef int (*iso2022_init_func)(void); |
---|
105 | n/a | typedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data); |
---|
106 | n/a | typedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length); |
---|
107 | n/a | |
---|
108 | n/a | struct iso2022_designation { |
---|
109 | n/a | unsigned char mark; |
---|
110 | n/a | unsigned char plane; |
---|
111 | n/a | unsigned char width; |
---|
112 | n/a | iso2022_init_func initializer; |
---|
113 | n/a | iso2022_decode_func decoder; |
---|
114 | n/a | iso2022_encode_func encoder; |
---|
115 | n/a | }; |
---|
116 | n/a | |
---|
117 | n/a | struct iso2022_config { |
---|
118 | n/a | int flags; |
---|
119 | n/a | const struct iso2022_designation *designations; /* non-ascii desigs */ |
---|
120 | n/a | }; |
---|
121 | n/a | |
---|
122 | n/a | /*-*- iso-2022 codec implementation -*-*/ |
---|
123 | n/a | |
---|
124 | n/a | CODEC_INIT(iso2022) |
---|
125 | n/a | { |
---|
126 | n/a | const struct iso2022_designation *desig; |
---|
127 | n/a | for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++) |
---|
128 | n/a | if (desig->initializer != NULL && desig->initializer() != 0) |
---|
129 | n/a | return -1; |
---|
130 | n/a | return 0; |
---|
131 | n/a | } |
---|
132 | n/a | |
---|
133 | n/a | ENCODER_INIT(iso2022) |
---|
134 | n/a | { |
---|
135 | n/a | STATE_CLEARFLAGS(); |
---|
136 | n/a | STATE_SETG0(CHARSET_ASCII); |
---|
137 | n/a | STATE_SETG1(CHARSET_ASCII); |
---|
138 | n/a | return 0; |
---|
139 | n/a | } |
---|
140 | n/a | |
---|
141 | n/a | ENCODER_RESET(iso2022) |
---|
142 | n/a | { |
---|
143 | n/a | if (STATE_GETFLAG(F_SHIFTED)) { |
---|
144 | n/a | WRITEBYTE1(SI); |
---|
145 | n/a | NEXT_OUT(1); |
---|
146 | n/a | STATE_CLEARFLAG(F_SHIFTED); |
---|
147 | n/a | } |
---|
148 | n/a | if (STATE_G0 != CHARSET_ASCII) { |
---|
149 | n/a | WRITEBYTE3(ESC, '(', 'B'); |
---|
150 | n/a | NEXT_OUT(3); |
---|
151 | n/a | STATE_SETG0(CHARSET_ASCII); |
---|
152 | n/a | } |
---|
153 | n/a | return 0; |
---|
154 | n/a | } |
---|
155 | n/a | |
---|
156 | n/a | ENCODER(iso2022) |
---|
157 | n/a | { |
---|
158 | n/a | while (*inpos < inlen) { |
---|
159 | n/a | const struct iso2022_designation *dsg; |
---|
160 | n/a | DBCHAR encoded; |
---|
161 | n/a | Py_UCS4 c = INCHAR1; |
---|
162 | n/a | Py_ssize_t insize; |
---|
163 | n/a | |
---|
164 | n/a | if (c < 0x80) { |
---|
165 | n/a | if (STATE_G0 != CHARSET_ASCII) { |
---|
166 | n/a | WRITEBYTE3(ESC, '(', 'B'); |
---|
167 | n/a | STATE_SETG0(CHARSET_ASCII); |
---|
168 | n/a | NEXT_OUT(3); |
---|
169 | n/a | } |
---|
170 | n/a | if (STATE_GETFLAG(F_SHIFTED)) { |
---|
171 | n/a | WRITEBYTE1(SI); |
---|
172 | n/a | STATE_CLEARFLAG(F_SHIFTED); |
---|
173 | n/a | NEXT_OUT(1); |
---|
174 | n/a | } |
---|
175 | n/a | WRITEBYTE1((unsigned char)c); |
---|
176 | n/a | NEXT(1, 1); |
---|
177 | n/a | continue; |
---|
178 | n/a | } |
---|
179 | n/a | |
---|
180 | n/a | insize = 1; |
---|
181 | n/a | |
---|
182 | n/a | encoded = MAP_UNMAPPABLE; |
---|
183 | n/a | for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { |
---|
184 | n/a | Py_ssize_t length = 1; |
---|
185 | n/a | encoded = dsg->encoder(&c, &length); |
---|
186 | n/a | if (encoded == MAP_MULTIPLE_AVAIL) { |
---|
187 | n/a | /* this implementation won't work for pair |
---|
188 | n/a | * of non-bmp characters. */ |
---|
189 | n/a | if (inlen - *inpos < 2) { |
---|
190 | n/a | if (!(flags & MBENC_FLUSH)) |
---|
191 | n/a | return MBERR_TOOFEW; |
---|
192 | n/a | length = -1; |
---|
193 | n/a | } |
---|
194 | n/a | else |
---|
195 | n/a | length = 2; |
---|
196 | n/a | encoded = dsg->encoder(&c, &length); |
---|
197 | n/a | if (encoded != MAP_UNMAPPABLE) { |
---|
198 | n/a | insize = length; |
---|
199 | n/a | break; |
---|
200 | n/a | } |
---|
201 | n/a | } |
---|
202 | n/a | else if (encoded != MAP_UNMAPPABLE) |
---|
203 | n/a | break; |
---|
204 | n/a | } |
---|
205 | n/a | |
---|
206 | n/a | if (!dsg->mark) |
---|
207 | n/a | return 1; |
---|
208 | n/a | assert(dsg->width == 1 || dsg->width == 2); |
---|
209 | n/a | |
---|
210 | n/a | switch (dsg->plane) { |
---|
211 | n/a | case 0: /* G0 */ |
---|
212 | n/a | if (STATE_GETFLAG(F_SHIFTED)) { |
---|
213 | n/a | WRITEBYTE1(SI); |
---|
214 | n/a | STATE_CLEARFLAG(F_SHIFTED); |
---|
215 | n/a | NEXT_OUT(1); |
---|
216 | n/a | } |
---|
217 | n/a | if (STATE_G0 != dsg->mark) { |
---|
218 | n/a | if (dsg->width == 1) { |
---|
219 | n/a | WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark)); |
---|
220 | n/a | STATE_SETG0(dsg->mark); |
---|
221 | n/a | NEXT_OUT(3); |
---|
222 | n/a | } |
---|
223 | n/a | else if (dsg->mark == CHARSET_JISX0208) { |
---|
224 | n/a | WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark)); |
---|
225 | n/a | STATE_SETG0(dsg->mark); |
---|
226 | n/a | NEXT_OUT(3); |
---|
227 | n/a | } |
---|
228 | n/a | else { |
---|
229 | n/a | WRITEBYTE4(ESC, '$', '(', |
---|
230 | n/a | ESCMARK(dsg->mark)); |
---|
231 | n/a | STATE_SETG0(dsg->mark); |
---|
232 | n/a | NEXT_OUT(4); |
---|
233 | n/a | } |
---|
234 | n/a | } |
---|
235 | n/a | break; |
---|
236 | n/a | case 1: /* G1 */ |
---|
237 | n/a | if (STATE_G1 != dsg->mark) { |
---|
238 | n/a | if (dsg->width == 1) { |
---|
239 | n/a | WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark)); |
---|
240 | n/a | STATE_SETG1(dsg->mark); |
---|
241 | n/a | NEXT_OUT(3); |
---|
242 | n/a | } |
---|
243 | n/a | else { |
---|
244 | n/a | WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark)); |
---|
245 | n/a | STATE_SETG1(dsg->mark); |
---|
246 | n/a | NEXT_OUT(4); |
---|
247 | n/a | } |
---|
248 | n/a | } |
---|
249 | n/a | if (!STATE_GETFLAG(F_SHIFTED)) { |
---|
250 | n/a | WRITEBYTE1(SO); |
---|
251 | n/a | STATE_SETFLAG(F_SHIFTED); |
---|
252 | n/a | NEXT_OUT(1); |
---|
253 | n/a | } |
---|
254 | n/a | break; |
---|
255 | n/a | default: /* G2 and G3 is not supported: no encoding in |
---|
256 | n/a | * CJKCodecs are using them yet */ |
---|
257 | n/a | return MBERR_INTERNAL; |
---|
258 | n/a | } |
---|
259 | n/a | |
---|
260 | n/a | if (dsg->width == 1) { |
---|
261 | n/a | WRITEBYTE1((unsigned char)encoded); |
---|
262 | n/a | NEXT_OUT(1); |
---|
263 | n/a | } |
---|
264 | n/a | else { |
---|
265 | n/a | WRITEBYTE2(encoded >> 8, encoded & 0xff); |
---|
266 | n/a | NEXT_OUT(2); |
---|
267 | n/a | } |
---|
268 | n/a | NEXT_INCHAR(insize); |
---|
269 | n/a | } |
---|
270 | n/a | |
---|
271 | n/a | return 0; |
---|
272 | n/a | } |
---|
273 | n/a | |
---|
274 | n/a | DECODER_INIT(iso2022) |
---|
275 | n/a | { |
---|
276 | n/a | STATE_CLEARFLAGS(); |
---|
277 | n/a | STATE_SETG0(CHARSET_ASCII); |
---|
278 | n/a | STATE_SETG1(CHARSET_ASCII); |
---|
279 | n/a | STATE_SETG2(CHARSET_ASCII); |
---|
280 | n/a | return 0; |
---|
281 | n/a | } |
---|
282 | n/a | |
---|
283 | n/a | DECODER_RESET(iso2022) |
---|
284 | n/a | { |
---|
285 | n/a | STATE_SETG0(CHARSET_ASCII); |
---|
286 | n/a | STATE_CLEARFLAG(F_SHIFTED); |
---|
287 | n/a | return 0; |
---|
288 | n/a | } |
---|
289 | n/a | |
---|
290 | n/a | static Py_ssize_t |
---|
291 | n/a | iso2022processesc(const void *config, MultibyteCodec_State *state, |
---|
292 | n/a | const unsigned char **inbuf, Py_ssize_t *inleft) |
---|
293 | n/a | { |
---|
294 | n/a | unsigned char charset, designation; |
---|
295 | n/a | Py_ssize_t i, esclen = 0; |
---|
296 | n/a | |
---|
297 | n/a | for (i = 1;i < MAX_ESCSEQLEN;i++) { |
---|
298 | n/a | if (i >= *inleft) |
---|
299 | n/a | return MBERR_TOOFEW; |
---|
300 | n/a | if (IS_ESCEND((*inbuf)[i])) { |
---|
301 | n/a | esclen = i + 1; |
---|
302 | n/a | break; |
---|
303 | n/a | } |
---|
304 | n/a | else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft && |
---|
305 | n/a | (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') { |
---|
306 | n/a | i += 2; |
---|
307 | n/a | } |
---|
308 | n/a | } |
---|
309 | n/a | |
---|
310 | n/a | switch (esclen) { |
---|
311 | n/a | case 0: |
---|
312 | n/a | return 1; /* unterminated escape sequence */ |
---|
313 | n/a | case 3: |
---|
314 | n/a | if (INBYTE2 == '$') { |
---|
315 | n/a | charset = INBYTE3 | CHARSET_DBCS; |
---|
316 | n/a | designation = 0; |
---|
317 | n/a | } |
---|
318 | n/a | else { |
---|
319 | n/a | charset = INBYTE3; |
---|
320 | n/a | if (INBYTE2 == '(') |
---|
321 | n/a | designation = 0; |
---|
322 | n/a | else if (INBYTE2 == ')') |
---|
323 | n/a | designation = 1; |
---|
324 | n/a | else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.') |
---|
325 | n/a | designation = 2; |
---|
326 | n/a | else |
---|
327 | n/a | return 3; |
---|
328 | n/a | } |
---|
329 | n/a | break; |
---|
330 | n/a | case 4: |
---|
331 | n/a | if (INBYTE2 != '$') |
---|
332 | n/a | return 4; |
---|
333 | n/a | |
---|
334 | n/a | charset = INBYTE4 | CHARSET_DBCS; |
---|
335 | n/a | if (INBYTE3 == '(') |
---|
336 | n/a | designation = 0; |
---|
337 | n/a | else if (INBYTE3 == ')') |
---|
338 | n/a | designation = 1; |
---|
339 | n/a | else |
---|
340 | n/a | return 4; |
---|
341 | n/a | break; |
---|
342 | n/a | case 6: /* designation with prefix */ |
---|
343 | n/a | if (CONFIG_ISSET(USE_JISX0208_EXT) && |
---|
344 | n/a | (*inbuf)[3] == ESC && (*inbuf)[4] == '$' && |
---|
345 | n/a | (*inbuf)[5] == 'B') { |
---|
346 | n/a | charset = 'B' | CHARSET_DBCS; |
---|
347 | n/a | designation = 0; |
---|
348 | n/a | } |
---|
349 | n/a | else |
---|
350 | n/a | return 6; |
---|
351 | n/a | break; |
---|
352 | n/a | default: |
---|
353 | n/a | return esclen; |
---|
354 | n/a | } |
---|
355 | n/a | |
---|
356 | n/a | /* raise error when the charset is not designated for this encoding */ |
---|
357 | n/a | if (charset != CHARSET_ASCII) { |
---|
358 | n/a | const struct iso2022_designation *dsg; |
---|
359 | n/a | |
---|
360 | n/a | for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { |
---|
361 | n/a | if (dsg->mark == charset) |
---|
362 | n/a | break; |
---|
363 | n/a | } |
---|
364 | n/a | if (!dsg->mark) |
---|
365 | n/a | return esclen; |
---|
366 | n/a | } |
---|
367 | n/a | |
---|
368 | n/a | STATE_SETG(designation, charset); |
---|
369 | n/a | *inleft -= esclen; |
---|
370 | n/a | (*inbuf) += esclen; |
---|
371 | n/a | return 0; |
---|
372 | n/a | } |
---|
373 | n/a | |
---|
374 | n/a | #define ISO8859_7_DECODE(c, writer) \ |
---|
375 | n/a | if ((c) < 0xa0) { \ |
---|
376 | n/a | OUTCHAR(c); \ |
---|
377 | n/a | } else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) { \ |
---|
378 | n/a | OUTCHAR(c); \ |
---|
379 | n/a | } else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \ |
---|
380 | n/a | (0xbffffd77L & (1L << ((c)-0xb4))))) { \ |
---|
381 | n/a | OUTCHAR(0x02d0 + (c)); \ |
---|
382 | n/a | } else if ((c) == 0xa1) { \ |
---|
383 | n/a | OUTCHAR(0x2018); \ |
---|
384 | n/a | } else if ((c) == 0xa2) { \ |
---|
385 | n/a | OUTCHAR(0x2019); \ |
---|
386 | n/a | } else if ((c) == 0xaf) { \ |
---|
387 | n/a | OUTCHAR(0x2015); \ |
---|
388 | n/a | } |
---|
389 | n/a | |
---|
390 | n/a | static Py_ssize_t |
---|
391 | n/a | iso2022processg2(const void *config, MultibyteCodec_State *state, |
---|
392 | n/a | const unsigned char **inbuf, Py_ssize_t *inleft, |
---|
393 | n/a | _PyUnicodeWriter *writer) |
---|
394 | n/a | { |
---|
395 | n/a | /* not written to use encoder, decoder functions because only few |
---|
396 | n/a | * encodings use G2 designations in CJKCodecs */ |
---|
397 | n/a | if (STATE_G2 == CHARSET_ISO8859_1) { |
---|
398 | n/a | if (INBYTE3 < 0x80) |
---|
399 | n/a | OUTCHAR(INBYTE3 + 0x80); |
---|
400 | n/a | else |
---|
401 | n/a | return 3; |
---|
402 | n/a | } |
---|
403 | n/a | else if (STATE_G2 == CHARSET_ISO8859_7) { |
---|
404 | n/a | ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer) |
---|
405 | n/a | else |
---|
406 | n/a | return 3; |
---|
407 | n/a | } |
---|
408 | n/a | else if (STATE_G2 == CHARSET_ASCII) { |
---|
409 | n/a | if (INBYTE3 & 0x80) |
---|
410 | n/a | return 3; |
---|
411 | n/a | else |
---|
412 | n/a | OUTCHAR(INBYTE3); |
---|
413 | n/a | } |
---|
414 | n/a | else |
---|
415 | n/a | return MBERR_INTERNAL; |
---|
416 | n/a | |
---|
417 | n/a | (*inbuf) += 3; |
---|
418 | n/a | *inleft -= 3; |
---|
419 | n/a | return 0; |
---|
420 | n/a | } |
---|
421 | n/a | |
---|
422 | n/a | DECODER(iso2022) |
---|
423 | n/a | { |
---|
424 | n/a | const struct iso2022_designation *dsgcache = NULL; |
---|
425 | n/a | |
---|
426 | n/a | while (inleft > 0) { |
---|
427 | n/a | unsigned char c = INBYTE1; |
---|
428 | n/a | Py_ssize_t err; |
---|
429 | n/a | |
---|
430 | n/a | if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { |
---|
431 | n/a | /* ESC throughout mode: |
---|
432 | n/a | * for non-iso2022 escape sequences */ |
---|
433 | n/a | OUTCHAR(c); /* assume as ISO-8859-1 */ |
---|
434 | n/a | NEXT_IN(1); |
---|
435 | n/a | if (IS_ESCEND(c)) { |
---|
436 | n/a | STATE_CLEARFLAG(F_ESCTHROUGHOUT); |
---|
437 | n/a | } |
---|
438 | n/a | continue; |
---|
439 | n/a | } |
---|
440 | n/a | |
---|
441 | n/a | switch (c) { |
---|
442 | n/a | case ESC: |
---|
443 | n/a | REQUIRE_INBUF(2); |
---|
444 | n/a | if (IS_ISO2022ESC(INBYTE2)) { |
---|
445 | n/a | err = iso2022processesc(config, state, |
---|
446 | n/a | inbuf, &inleft); |
---|
447 | n/a | if (err != 0) |
---|
448 | n/a | return err; |
---|
449 | n/a | } |
---|
450 | n/a | else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */ |
---|
451 | n/a | REQUIRE_INBUF(3); |
---|
452 | n/a | err = iso2022processg2(config, state, |
---|
453 | n/a | inbuf, &inleft, writer); |
---|
454 | n/a | if (err != 0) |
---|
455 | n/a | return err; |
---|
456 | n/a | } |
---|
457 | n/a | else { |
---|
458 | n/a | OUTCHAR(ESC); |
---|
459 | n/a | STATE_SETFLAG(F_ESCTHROUGHOUT); |
---|
460 | n/a | NEXT_IN(1); |
---|
461 | n/a | } |
---|
462 | n/a | break; |
---|
463 | n/a | case SI: |
---|
464 | n/a | if (CONFIG_ISSET(NO_SHIFT)) |
---|
465 | n/a | goto bypass; |
---|
466 | n/a | STATE_CLEARFLAG(F_SHIFTED); |
---|
467 | n/a | NEXT_IN(1); |
---|
468 | n/a | break; |
---|
469 | n/a | case SO: |
---|
470 | n/a | if (CONFIG_ISSET(NO_SHIFT)) |
---|
471 | n/a | goto bypass; |
---|
472 | n/a | STATE_SETFLAG(F_SHIFTED); |
---|
473 | n/a | NEXT_IN(1); |
---|
474 | n/a | break; |
---|
475 | n/a | case LF: |
---|
476 | n/a | STATE_CLEARFLAG(F_SHIFTED); |
---|
477 | n/a | OUTCHAR(LF); |
---|
478 | n/a | NEXT_IN(1); |
---|
479 | n/a | break; |
---|
480 | n/a | default: |
---|
481 | n/a | if (c < 0x20) /* C0 */ |
---|
482 | n/a | goto bypass; |
---|
483 | n/a | else if (c >= 0x80) |
---|
484 | n/a | return 1; |
---|
485 | n/a | else { |
---|
486 | n/a | const struct iso2022_designation *dsg; |
---|
487 | n/a | unsigned char charset; |
---|
488 | n/a | Py_UCS4 decoded; |
---|
489 | n/a | |
---|
490 | n/a | if (STATE_GETFLAG(F_SHIFTED)) |
---|
491 | n/a | charset = STATE_G1; |
---|
492 | n/a | else |
---|
493 | n/a | charset = STATE_G0; |
---|
494 | n/a | |
---|
495 | n/a | if (charset == CHARSET_ASCII) { |
---|
496 | n/a | bypass: |
---|
497 | n/a | OUTCHAR(c); |
---|
498 | n/a | NEXT_IN(1); |
---|
499 | n/a | break; |
---|
500 | n/a | } |
---|
501 | n/a | |
---|
502 | n/a | if (dsgcache != NULL && |
---|
503 | n/a | dsgcache->mark == charset) |
---|
504 | n/a | dsg = dsgcache; |
---|
505 | n/a | else { |
---|
506 | n/a | for (dsg = CONFIG_DESIGNATIONS; |
---|
507 | n/a | dsg->mark != charset |
---|
508 | n/a | #ifdef Py_DEBUG |
---|
509 | n/a | && dsg->mark != '\0' |
---|
510 | n/a | #endif |
---|
511 | n/a | ; dsg++) |
---|
512 | n/a | { |
---|
513 | n/a | /* noop */ |
---|
514 | n/a | } |
---|
515 | n/a | assert(dsg->mark != '\0'); |
---|
516 | n/a | dsgcache = dsg; |
---|
517 | n/a | } |
---|
518 | n/a | |
---|
519 | n/a | REQUIRE_INBUF(dsg->width); |
---|
520 | n/a | decoded = dsg->decoder(*inbuf); |
---|
521 | n/a | if (decoded == MAP_UNMAPPABLE) |
---|
522 | n/a | return dsg->width; |
---|
523 | n/a | |
---|
524 | n/a | if (decoded < 0x10000) { |
---|
525 | n/a | OUTCHAR(decoded); |
---|
526 | n/a | } |
---|
527 | n/a | else if (decoded < 0x30000) { |
---|
528 | n/a | OUTCHAR(decoded); |
---|
529 | n/a | } |
---|
530 | n/a | else { /* JIS X 0213 pairs */ |
---|
531 | n/a | OUTCHAR2(decoded >> 16, decoded & 0xffff); |
---|
532 | n/a | } |
---|
533 | n/a | NEXT_IN(dsg->width); |
---|
534 | n/a | } |
---|
535 | n/a | break; |
---|
536 | n/a | } |
---|
537 | n/a | } |
---|
538 | n/a | return 0; |
---|
539 | n/a | } |
---|
540 | n/a | |
---|
541 | n/a | /*-*- mapping table holders -*-*/ |
---|
542 | n/a | |
---|
543 | n/a | #define ENCMAP(enc) static const encode_map *enc##_encmap = NULL; |
---|
544 | n/a | #define DECMAP(enc) static const decode_map *enc##_decmap = NULL; |
---|
545 | n/a | |
---|
546 | n/a | /* kr */ |
---|
547 | n/a | ENCMAP(cp949) |
---|
548 | n/a | DECMAP(ksx1001) |
---|
549 | n/a | |
---|
550 | n/a | /* jp */ |
---|
551 | n/a | ENCMAP(jisxcommon) |
---|
552 | n/a | DECMAP(jisx0208) |
---|
553 | n/a | DECMAP(jisx0212) |
---|
554 | n/a | ENCMAP(jisx0213_bmp) |
---|
555 | n/a | DECMAP(jisx0213_1_bmp) |
---|
556 | n/a | DECMAP(jisx0213_2_bmp) |
---|
557 | n/a | ENCMAP(jisx0213_emp) |
---|
558 | n/a | DECMAP(jisx0213_1_emp) |
---|
559 | n/a | DECMAP(jisx0213_2_emp) |
---|
560 | n/a | |
---|
561 | n/a | /* cn */ |
---|
562 | n/a | ENCMAP(gbcommon) |
---|
563 | n/a | DECMAP(gb2312) |
---|
564 | n/a | |
---|
565 | n/a | /* tw */ |
---|
566 | n/a | |
---|
567 | n/a | /*-*- mapping access functions -*-*/ |
---|
568 | n/a | |
---|
569 | n/a | static int |
---|
570 | n/a | ksx1001_init(void) |
---|
571 | n/a | { |
---|
572 | n/a | static int initialized = 0; |
---|
573 | n/a | |
---|
574 | n/a | if (!initialized && ( |
---|
575 | n/a | IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) || |
---|
576 | n/a | IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap))) |
---|
577 | n/a | return -1; |
---|
578 | n/a | initialized = 1; |
---|
579 | n/a | return 0; |
---|
580 | n/a | } |
---|
581 | n/a | |
---|
582 | n/a | static Py_UCS4 |
---|
583 | n/a | ksx1001_decoder(const unsigned char *data) |
---|
584 | n/a | { |
---|
585 | n/a | Py_UCS4 u; |
---|
586 | n/a | if (TRYMAP_DEC(ksx1001, u, data[0], data[1])) |
---|
587 | n/a | return u; |
---|
588 | n/a | else |
---|
589 | n/a | return MAP_UNMAPPABLE; |
---|
590 | n/a | } |
---|
591 | n/a | |
---|
592 | n/a | static DBCHAR |
---|
593 | n/a | ksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
594 | n/a | { |
---|
595 | n/a | DBCHAR coded; |
---|
596 | n/a | assert(*length == 1); |
---|
597 | n/a | if (*data < 0x10000) { |
---|
598 | n/a | if (TRYMAP_ENC(cp949, coded, *data)) { |
---|
599 | n/a | if (!(coded & 0x8000)) |
---|
600 | n/a | return coded; |
---|
601 | n/a | } |
---|
602 | n/a | } |
---|
603 | n/a | return MAP_UNMAPPABLE; |
---|
604 | n/a | } |
---|
605 | n/a | |
---|
606 | n/a | static int |
---|
607 | n/a | jisx0208_init(void) |
---|
608 | n/a | { |
---|
609 | n/a | static int initialized = 0; |
---|
610 | n/a | |
---|
611 | n/a | if (!initialized && ( |
---|
612 | n/a | IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || |
---|
613 | n/a | IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap))) |
---|
614 | n/a | return -1; |
---|
615 | n/a | initialized = 1; |
---|
616 | n/a | return 0; |
---|
617 | n/a | } |
---|
618 | n/a | |
---|
619 | n/a | static Py_UCS4 |
---|
620 | n/a | jisx0208_decoder(const unsigned char *data) |
---|
621 | n/a | { |
---|
622 | n/a | Py_UCS4 u; |
---|
623 | n/a | if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ |
---|
624 | n/a | return 0xff3c; |
---|
625 | n/a | else if (TRYMAP_DEC(jisx0208, u, data[0], data[1])) |
---|
626 | n/a | return u; |
---|
627 | n/a | else |
---|
628 | n/a | return MAP_UNMAPPABLE; |
---|
629 | n/a | } |
---|
630 | n/a | |
---|
631 | n/a | static DBCHAR |
---|
632 | n/a | jisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
633 | n/a | { |
---|
634 | n/a | DBCHAR coded; |
---|
635 | n/a | assert(*length == 1); |
---|
636 | n/a | if (*data < 0x10000) { |
---|
637 | n/a | if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ |
---|
638 | n/a | return 0x2140; |
---|
639 | n/a | else if (TRYMAP_ENC(jisxcommon, coded, *data)) { |
---|
640 | n/a | if (!(coded & 0x8000)) |
---|
641 | n/a | return coded; |
---|
642 | n/a | } |
---|
643 | n/a | } |
---|
644 | n/a | return MAP_UNMAPPABLE; |
---|
645 | n/a | } |
---|
646 | n/a | |
---|
647 | n/a | static int |
---|
648 | n/a | jisx0212_init(void) |
---|
649 | n/a | { |
---|
650 | n/a | static int initialized = 0; |
---|
651 | n/a | |
---|
652 | n/a | if (!initialized && ( |
---|
653 | n/a | IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) || |
---|
654 | n/a | IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap))) |
---|
655 | n/a | return -1; |
---|
656 | n/a | initialized = 1; |
---|
657 | n/a | return 0; |
---|
658 | n/a | } |
---|
659 | n/a | |
---|
660 | n/a | static Py_UCS4 |
---|
661 | n/a | jisx0212_decoder(const unsigned char *data) |
---|
662 | n/a | { |
---|
663 | n/a | Py_UCS4 u; |
---|
664 | n/a | if (TRYMAP_DEC(jisx0212, u, data[0], data[1])) |
---|
665 | n/a | return u; |
---|
666 | n/a | else |
---|
667 | n/a | return MAP_UNMAPPABLE; |
---|
668 | n/a | } |
---|
669 | n/a | |
---|
670 | n/a | static DBCHAR |
---|
671 | n/a | jisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
672 | n/a | { |
---|
673 | n/a | DBCHAR coded; |
---|
674 | n/a | assert(*length == 1); |
---|
675 | n/a | if (*data < 0x10000) { |
---|
676 | n/a | if (TRYMAP_ENC(jisxcommon, coded, *data)) { |
---|
677 | n/a | if (coded & 0x8000) |
---|
678 | n/a | return coded & 0x7fff; |
---|
679 | n/a | } |
---|
680 | n/a | } |
---|
681 | n/a | return MAP_UNMAPPABLE; |
---|
682 | n/a | } |
---|
683 | n/a | |
---|
684 | n/a | static int |
---|
685 | n/a | jisx0213_init(void) |
---|
686 | n/a | { |
---|
687 | n/a | static int initialized = 0; |
---|
688 | n/a | |
---|
689 | n/a | if (!initialized && ( |
---|
690 | n/a | jisx0208_init() || |
---|
691 | n/a | IMPORT_MAP(jp, jisx0213_bmp, |
---|
692 | n/a | &jisx0213_bmp_encmap, NULL) || |
---|
693 | n/a | IMPORT_MAP(jp, jisx0213_1_bmp, |
---|
694 | n/a | NULL, &jisx0213_1_bmp_decmap) || |
---|
695 | n/a | IMPORT_MAP(jp, jisx0213_2_bmp, |
---|
696 | n/a | NULL, &jisx0213_2_bmp_decmap) || |
---|
697 | n/a | IMPORT_MAP(jp, jisx0213_emp, |
---|
698 | n/a | &jisx0213_emp_encmap, NULL) || |
---|
699 | n/a | IMPORT_MAP(jp, jisx0213_1_emp, |
---|
700 | n/a | NULL, &jisx0213_1_emp_decmap) || |
---|
701 | n/a | IMPORT_MAP(jp, jisx0213_2_emp, |
---|
702 | n/a | NULL, &jisx0213_2_emp_decmap) || |
---|
703 | n/a | IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap, |
---|
704 | n/a | &jisx0213_pair_decmap))) |
---|
705 | n/a | return -1; |
---|
706 | n/a | initialized = 1; |
---|
707 | n/a | return 0; |
---|
708 | n/a | } |
---|
709 | n/a | |
---|
710 | n/a | #define config ((void *)2000) |
---|
711 | n/a | static Py_UCS4 |
---|
712 | n/a | jisx0213_2000_1_decoder(const unsigned char *data) |
---|
713 | n/a | { |
---|
714 | n/a | Py_UCS4 u; |
---|
715 | n/a | EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1]) |
---|
716 | n/a | else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ |
---|
717 | n/a | return 0xff3c; |
---|
718 | n/a | else if (TRYMAP_DEC(jisx0208, u, data[0], data[1])) |
---|
719 | n/a | ; |
---|
720 | n/a | else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1])) |
---|
721 | n/a | ; |
---|
722 | n/a | else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])) |
---|
723 | n/a | u |= 0x20000; |
---|
724 | n/a | else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1])) |
---|
725 | n/a | ; |
---|
726 | n/a | else |
---|
727 | n/a | return MAP_UNMAPPABLE; |
---|
728 | n/a | return u; |
---|
729 | n/a | } |
---|
730 | n/a | |
---|
731 | n/a | static Py_UCS4 |
---|
732 | n/a | jisx0213_2000_2_decoder(const unsigned char *data) |
---|
733 | n/a | { |
---|
734 | n/a | Py_UCS4 u; |
---|
735 | n/a | EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1]) |
---|
736 | n/a | if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1])) |
---|
737 | n/a | ; |
---|
738 | n/a | else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])) |
---|
739 | n/a | u |= 0x20000; |
---|
740 | n/a | else |
---|
741 | n/a | return MAP_UNMAPPABLE; |
---|
742 | n/a | return u; |
---|
743 | n/a | } |
---|
744 | n/a | #undef config |
---|
745 | n/a | |
---|
746 | n/a | static Py_UCS4 |
---|
747 | n/a | jisx0213_2004_1_decoder(const unsigned char *data) |
---|
748 | n/a | { |
---|
749 | n/a | Py_UCS4 u; |
---|
750 | n/a | if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */ |
---|
751 | n/a | return 0xff3c; |
---|
752 | n/a | else if (TRYMAP_DEC(jisx0208, u, data[0], data[1])) |
---|
753 | n/a | ; |
---|
754 | n/a | else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1])) |
---|
755 | n/a | ; |
---|
756 | n/a | else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1])) |
---|
757 | n/a | u |= 0x20000; |
---|
758 | n/a | else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1])) |
---|
759 | n/a | ; |
---|
760 | n/a | else |
---|
761 | n/a | return MAP_UNMAPPABLE; |
---|
762 | n/a | return u; |
---|
763 | n/a | } |
---|
764 | n/a | |
---|
765 | n/a | static Py_UCS4 |
---|
766 | n/a | jisx0213_2004_2_decoder(const unsigned char *data) |
---|
767 | n/a | { |
---|
768 | n/a | Py_UCS4 u; |
---|
769 | n/a | if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1])) |
---|
770 | n/a | ; |
---|
771 | n/a | else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1])) |
---|
772 | n/a | u |= 0x20000; |
---|
773 | n/a | else |
---|
774 | n/a | return MAP_UNMAPPABLE; |
---|
775 | n/a | return u; |
---|
776 | n/a | } |
---|
777 | n/a | |
---|
778 | n/a | static DBCHAR |
---|
779 | n/a | jisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config) |
---|
780 | n/a | { |
---|
781 | n/a | DBCHAR coded; |
---|
782 | n/a | |
---|
783 | n/a | switch (*length) { |
---|
784 | n/a | case 1: /* first character */ |
---|
785 | n/a | if (*data >= 0x10000) { |
---|
786 | n/a | if ((*data) >> 16 == 0x20000 >> 16) { |
---|
787 | n/a | EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data) |
---|
788 | n/a | else if (TRYMAP_ENC(jisx0213_emp, coded, (*data) & 0xffff)) |
---|
789 | n/a | return coded; |
---|
790 | n/a | } |
---|
791 | n/a | return MAP_UNMAPPABLE; |
---|
792 | n/a | } |
---|
793 | n/a | |
---|
794 | n/a | EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data) |
---|
795 | n/a | else if (TRYMAP_ENC(jisx0213_bmp, coded, *data)) { |
---|
796 | n/a | if (coded == MULTIC) |
---|
797 | n/a | return MAP_MULTIPLE_AVAIL; |
---|
798 | n/a | } |
---|
799 | n/a | else if (TRYMAP_ENC(jisxcommon, coded, *data)) { |
---|
800 | n/a | if (coded & 0x8000) |
---|
801 | n/a | return MAP_UNMAPPABLE; |
---|
802 | n/a | } |
---|
803 | n/a | else |
---|
804 | n/a | return MAP_UNMAPPABLE; |
---|
805 | n/a | return coded; |
---|
806 | n/a | |
---|
807 | n/a | case 2: /* second character of unicode pair */ |
---|
808 | n/a | coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1], |
---|
809 | n/a | jisx0213_pair_encmap, JISX0213_ENCPAIRS); |
---|
810 | n/a | if (coded == DBCINV) { |
---|
811 | n/a | *length = 1; |
---|
812 | n/a | coded = find_pairencmap((ucs2_t)data[0], 0, |
---|
813 | n/a | jisx0213_pair_encmap, JISX0213_ENCPAIRS); |
---|
814 | n/a | if (coded == DBCINV) |
---|
815 | n/a | return MAP_UNMAPPABLE; |
---|
816 | n/a | } |
---|
817 | n/a | else |
---|
818 | n/a | return coded; |
---|
819 | n/a | |
---|
820 | n/a | case -1: /* flush unterminated */ |
---|
821 | n/a | *length = 1; |
---|
822 | n/a | coded = find_pairencmap((ucs2_t)data[0], 0, |
---|
823 | n/a | jisx0213_pair_encmap, JISX0213_ENCPAIRS); |
---|
824 | n/a | if (coded == DBCINV) |
---|
825 | n/a | return MAP_UNMAPPABLE; |
---|
826 | n/a | else |
---|
827 | n/a | return coded; |
---|
828 | n/a | break; |
---|
829 | n/a | |
---|
830 | n/a | default: |
---|
831 | n/a | return MAP_UNMAPPABLE; |
---|
832 | n/a | } |
---|
833 | n/a | } |
---|
834 | n/a | |
---|
835 | n/a | static DBCHAR |
---|
836 | n/a | jisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
837 | n/a | { |
---|
838 | n/a | DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); |
---|
839 | n/a | if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) |
---|
840 | n/a | return coded; |
---|
841 | n/a | else if (coded & 0x8000) |
---|
842 | n/a | return MAP_UNMAPPABLE; |
---|
843 | n/a | else |
---|
844 | n/a | return coded; |
---|
845 | n/a | } |
---|
846 | n/a | |
---|
847 | n/a | static DBCHAR |
---|
848 | n/a | jisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) |
---|
849 | n/a | { |
---|
850 | n/a | DBCHAR coded; |
---|
851 | n/a | Py_ssize_t ilength = *length; |
---|
852 | n/a | |
---|
853 | n/a | coded = jisx0213_encoder(data, length, (void *)2000); |
---|
854 | n/a | switch (ilength) { |
---|
855 | n/a | case 1: |
---|
856 | n/a | if (coded == MAP_MULTIPLE_AVAIL) |
---|
857 | n/a | return MAP_MULTIPLE_AVAIL; |
---|
858 | n/a | else |
---|
859 | n/a | return MAP_UNMAPPABLE; |
---|
860 | n/a | case 2: |
---|
861 | n/a | if (*length != 2) |
---|
862 | n/a | return MAP_UNMAPPABLE; |
---|
863 | n/a | else |
---|
864 | n/a | return coded; |
---|
865 | n/a | default: |
---|
866 | n/a | return MAP_UNMAPPABLE; |
---|
867 | n/a | } |
---|
868 | n/a | } |
---|
869 | n/a | |
---|
870 | n/a | static DBCHAR |
---|
871 | n/a | jisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
872 | n/a | { |
---|
873 | n/a | DBCHAR coded = jisx0213_encoder(data, length, (void *)2000); |
---|
874 | n/a | if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) |
---|
875 | n/a | return coded; |
---|
876 | n/a | else if (coded & 0x8000) |
---|
877 | n/a | return coded & 0x7fff; |
---|
878 | n/a | else |
---|
879 | n/a | return MAP_UNMAPPABLE; |
---|
880 | n/a | } |
---|
881 | n/a | |
---|
882 | n/a | static DBCHAR |
---|
883 | n/a | jisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
884 | n/a | { |
---|
885 | n/a | DBCHAR coded = jisx0213_encoder(data, length, NULL); |
---|
886 | n/a | if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) |
---|
887 | n/a | return coded; |
---|
888 | n/a | else if (coded & 0x8000) |
---|
889 | n/a | return MAP_UNMAPPABLE; |
---|
890 | n/a | else |
---|
891 | n/a | return coded; |
---|
892 | n/a | } |
---|
893 | n/a | |
---|
894 | n/a | static DBCHAR |
---|
895 | n/a | jisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length) |
---|
896 | n/a | { |
---|
897 | n/a | DBCHAR coded; |
---|
898 | n/a | Py_ssize_t ilength = *length; |
---|
899 | n/a | |
---|
900 | n/a | coded = jisx0213_encoder(data, length, NULL); |
---|
901 | n/a | switch (ilength) { |
---|
902 | n/a | case 1: |
---|
903 | n/a | if (coded == MAP_MULTIPLE_AVAIL) |
---|
904 | n/a | return MAP_MULTIPLE_AVAIL; |
---|
905 | n/a | else |
---|
906 | n/a | return MAP_UNMAPPABLE; |
---|
907 | n/a | case 2: |
---|
908 | n/a | if (*length != 2) |
---|
909 | n/a | return MAP_UNMAPPABLE; |
---|
910 | n/a | else |
---|
911 | n/a | return coded; |
---|
912 | n/a | default: |
---|
913 | n/a | return MAP_UNMAPPABLE; |
---|
914 | n/a | } |
---|
915 | n/a | } |
---|
916 | n/a | |
---|
917 | n/a | static DBCHAR |
---|
918 | n/a | jisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
919 | n/a | { |
---|
920 | n/a | DBCHAR coded = jisx0213_encoder(data, length, NULL); |
---|
921 | n/a | if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL) |
---|
922 | n/a | return coded; |
---|
923 | n/a | else if (coded & 0x8000) |
---|
924 | n/a | return coded & 0x7fff; |
---|
925 | n/a | else |
---|
926 | n/a | return MAP_UNMAPPABLE; |
---|
927 | n/a | } |
---|
928 | n/a | |
---|
929 | n/a | static Py_UCS4 |
---|
930 | n/a | jisx0201_r_decoder(const unsigned char *data) |
---|
931 | n/a | { |
---|
932 | n/a | Py_UCS4 u; |
---|
933 | n/a | JISX0201_R_DECODE_CHAR(*data, u) |
---|
934 | n/a | else |
---|
935 | n/a | return MAP_UNMAPPABLE; |
---|
936 | n/a | return u; |
---|
937 | n/a | } |
---|
938 | n/a | |
---|
939 | n/a | static DBCHAR |
---|
940 | n/a | jisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
941 | n/a | { |
---|
942 | n/a | DBCHAR coded; |
---|
943 | n/a | JISX0201_R_ENCODE(*data, coded) |
---|
944 | n/a | else |
---|
945 | n/a | return MAP_UNMAPPABLE; |
---|
946 | n/a | return coded; |
---|
947 | n/a | } |
---|
948 | n/a | |
---|
949 | n/a | static Py_UCS4 |
---|
950 | n/a | jisx0201_k_decoder(const unsigned char *data) |
---|
951 | n/a | { |
---|
952 | n/a | Py_UCS4 u; |
---|
953 | n/a | JISX0201_K_DECODE_CHAR(*data ^ 0x80, u) |
---|
954 | n/a | else |
---|
955 | n/a | return MAP_UNMAPPABLE; |
---|
956 | n/a | return u; |
---|
957 | n/a | } |
---|
958 | n/a | |
---|
959 | n/a | static DBCHAR |
---|
960 | n/a | jisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
961 | n/a | { |
---|
962 | n/a | DBCHAR coded; |
---|
963 | n/a | JISX0201_K_ENCODE(*data, coded) |
---|
964 | n/a | else |
---|
965 | n/a | return MAP_UNMAPPABLE; |
---|
966 | n/a | return coded - 0x80; |
---|
967 | n/a | } |
---|
968 | n/a | |
---|
969 | n/a | static int |
---|
970 | n/a | gb2312_init(void) |
---|
971 | n/a | { |
---|
972 | n/a | static int initialized = 0; |
---|
973 | n/a | |
---|
974 | n/a | if (!initialized && ( |
---|
975 | n/a | IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) || |
---|
976 | n/a | IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap))) |
---|
977 | n/a | return -1; |
---|
978 | n/a | initialized = 1; |
---|
979 | n/a | return 0; |
---|
980 | n/a | } |
---|
981 | n/a | |
---|
982 | n/a | static Py_UCS4 |
---|
983 | n/a | gb2312_decoder(const unsigned char *data) |
---|
984 | n/a | { |
---|
985 | n/a | Py_UCS4 u; |
---|
986 | n/a | if (TRYMAP_DEC(gb2312, u, data[0], data[1])) |
---|
987 | n/a | return u; |
---|
988 | n/a | else |
---|
989 | n/a | return MAP_UNMAPPABLE; |
---|
990 | n/a | } |
---|
991 | n/a | |
---|
992 | n/a | static DBCHAR |
---|
993 | n/a | gb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
994 | n/a | { |
---|
995 | n/a | DBCHAR coded; |
---|
996 | n/a | assert(*length == 1); |
---|
997 | n/a | if (*data < 0x10000) { |
---|
998 | n/a | if (TRYMAP_ENC(gbcommon, coded, *data)) { |
---|
999 | n/a | if (!(coded & 0x8000)) |
---|
1000 | n/a | return coded; |
---|
1001 | n/a | } |
---|
1002 | n/a | } |
---|
1003 | n/a | return MAP_UNMAPPABLE; |
---|
1004 | n/a | } |
---|
1005 | n/a | |
---|
1006 | n/a | |
---|
1007 | n/a | static Py_UCS4 |
---|
1008 | n/a | dummy_decoder(const unsigned char *data) |
---|
1009 | n/a | { |
---|
1010 | n/a | return MAP_UNMAPPABLE; |
---|
1011 | n/a | } |
---|
1012 | n/a | |
---|
1013 | n/a | static DBCHAR |
---|
1014 | n/a | dummy_encoder(const Py_UCS4 *data, Py_ssize_t *length) |
---|
1015 | n/a | { |
---|
1016 | n/a | return MAP_UNMAPPABLE; |
---|
1017 | n/a | } |
---|
1018 | n/a | |
---|
1019 | n/a | /*-*- registry tables -*-*/ |
---|
1020 | n/a | |
---|
1021 | n/a | #define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \ |
---|
1022 | n/a | ksx1001_init, \ |
---|
1023 | n/a | ksx1001_decoder, ksx1001_encoder } |
---|
1024 | n/a | #define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \ |
---|
1025 | n/a | ksx1001_init, \ |
---|
1026 | n/a | ksx1001_decoder, ksx1001_encoder } |
---|
1027 | n/a | #define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \ |
---|
1028 | n/a | NULL, \ |
---|
1029 | n/a | jisx0201_r_decoder, jisx0201_r_encoder } |
---|
1030 | n/a | #define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \ |
---|
1031 | n/a | NULL, \ |
---|
1032 | n/a | jisx0201_k_decoder, jisx0201_k_encoder } |
---|
1033 | n/a | #define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \ |
---|
1034 | n/a | jisx0208_init, \ |
---|
1035 | n/a | jisx0208_decoder, jisx0208_encoder } |
---|
1036 | n/a | #define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \ |
---|
1037 | n/a | jisx0208_init, \ |
---|
1038 | n/a | jisx0208_decoder, jisx0208_encoder } |
---|
1039 | n/a | #define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \ |
---|
1040 | n/a | jisx0212_init, \ |
---|
1041 | n/a | jisx0212_decoder, jisx0212_encoder } |
---|
1042 | n/a | #define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \ |
---|
1043 | n/a | jisx0213_init, \ |
---|
1044 | n/a | jisx0213_2000_1_decoder, \ |
---|
1045 | n/a | jisx0213_2000_1_encoder } |
---|
1046 | n/a | #define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \ |
---|
1047 | n/a | jisx0213_init, \ |
---|
1048 | n/a | jisx0213_2000_1_decoder, \ |
---|
1049 | n/a | jisx0213_2000_1_encoder_paironly } |
---|
1050 | n/a | #define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \ |
---|
1051 | n/a | jisx0213_init, \ |
---|
1052 | n/a | jisx0213_2000_2_decoder, \ |
---|
1053 | n/a | jisx0213_2000_2_encoder } |
---|
1054 | n/a | #define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \ |
---|
1055 | n/a | jisx0213_init, \ |
---|
1056 | n/a | jisx0213_2004_1_decoder, \ |
---|
1057 | n/a | jisx0213_2004_1_encoder } |
---|
1058 | n/a | #define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \ |
---|
1059 | n/a | jisx0213_init, \ |
---|
1060 | n/a | jisx0213_2004_1_decoder, \ |
---|
1061 | n/a | jisx0213_2004_1_encoder_paironly } |
---|
1062 | n/a | #define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \ |
---|
1063 | n/a | jisx0213_init, \ |
---|
1064 | n/a | jisx0213_2004_2_decoder, \ |
---|
1065 | n/a | jisx0213_2004_2_encoder } |
---|
1066 | n/a | #define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \ |
---|
1067 | n/a | gb2312_init, \ |
---|
1068 | n/a | gb2312_decoder, gb2312_encoder } |
---|
1069 | n/a | #define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \ |
---|
1070 | n/a | cns11643_init, \ |
---|
1071 | n/a | cns11643_1_decoder, cns11643_1_encoder } |
---|
1072 | n/a | #define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \ |
---|
1073 | n/a | cns11643_init, \ |
---|
1074 | n/a | cns11643_2_decoder, cns11643_2_encoder } |
---|
1075 | n/a | #define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \ |
---|
1076 | n/a | NULL, dummy_decoder, dummy_encoder } |
---|
1077 | n/a | #define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \ |
---|
1078 | n/a | NULL, dummy_decoder, dummy_encoder } |
---|
1079 | n/a | #define REGISTRY_SENTINEL { 0, } |
---|
1080 | n/a | #define CONFIGDEF(var, attrs) \ |
---|
1081 | n/a | static const struct iso2022_config iso2022_##var##_config = { \ |
---|
1082 | n/a | attrs, iso2022_##var##_designations \ |
---|
1083 | n/a | }; |
---|
1084 | n/a | |
---|
1085 | n/a | static const struct iso2022_designation iso2022_kr_designations[] = { |
---|
1086 | n/a | REGISTRY_KSX1001_G1, REGISTRY_SENTINEL |
---|
1087 | n/a | }; |
---|
1088 | n/a | CONFIGDEF(kr, 0) |
---|
1089 | n/a | |
---|
1090 | n/a | static const struct iso2022_designation iso2022_jp_designations[] = { |
---|
1091 | n/a | REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, |
---|
1092 | n/a | REGISTRY_SENTINEL |
---|
1093 | n/a | }; |
---|
1094 | n/a | CONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT) |
---|
1095 | n/a | |
---|
1096 | n/a | static const struct iso2022_designation iso2022_jp_1_designations[] = { |
---|
1097 | n/a | REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, |
---|
1098 | n/a | REGISTRY_JISX0208_O, REGISTRY_SENTINEL |
---|
1099 | n/a | }; |
---|
1100 | n/a | CONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT) |
---|
1101 | n/a | |
---|
1102 | n/a | static const struct iso2022_designation iso2022_jp_2_designations[] = { |
---|
1103 | n/a | REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0, |
---|
1104 | n/a | REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O, |
---|
1105 | n/a | REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL |
---|
1106 | n/a | }; |
---|
1107 | n/a | CONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT) |
---|
1108 | n/a | |
---|
1109 | n/a | static const struct iso2022_designation iso2022_jp_2004_designations[] = { |
---|
1110 | n/a | REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208, |
---|
1111 | n/a | REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL |
---|
1112 | n/a | }; |
---|
1113 | n/a | CONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT) |
---|
1114 | n/a | |
---|
1115 | n/a | static const struct iso2022_designation iso2022_jp_3_designations[] = { |
---|
1116 | n/a | REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208, |
---|
1117 | n/a | REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL |
---|
1118 | n/a | }; |
---|
1119 | n/a | CONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT) |
---|
1120 | n/a | |
---|
1121 | n/a | static const struct iso2022_designation iso2022_jp_ext_designations[] = { |
---|
1122 | n/a | REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R, |
---|
1123 | n/a | REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL |
---|
1124 | n/a | }; |
---|
1125 | n/a | CONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT) |
---|
1126 | n/a | |
---|
1127 | n/a | |
---|
1128 | n/a | BEGIN_MAPPINGS_LIST |
---|
1129 | n/a | /* no mapping table here */ |
---|
1130 | n/a | END_MAPPINGS_LIST |
---|
1131 | n/a | |
---|
1132 | n/a | #define ISO2022_CODEC(variation) { \ |
---|
1133 | n/a | "iso2022_" #variation, \ |
---|
1134 | n/a | &iso2022_##variation##_config, \ |
---|
1135 | n/a | iso2022_codec_init, \ |
---|
1136 | n/a | _STATEFUL_METHODS(iso2022) \ |
---|
1137 | n/a | }, |
---|
1138 | n/a | |
---|
1139 | n/a | BEGIN_CODECS_LIST |
---|
1140 | n/a | ISO2022_CODEC(kr) |
---|
1141 | n/a | ISO2022_CODEC(jp) |
---|
1142 | n/a | ISO2022_CODEC(jp_1) |
---|
1143 | n/a | ISO2022_CODEC(jp_2) |
---|
1144 | n/a | ISO2022_CODEC(jp_2004) |
---|
1145 | n/a | ISO2022_CODEC(jp_3) |
---|
1146 | n/a | ISO2022_CODEC(jp_ext) |
---|
1147 | n/a | END_CODECS_LIST |
---|
1148 | n/a | |
---|
1149 | n/a | I_AM_A_MODULE_FOR(iso2022) |
---|