ยปCore Development>Code coverage>Modules/cjkcodecs/_codecs_iso2022.c

Python code coverage for Modules/cjkcodecs/_codecs_iso2022.c

#countcontent
1n/a/*
2n/a * _codecs_iso2022.c: Codecs collection for ISO-2022 encodings.
3n/a *
4n/a * Written by Hye-Shik Chang <perky@FreeBSD.org>
5n/a */
6n/a
7n/a#define USING_IMPORTED_MAPS
8n/a#define USING_BINARY_PAIR_SEARCH
9n/a#define EXTERN_JISX0213_PAIR
10n/a#define EMULATE_JISX0213_2000_ENCODE_INVALID MAP_UNMAPPABLE
11n/a#define EMULATE_JISX0213_2000_DECODE_INVALID MAP_UNMAPPABLE
12n/a
13n/a#include "cjkcodecs.h"
14n/a#include "alg_jisx0201.h"
15n/a#include "emu_jisx0213_2000.h"
16n/a#include "mappings_jisx0213_pair.h"
17n/a
18n/a/* STATE
19n/a
20n/a state->c[0-3]
21n/a
22n/a 00000000
23n/a ||^^^^^|
24n/a |+-----+---- G0-3 Character Set
25n/a +----------- Is G0-3 double byte?
26n/a
27n/a state->c[4]
28n/a
29n/a 00000000
30n/a ||
31n/a |+---- Locked-Shift?
32n/a +----- ESC Throughout
33n/a*/
34n/a
35n/a#define ESC 0x1B
36n/a#define SO 0x0E
37n/a#define SI 0x0F
38n/a#define LF 0x0A
39n/a
40n/a#define MAX_ESCSEQLEN 16
41n/a
42n/a#define CHARSET_ISO8859_1 'A'
43n/a#define CHARSET_ASCII 'B'
44n/a#define CHARSET_ISO8859_7 'F'
45n/a#define CHARSET_JISX0201_K 'I'
46n/a#define CHARSET_JISX0201_R 'J'
47n/a
48n/a#define CHARSET_GB2312 ('A'|CHARSET_DBCS)
49n/a#define CHARSET_JISX0208 ('B'|CHARSET_DBCS)
50n/a#define CHARSET_KSX1001 ('C'|CHARSET_DBCS)
51n/a#define CHARSET_JISX0212 ('D'|CHARSET_DBCS)
52n/a#define CHARSET_GB2312_8565 ('E'|CHARSET_DBCS)
53n/a#define CHARSET_CNS11643_1 ('G'|CHARSET_DBCS)
54n/a#define CHARSET_CNS11643_2 ('H'|CHARSET_DBCS)
55n/a#define CHARSET_JISX0213_2000_1 ('O'|CHARSET_DBCS)
56n/a#define CHARSET_JISX0213_2 ('P'|CHARSET_DBCS)
57n/a#define CHARSET_JISX0213_2004_1 ('Q'|CHARSET_DBCS)
58n/a#define CHARSET_JISX0208_O ('@'|CHARSET_DBCS)
59n/a
60n/a#define CHARSET_DBCS 0x80
61n/a#define ESCMARK(mark) ((mark) & 0x7f)
62n/a
63n/a#define IS_ESCEND(c) (((c) >= 'A' && (c) <= 'Z') || (c) == '@')
64n/a#define IS_ISO2022ESC(c2) \
65n/a ((c2) == '(' || (c2) == ')' || (c2) == '$' || \
66n/a (c2) == '.' || (c2) == '&')
67n/a /* this is not a complete list of ISO-2022 escape sequence headers.
68n/a * but, it's enough to implement CJK instances of iso-2022. */
69n/a
70n/a#define MAP_UNMAPPABLE 0xFFFF
71n/a#define MAP_MULTIPLE_AVAIL 0xFFFE /* for JIS X 0213 */
72n/a
73n/a#define F_SHIFTED 0x01
74n/a#define F_ESCTHROUGHOUT 0x02
75n/a
76n/a#define STATE_SETG(dn, v) do { ((state)->c[dn]) = (v); } while (0)
77n/a#define STATE_GETG(dn) ((state)->c[dn])
78n/a
79n/a#define STATE_G0 STATE_GETG(0)
80n/a#define STATE_G1 STATE_GETG(1)
81n/a#define STATE_G2 STATE_GETG(2)
82n/a#define STATE_G3 STATE_GETG(3)
83n/a#define STATE_SETG0(v) STATE_SETG(0, v)
84n/a#define STATE_SETG1(v) STATE_SETG(1, v)
85n/a#define STATE_SETG2(v) STATE_SETG(2, v)
86n/a#define STATE_SETG3(v) STATE_SETG(3, v)
87n/a
88n/a#define STATE_SETFLAG(f) do { ((state)->c[4]) |= (f); } while (0)
89n/a#define STATE_GETFLAG(f) ((state)->c[4] & (f))
90n/a#define STATE_CLEARFLAG(f) do { ((state)->c[4]) &= ~(f); } while (0)
91n/a#define STATE_CLEARFLAGS() do { ((state)->c[4]) = 0; } while (0)
92n/a
93n/a#define ISO2022_CONFIG ((const struct iso2022_config *)config)
94n/a#define CONFIG_ISSET(flag) (ISO2022_CONFIG->flags & (flag))
95n/a#define CONFIG_DESIGNATIONS (ISO2022_CONFIG->designations)
96n/a
97n/a/* iso2022_config.flags */
98n/a#define NO_SHIFT 0x01
99n/a#define USE_G2 0x02
100n/a#define USE_JISX0208_EXT 0x04
101n/a
102n/a/*-*- internal data structures -*-*/
103n/a
104n/atypedef int (*iso2022_init_func)(void);
105n/atypedef Py_UCS4 (*iso2022_decode_func)(const unsigned char *data);
106n/atypedef DBCHAR (*iso2022_encode_func)(const Py_UCS4 *data, Py_ssize_t *length);
107n/a
108n/astruct iso2022_designation {
109n/a unsigned char mark;
110n/a unsigned char plane;
111n/a unsigned char width;
112n/a iso2022_init_func initializer;
113n/a iso2022_decode_func decoder;
114n/a iso2022_encode_func encoder;
115n/a};
116n/a
117n/astruct iso2022_config {
118n/a int flags;
119n/a const struct iso2022_designation *designations; /* non-ascii desigs */
120n/a};
121n/a
122n/a/*-*- iso-2022 codec implementation -*-*/
123n/a
124n/aCODEC_INIT(iso2022)
125n/a{
126n/a const struct iso2022_designation *desig;
127n/a for (desig = CONFIG_DESIGNATIONS; desig->mark; desig++)
128n/a if (desig->initializer != NULL && desig->initializer() != 0)
129n/a return -1;
130n/a return 0;
131n/a}
132n/a
133n/aENCODER_INIT(iso2022)
134n/a{
135n/a STATE_CLEARFLAGS();
136n/a STATE_SETG0(CHARSET_ASCII);
137n/a STATE_SETG1(CHARSET_ASCII);
138n/a return 0;
139n/a}
140n/a
141n/aENCODER_RESET(iso2022)
142n/a{
143n/a if (STATE_GETFLAG(F_SHIFTED)) {
144n/a WRITEBYTE1(SI);
145n/a NEXT_OUT(1);
146n/a STATE_CLEARFLAG(F_SHIFTED);
147n/a }
148n/a if (STATE_G0 != CHARSET_ASCII) {
149n/a WRITEBYTE3(ESC, '(', 'B');
150n/a NEXT_OUT(3);
151n/a STATE_SETG0(CHARSET_ASCII);
152n/a }
153n/a return 0;
154n/a}
155n/a
156n/aENCODER(iso2022)
157n/a{
158n/a while (*inpos < inlen) {
159n/a const struct iso2022_designation *dsg;
160n/a DBCHAR encoded;
161n/a Py_UCS4 c = INCHAR1;
162n/a Py_ssize_t insize;
163n/a
164n/a if (c < 0x80) {
165n/a if (STATE_G0 != CHARSET_ASCII) {
166n/a WRITEBYTE3(ESC, '(', 'B');
167n/a STATE_SETG0(CHARSET_ASCII);
168n/a NEXT_OUT(3);
169n/a }
170n/a if (STATE_GETFLAG(F_SHIFTED)) {
171n/a WRITEBYTE1(SI);
172n/a STATE_CLEARFLAG(F_SHIFTED);
173n/a NEXT_OUT(1);
174n/a }
175n/a WRITEBYTE1((unsigned char)c);
176n/a NEXT(1, 1);
177n/a continue;
178n/a }
179n/a
180n/a insize = 1;
181n/a
182n/a encoded = MAP_UNMAPPABLE;
183n/a for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
184n/a Py_ssize_t length = 1;
185n/a encoded = dsg->encoder(&c, &length);
186n/a if (encoded == MAP_MULTIPLE_AVAIL) {
187n/a /* this implementation won't work for pair
188n/a * of non-bmp characters. */
189n/a if (inlen - *inpos < 2) {
190n/a if (!(flags & MBENC_FLUSH))
191n/a return MBERR_TOOFEW;
192n/a length = -1;
193n/a }
194n/a else
195n/a length = 2;
196n/a encoded = dsg->encoder(&c, &length);
197n/a if (encoded != MAP_UNMAPPABLE) {
198n/a insize = length;
199n/a break;
200n/a }
201n/a }
202n/a else if (encoded != MAP_UNMAPPABLE)
203n/a break;
204n/a }
205n/a
206n/a if (!dsg->mark)
207n/a return 1;
208n/a assert(dsg->width == 1 || dsg->width == 2);
209n/a
210n/a switch (dsg->plane) {
211n/a case 0: /* G0 */
212n/a if (STATE_GETFLAG(F_SHIFTED)) {
213n/a WRITEBYTE1(SI);
214n/a STATE_CLEARFLAG(F_SHIFTED);
215n/a NEXT_OUT(1);
216n/a }
217n/a if (STATE_G0 != dsg->mark) {
218n/a if (dsg->width == 1) {
219n/a WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark));
220n/a STATE_SETG0(dsg->mark);
221n/a NEXT_OUT(3);
222n/a }
223n/a else if (dsg->mark == CHARSET_JISX0208) {
224n/a WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark));
225n/a STATE_SETG0(dsg->mark);
226n/a NEXT_OUT(3);
227n/a }
228n/a else {
229n/a WRITEBYTE4(ESC, '$', '(',
230n/a ESCMARK(dsg->mark));
231n/a STATE_SETG0(dsg->mark);
232n/a NEXT_OUT(4);
233n/a }
234n/a }
235n/a break;
236n/a case 1: /* G1 */
237n/a if (STATE_G1 != dsg->mark) {
238n/a if (dsg->width == 1) {
239n/a WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark));
240n/a STATE_SETG1(dsg->mark);
241n/a NEXT_OUT(3);
242n/a }
243n/a else {
244n/a WRITEBYTE4(ESC, '$', ')', ESCMARK(dsg->mark));
245n/a STATE_SETG1(dsg->mark);
246n/a NEXT_OUT(4);
247n/a }
248n/a }
249n/a if (!STATE_GETFLAG(F_SHIFTED)) {
250n/a WRITEBYTE1(SO);
251n/a STATE_SETFLAG(F_SHIFTED);
252n/a NEXT_OUT(1);
253n/a }
254n/a break;
255n/a default: /* G2 and G3 is not supported: no encoding in
256n/a * CJKCodecs are using them yet */
257n/a return MBERR_INTERNAL;
258n/a }
259n/a
260n/a if (dsg->width == 1) {
261n/a WRITEBYTE1((unsigned char)encoded);
262n/a NEXT_OUT(1);
263n/a }
264n/a else {
265n/a WRITEBYTE2(encoded >> 8, encoded & 0xff);
266n/a NEXT_OUT(2);
267n/a }
268n/a NEXT_INCHAR(insize);
269n/a }
270n/a
271n/a return 0;
272n/a}
273n/a
274n/aDECODER_INIT(iso2022)
275n/a{
276n/a STATE_CLEARFLAGS();
277n/a STATE_SETG0(CHARSET_ASCII);
278n/a STATE_SETG1(CHARSET_ASCII);
279n/a STATE_SETG2(CHARSET_ASCII);
280n/a return 0;
281n/a}
282n/a
283n/aDECODER_RESET(iso2022)
284n/a{
285n/a STATE_SETG0(CHARSET_ASCII);
286n/a STATE_CLEARFLAG(F_SHIFTED);
287n/a return 0;
288n/a}
289n/a
290n/astatic Py_ssize_t
291n/aiso2022processesc(const void *config, MultibyteCodec_State *state,
292n/a const unsigned char **inbuf, Py_ssize_t *inleft)
293n/a{
294n/a unsigned char charset, designation;
295n/a Py_ssize_t i, esclen = 0;
296n/a
297n/a for (i = 1;i < MAX_ESCSEQLEN;i++) {
298n/a if (i >= *inleft)
299n/a return MBERR_TOOFEW;
300n/a if (IS_ESCEND((*inbuf)[i])) {
301n/a esclen = i + 1;
302n/a break;
303n/a }
304n/a else if (CONFIG_ISSET(USE_JISX0208_EXT) && i+1 < *inleft &&
305n/a (*inbuf)[i] == '&' && (*inbuf)[i+1] == '@') {
306n/a i += 2;
307n/a }
308n/a }
309n/a
310n/a switch (esclen) {
311n/a case 0:
312n/a return 1; /* unterminated escape sequence */
313n/a case 3:
314n/a if (INBYTE2 == '$') {
315n/a charset = INBYTE3 | CHARSET_DBCS;
316n/a designation = 0;
317n/a }
318n/a else {
319n/a charset = INBYTE3;
320n/a if (INBYTE2 == '(')
321n/a designation = 0;
322n/a else if (INBYTE2 == ')')
323n/a designation = 1;
324n/a else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.')
325n/a designation = 2;
326n/a else
327n/a return 3;
328n/a }
329n/a break;
330n/a case 4:
331n/a if (INBYTE2 != '$')
332n/a return 4;
333n/a
334n/a charset = INBYTE4 | CHARSET_DBCS;
335n/a if (INBYTE3 == '(')
336n/a designation = 0;
337n/a else if (INBYTE3 == ')')
338n/a designation = 1;
339n/a else
340n/a return 4;
341n/a break;
342n/a case 6: /* designation with prefix */
343n/a if (CONFIG_ISSET(USE_JISX0208_EXT) &&
344n/a (*inbuf)[3] == ESC && (*inbuf)[4] == '$' &&
345n/a (*inbuf)[5] == 'B') {
346n/a charset = 'B' | CHARSET_DBCS;
347n/a designation = 0;
348n/a }
349n/a else
350n/a return 6;
351n/a break;
352n/a default:
353n/a return esclen;
354n/a }
355n/a
356n/a /* raise error when the charset is not designated for this encoding */
357n/a if (charset != CHARSET_ASCII) {
358n/a const struct iso2022_designation *dsg;
359n/a
360n/a for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
361n/a if (dsg->mark == charset)
362n/a break;
363n/a }
364n/a if (!dsg->mark)
365n/a return esclen;
366n/a }
367n/a
368n/a STATE_SETG(designation, charset);
369n/a *inleft -= esclen;
370n/a (*inbuf) += esclen;
371n/a return 0;
372n/a}
373n/a
374n/a#define ISO8859_7_DECODE(c, writer) \
375n/a if ((c) < 0xa0) { \
376n/a OUTCHAR(c); \
377n/a } else if ((c) < 0xc0 && (0x288f3bc9L & (1L << ((c)-0xa0)))) { \
378n/a OUTCHAR(c); \
379n/a } else if ((c) >= 0xb4 && (c) <= 0xfe && ((c) >= 0xd4 || \
380n/a (0xbffffd77L & (1L << ((c)-0xb4))))) { \
381n/a OUTCHAR(0x02d0 + (c)); \
382n/a } else if ((c) == 0xa1) { \
383n/a OUTCHAR(0x2018); \
384n/a } else if ((c) == 0xa2) { \
385n/a OUTCHAR(0x2019); \
386n/a } else if ((c) == 0xaf) { \
387n/a OUTCHAR(0x2015); \
388n/a }
389n/a
390n/astatic Py_ssize_t
391n/aiso2022processg2(const void *config, MultibyteCodec_State *state,
392n/a const unsigned char **inbuf, Py_ssize_t *inleft,
393n/a _PyUnicodeWriter *writer)
394n/a{
395n/a /* not written to use encoder, decoder functions because only few
396n/a * encodings use G2 designations in CJKCodecs */
397n/a if (STATE_G2 == CHARSET_ISO8859_1) {
398n/a if (INBYTE3 < 0x80)
399n/a OUTCHAR(INBYTE3 + 0x80);
400n/a else
401n/a return 3;
402n/a }
403n/a else if (STATE_G2 == CHARSET_ISO8859_7) {
404n/a ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer)
405n/a else
406n/a return 3;
407n/a }
408n/a else if (STATE_G2 == CHARSET_ASCII) {
409n/a if (INBYTE3 & 0x80)
410n/a return 3;
411n/a else
412n/a OUTCHAR(INBYTE3);
413n/a }
414n/a else
415n/a return MBERR_INTERNAL;
416n/a
417n/a (*inbuf) += 3;
418n/a *inleft -= 3;
419n/a return 0;
420n/a}
421n/a
422n/aDECODER(iso2022)
423n/a{
424n/a const struct iso2022_designation *dsgcache = NULL;
425n/a
426n/a while (inleft > 0) {
427n/a unsigned char c = INBYTE1;
428n/a Py_ssize_t err;
429n/a
430n/a if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
431n/a /* ESC throughout mode:
432n/a * for non-iso2022 escape sequences */
433n/a OUTCHAR(c); /* assume as ISO-8859-1 */
434n/a NEXT_IN(1);
435n/a if (IS_ESCEND(c)) {
436n/a STATE_CLEARFLAG(F_ESCTHROUGHOUT);
437n/a }
438n/a continue;
439n/a }
440n/a
441n/a switch (c) {
442n/a case ESC:
443n/a REQUIRE_INBUF(2);
444n/a if (IS_ISO2022ESC(INBYTE2)) {
445n/a err = iso2022processesc(config, state,
446n/a inbuf, &inleft);
447n/a if (err != 0)
448n/a return err;
449n/a }
450n/a else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */
451n/a REQUIRE_INBUF(3);
452n/a err = iso2022processg2(config, state,
453n/a inbuf, &inleft, writer);
454n/a if (err != 0)
455n/a return err;
456n/a }
457n/a else {
458n/a OUTCHAR(ESC);
459n/a STATE_SETFLAG(F_ESCTHROUGHOUT);
460n/a NEXT_IN(1);
461n/a }
462n/a break;
463n/a case SI:
464n/a if (CONFIG_ISSET(NO_SHIFT))
465n/a goto bypass;
466n/a STATE_CLEARFLAG(F_SHIFTED);
467n/a NEXT_IN(1);
468n/a break;
469n/a case SO:
470n/a if (CONFIG_ISSET(NO_SHIFT))
471n/a goto bypass;
472n/a STATE_SETFLAG(F_SHIFTED);
473n/a NEXT_IN(1);
474n/a break;
475n/a case LF:
476n/a STATE_CLEARFLAG(F_SHIFTED);
477n/a OUTCHAR(LF);
478n/a NEXT_IN(1);
479n/a break;
480n/a default:
481n/a if (c < 0x20) /* C0 */
482n/a goto bypass;
483n/a else if (c >= 0x80)
484n/a return 1;
485n/a else {
486n/a const struct iso2022_designation *dsg;
487n/a unsigned char charset;
488n/a Py_UCS4 decoded;
489n/a
490n/a if (STATE_GETFLAG(F_SHIFTED))
491n/a charset = STATE_G1;
492n/a else
493n/a charset = STATE_G0;
494n/a
495n/a if (charset == CHARSET_ASCII) {
496n/abypass:
497n/a OUTCHAR(c);
498n/a NEXT_IN(1);
499n/a break;
500n/a }
501n/a
502n/a if (dsgcache != NULL &&
503n/a dsgcache->mark == charset)
504n/a dsg = dsgcache;
505n/a else {
506n/a for (dsg = CONFIG_DESIGNATIONS;
507n/a dsg->mark != charset
508n/a#ifdef Py_DEBUG
509n/a && dsg->mark != '\0'
510n/a#endif
511n/a ; dsg++)
512n/a {
513n/a /* noop */
514n/a }
515n/a assert(dsg->mark != '\0');
516n/a dsgcache = dsg;
517n/a }
518n/a
519n/a REQUIRE_INBUF(dsg->width);
520n/a decoded = dsg->decoder(*inbuf);
521n/a if (decoded == MAP_UNMAPPABLE)
522n/a return dsg->width;
523n/a
524n/a if (decoded < 0x10000) {
525n/a OUTCHAR(decoded);
526n/a }
527n/a else if (decoded < 0x30000) {
528n/a OUTCHAR(decoded);
529n/a }
530n/a else { /* JIS X 0213 pairs */
531n/a OUTCHAR2(decoded >> 16, decoded & 0xffff);
532n/a }
533n/a NEXT_IN(dsg->width);
534n/a }
535n/a break;
536n/a }
537n/a }
538n/a return 0;
539n/a}
540n/a
541n/a/*-*- mapping table holders -*-*/
542n/a
543n/a#define ENCMAP(enc) static const encode_map *enc##_encmap = NULL;
544n/a#define DECMAP(enc) static const decode_map *enc##_decmap = NULL;
545n/a
546n/a/* kr */
547n/aENCMAP(cp949)
548n/aDECMAP(ksx1001)
549n/a
550n/a/* jp */
551n/aENCMAP(jisxcommon)
552n/aDECMAP(jisx0208)
553n/aDECMAP(jisx0212)
554n/aENCMAP(jisx0213_bmp)
555n/aDECMAP(jisx0213_1_bmp)
556n/aDECMAP(jisx0213_2_bmp)
557n/aENCMAP(jisx0213_emp)
558n/aDECMAP(jisx0213_1_emp)
559n/aDECMAP(jisx0213_2_emp)
560n/a
561n/a/* cn */
562n/aENCMAP(gbcommon)
563n/aDECMAP(gb2312)
564n/a
565n/a/* tw */
566n/a
567n/a/*-*- mapping access functions -*-*/
568n/a
569n/astatic int
570n/aksx1001_init(void)
571n/a{
572n/a static int initialized = 0;
573n/a
574n/a if (!initialized && (
575n/a IMPORT_MAP(kr, cp949, &cp949_encmap, NULL) ||
576n/a IMPORT_MAP(kr, ksx1001, NULL, &ksx1001_decmap)))
577n/a return -1;
578n/a initialized = 1;
579n/a return 0;
580n/a}
581n/a
582n/astatic Py_UCS4
583n/aksx1001_decoder(const unsigned char *data)
584n/a{
585n/a Py_UCS4 u;
586n/a if (TRYMAP_DEC(ksx1001, u, data[0], data[1]))
587n/a return u;
588n/a else
589n/a return MAP_UNMAPPABLE;
590n/a}
591n/a
592n/astatic DBCHAR
593n/aksx1001_encoder(const Py_UCS4 *data, Py_ssize_t *length)
594n/a{
595n/a DBCHAR coded;
596n/a assert(*length == 1);
597n/a if (*data < 0x10000) {
598n/a if (TRYMAP_ENC(cp949, coded, *data)) {
599n/a if (!(coded & 0x8000))
600n/a return coded;
601n/a }
602n/a }
603n/a return MAP_UNMAPPABLE;
604n/a}
605n/a
606n/astatic int
607n/ajisx0208_init(void)
608n/a{
609n/a static int initialized = 0;
610n/a
611n/a if (!initialized && (
612n/a IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
613n/a IMPORT_MAP(jp, jisx0208, NULL, &jisx0208_decmap)))
614n/a return -1;
615n/a initialized = 1;
616n/a return 0;
617n/a}
618n/a
619n/astatic Py_UCS4
620n/ajisx0208_decoder(const unsigned char *data)
621n/a{
622n/a Py_UCS4 u;
623n/a if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
624n/a return 0xff3c;
625n/a else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
626n/a return u;
627n/a else
628n/a return MAP_UNMAPPABLE;
629n/a}
630n/a
631n/astatic DBCHAR
632n/ajisx0208_encoder(const Py_UCS4 *data, Py_ssize_t *length)
633n/a{
634n/a DBCHAR coded;
635n/a assert(*length == 1);
636n/a if (*data < 0x10000) {
637n/a if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
638n/a return 0x2140;
639n/a else if (TRYMAP_ENC(jisxcommon, coded, *data)) {
640n/a if (!(coded & 0x8000))
641n/a return coded;
642n/a }
643n/a }
644n/a return MAP_UNMAPPABLE;
645n/a}
646n/a
647n/astatic int
648n/ajisx0212_init(void)
649n/a{
650n/a static int initialized = 0;
651n/a
652n/a if (!initialized && (
653n/a IMPORT_MAP(jp, jisxcommon, &jisxcommon_encmap, NULL) ||
654n/a IMPORT_MAP(jp, jisx0212, NULL, &jisx0212_decmap)))
655n/a return -1;
656n/a initialized = 1;
657n/a return 0;
658n/a}
659n/a
660n/astatic Py_UCS4
661n/ajisx0212_decoder(const unsigned char *data)
662n/a{
663n/a Py_UCS4 u;
664n/a if (TRYMAP_DEC(jisx0212, u, data[0], data[1]))
665n/a return u;
666n/a else
667n/a return MAP_UNMAPPABLE;
668n/a}
669n/a
670n/astatic DBCHAR
671n/ajisx0212_encoder(const Py_UCS4 *data, Py_ssize_t *length)
672n/a{
673n/a DBCHAR coded;
674n/a assert(*length == 1);
675n/a if (*data < 0x10000) {
676n/a if (TRYMAP_ENC(jisxcommon, coded, *data)) {
677n/a if (coded & 0x8000)
678n/a return coded & 0x7fff;
679n/a }
680n/a }
681n/a return MAP_UNMAPPABLE;
682n/a}
683n/a
684n/astatic int
685n/ajisx0213_init(void)
686n/a{
687n/a static int initialized = 0;
688n/a
689n/a if (!initialized && (
690n/a jisx0208_init() ||
691n/a IMPORT_MAP(jp, jisx0213_bmp,
692n/a &jisx0213_bmp_encmap, NULL) ||
693n/a IMPORT_MAP(jp, jisx0213_1_bmp,
694n/a NULL, &jisx0213_1_bmp_decmap) ||
695n/a IMPORT_MAP(jp, jisx0213_2_bmp,
696n/a NULL, &jisx0213_2_bmp_decmap) ||
697n/a IMPORT_MAP(jp, jisx0213_emp,
698n/a &jisx0213_emp_encmap, NULL) ||
699n/a IMPORT_MAP(jp, jisx0213_1_emp,
700n/a NULL, &jisx0213_1_emp_decmap) ||
701n/a IMPORT_MAP(jp, jisx0213_2_emp,
702n/a NULL, &jisx0213_2_emp_decmap) ||
703n/a IMPORT_MAP(jp, jisx0213_pair, &jisx0213_pair_encmap,
704n/a &jisx0213_pair_decmap)))
705n/a return -1;
706n/a initialized = 1;
707n/a return 0;
708n/a}
709n/a
710n/a#define config ((void *)2000)
711n/astatic Py_UCS4
712n/ajisx0213_2000_1_decoder(const unsigned char *data)
713n/a{
714n/a Py_UCS4 u;
715n/a EMULATE_JISX0213_2000_DECODE_PLANE1(u, data[0], data[1])
716n/a else if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
717n/a return 0xff3c;
718n/a else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
719n/a ;
720n/a else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]))
721n/a ;
722n/a else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]))
723n/a u |= 0x20000;
724n/a else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]))
725n/a ;
726n/a else
727n/a return MAP_UNMAPPABLE;
728n/a return u;
729n/a}
730n/a
731n/astatic Py_UCS4
732n/ajisx0213_2000_2_decoder(const unsigned char *data)
733n/a{
734n/a Py_UCS4 u;
735n/a EMULATE_JISX0213_2000_DECODE_PLANE2_CHAR(u, data[0], data[1])
736n/a if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]))
737n/a ;
738n/a else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]))
739n/a u |= 0x20000;
740n/a else
741n/a return MAP_UNMAPPABLE;
742n/a return u;
743n/a}
744n/a#undef config
745n/a
746n/astatic Py_UCS4
747n/ajisx0213_2004_1_decoder(const unsigned char *data)
748n/a{
749n/a Py_UCS4 u;
750n/a if (data[0] == 0x21 && data[1] == 0x40) /* F/W REVERSE SOLIDUS */
751n/a return 0xff3c;
752n/a else if (TRYMAP_DEC(jisx0208, u, data[0], data[1]))
753n/a ;
754n/a else if (TRYMAP_DEC(jisx0213_1_bmp, u, data[0], data[1]))
755n/a ;
756n/a else if (TRYMAP_DEC(jisx0213_1_emp, u, data[0], data[1]))
757n/a u |= 0x20000;
758n/a else if (TRYMAP_DEC(jisx0213_pair, u, data[0], data[1]))
759n/a ;
760n/a else
761n/a return MAP_UNMAPPABLE;
762n/a return u;
763n/a}
764n/a
765n/astatic Py_UCS4
766n/ajisx0213_2004_2_decoder(const unsigned char *data)
767n/a{
768n/a Py_UCS4 u;
769n/a if (TRYMAP_DEC(jisx0213_2_bmp, u, data[0], data[1]))
770n/a ;
771n/a else if (TRYMAP_DEC(jisx0213_2_emp, u, data[0], data[1]))
772n/a u |= 0x20000;
773n/a else
774n/a return MAP_UNMAPPABLE;
775n/a return u;
776n/a}
777n/a
778n/astatic DBCHAR
779n/ajisx0213_encoder(const Py_UCS4 *data, Py_ssize_t *length, void *config)
780n/a{
781n/a DBCHAR coded;
782n/a
783n/a switch (*length) {
784n/a case 1: /* first character */
785n/a if (*data >= 0x10000) {
786n/a if ((*data) >> 16 == 0x20000 >> 16) {
787n/a EMULATE_JISX0213_2000_ENCODE_EMP(coded, *data)
788n/a else if (TRYMAP_ENC(jisx0213_emp, coded, (*data) & 0xffff))
789n/a return coded;
790n/a }
791n/a return MAP_UNMAPPABLE;
792n/a }
793n/a
794n/a EMULATE_JISX0213_2000_ENCODE_BMP(coded, *data)
795n/a else if (TRYMAP_ENC(jisx0213_bmp, coded, *data)) {
796n/a if (coded == MULTIC)
797n/a return MAP_MULTIPLE_AVAIL;
798n/a }
799n/a else if (TRYMAP_ENC(jisxcommon, coded, *data)) {
800n/a if (coded & 0x8000)
801n/a return MAP_UNMAPPABLE;
802n/a }
803n/a else
804n/a return MAP_UNMAPPABLE;
805n/a return coded;
806n/a
807n/a case 2: /* second character of unicode pair */
808n/a coded = find_pairencmap((ucs2_t)data[0], (ucs2_t)data[1],
809n/a jisx0213_pair_encmap, JISX0213_ENCPAIRS);
810n/a if (coded == DBCINV) {
811n/a *length = 1;
812n/a coded = find_pairencmap((ucs2_t)data[0], 0,
813n/a jisx0213_pair_encmap, JISX0213_ENCPAIRS);
814n/a if (coded == DBCINV)
815n/a return MAP_UNMAPPABLE;
816n/a }
817n/a else
818n/a return coded;
819n/a
820n/a case -1: /* flush unterminated */
821n/a *length = 1;
822n/a coded = find_pairencmap((ucs2_t)data[0], 0,
823n/a jisx0213_pair_encmap, JISX0213_ENCPAIRS);
824n/a if (coded == DBCINV)
825n/a return MAP_UNMAPPABLE;
826n/a else
827n/a return coded;
828n/a break;
829n/a
830n/a default:
831n/a return MAP_UNMAPPABLE;
832n/a }
833n/a}
834n/a
835n/astatic DBCHAR
836n/ajisx0213_2000_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
837n/a{
838n/a DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
839n/a if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
840n/a return coded;
841n/a else if (coded & 0x8000)
842n/a return MAP_UNMAPPABLE;
843n/a else
844n/a return coded;
845n/a}
846n/a
847n/astatic DBCHAR
848n/ajisx0213_2000_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
849n/a{
850n/a DBCHAR coded;
851n/a Py_ssize_t ilength = *length;
852n/a
853n/a coded = jisx0213_encoder(data, length, (void *)2000);
854n/a switch (ilength) {
855n/a case 1:
856n/a if (coded == MAP_MULTIPLE_AVAIL)
857n/a return MAP_MULTIPLE_AVAIL;
858n/a else
859n/a return MAP_UNMAPPABLE;
860n/a case 2:
861n/a if (*length != 2)
862n/a return MAP_UNMAPPABLE;
863n/a else
864n/a return coded;
865n/a default:
866n/a return MAP_UNMAPPABLE;
867n/a }
868n/a}
869n/a
870n/astatic DBCHAR
871n/ajisx0213_2000_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
872n/a{
873n/a DBCHAR coded = jisx0213_encoder(data, length, (void *)2000);
874n/a if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
875n/a return coded;
876n/a else if (coded & 0x8000)
877n/a return coded & 0x7fff;
878n/a else
879n/a return MAP_UNMAPPABLE;
880n/a}
881n/a
882n/astatic DBCHAR
883n/ajisx0213_2004_1_encoder(const Py_UCS4 *data, Py_ssize_t *length)
884n/a{
885n/a DBCHAR coded = jisx0213_encoder(data, length, NULL);
886n/a if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
887n/a return coded;
888n/a else if (coded & 0x8000)
889n/a return MAP_UNMAPPABLE;
890n/a else
891n/a return coded;
892n/a}
893n/a
894n/astatic DBCHAR
895n/ajisx0213_2004_1_encoder_paironly(const Py_UCS4 *data, Py_ssize_t *length)
896n/a{
897n/a DBCHAR coded;
898n/a Py_ssize_t ilength = *length;
899n/a
900n/a coded = jisx0213_encoder(data, length, NULL);
901n/a switch (ilength) {
902n/a case 1:
903n/a if (coded == MAP_MULTIPLE_AVAIL)
904n/a return MAP_MULTIPLE_AVAIL;
905n/a else
906n/a return MAP_UNMAPPABLE;
907n/a case 2:
908n/a if (*length != 2)
909n/a return MAP_UNMAPPABLE;
910n/a else
911n/a return coded;
912n/a default:
913n/a return MAP_UNMAPPABLE;
914n/a }
915n/a}
916n/a
917n/astatic DBCHAR
918n/ajisx0213_2004_2_encoder(const Py_UCS4 *data, Py_ssize_t *length)
919n/a{
920n/a DBCHAR coded = jisx0213_encoder(data, length, NULL);
921n/a if (coded == MAP_UNMAPPABLE || coded == MAP_MULTIPLE_AVAIL)
922n/a return coded;
923n/a else if (coded & 0x8000)
924n/a return coded & 0x7fff;
925n/a else
926n/a return MAP_UNMAPPABLE;
927n/a}
928n/a
929n/astatic Py_UCS4
930n/ajisx0201_r_decoder(const unsigned char *data)
931n/a{
932n/a Py_UCS4 u;
933n/a JISX0201_R_DECODE_CHAR(*data, u)
934n/a else
935n/a return MAP_UNMAPPABLE;
936n/a return u;
937n/a}
938n/a
939n/astatic DBCHAR
940n/ajisx0201_r_encoder(const Py_UCS4 *data, Py_ssize_t *length)
941n/a{
942n/a DBCHAR coded;
943n/a JISX0201_R_ENCODE(*data, coded)
944n/a else
945n/a return MAP_UNMAPPABLE;
946n/a return coded;
947n/a}
948n/a
949n/astatic Py_UCS4
950n/ajisx0201_k_decoder(const unsigned char *data)
951n/a{
952n/a Py_UCS4 u;
953n/a JISX0201_K_DECODE_CHAR(*data ^ 0x80, u)
954n/a else
955n/a return MAP_UNMAPPABLE;
956n/a return u;
957n/a}
958n/a
959n/astatic DBCHAR
960n/ajisx0201_k_encoder(const Py_UCS4 *data, Py_ssize_t *length)
961n/a{
962n/a DBCHAR coded;
963n/a JISX0201_K_ENCODE(*data, coded)
964n/a else
965n/a return MAP_UNMAPPABLE;
966n/a return coded - 0x80;
967n/a}
968n/a
969n/astatic int
970n/agb2312_init(void)
971n/a{
972n/a static int initialized = 0;
973n/a
974n/a if (!initialized && (
975n/a IMPORT_MAP(cn, gbcommon, &gbcommon_encmap, NULL) ||
976n/a IMPORT_MAP(cn, gb2312, NULL, &gb2312_decmap)))
977n/a return -1;
978n/a initialized = 1;
979n/a return 0;
980n/a}
981n/a
982n/astatic Py_UCS4
983n/agb2312_decoder(const unsigned char *data)
984n/a{
985n/a Py_UCS4 u;
986n/a if (TRYMAP_DEC(gb2312, u, data[0], data[1]))
987n/a return u;
988n/a else
989n/a return MAP_UNMAPPABLE;
990n/a}
991n/a
992n/astatic DBCHAR
993n/agb2312_encoder(const Py_UCS4 *data, Py_ssize_t *length)
994n/a{
995n/a DBCHAR coded;
996n/a assert(*length == 1);
997n/a if (*data < 0x10000) {
998n/a if (TRYMAP_ENC(gbcommon, coded, *data)) {
999n/a if (!(coded & 0x8000))
1000n/a return coded;
1001n/a }
1002n/a }
1003n/a return MAP_UNMAPPABLE;
1004n/a}
1005n/a
1006n/a
1007n/astatic Py_UCS4
1008n/adummy_decoder(const unsigned char *data)
1009n/a{
1010n/a return MAP_UNMAPPABLE;
1011n/a}
1012n/a
1013n/astatic DBCHAR
1014n/adummy_encoder(const Py_UCS4 *data, Py_ssize_t *length)
1015n/a{
1016n/a return MAP_UNMAPPABLE;
1017n/a}
1018n/a
1019n/a/*-*- registry tables -*-*/
1020n/a
1021n/a#define REGISTRY_KSX1001_G0 { CHARSET_KSX1001, 0, 2, \
1022n/a ksx1001_init, \
1023n/a ksx1001_decoder, ksx1001_encoder }
1024n/a#define REGISTRY_KSX1001_G1 { CHARSET_KSX1001, 1, 2, \
1025n/a ksx1001_init, \
1026n/a ksx1001_decoder, ksx1001_encoder }
1027n/a#define REGISTRY_JISX0201_R { CHARSET_JISX0201_R, 0, 1, \
1028n/a NULL, \
1029n/a jisx0201_r_decoder, jisx0201_r_encoder }
1030n/a#define REGISTRY_JISX0201_K { CHARSET_JISX0201_K, 0, 1, \
1031n/a NULL, \
1032n/a jisx0201_k_decoder, jisx0201_k_encoder }
1033n/a#define REGISTRY_JISX0208 { CHARSET_JISX0208, 0, 2, \
1034n/a jisx0208_init, \
1035n/a jisx0208_decoder, jisx0208_encoder }
1036n/a#define REGISTRY_JISX0208_O { CHARSET_JISX0208_O, 0, 2, \
1037n/a jisx0208_init, \
1038n/a jisx0208_decoder, jisx0208_encoder }
1039n/a#define REGISTRY_JISX0212 { CHARSET_JISX0212, 0, 2, \
1040n/a jisx0212_init, \
1041n/a jisx0212_decoder, jisx0212_encoder }
1042n/a#define REGISTRY_JISX0213_2000_1 { CHARSET_JISX0213_2000_1, 0, 2, \
1043n/a jisx0213_init, \
1044n/a jisx0213_2000_1_decoder, \
1045n/a jisx0213_2000_1_encoder }
1046n/a#define REGISTRY_JISX0213_2000_1_PAIRONLY { CHARSET_JISX0213_2000_1, 0, 2, \
1047n/a jisx0213_init, \
1048n/a jisx0213_2000_1_decoder, \
1049n/a jisx0213_2000_1_encoder_paironly }
1050n/a#define REGISTRY_JISX0213_2000_2 { CHARSET_JISX0213_2, 0, 2, \
1051n/a jisx0213_init, \
1052n/a jisx0213_2000_2_decoder, \
1053n/a jisx0213_2000_2_encoder }
1054n/a#define REGISTRY_JISX0213_2004_1 { CHARSET_JISX0213_2004_1, 0, 2, \
1055n/a jisx0213_init, \
1056n/a jisx0213_2004_1_decoder, \
1057n/a jisx0213_2004_1_encoder }
1058n/a#define REGISTRY_JISX0213_2004_1_PAIRONLY { CHARSET_JISX0213_2004_1, 0, 2, \
1059n/a jisx0213_init, \
1060n/a jisx0213_2004_1_decoder, \
1061n/a jisx0213_2004_1_encoder_paironly }
1062n/a#define REGISTRY_JISX0213_2004_2 { CHARSET_JISX0213_2, 0, 2, \
1063n/a jisx0213_init, \
1064n/a jisx0213_2004_2_decoder, \
1065n/a jisx0213_2004_2_encoder }
1066n/a#define REGISTRY_GB2312 { CHARSET_GB2312, 0, 2, \
1067n/a gb2312_init, \
1068n/a gb2312_decoder, gb2312_encoder }
1069n/a#define REGISTRY_CNS11643_1 { CHARSET_CNS11643_1, 1, 2, \
1070n/a cns11643_init, \
1071n/a cns11643_1_decoder, cns11643_1_encoder }
1072n/a#define REGISTRY_CNS11643_2 { CHARSET_CNS11643_2, 2, 2, \
1073n/a cns11643_init, \
1074n/a cns11643_2_decoder, cns11643_2_encoder }
1075n/a#define REGISTRY_ISO8859_1 { CHARSET_ISO8859_1, 2, 1, \
1076n/a NULL, dummy_decoder, dummy_encoder }
1077n/a#define REGISTRY_ISO8859_7 { CHARSET_ISO8859_7, 2, 1, \
1078n/a NULL, dummy_decoder, dummy_encoder }
1079n/a#define REGISTRY_SENTINEL { 0, }
1080n/a#define CONFIGDEF(var, attrs) \
1081n/a static const struct iso2022_config iso2022_##var##_config = { \
1082n/a attrs, iso2022_##var##_designations \
1083n/a };
1084n/a
1085n/astatic const struct iso2022_designation iso2022_kr_designations[] = {
1086n/a REGISTRY_KSX1001_G1, REGISTRY_SENTINEL
1087n/a};
1088n/aCONFIGDEF(kr, 0)
1089n/a
1090n/astatic const struct iso2022_designation iso2022_jp_designations[] = {
1091n/a REGISTRY_JISX0208, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
1092n/a REGISTRY_SENTINEL
1093n/a};
1094n/aCONFIGDEF(jp, NO_SHIFT | USE_JISX0208_EXT)
1095n/a
1096n/astatic const struct iso2022_designation iso2022_jp_1_designations[] = {
1097n/a REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
1098n/a REGISTRY_JISX0208_O, REGISTRY_SENTINEL
1099n/a};
1100n/aCONFIGDEF(jp_1, NO_SHIFT | USE_JISX0208_EXT)
1101n/a
1102n/astatic const struct iso2022_designation iso2022_jp_2_designations[] = {
1103n/a REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_KSX1001_G0,
1104n/a REGISTRY_GB2312, REGISTRY_JISX0201_R, REGISTRY_JISX0208_O,
1105n/a REGISTRY_ISO8859_1, REGISTRY_ISO8859_7, REGISTRY_SENTINEL
1106n/a};
1107n/aCONFIGDEF(jp_2, NO_SHIFT | USE_G2 | USE_JISX0208_EXT)
1108n/a
1109n/astatic const struct iso2022_designation iso2022_jp_2004_designations[] = {
1110n/a REGISTRY_JISX0213_2004_1_PAIRONLY, REGISTRY_JISX0208,
1111n/a REGISTRY_JISX0213_2004_1, REGISTRY_JISX0213_2004_2, REGISTRY_SENTINEL
1112n/a};
1113n/aCONFIGDEF(jp_2004, NO_SHIFT | USE_JISX0208_EXT)
1114n/a
1115n/astatic const struct iso2022_designation iso2022_jp_3_designations[] = {
1116n/a REGISTRY_JISX0213_2000_1_PAIRONLY, REGISTRY_JISX0208,
1117n/a REGISTRY_JISX0213_2000_1, REGISTRY_JISX0213_2000_2, REGISTRY_SENTINEL
1118n/a};
1119n/aCONFIGDEF(jp_3, NO_SHIFT | USE_JISX0208_EXT)
1120n/a
1121n/astatic const struct iso2022_designation iso2022_jp_ext_designations[] = {
1122n/a REGISTRY_JISX0208, REGISTRY_JISX0212, REGISTRY_JISX0201_R,
1123n/a REGISTRY_JISX0201_K, REGISTRY_JISX0208_O, REGISTRY_SENTINEL
1124n/a};
1125n/aCONFIGDEF(jp_ext, NO_SHIFT | USE_JISX0208_EXT)
1126n/a
1127n/a
1128n/aBEGIN_MAPPINGS_LIST
1129n/a /* no mapping table here */
1130n/aEND_MAPPINGS_LIST
1131n/a
1132n/a#define ISO2022_CODEC(variation) { \
1133n/a "iso2022_" #variation, \
1134n/a &iso2022_##variation##_config, \
1135n/a iso2022_codec_init, \
1136n/a _STATEFUL_METHODS(iso2022) \
1137n/a},
1138n/a
1139n/aBEGIN_CODECS_LIST
1140n/a ISO2022_CODEC(kr)
1141n/a ISO2022_CODEC(jp)
1142n/a ISO2022_CODEC(jp_1)
1143n/a ISO2022_CODEC(jp_2)
1144n/a ISO2022_CODEC(jp_2004)
1145n/a ISO2022_CODEC(jp_3)
1146n/a ISO2022_CODEC(jp_ext)
1147n/aEND_CODECS_LIST
1148n/a
1149n/aI_AM_A_MODULE_FOR(iso2022)