Whitespace normalization.
[python/dscho.git] / Modules / cjkcodecs / _iso2022_jp_ext.c
blob8225911f31214f4e9c9a20803bb41c3e02f855d7
1 /*
2 * _iso2022_jp_ext.c: the ISO-2022-JP-EXT codec (RFC2237 + alpha)
4 * Written by Hye-Shik Chang <perky@FreeBSD.org>
5 * $CJKCodecs: _iso2022_jp_ext.c,v 1.3 2003/12/31 05:46:55 perky Exp $
6 */
8 #define ISO2022_DESIGNATIONS \
9 CHARSET_ASCII, CHARSET_JISX0201_R, CHARSET_JISX0201_K, \
10 CHARSET_JISX0208, CHARSET_JISX0208_O, CHARSET_JISX0212
11 #define ISO2022_NO_SHIFT
12 #define ISO2022_USE_JISX0208EXT
14 #include "codeccommon.h"
15 #include "iso2022common.h"
16 #include "alg_jisx0201.h"
18 ENCMAP(jisxcommon)
19 DECMAP(jisx0208)
20 DECMAP(jisx0212)
22 #define HAVE_ENCODER_INIT
23 ENCODER_INIT(iso2022_jp_ext)
25 STATE_CLEARFLAGS(state)
26 STATE_SETG0(state, CHARSET_ASCII)
27 STATE_SETG1(state, CHARSET_ASCII)
28 return 0;
31 #define HAVE_ENCODER_RESET
32 ENCODER_RESET(iso2022_jp_ext)
34 if (STATE_GETG0(state) != CHARSET_ASCII) {
35 RESERVE_OUTBUF(3)
36 WRITE3(ESC, '(', 'B')
37 STATE_SETG0(state, CHARSET_ASCII)
38 NEXT_OUT(3)
40 return 0;
43 ENCODER(iso2022_jp_ext)
45 while (inleft > 0) {
46 Py_UNICODE c = **inbuf;
47 DBCHAR code;
49 if (c < 0x80) {
50 switch (STATE_GETG0(state)) {
51 case CHARSET_ASCII:
52 WRITE1((unsigned char)c)
53 NEXT(1, 1)
54 break;
55 case CHARSET_JISX0201_R:
56 JISX0201_R_ENCODE(c, code)
57 else { /* FALLTHROUGH (yay!) */
58 default:
59 WRITE3(ESC, '(', 'B')
60 NEXT_OUT(3)
61 STATE_SETG0(state, CHARSET_ASCII)
62 code = c;
64 WRITE1((unsigned char)code)
65 NEXT(1, 1)
66 break;
68 if (c == '\n')
69 STATE_CLEARFLAG(state, F_SHIFTED)
71 else UCS4INVALID(c)
72 else {
73 unsigned char charset;
75 charset = STATE_GETG0(state);
76 if (charset == CHARSET_JISX0201_R) {
77 code = DBCINV;
78 JISX0201_R_ENCODE(c, code)
79 if (code != DBCINV) {
80 WRITE1((unsigned char)code)
81 NEXT(1, 1)
82 continue;
86 TRYMAP_ENC(jisxcommon, code, c) {
87 if (code & 0x8000) { /* MSB set: JIS X 0212 */
88 if (charset != CHARSET_JISX0212) {
89 WRITE4(ESC, '$', '(', 'D')
90 STATE_SETG0(state, CHARSET_JISX0212)
91 NEXT_OUT(4)
93 WRITE2((code >> 8) & 0x7f, code & 0x7f)
94 } else { /* MSB unset: JIS X 0208 */
95 jisx0208encode: if (charset != CHARSET_JISX0208) {
96 WRITE3(ESC, '$', 'B')
97 STATE_SETG0(state, CHARSET_JISX0208)
98 NEXT_OUT(3)
100 WRITE2(code >> 8, code & 0xff)
102 NEXT(1, 2)
103 } else if (c == 0xff3c) { /* FULL-WIDTH REVERSE SOLIDUS */
104 code = 0x2140;
105 goto jisx0208encode;
106 } else {
107 JISX0201_ENCODE(c, code)
108 else
109 return 1;
111 if (code < 0x80) { /* JIS X 0201 Roman */
112 /* if (charset == CHARSET_JISX0201_R) : already checked */
113 WRITE4(ESC, '(', 'J', (unsigned char)code)
114 STATE_SETG0(state, CHARSET_JISX0201_R)
115 NEXT(1, 4)
116 } else { /* JIS X 0201 Katakana */
117 if (charset != CHARSET_JISX0201_K) {
118 WRITE3(ESC, '(', 'I')
119 STATE_SETG0(state, CHARSET_JISX0201_K)
120 NEXT_OUT(3)
122 WRITE1(code - 0x80)
123 NEXT(1, 1)
129 return 0;
132 #define HAVE_DECODER_INIT
133 DECODER_INIT(iso2022_jp_ext)
135 STATE_CLEARFLAGS(state)
136 STATE_SETG0(state, CHARSET_ASCII)
137 STATE_SETG1(state, CHARSET_ASCII)
138 return 0;
141 #define HAVE_DECODER_RESET
142 DECODER_RESET(iso2022_jp_ext)
144 STATE_CLEARFLAG(state, F_SHIFTED)
145 return 0;
148 DECODER(iso2022_jp_ext)
150 ISO2022_LOOP_BEGIN
151 unsigned char charset, c2;
153 ISO2022_GETCHARSET(charset, c)
155 if (charset & CHARSET_DOUBLEBYTE) {
156 RESERVE_INBUF(2)
157 RESERVE_OUTBUF(1)
158 c2 = IN2;
159 if (charset == CHARSET_JISX0208 || charset == CHARSET_JISX0208_O) {
160 if (c == 0x21 && c2 == 0x40) /* FULL-WIDTH REVERSE SOLIDUS */
161 **outbuf = 0xff3c;
162 else TRYMAP_DEC(jisx0208, **outbuf, c, c2);
163 else return 2;
164 } else if (charset == CHARSET_JISX0212) {
165 TRYMAP_DEC(jisx0212, **outbuf, c, c2);
166 else return 2;
167 } else
168 return MBERR_INTERNAL;
169 NEXT(2, 1)
170 } else if (charset == CHARSET_ASCII) {
171 RESERVE_OUTBUF(1)
172 OUT1(c)
173 NEXT(1, 1)
174 } else if (charset == CHARSET_JISX0201_R) {
175 RESERVE_OUTBUF(1)
176 JISX0201_R_DECODE(c, **outbuf)
177 else
178 return 1;
179 NEXT(1, 1)
180 } else if (charset == CHARSET_JISX0201_K) {
181 RESERVE_OUTBUF(1)
182 JISX0201_K_DECODE(c ^ 0x80, **outbuf)
183 else
184 return 1;
185 NEXT(1, 1)
186 } else
187 return MBERR_INTERNAL;
188 ISO2022_LOOP_END
190 return 0;
193 #include "codecentry.h"
194 BEGIN_CODEC_REGISTRY(iso2022_jp_ext)
195 MAPOPEN(ja_JP)
196 IMPORTMAP_DEC(jisx0208)
197 IMPORTMAP_DEC(jisx0212)
198 IMPORTMAP_ENC(jisxcommon)
199 MAPCLOSE()
200 END_CODEC_REGISTRY(iso2022_jp_ext)