Switch to autoconf 2.72.
[libiconv.git] / lib / shift_jisx0213.h
blobf67dbfc1dd0739263d56af130b1cf4d650162799
1 /*
2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
21 * SHIFT_JISX0213
24 /* The structure of Shift_JISX0213 is as follows:
26 0x00..0x7F: ISO646-JP, an ASCII variant
28 0x{A1..DF}: JISX0201 Katakana.
30 0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
32 0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
34 Note that some JISX0213 characters are not contained in Unicode 3.2
35 and are therefore best represented as sequences of Unicode characters.
38 #include "jisx0213.h"
39 #include "flushwc.h"
41 static int
42 shift_jisx0213_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)
44 ucs4_t last_wc = conv->istate;
45 if (last_wc) {
46 /* Output the buffered character. */
47 conv->istate = 0;
48 *pwc = last_wc;
49 return 0; /* Don't advance the input pointer. */
50 } else {
51 unsigned char c = *s;
52 if (c < 0x80) {
53 /* Plain ISO646-JP character. */
54 if (c == 0x5c)
55 *pwc = (ucs4_t) 0x00a5;
56 else if (c == 0x7e)
57 *pwc = (ucs4_t) 0x203e;
58 else
59 *pwc = (ucs4_t) c;
60 return 1;
61 } else if (c >= 0xa1 && c <= 0xdf) {
62 *pwc = c + 0xfec0;
63 return 1;
64 } else {
65 if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)) {
66 /* Two byte character. */
67 if (n >= 2) {
68 unsigned char c2 = s[1];
69 if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) {
70 unsigned int c1;
71 ucs4_t wc;
72 /* Convert to row and column. */
73 if (c < 0xe0)
74 c -= 0x81;
75 else
76 c -= 0xc1;
77 if (c2 < 0x80)
78 c2 -= 0x40;
79 else
80 c2 -= 0x41;
81 /* Now 0 <= c <= 0x3b, 0 <= c2 <= 0xbb. */
82 c1 = 2 * c;
83 if (c2 >= 0x5e)
84 c2 -= 0x5e, c1++;
85 c2 += 0x21;
86 if (c1 >= 0x5e) {
87 /* Handling of JISX 0213 plane 2 rows. */
88 if (c1 >= 0x67)
89 c1 += 230;
90 else if (c1 >= 0x63 || c1 == 0x5f)
91 c1 += 168;
92 else
93 c1 += 162;
95 wc = jisx0213_to_ucs4(0x121+c1,c2);
96 if (wc) {
97 if (wc < 0x80) {
98 /* It's a combining character. */
99 ucs4_t wc1 = jisx0213_to_ucs_combining[wc - 1][0];
100 ucs4_t wc2 = jisx0213_to_ucs_combining[wc - 1][1];
101 /* We cannot output two Unicode characters at once. So,
102 output the first character and buffer the second one. */
103 *pwc = wc1;
104 conv->istate = wc2;
105 } else
106 *pwc = wc;
107 return 2;
110 } else
111 return RET_TOOFEW(0);
113 return RET_ILSEQ;
118 #define shift_jisx0213_flushwc normal_flushwc
120 /* Composition tables for each of the relevant combining characters. */
121 static const struct { unsigned short base; unsigned short composed; } shift_jisx0213_comp_table_data[] = {
122 #define shift_jisx0213_comp_table02e5_idx 0
123 #define shift_jisx0213_comp_table02e5_len 1
124 { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
125 #define shift_jisx0213_comp_table02e9_idx (shift_jisx0213_comp_table02e5_idx+shift_jisx0213_comp_table02e5_len)
126 #define shift_jisx0213_comp_table02e9_len 1
127 { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
128 #define shift_jisx0213_comp_table0300_idx (shift_jisx0213_comp_table02e9_idx+shift_jisx0213_comp_table02e9_len)
129 #define shift_jisx0213_comp_table0300_len 5
130 { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
131 { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
132 { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
133 { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
134 { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
135 #define shift_jisx0213_comp_table0301_idx (shift_jisx0213_comp_table0300_idx+shift_jisx0213_comp_table0300_len)
136 #define shift_jisx0213_comp_table0301_len 4
137 { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
138 { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
139 { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
140 { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
141 #define shift_jisx0213_comp_table309a_idx (shift_jisx0213_comp_table0301_idx+shift_jisx0213_comp_table0301_len)
142 #define shift_jisx0213_comp_table309a_len 14
143 { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
144 { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
145 { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
146 { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
147 { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
148 { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
149 { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
150 { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
151 { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
152 { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
153 { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
154 { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
155 { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
156 { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
159 static int
160 shift_jisx0213_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)
162 int count = 0;
163 unsigned short lasttwo = conv->ostate;
165 if (lasttwo) {
166 /* Attempt to combine the last character with this one. */
167 unsigned int idx;
168 unsigned int len;
170 if (wc == 0x02e5)
171 idx = shift_jisx0213_comp_table02e5_idx,
172 len = shift_jisx0213_comp_table02e5_len;
173 else if (wc == 0x02e9)
174 idx = shift_jisx0213_comp_table02e9_idx,
175 len = shift_jisx0213_comp_table02e9_len;
176 else if (wc == 0x0300)
177 idx = shift_jisx0213_comp_table0300_idx,
178 len = shift_jisx0213_comp_table0300_len;
179 else if (wc == 0x0301)
180 idx = shift_jisx0213_comp_table0301_idx,
181 len = shift_jisx0213_comp_table0301_len;
182 else if (wc == 0x309a)
183 idx = shift_jisx0213_comp_table309a_idx,
184 len = shift_jisx0213_comp_table309a_len;
185 else
186 goto not_combining;
189 if (shift_jisx0213_comp_table_data[idx].base == lasttwo)
190 break;
191 while (++idx, --len > 0);
193 if (len > 0) {
194 /* Output the combined character. */
195 if (n >= 2) {
196 lasttwo = shift_jisx0213_comp_table_data[idx].composed;
197 r[0] = (lasttwo >> 8) & 0xff;
198 r[1] = lasttwo & 0xff;
199 conv->ostate = 0;
200 return 2;
201 } else
202 return RET_TOOSMALL;
205 not_combining:
206 /* Output the buffered character. */
207 if (n < 2)
208 return RET_TOOSMALL;
209 r[0] = (lasttwo >> 8) & 0xff;
210 r[1] = lasttwo & 0xff;
211 r += 2;
212 count = 2;
215 if (wc < 0x80 && wc != 0x5c && wc != 0x7e) {
216 /* Plain ISO646-JP character. */
217 if (n > count) {
218 r[0] = (unsigned char) wc;
219 conv->ostate = 0;
220 return count+1;
221 } else
222 return RET_TOOSMALL;
223 } else if (wc == 0x00a5) {
224 if (n > count) {
225 r[0] = 0x5c;
226 conv->ostate = 0;
227 return count+1;
228 } else
229 return RET_TOOSMALL;
230 } else if (wc == 0x203e) {
231 if (n > count) {
232 r[0] = 0x7e;
233 conv->ostate = 0;
234 return count+1;
235 } else
236 return RET_TOOSMALL;
237 } else if (wc >= 0xff61 && wc <= 0xff9f) {
238 /* Half-width katakana. */
239 if (n > count) {
240 r[0] = wc - 0xfec0;
241 conv->ostate = 0;
242 return count+1;
243 } else
244 return RET_TOOSMALL;
245 } else {
246 unsigned int s1, s2;
247 unsigned short jch = ucs4_to_jisx0213(wc);
248 if (jch != 0) {
249 /* Convert it to shifted representation. */
250 s1 = jch >> 8;
251 s2 = jch & 0x7f;
252 s1 -= 0x21;
253 s2 -= 0x21;
254 if (s1 >= 0x5e) {
255 /* Handling of JISX 0213 plane 2 rows. */
256 if (s1 >= 0xcd) /* rows 0x26E..0x27E */
257 s1 -= 102;
258 else if (s1 >= 0x8b || s1 == 0x87) /* rows 0x228, 0x22C..0x22F */
259 s1 -= 40;
260 else /* rows 0x221, 0x223..0x225 */
261 s1 -= 34;
262 /* Now 0x5e <= s1 <= 0x77. */
264 if (s1 & 1)
265 s2 += 0x5e;
266 s1 = s1 >> 1;
267 if (s1 < 0x1f)
268 s1 += 0x81;
269 else
270 s1 += 0xc1;
271 if (s2 < 0x3f)
272 s2 += 0x40;
273 else
274 s2 += 0x41;
275 if (jch & 0x0080) {
276 /* A possible match in comp_table_data. We have to buffer it. */
277 /* We know it's a JISX 0213 plane 1 character. */
278 if (jch & 0x8000) abort();
279 conv->ostate = (s1 << 8) | s2;
280 return count+0;
282 /* Output the shifted representation. */
283 if (n >= count+2) {
284 r[0] = s1;
285 r[1] = s2;
286 conv->ostate = 0;
287 return count+2;
288 } else
289 return RET_TOOSMALL;
291 return RET_ILUNI;
295 static int
296 shift_jisx0213_reset (conv_t conv, unsigned char *r, size_t n)
298 state_t lasttwo = conv->ostate;
300 if (lasttwo) {
301 if (n < 2)
302 return RET_TOOSMALL;
303 r[0] = (lasttwo >> 8) & 0xff;
304 r[1] = lasttwo & 0xff;
305 /* conv->ostate = 0; will be done by the caller */
306 return 2;
307 } else
308 return 0;