2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
24 /* The structure of Shift_JISX0213 is as follows:
26 0x00..0x7F: ISO646-JP, an ASCII variant
28 0x{A1..DF}: JISX0201 Katakana.
30 0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
32 0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
34 Note that some JISX0213 characters are not contained in Unicode 3.2
35 and are therefore best represented as sequences of Unicode characters.
42 shift_jisx0213_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, size_t n
)
44 ucs4_t last_wc
= conv
->istate
;
46 /* Output the buffered character. */
49 return 0; /* Don't advance the input pointer. */
53 /* Plain ISO646-JP character. */
55 *pwc
= (ucs4_t
) 0x00a5;
57 *pwc
= (ucs4_t
) 0x203e;
61 } else if (c
>= 0xa1 && c
<= 0xdf) {
65 if ((c
>= 0x81 && c
<= 0x9f) || (c
>= 0xe0 && c
<= 0xfc)) {
66 /* Two byte character. */
68 unsigned char c2
= s
[1];
69 if ((c2
>= 0x40 && c2
<= 0x7e) || (c2
>= 0x80 && c2
<= 0xfc)) {
72 /* Convert to row and column. */
81 /* Now 0 <= c <= 0x3b, 0 <= c2 <= 0xbb. */
87 /* Handling of JISX 0213 plane 2 rows. */
90 else if (c1
>= 0x63 || c1
== 0x5f)
95 wc
= jisx0213_to_ucs4(0x121+c1
,c2
);
98 /* It's a combining character. */
99 ucs4_t wc1
= jisx0213_to_ucs_combining
[wc
- 1][0];
100 ucs4_t wc2
= jisx0213_to_ucs_combining
[wc
- 1][1];
101 /* We cannot output two Unicode characters at once. So,
102 output the first character and buffer the second one. */
111 return RET_TOOFEW(0);
118 #define shift_jisx0213_flushwc normal_flushwc
120 /* Composition tables for each of the relevant combining characters. */
121 static const struct { unsigned short base
; unsigned short composed
; } shift_jisx0213_comp_table_data
[] = {
122 #define shift_jisx0213_comp_table02e5_idx 0
123 #define shift_jisx0213_comp_table02e5_len 1
124 { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
125 #define shift_jisx0213_comp_table02e9_idx (shift_jisx0213_comp_table02e5_idx+shift_jisx0213_comp_table02e5_len)
126 #define shift_jisx0213_comp_table02e9_len 1
127 { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
128 #define shift_jisx0213_comp_table0300_idx (shift_jisx0213_comp_table02e9_idx+shift_jisx0213_comp_table02e9_len)
129 #define shift_jisx0213_comp_table0300_len 5
130 { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
131 { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
132 { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
133 { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
134 { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
135 #define shift_jisx0213_comp_table0301_idx (shift_jisx0213_comp_table0300_idx+shift_jisx0213_comp_table0300_len)
136 #define shift_jisx0213_comp_table0301_len 4
137 { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
138 { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
139 { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
140 { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
141 #define shift_jisx0213_comp_table309a_idx (shift_jisx0213_comp_table0301_idx+shift_jisx0213_comp_table0301_len)
142 #define shift_jisx0213_comp_table309a_len 14
143 { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
144 { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
145 { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
146 { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
147 { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
148 { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
149 { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
150 { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
151 { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
152 { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
153 { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
154 { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
155 { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
156 { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
160 shift_jisx0213_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, size_t n
)
163 unsigned short lasttwo
= conv
->ostate
;
166 /* Attempt to combine the last character with this one. */
171 idx
= shift_jisx0213_comp_table02e5_idx
,
172 len
= shift_jisx0213_comp_table02e5_len
;
173 else if (wc
== 0x02e9)
174 idx
= shift_jisx0213_comp_table02e9_idx
,
175 len
= shift_jisx0213_comp_table02e9_len
;
176 else if (wc
== 0x0300)
177 idx
= shift_jisx0213_comp_table0300_idx
,
178 len
= shift_jisx0213_comp_table0300_len
;
179 else if (wc
== 0x0301)
180 idx
= shift_jisx0213_comp_table0301_idx
,
181 len
= shift_jisx0213_comp_table0301_len
;
182 else if (wc
== 0x309a)
183 idx
= shift_jisx0213_comp_table309a_idx
,
184 len
= shift_jisx0213_comp_table309a_len
;
189 if (shift_jisx0213_comp_table_data
[idx
].base
== lasttwo
)
191 while (++idx
, --len
> 0);
194 /* Output the combined character. */
196 lasttwo
= shift_jisx0213_comp_table_data
[idx
].composed
;
197 r
[0] = (lasttwo
>> 8) & 0xff;
198 r
[1] = lasttwo
& 0xff;
206 /* Output the buffered character. */
209 r
[0] = (lasttwo
>> 8) & 0xff;
210 r
[1] = lasttwo
& 0xff;
215 if (wc
< 0x80 && wc
!= 0x5c && wc
!= 0x7e) {
216 /* Plain ISO646-JP character. */
218 r
[0] = (unsigned char) wc
;
223 } else if (wc
== 0x00a5) {
230 } else if (wc
== 0x203e) {
237 } else if (wc
>= 0xff61 && wc
<= 0xff9f) {
238 /* Half-width katakana. */
247 unsigned short jch
= ucs4_to_jisx0213(wc
);
249 /* Convert it to shifted representation. */
255 /* Handling of JISX 0213 plane 2 rows. */
256 if (s1
>= 0xcd) /* rows 0x26E..0x27E */
258 else if (s1
>= 0x8b || s1
== 0x87) /* rows 0x228, 0x22C..0x22F */
260 else /* rows 0x221, 0x223..0x225 */
262 /* Now 0x5e <= s1 <= 0x77. */
276 /* A possible match in comp_table_data. We have to buffer it. */
277 /* We know it's a JISX 0213 plane 1 character. */
278 if (jch
& 0x8000) abort();
279 conv
->ostate
= (s1
<< 8) | s2
;
282 /* Output the shifted representation. */
296 shift_jisx0213_reset (conv_t conv
, unsigned char *r
, size_t n
)
298 state_t lasttwo
= conv
->ostate
;
303 r
[0] = (lasttwo
>> 8) & 0xff;
304 r
[1] = lasttwo
& 0xff;
305 /* conv->ostate = 0; will be done by the caller */