2 * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
25 /* The structure of Shift_JISX0213 is as follows:
27 0x00..0x7F: ISO646-JP, an ASCII variant
29 0x{A1..DF}: JISX0201 Katakana.
31 0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
33 0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
35 Note that some JISX0213 characters are not contained in Unicode 3.2
36 and are therefore best represented as sequences of Unicode characters.
43 shift_jisx0213_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, int n
)
45 ucs4_t last_wc
= conv
->istate
;
47 /* Output the buffered character. */
50 return 0; /* Don't advance the input pointer. */
54 /* Plain ISO646-JP character. */
56 *pwc
= (ucs4_t
) 0x00a5;
58 *pwc
= (ucs4_t
) 0x203e;
62 } else if (c
>= 0xa1 && c
<= 0xdf) {
66 if ((c
>= 0x81 && c
<= 0x9f) || (c
>= 0xe0 && c
<= 0xfc)) {
67 /* Two byte character. */
69 unsigned char c2
= s
[1];
70 if ((c2
>= 0x40 && c2
<= 0x7e) || (c2
>= 0x80 && c2
<= 0xfc)) {
73 /* Convert to row and column. */
82 /* Now 0 <= c <= 0x3b, 0 <= c2 <= 0xbb. */
88 /* Handling of JISX 0213 plane 2 rows. */
91 else if (c1
>= 0x63 || c1
== 0x5f)
96 wc
= jisx0213_to_ucs4(0x121+c1
,c2
);
99 /* It's a combining character. */
100 ucs4_t wc1
= jisx0213_to_ucs_combining
[wc
- 1][0];
101 ucs4_t wc2
= jisx0213_to_ucs_combining
[wc
- 1][1];
102 /* We cannot output two Unicode characters at once. So,
103 output the first character and buffer the second one. */
112 return RET_TOOFEW(0);
119 #define shift_jisx0213_flushwc normal_flushwc
121 /* Composition tables for each of the relevant combining characters. */
122 static const struct { unsigned short base
; unsigned short composed
; } shift_jisx0213_comp_table_data
[] = {
123 #define shift_jisx0213_comp_table02e5_idx 0
124 #define shift_jisx0213_comp_table02e5_len 1
125 { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
126 #define shift_jisx0213_comp_table02e9_idx (shift_jisx0213_comp_table02e5_idx+shift_jisx0213_comp_table02e5_len)
127 #define shift_jisx0213_comp_table02e9_len 1
128 { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
129 #define shift_jisx0213_comp_table0300_idx (shift_jisx0213_comp_table02e9_idx+shift_jisx0213_comp_table02e9_len)
130 #define shift_jisx0213_comp_table0300_len 5
131 { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
132 { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
133 { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
134 { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
135 { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
136 #define shift_jisx0213_comp_table0301_idx (shift_jisx0213_comp_table0300_idx+shift_jisx0213_comp_table0300_len)
137 #define shift_jisx0213_comp_table0301_len 4
138 { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
139 { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
140 { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
141 { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
142 #define shift_jisx0213_comp_table309a_idx (shift_jisx0213_comp_table0301_idx+shift_jisx0213_comp_table0301_len)
143 #define shift_jisx0213_comp_table309a_len 14
144 { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
145 { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
146 { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
147 { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
148 { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
149 { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
150 { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
151 { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
152 { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
153 { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
154 { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
155 { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
156 { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
157 { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
161 shift_jisx0213_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, int n
)
164 unsigned short lasttwo
= conv
->ostate
;
167 /* Attempt to combine the last character with this one. */
172 idx
= shift_jisx0213_comp_table02e5_idx
,
173 len
= shift_jisx0213_comp_table02e5_len
;
174 else if (wc
== 0x02e9)
175 idx
= shift_jisx0213_comp_table02e9_idx
,
176 len
= shift_jisx0213_comp_table02e9_len
;
177 else if (wc
== 0x0300)
178 idx
= shift_jisx0213_comp_table0300_idx
,
179 len
= shift_jisx0213_comp_table0300_len
;
180 else if (wc
== 0x0301)
181 idx
= shift_jisx0213_comp_table0301_idx
,
182 len
= shift_jisx0213_comp_table0301_len
;
183 else if (wc
== 0x309a)
184 idx
= shift_jisx0213_comp_table309a_idx
,
185 len
= shift_jisx0213_comp_table309a_len
;
190 if (shift_jisx0213_comp_table_data
[idx
].base
== lasttwo
)
192 while (++idx
, --len
> 0);
195 /* Output the combined character. */
197 lasttwo
= shift_jisx0213_comp_table_data
[idx
].composed
;
198 r
[0] = (lasttwo
>> 8) & 0xff;
199 r
[1] = lasttwo
& 0xff;
207 /* Output the buffered character. */
210 r
[0] = (lasttwo
>> 8) & 0xff;
211 r
[1] = lasttwo
& 0xff;
216 if (wc
< 0x80 && wc
!= 0x5c && wc
!= 0x7e) {
217 /* Plain ISO646-JP character. */
219 r
[0] = (unsigned char) wc
;
224 } else if (wc
== 0x00a5) {
231 } else if (wc
== 0x203e) {
238 } else if (wc
>= 0xff61 && wc
<= 0xff9f) {
239 /* Half-width katakana. */
248 unsigned short jch
= ucs4_to_jisx0213(wc
);
250 /* Convert it to shifted representation. */
256 /* Handling of JISX 0213 plane 2 rows. */
257 if (s1
>= 0xcd) /* rows 0x26E..0x27E */
259 else if (s1
>= 0x8b || s1
== 0x87) /* rows 0x228, 0x22C..0x22F */
261 else /* rows 0x221, 0x223..0x225 */
263 /* Now 0x5e <= s1 <= 0x77. */
277 /* A possible match in comp_table_data. We have to buffer it. */
278 /* We know it's a JISX 0213 plane 1 character. */
279 if (jch
& 0x8000) abort();
280 conv
->ostate
= (s1
<< 8) | s2
;
283 /* Output the shifted representation. */
297 shift_jisx0213_reset (conv_t conv
, unsigned char *r
, int n
)
299 state_t lasttwo
= conv
->ostate
;
304 r
[0] = (lasttwo
>> 8) & 0xff;
305 r
[1] = lasttwo
& 0xff;
306 /* conv->ostate = 0; will be done by the caller */