2 * Copyright (C) 1999-2001, 2004 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
28 static const unsigned char cp1258_comb_table
[] = {
29 0xcc, 0xec, 0xde, 0xd2, 0xf2,
32 /* The possible bases in viet_comp_table_data:
33 0x0041..0x0045, 0x0047..0x0049, 0x004B..0x0050, 0x0052..0x0057,
34 0x0059..0x005A, 0x0061..0x0065, 0x0067..0x0069, 0x006B..0x0070,
35 0x0072..0x0077, 0x0079..0x007A, 0x00A5, 0x00A8, 0x00C2, 0x00C5..0x00C7,
36 0x00CA, 0x00CF, 0x00D3..0x00D4, 0x00D6, 0x00D8, 0x00DA, 0x00DC, 0x00E2,
37 0x00E5..0x00E7, 0x00EA, 0x00EF, 0x00F3..0x00F4, 0x00F6, 0x00F8, 0x00FA,
38 0x00FC, 0x0102..0x0103, 0x01A0..0x01A1, 0x01AF..0x01B0. */
39 static const unsigned int cp1258_comp_bases
[] = {
40 0x06fdfbbe, 0x06fdfbbe, 0x00000000, 0x00000120, 0x155884e4, 0x155884e4,
41 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00018003
44 static const unsigned short cp1258_2uni
[128] = {
46 0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
47 0x02c6, 0x2030, 0xfffd, 0x2039, 0x0152, 0xfffd, 0xfffd, 0xfffd,
49 0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
50 0x02dc, 0x2122, 0xfffd, 0x203a, 0x0153, 0xfffd, 0xfffd, 0x0178,
52 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
53 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
55 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
56 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
58 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
59 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x0300, 0x00cd, 0x00ce, 0x00cf,
61 0x0110, 0x00d1, 0x0309, 0x00d3, 0x00d4, 0x01a0, 0x00d6, 0x00d7,
62 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x01af, 0x0303, 0x00df,
64 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
65 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x0301, 0x00ed, 0x00ee, 0x00ef,
67 0x0111, 0x00f1, 0x0323, 0x00f3, 0x00f4, 0x01a1, 0x00f6, 0x00f7,
68 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x01b0, 0x20ab, 0x00ff,
71 /* In the CP1258 to Unicode direction, the state contains a buffered
72 character, or 0 if none. */
75 cp1258_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, int n
)
79 unsigned short last_wc
;
83 wc
= cp1258_2uni
[c
-0x80];
87 last_wc
= conv
->istate
;
89 if (wc
>= 0x0300 && wc
< 0x0340) {
90 /* See whether last_wc and wc can be combined. */
94 case 0x0300: k
= 0; break;
95 case 0x0301: k
= 1; break;
96 case 0x0303: k
= 2; break;
97 case 0x0309: k
= 3; break;
98 case 0x0323: k
= 4; break;
101 i1
= viet_comp_table
[k
].idx
;
102 i2
= i1
+ viet_comp_table
[k
].len
-1;
103 if (last_wc
>= viet_comp_table_data
[i1
].base
104 && last_wc
<= viet_comp_table_data
[i2
].base
) {
108 if (last_wc
== viet_comp_table_data
[i
].base
)
110 if (last_wc
< viet_comp_table_data
[i
].base
) {
119 if (last_wc
== viet_comp_table_data
[i
].base
)
125 last_wc
= viet_comp_table_data
[i
].composed
;
126 /* Output the combined character. */
128 *pwc
= (ucs4_t
) last_wc
;
133 /* Output the buffered character. */
135 *pwc
= (ucs4_t
) last_wc
;
136 return 0; /* Don't advance the input pointer. */
138 if (wc
>= 0x0041 && wc
<= 0x01b0
139 && ((cp1258_comp_bases
[(wc
- 0x0040) >> 5] >> (wc
& 0x1f)) & 1)) {
140 /* wc is a possible match in viet_comp_table_data. Buffer it. */
142 return RET_TOOFEW(1);
144 /* Output wc immediately. */
150 #define cp1258_flushwc normal_flushwc
152 static const unsigned char cp1258_page00
[88] = {
153 0xc0, 0xc1, 0xc2, 0x00, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
154 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
155 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, /* 0xd0-0xd7 */
156 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */
157 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
158 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, /* 0xe8-0xef */
159 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, /* 0xf0-0xf7 */
160 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, /* 0xf8-0xff */
162 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
164 0xd0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
166 static const unsigned char cp1258_page01
[104] = {
167 0x00, 0x00, 0x8c, 0x9c, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */
168 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */
169 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */
170 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */
171 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */
172 0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */
173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */
174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
175 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
177 0xd5, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdd, /* 0xa8-0xaf */
179 0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
181 static const unsigned char cp1258_page02
[32] = {
182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
185 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
187 static const unsigned char cp1258_page03
[40] = {
188 0xcc, 0xec, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
189 0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */
192 0x00, 0x00, 0x00, 0xf2, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */
194 static const unsigned char cp1258_page20
[48] = {
195 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
196 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
197 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
199 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
200 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
204 cp1258_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, int n
)
211 else if (wc
>= 0x00a0 && wc
< 0x00c0)
213 else if (wc
>= 0x00c0 && wc
< 0x0118)
214 c
= cp1258_page00
[wc
-0x00c0];
215 else if (wc
>= 0x0150 && wc
< 0x01b8)
216 c
= cp1258_page01
[wc
-0x0150];
217 else if (wc
>= 0x02c0 && wc
< 0x02e0)
218 c
= cp1258_page02
[wc
-0x02c0];
219 else if (wc
>= 0x0300 && wc
< 0x0328)
220 c
= cp1258_page03
[wc
-0x0300];
221 else if (wc
>= 0x0340 && wc
< 0x0342) /* deprecated Vietnamese tone marks */
222 c
= cp1258_page03
[wc
-0x0340];
223 else if (wc
>= 0x2010 && wc
< 0x2040)
224 c
= cp1258_page20
[wc
-0x2010];
225 else if (wc
== 0x20ab)
227 else if (wc
== 0x20ac)
229 else if (wc
== 0x2122)
235 /* Try canonical decomposition. */
237 /* Binary search through viet_decomp_table. */
239 unsigned int i2
= sizeof(viet_decomp_table
)/sizeof(viet_decomp_table
[0])-1;
240 if (wc
>= viet_decomp_table
[i1
].composed
241 && wc
<= viet_decomp_table
[i2
].composed
) {
244 /* Here i2 - i1 > 0. */
246 if (wc
== viet_decomp_table
[i
].composed
)
248 if (wc
< viet_decomp_table
[i
].composed
) {
251 /* Here i1 < i < i2. */
254 /* Here i1 <= i < i2. */
258 /* Here i2 - i1 = 1. */
260 if (wc
== viet_decomp_table
[i
].composed
)
267 /* Found a canonical decomposition. */
268 wc
= viet_decomp_table
[i
].base
;
269 /* wc is one of 0x0020, 0x0041..0x005a, 0x0061..0x007a, 0x00a5, 0x00a8,
270 0x00c2, 0x00c5..0x00c7, 0x00ca, 0x00cf, 0x00d3, 0x00d4, 0x00d6,
271 0x00d8, 0x00da, 0x00dc, 0x00e2, 0x00e5..0x00e7, 0x00ea, 0x00ef,
272 0x00f3, 0x00f4, 0x00f6, 0x00f8, 0x00fc, 0x0102, 0x0103, 0x01a0,
273 0x01a1, 0x01af, 0x01b0. */
276 else if (wc
< 0x0118)
277 c
= cp1258_page00
[wc
-0x00c0];
279 c
= cp1258_page01
[wc
-0x0150];
283 r
[1] = cp1258_comb_table
[viet_decomp_table
[i
].comb1
];