2 * Copyright (C) 1999-2001, 2004 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
27 /* Combining characters used in Hebrew encoding CP1255. */
29 /* Relevant combining characters:
30 0x05b4, 0x05b7, 0x05b8, 0x05b9, 0x05bc, 0x05bf, 0x05c1, 0x05c2. */
32 /* Composition tables for each of the relevant combining characters. */
33 static const struct { unsigned short base
; unsigned short composed
; } cp1255_comp_table_data
[] = {
34 #define cp1255_comp_table05b4_idx 0
35 #define cp1255_comp_table05b4_len 1
37 #define cp1255_comp_table05b7_idx (cp1255_comp_table05b4_idx+cp1255_comp_table05b4_len)
38 #define cp1255_comp_table05b7_len 2
41 #define cp1255_comp_table05b8_idx (cp1255_comp_table05b7_idx+cp1255_comp_table05b7_len)
42 #define cp1255_comp_table05b8_len 1
44 #define cp1255_comp_table05b9_idx (cp1255_comp_table05b8_idx+cp1255_comp_table05b8_len)
45 #define cp1255_comp_table05b9_len 1
47 #define cp1255_comp_table05bc_idx (cp1255_comp_table05b9_idx+cp1255_comp_table05b9_len)
48 #define cp1255_comp_table05bc_len 24
73 #define cp1255_comp_table05bf_idx (cp1255_comp_table05bc_idx+cp1255_comp_table05bc_len)
74 #define cp1255_comp_table05bf_len 3
78 #define cp1255_comp_table05c1_idx (cp1255_comp_table05bf_idx+cp1255_comp_table05bf_len)
79 #define cp1255_comp_table05c1_len 2
82 #define cp1255_comp_table05c2_idx (cp1255_comp_table05c1_idx+cp1255_comp_table05c1_len)
83 #define cp1255_comp_table05c2_len 2
87 static const struct { unsigned int len
; unsigned int idx
; } cp1255_comp_table
[] = {
88 { cp1255_comp_table05b4_len
, cp1255_comp_table05b4_idx
},
89 { cp1255_comp_table05b7_len
, cp1255_comp_table05b7_idx
},
90 { cp1255_comp_table05b8_len
, cp1255_comp_table05b8_idx
},
91 { cp1255_comp_table05b9_len
, cp1255_comp_table05b9_idx
},
92 { cp1255_comp_table05bc_len
, cp1255_comp_table05bc_idx
},
93 { cp1255_comp_table05bf_len
, cp1255_comp_table05bf_idx
},
94 { cp1255_comp_table05c1_len
, cp1255_comp_table05c1_idx
},
95 { cp1255_comp_table05c2_len
, cp1255_comp_table05c2_idx
},
98 /* Decomposition table for the relevant Unicode characters. */
99 struct cp1255_decomp
{ unsigned short composed
; unsigned short base
; int comb1
: 8; signed int comb2
: 8; };
100 static const struct cp1255_decomp cp1255_decomp_table
[] = {
101 { 0xFB1D, 0x05D9, 0, -1 },
102 { 0xFB1F, 0x05F2, 1, -1 },
103 { 0xFB2A, 0x05E9, 6, -1 },
104 { 0xFB2B, 0x05E9, 7, -1 },
105 { 0xFB2C, 0x05E9, 4, 6 },
106 { 0xFB2D, 0x05E9, 4, 7 },
107 { 0xFB2E, 0x05D0, 1, -1 },
108 { 0xFB2F, 0x05D0, 2, -1 },
109 { 0xFB30, 0x05D0, 4, -1 },
110 { 0xFB31, 0x05D1, 4, -1 },
111 { 0xFB32, 0x05D2, 4, -1 },
112 { 0xFB33, 0x05D3, 4, -1 },
113 { 0xFB34, 0x05D4, 4, -1 },
114 { 0xFB35, 0x05D5, 4, -1 },
115 { 0xFB36, 0x05D6, 4, -1 },
116 { 0xFB38, 0x05D8, 4, -1 },
117 { 0xFB39, 0x05D9, 4, -1 },
118 { 0xFB3A, 0x05DA, 4, -1 },
119 { 0xFB3B, 0x05DB, 4, -1 },
120 { 0xFB3C, 0x05DC, 4, -1 },
121 { 0xFB3E, 0x05DE, 4, -1 },
122 { 0xFB40, 0x05E0, 4, -1 },
123 { 0xFB41, 0x05E1, 4, -1 },
124 { 0xFB43, 0x05E3, 4, -1 },
125 { 0xFB44, 0x05E4, 4, -1 },
126 { 0xFB46, 0x05E6, 4, -1 },
127 { 0xFB47, 0x05E7, 4, -1 },
128 { 0xFB48, 0x05E8, 4, -1 },
129 { 0xFB49, 0x05E9, 4, -1 },
130 { 0xFB4A, 0x05EA, 4, -1 },
131 { 0xFB4B, 0x05D5, 3, -1 },
132 { 0xFB4C, 0x05D1, 5, -1 },
133 { 0xFB4D, 0x05DB, 5, -1 },
134 { 0xFB4E, 0x05E4, 5, -1 },
137 static const unsigned char cp1255_comb_table
[] = {
138 0xc4, 0xc7, 0xc8, 0xc9, 0xcc, 0xcf, 0xd1, 0xd2,
141 static const unsigned short cp1255_2uni
[128] = {
143 0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
144 0x02c6, 0x2030, 0xfffd, 0x2039, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
146 0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
147 0x02dc, 0x2122, 0xfffd, 0x203a, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
149 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20aa, 0x00a5, 0x00a6, 0x00a7,
150 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
152 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
153 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
155 0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7,
156 0x05b8, 0x05b9, 0xfffd, 0x05bb, 0x05bc, 0x05bd, 0x05be, 0x05bf,
158 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f0, 0x05f1, 0x05f2, 0x05f3,
159 0x05f4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
161 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
162 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
164 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
165 0x05e8, 0x05e9, 0x05ea, 0xfffd, 0xfffd, 0x200e, 0x200f, 0xfffd,
168 /* In the CP1255 to Unicode direction, the state contains a buffered
169 character, or 0 if none. */
172 cp1255_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, int n
)
174 unsigned char c
= *s
;
176 unsigned short last_wc
;
180 wc
= cp1255_2uni
[c
-0x80];
184 last_wc
= conv
->istate
;
186 if (wc
>= 0x05b0 && wc
< 0x05c5) {
187 /* See whether last_wc and wc can be combined. */
191 case 0x05b4: k
= 0; break;
192 case 0x05b7: k
= 1; break;
193 case 0x05b8: k
= 2; break;
194 case 0x05b9: k
= 3; break;
195 case 0x05bc: k
= 4; break;
196 case 0x05bf: k
= 5; break;
197 case 0x05c1: k
= 6; break;
198 case 0x05c2: k
= 7; break;
199 default: goto not_combining
;
201 i1
= cp1255_comp_table
[k
].idx
;
202 i2
= i1
+ cp1255_comp_table
[k
].len
-1;
203 if (last_wc
>= cp1255_comp_table_data
[i1
].base
204 && last_wc
<= cp1255_comp_table_data
[i2
].base
) {
208 if (last_wc
== cp1255_comp_table_data
[i
].base
)
210 if (last_wc
< cp1255_comp_table_data
[i
].base
) {
219 if (last_wc
== cp1255_comp_table_data
[i
].base
)
225 last_wc
= cp1255_comp_table_data
[i
].composed
;
226 if (last_wc
== 0xfb2a || last_wc
== 0xfb2b || last_wc
== 0xfb49) {
227 /* Buffer the combined character. */
228 conv
->istate
= last_wc
;
229 return RET_TOOFEW(1);
231 /* Output the combined character. */
233 *pwc
= (ucs4_t
) last_wc
;
239 /* Output the buffered character. */
241 *pwc
= (ucs4_t
) last_wc
;
242 return 0; /* Don't advance the input pointer. */
244 if ((wc
>= 0x05d0 && wc
<= 0x05ea && ((0x07db5f7f >> (wc
- 0x05d0)) & 1))
246 /* wc is a possible match in cp1255_comp_table_data. Buffer it. */
248 return RET_TOOFEW(1);
250 /* Output wc immediately. */
256 #define cp1255_flushwc normal_flushwc
258 static const unsigned char cp1255_page00
[88] = {
259 0xa0, 0xa1, 0xa2, 0xa3, 0x00, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
260 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
261 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
262 0xb8, 0xb9, 0x00, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
263 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
264 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
265 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, /* 0xd0-0xd7 */
266 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
267 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */
268 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
269 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xba, /* 0xf0-0xf7 */
271 static const unsigned char cp1255_page02
[32] = {
272 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
273 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
274 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
275 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
277 static const unsigned char cp1255_page05
[72] = {
278 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xb0-0xb7 */
279 0xc8, 0xc9, 0x00, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xb8-0xbf */
280 0xd0, 0xd1, 0xd2, 0xd3, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
281 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
282 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xd0-0xd7 */
283 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xd8-0xdf */
284 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xe0-0xe7 */
285 0xf8, 0xf9, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
286 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */
288 static const unsigned char cp1255_page20
[56] = {
289 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfd, 0xfe, /* 0x08-0x0f */
290 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
291 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
292 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
293 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
294 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
295 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
299 cp1255_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, int n
)
306 else if (wc
>= 0x00a0 && wc
< 0x00f8)
307 c
= cp1255_page00
[wc
-0x00a0];
308 else if (wc
== 0x0192)
310 else if (wc
>= 0x02c0 && wc
< 0x02e0)
311 c
= cp1255_page02
[wc
-0x02c0];
312 else if (wc
>= 0x05b0 && wc
< 0x05f8)
313 c
= cp1255_page05
[wc
-0x05b0];
314 else if (wc
>= 0x2008 && wc
< 0x2040)
315 c
= cp1255_page20
[wc
-0x2008];
316 else if (wc
== 0x20aa)
318 else if (wc
== 0x20ac)
320 else if (wc
== 0x2122)
326 /* Try canonical decomposition. */
328 /* Binary search through cp1255_decomp_table. */
330 unsigned int i2
= sizeof(cp1255_decomp_table
)/sizeof(cp1255_decomp_table
[0])-1;
331 if (wc
>= cp1255_decomp_table
[i1
].composed
332 && wc
<= cp1255_decomp_table
[i2
].composed
) {
335 /* Here i2 - i1 > 0. */
337 if (wc
== cp1255_decomp_table
[i
].composed
)
339 if (wc
< cp1255_decomp_table
[i
].composed
) {
342 /* Here i1 < i < i2. */
345 /* Here i1 <= i < i2. */
349 /* Here i2 - i1 = 1. */
351 if (wc
== cp1255_decomp_table
[i
].composed
)
358 /* Found a canonical decomposition. */
359 wc
= cp1255_decomp_table
[i
].base
;
360 /* wc is one of 0x05d0..0x05d6, 0x05d8..0x05dc, 0x05de, 0x05e0..0x05e1,
361 0x05e3..0x05e4, 0x05e6..0x05ea, 0x05f2. */
362 c
= cp1255_page05
[wc
-0x05b0];
363 if (cp1255_decomp_table
[i
].comb2
< 0) {
367 r
[1] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb1
];
373 r
[1] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb1
];
374 r
[2] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb2
];