2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
27 /* Combining characters used in Hebrew encoding CP1255. */
29 /* Relevant combining characters:
30 0x05b4, 0x05b7, 0x05b8, 0x05b9, 0x05bc, 0x05bf, 0x05c1, 0x05c2. */
32 /* Composition tables for each of the relevant combining characters. */
33 static const struct { unsigned short base
; unsigned short composed
; } cp1255_comp_table_data
[] = {
34 #define cp1255_comp_table05b4_idx 0
35 #define cp1255_comp_table05b4_len 1
37 #define cp1255_comp_table05b7_idx (cp1255_comp_table05b4_idx+cp1255_comp_table05b4_len)
38 #define cp1255_comp_table05b7_len 2
41 #define cp1255_comp_table05b8_idx (cp1255_comp_table05b7_idx+cp1255_comp_table05b7_len)
42 #define cp1255_comp_table05b8_len 1
44 #define cp1255_comp_table05b9_idx (cp1255_comp_table05b8_idx+cp1255_comp_table05b8_len)
45 #define cp1255_comp_table05b9_len 1
47 #define cp1255_comp_table05bc_idx (cp1255_comp_table05b9_idx+cp1255_comp_table05b9_len)
48 #define cp1255_comp_table05bc_len 24
73 #define cp1255_comp_table05bf_idx (cp1255_comp_table05bc_idx+cp1255_comp_table05bc_len)
74 #define cp1255_comp_table05bf_len 3
78 #define cp1255_comp_table05c1_idx (cp1255_comp_table05bf_idx+cp1255_comp_table05bf_len)
79 #define cp1255_comp_table05c1_len 2
82 #define cp1255_comp_table05c2_idx (cp1255_comp_table05c1_idx+cp1255_comp_table05c1_len)
83 #define cp1255_comp_table05c2_len 2
87 static const struct { unsigned int len
; unsigned int idx
; } cp1255_comp_table
[] = {
88 { cp1255_comp_table05b4_len
, cp1255_comp_table05b4_idx
},
89 { cp1255_comp_table05b7_len
, cp1255_comp_table05b7_idx
},
90 { cp1255_comp_table05b8_len
, cp1255_comp_table05b8_idx
},
91 { cp1255_comp_table05b9_len
, cp1255_comp_table05b9_idx
},
92 { cp1255_comp_table05bc_len
, cp1255_comp_table05bc_idx
},
93 { cp1255_comp_table05bf_len
, cp1255_comp_table05bf_idx
},
94 { cp1255_comp_table05c1_len
, cp1255_comp_table05c1_idx
},
95 { cp1255_comp_table05c2_len
, cp1255_comp_table05c2_idx
},
98 /* Decomposition table for the relevant Unicode characters. */
99 struct cp1255_decomp
{ unsigned short composed
; unsigned short base
; int comb1
: 8; signed int comb2
: 8; };
100 static const struct cp1255_decomp cp1255_decomp_table
[] = {
101 { 0xFB1D, 0x05D9, 0, -1 },
102 { 0xFB1F, 0x05F2, 1, -1 },
103 { 0xFB2A, 0x05E9, 6, -1 },
104 { 0xFB2B, 0x05E9, 7, -1 },
105 { 0xFB2C, 0x05E9, 4, 6 },
106 { 0xFB2D, 0x05E9, 4, 7 },
107 { 0xFB2E, 0x05D0, 1, -1 },
108 { 0xFB2F, 0x05D0, 2, -1 },
109 { 0xFB30, 0x05D0, 4, -1 },
110 { 0xFB31, 0x05D1, 4, -1 },
111 { 0xFB32, 0x05D2, 4, -1 },
112 { 0xFB33, 0x05D3, 4, -1 },
113 { 0xFB34, 0x05D4, 4, -1 },
114 { 0xFB35, 0x05D5, 4, -1 },
115 { 0xFB36, 0x05D6, 4, -1 },
116 { 0xFB38, 0x05D8, 4, -1 },
117 { 0xFB39, 0x05D9, 4, -1 },
118 { 0xFB3A, 0x05DA, 4, -1 },
119 { 0xFB3B, 0x05DB, 4, -1 },
120 { 0xFB3C, 0x05DC, 4, -1 },
121 { 0xFB3E, 0x05DE, 4, -1 },
122 { 0xFB40, 0x05E0, 4, -1 },
123 { 0xFB41, 0x05E1, 4, -1 },
124 { 0xFB43, 0x05E3, 4, -1 },
125 { 0xFB44, 0x05E4, 4, -1 },
126 { 0xFB46, 0x05E6, 4, -1 },
127 { 0xFB47, 0x05E7, 4, -1 },
128 { 0xFB48, 0x05E8, 4, -1 },
129 { 0xFB49, 0x05E9, 4, -1 },
130 { 0xFB4A, 0x05EA, 4, -1 },
131 { 0xFB4B, 0x05D5, 3, -1 },
132 { 0xFB4C, 0x05D1, 5, -1 },
133 { 0xFB4D, 0x05DB, 5, -1 },
134 { 0xFB4E, 0x05E4, 5, -1 },
137 static const unsigned char cp1255_comb_table
[] = {
138 0xc4, 0xc7, 0xc8, 0xc9, 0xcc, 0xcf, 0xd1, 0xd2,
141 static const unsigned short cp1255_2uni
[128] = {
143 0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
144 0x02c6, 0x2030, 0xfffd, 0x2039, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
146 0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
147 0x02dc, 0x2122, 0xfffd, 0x203a, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
149 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20aa, 0x00a5, 0x00a6, 0x00a7,
150 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
152 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
153 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
155 0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7,
156 0x05b8, 0x05b9, 0xfffd, 0x05bb, 0x05bc, 0x05bd, 0x05be, 0x05bf,
158 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f0, 0x05f1, 0x05f2, 0x05f3,
159 0x05f4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
161 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
162 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
164 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
165 0x05e8, 0x05e9, 0x05ea, 0xfffd, 0xfffd, 0x200e, 0x200f, 0xfffd,
168 /* In the CP1255 to Unicode direction, the state contains a buffered
169 character, or 0 if none. */
172 cp1255_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, int n
)
174 unsigned char c
= *s
;
176 unsigned short last_wc
;
180 wc
= cp1255_2uni
[c
-0x80];
184 last_wc
= conv
->istate
;
186 if (wc
>= 0x05b0 && wc
< 0x05c5) {
187 /* See whether last_wc and wc can be combined. */
191 case 0x05b4: k
= 0; break;
192 case 0x05b7: k
= 1; break;
193 case 0x05b8: k
= 2; break;
194 case 0x05b9: k
= 3; break;
195 case 0x05bc: k
= 4; break;
196 case 0x05bf: k
= 5; break;
197 case 0x05c1: k
= 6; break;
198 case 0x05c2: k
= 7; break;
199 default: goto not_combining
;
201 i1
= cp1255_comp_table
[k
].idx
;
202 i2
= i1
+ cp1255_comp_table
[k
].len
-1;
203 if (last_wc
>= cp1255_comp_table_data
[i1
].base
204 && last_wc
<= cp1255_comp_table_data
[i2
].base
) {
208 if (last_wc
== cp1255_comp_table_data
[i
].base
)
210 if (last_wc
< cp1255_comp_table_data
[i
].base
) {
219 if (last_wc
== cp1255_comp_table_data
[i
].base
)
225 last_wc
= cp1255_comp_table_data
[i
].composed
;
226 if (last_wc
== 0xfb2a || last_wc
== 0xfb2b || last_wc
== 0xfb49) {
227 /* Buffer the combined character. */
228 conv
->istate
= last_wc
;
229 return RET_TOOFEW(1);
231 /* Output the combined character. */
233 *pwc
= (ucs4_t
) last_wc
;
239 /* Output the buffered character. */
241 *pwc
= (ucs4_t
) last_wc
;
242 return 0; /* Don't advance the input pointer. */
244 if (wc
>= 0x05d0 && wc
<= 0x05f2) {
245 /* wc is a possible match in cp1255_comp_table_data. Buffer it. */
247 return RET_TOOFEW(1);
249 /* Output wc immediately. */
255 #define cp1255_flushwc normal_flushwc
257 static const unsigned char cp1255_page00
[88] = {
258 0xa0, 0xa1, 0xa2, 0xa3, 0x00, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
259 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
260 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
261 0xb8, 0xb9, 0x00, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
262 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
263 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
264 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, /* 0xd0-0xd7 */
265 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
266 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */
267 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
268 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xba, /* 0xf0-0xf7 */
270 static const unsigned char cp1255_page02
[32] = {
271 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
272 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
273 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
274 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
276 static const unsigned char cp1255_page05
[72] = {
277 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xb0-0xb7 */
278 0xc8, 0xc9, 0x00, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xb8-0xbf */
279 0xd0, 0xd1, 0xd2, 0xd3, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
280 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
281 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xd0-0xd7 */
282 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xd8-0xdf */
283 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xe0-0xe7 */
284 0xf8, 0xf9, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
285 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */
287 static const unsigned char cp1255_page20
[56] = {
288 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfd, 0xfe, /* 0x08-0x0f */
289 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
290 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
291 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
292 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
293 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
294 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
298 cp1255_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, int n
)
305 else if (wc
>= 0x00a0 && wc
< 0x00f8)
306 c
= cp1255_page00
[wc
-0x00a0];
307 else if (wc
== 0x0192)
309 else if (wc
>= 0x02c0 && wc
< 0x02e0)
310 c
= cp1255_page02
[wc
-0x02c0];
311 else if (wc
>= 0x05b0 && wc
< 0x05f8)
312 c
= cp1255_page05
[wc
-0x05b0];
313 else if (wc
>= 0x2008 && wc
< 0x2040)
314 c
= cp1255_page20
[wc
-0x2008];
315 else if (wc
== 0x20aa)
317 else if (wc
== 0x20ac)
319 else if (wc
== 0x2122)
325 /* Try canonical decomposition. */
327 /* Binary search through cp1255_decomp_table. */
329 unsigned int i2
= sizeof(cp1255_decomp_table
)/sizeof(cp1255_decomp_table
[0])-1;
330 if (wc
>= cp1255_decomp_table
[i1
].composed
331 && wc
<= cp1255_decomp_table
[i2
].composed
) {
334 /* Here i2 - i1 > 0. */
336 if (wc
== cp1255_decomp_table
[i
].composed
)
338 if (wc
< cp1255_decomp_table
[i
].composed
) {
341 /* Here i1 < i < i2. */
344 /* Here i1 <= i < i2. */
348 /* Here i2 - i1 = 1. */
350 if (wc
== cp1255_decomp_table
[i
].composed
)
357 /* Found a canonical decomposition. */
358 wc
= cp1255_decomp_table
[i
].base
;
359 /* wc is one of 0x05d0..0x05d6, 0x05d8..0x05dc, 0x05de, 0x05e0..0x05e1,
360 0x05e3..0x05e4, 0x05e6..0x05ea, 0x05f2. */
361 c
= cp1255_page05
[wc
-0x05b0];
362 if (cp1255_decomp_table
[i
].comb2
< 0) {
366 r
[1] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb1
];
372 r
[1] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb1
];
373 r
[2] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb2
];