2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 59 Temple Place -
18 * Suite 330, Boston, MA 02111-1307, USA.
25 /* Combining characters used in Hebrew encoding CP1255. */
29 /* Relevant combining characters. */
30 static const unsigned short cp1255_comb_table_uni
[] = {
31 0x05b4, 0x05b7, 0x05b8, 0x05b9, 0x05bc, 0x05bf, 0x05c1, 0x05c2,
34 /* Composition tables for each of the relevant combining characters. */
35 static const unsigned short cp1255_comp_table_data
[][2] = {
36 #define cp1255_comp_table05b4_idx 0
37 #define cp1255_comp_table05b4_len 1
39 #define cp1255_comp_table05b7_idx (cp1255_comp_table05b4_idx+cp1255_comp_table05b4_len)
40 #define cp1255_comp_table05b7_len 2
43 #define cp1255_comp_table05b8_idx (cp1255_comp_table05b7_idx+cp1255_comp_table05b7_len)
44 #define cp1255_comp_table05b8_len 1
46 #define cp1255_comp_table05b9_idx (cp1255_comp_table05b8_idx+cp1255_comp_table05b8_len)
47 #define cp1255_comp_table05b9_len 1
49 #define cp1255_comp_table05bc_idx (cp1255_comp_table05b9_idx+cp1255_comp_table05b9_len)
50 #define cp1255_comp_table05bc_len 24
75 #define cp1255_comp_table05bf_idx (cp1255_comp_table05bc_idx+cp1255_comp_table05bc_len)
76 #define cp1255_comp_table05bf_len 3
80 #define cp1255_comp_table05c1_idx (cp1255_comp_table05bf_idx+cp1255_comp_table05bf_len)
81 #define cp1255_comp_table05c1_len 2
84 #define cp1255_comp_table05c2_idx (cp1255_comp_table05c1_idx+cp1255_comp_table05c1_len)
85 #define cp1255_comp_table05c2_len 2
89 static const struct { unsigned int len
; unsigned int offset
; } cp1255_comp_table
[] = {
90 { cp1255_comp_table05b4_len
, cp1255_comp_table05b4_idx
},
91 { cp1255_comp_table05b7_len
, cp1255_comp_table05b7_idx
},
92 { cp1255_comp_table05b8_len
, cp1255_comp_table05b8_idx
},
93 { cp1255_comp_table05b9_len
, cp1255_comp_table05b9_idx
},
94 { cp1255_comp_table05bc_len
, cp1255_comp_table05bc_idx
},
95 { cp1255_comp_table05bf_len
, cp1255_comp_table05bf_idx
},
96 { cp1255_comp_table05c1_len
, cp1255_comp_table05c1_idx
},
97 { cp1255_comp_table05c2_len
, cp1255_comp_table05c2_idx
},
102 /* Decomposition table for the relevant Unicode characters. */
103 struct cp1255_decomp
{ unsigned short composed
; unsigned short base
; int comb1
: 8; int comb2
: 8; };
104 static const struct cp1255_decomp cp1255_decomp_table
[] = {
105 { 0xFB1D, 0x05D9, 0, -1 },
106 { 0xFB1F, 0x05F2, 1, -1 },
107 { 0xFB2A, 0x05E9, 6, -1 },
108 { 0xFB2B, 0x05E9, 7, -1 },
109 { 0xFB2C, 0x05E9, 4, 6 },
110 { 0xFB2D, 0x05E9, 4, 7 },
111 { 0xFB2E, 0x05D0, 1, -1 },
112 { 0xFB2F, 0x05D0, 2, -1 },
113 { 0xFB30, 0x05D0, 4, -1 },
114 { 0xFB31, 0x05D1, 4, -1 },
115 { 0xFB32, 0x05D2, 4, -1 },
116 { 0xFB33, 0x05D3, 4, -1 },
117 { 0xFB34, 0x05D4, 4, -1 },
118 { 0xFB35, 0x05D5, 4, -1 },
119 { 0xFB36, 0x05D6, 4, -1 },
120 { 0xFB38, 0x05D8, 4, -1 },
121 { 0xFB39, 0x05D9, 4, -1 },
122 { 0xFB3A, 0x05DA, 4, -1 },
123 { 0xFB3B, 0x05DB, 4, -1 },
124 { 0xFB3C, 0x05DC, 4, -1 },
125 { 0xFB3E, 0x05DE, 4, -1 },
126 { 0xFB40, 0x05E0, 4, -1 },
127 { 0xFB41, 0x05E1, 4, -1 },
128 { 0xFB43, 0x05E3, 4, -1 },
129 { 0xFB44, 0x05E4, 4, -1 },
130 { 0xFB46, 0x05E6, 4, -1 },
131 { 0xFB47, 0x05E7, 4, -1 },
132 { 0xFB48, 0x05E8, 4, -1 },
133 { 0xFB49, 0x05E9, 4, -1 },
134 { 0xFB4A, 0x05EA, 4, -1 },
135 { 0xFB4B, 0x05D5, 3, -1 },
136 { 0xFB4C, 0x05D1, 5, -1 },
137 { 0xFB4D, 0x05DB, 5, -1 },
138 { 0xFB4E, 0x05E4, 5, -1 },
141 static const unsigned char cp1255_comb_table
[] = {
142 0xc4, 0xc7, 0xc8, 0xc9, 0xcc, 0xcf, 0xd1, 0xd2,
145 static const unsigned short cp1255_2uni
[128] = {
147 0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
148 0x02c6, 0x2030, 0xfffd, 0x2039, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
150 0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
151 0x02dc, 0x2122, 0xfffd, 0x203a, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
153 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20aa, 0x00a5, 0x00a6, 0x00a7,
154 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
156 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
157 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
159 0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7,
160 0x05b8, 0x05b9, 0xfffd, 0x05bb, 0x05bc, 0x05bd, 0x05be, 0x05bf,
162 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f0, 0x05f1, 0x05f2, 0x05f3,
163 0x05f4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
165 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
166 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
168 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
169 0x05e8, 0x05e9, 0x05ea, 0xfffd, 0xfffd, 0x200e, 0x200f, 0xfffd,
172 /* CP1255 as a stateless encoding. Suitable for locales, but it has
173 the drawback that it can produce Unicode strings which are not
174 in Normalization Form C and therefore not suitable for interchange.
175 FIXME: It should produce Normalization Form C instead. */
178 cp1255_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, int n
)
180 unsigned char c
= *s
;
186 unsigned short wc
= cp1255_2uni
[c
-0x80];
195 static const unsigned char cp1255_page00
[88] = {
196 0xa0, 0xa1, 0xa2, 0xa3, 0x00, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
197 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
198 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
199 0xb8, 0xb9, 0x00, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
200 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
201 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
202 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, /* 0xd0-0xd7 */
203 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
204 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */
205 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
206 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xba, /* 0xf0-0xf7 */
208 static const unsigned char cp1255_page02
[32] = {
209 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
210 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
211 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
212 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
214 static const unsigned char cp1255_page05
[72] = {
215 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xb0-0xb7 */
216 0xc8, 0xc9, 0x00, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xb8-0xbf */
217 0xd0, 0xd1, 0xd2, 0xd3, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
218 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
219 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xd0-0xd7 */
220 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xd8-0xdf */
221 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xe0-0xe7 */
222 0xf8, 0xf9, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
223 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */
225 static const unsigned char cp1255_page20
[56] = {
226 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfd, 0xfe, /* 0x08-0x0f */
227 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
228 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
229 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
230 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
231 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
232 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
236 cp1255_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, int n
)
243 else if (wc
>= 0x00a0 && wc
< 0x00f8)
244 c
= cp1255_page00
[wc
-0x00a0];
245 else if (wc
== 0x0192)
247 else if (wc
>= 0x02c0 && wc
< 0x02e0)
248 c
= cp1255_page02
[wc
-0x02c0];
249 else if (wc
>= 0x05b0 && wc
< 0x05f8)
250 c
= cp1255_page05
[wc
-0x05b0];
251 else if (wc
>= 0x2008 && wc
< 0x2040)
252 c
= cp1255_page20
[wc
-0x2008];
253 else if (wc
== 0x20aa)
255 else if (wc
== 0x20ac)
257 else if (wc
== 0x2122)
263 /* Try canonical decomposition. */
265 /* Binary search through cp1255_decomp_table. */
267 unsigned int i2
= sizeof(cp1255_decomp_table
)/sizeof(cp1255_decomp_table
[0])-1;
268 if (wc
>= cp1255_decomp_table
[i1
].composed
269 && wc
<= cp1255_decomp_table
[i2
].composed
) {
272 /* Here i2 - i1 > 0. */
274 if (wc
== cp1255_decomp_table
[i
].composed
)
276 if (wc
< cp1255_decomp_table
[i
].composed
) {
279 /* Here i1 < i < i2. */
282 /* Here i1 <= i < i2. */
286 /* Here i2 - i1 = 1. */
288 if (wc
== cp1255_decomp_table
[i
].composed
)
295 /* Found a canonical decomposition. */
296 wc
= cp1255_decomp_table
[i
].base
;
297 /* wc is one of 0x05d0..0x05d6, 0x05d8..0x05dc, 0x05de, 0x05e0..0x05e1,
298 0x05e3..0x05e4, 0x05e6..0x05ea, 0x05f2. */
299 c
= cp1255_page05
[wc
-0x05b0];
300 if (cp1255_decomp_table
[i
].comb2
< 0) {
304 r
[1] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb1
];
310 r
[1] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb1
];
311 r
[2] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb2
];