2 * Copyright (C) 1999-2001, 2004, 2016 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, see <https://www.gnu.org/licenses/>.
26 /* Combining characters used in Hebrew encoding CP1255. */
28 /* Relevant combining characters:
29 0x05b4, 0x05b7, 0x05b8, 0x05b9, 0x05bc, 0x05bf, 0x05c1, 0x05c2. */
31 /* Composition tables for each of the relevant combining characters. */
32 static const struct { unsigned short base
; unsigned short composed
; } cp1255_comp_table_data
[] = {
33 #define cp1255_comp_table05b4_idx 0
34 #define cp1255_comp_table05b4_len 1
36 #define cp1255_comp_table05b7_idx (cp1255_comp_table05b4_idx+cp1255_comp_table05b4_len)
37 #define cp1255_comp_table05b7_len 2
40 #define cp1255_comp_table05b8_idx (cp1255_comp_table05b7_idx+cp1255_comp_table05b7_len)
41 #define cp1255_comp_table05b8_len 1
43 #define cp1255_comp_table05b9_idx (cp1255_comp_table05b8_idx+cp1255_comp_table05b8_len)
44 #define cp1255_comp_table05b9_len 1
46 #define cp1255_comp_table05bc_idx (cp1255_comp_table05b9_idx+cp1255_comp_table05b9_len)
47 #define cp1255_comp_table05bc_len 24
72 #define cp1255_comp_table05bf_idx (cp1255_comp_table05bc_idx+cp1255_comp_table05bc_len)
73 #define cp1255_comp_table05bf_len 3
77 #define cp1255_comp_table05c1_idx (cp1255_comp_table05bf_idx+cp1255_comp_table05bf_len)
78 #define cp1255_comp_table05c1_len 2
81 #define cp1255_comp_table05c2_idx (cp1255_comp_table05c1_idx+cp1255_comp_table05c1_len)
82 #define cp1255_comp_table05c2_len 2
86 static const struct { unsigned int len
; unsigned int idx
; } cp1255_comp_table
[] = {
87 { cp1255_comp_table05b4_len
, cp1255_comp_table05b4_idx
},
88 { cp1255_comp_table05b7_len
, cp1255_comp_table05b7_idx
},
89 { cp1255_comp_table05b8_len
, cp1255_comp_table05b8_idx
},
90 { cp1255_comp_table05b9_len
, cp1255_comp_table05b9_idx
},
91 { cp1255_comp_table05bc_len
, cp1255_comp_table05bc_idx
},
92 { cp1255_comp_table05bf_len
, cp1255_comp_table05bf_idx
},
93 { cp1255_comp_table05c1_len
, cp1255_comp_table05c1_idx
},
94 { cp1255_comp_table05c2_len
, cp1255_comp_table05c2_idx
},
97 /* Decomposition table for the relevant Unicode characters. */
98 struct cp1255_decomp
{ unsigned short composed
; unsigned short base
; int comb1
: 8; signed int comb2
: 8; };
99 static const struct cp1255_decomp cp1255_decomp_table
[] = {
100 { 0xFB1D, 0x05D9, 0, -1 },
101 { 0xFB1F, 0x05F2, 1, -1 },
102 { 0xFB2A, 0x05E9, 6, -1 },
103 { 0xFB2B, 0x05E9, 7, -1 },
104 { 0xFB2C, 0x05E9, 4, 6 },
105 { 0xFB2D, 0x05E9, 4, 7 },
106 { 0xFB2E, 0x05D0, 1, -1 },
107 { 0xFB2F, 0x05D0, 2, -1 },
108 { 0xFB30, 0x05D0, 4, -1 },
109 { 0xFB31, 0x05D1, 4, -1 },
110 { 0xFB32, 0x05D2, 4, -1 },
111 { 0xFB33, 0x05D3, 4, -1 },
112 { 0xFB34, 0x05D4, 4, -1 },
113 { 0xFB35, 0x05D5, 4, -1 },
114 { 0xFB36, 0x05D6, 4, -1 },
115 { 0xFB38, 0x05D8, 4, -1 },
116 { 0xFB39, 0x05D9, 4, -1 },
117 { 0xFB3A, 0x05DA, 4, -1 },
118 { 0xFB3B, 0x05DB, 4, -1 },
119 { 0xFB3C, 0x05DC, 4, -1 },
120 { 0xFB3E, 0x05DE, 4, -1 },
121 { 0xFB40, 0x05E0, 4, -1 },
122 { 0xFB41, 0x05E1, 4, -1 },
123 { 0xFB43, 0x05E3, 4, -1 },
124 { 0xFB44, 0x05E4, 4, -1 },
125 { 0xFB46, 0x05E6, 4, -1 },
126 { 0xFB47, 0x05E7, 4, -1 },
127 { 0xFB48, 0x05E8, 4, -1 },
128 { 0xFB49, 0x05E9, 4, -1 },
129 { 0xFB4A, 0x05EA, 4, -1 },
130 { 0xFB4B, 0x05D5, 3, -1 },
131 { 0xFB4C, 0x05D1, 5, -1 },
132 { 0xFB4D, 0x05DB, 5, -1 },
133 { 0xFB4E, 0x05E4, 5, -1 },
136 static const unsigned char cp1255_comb_table
[] = {
137 0xc4, 0xc7, 0xc8, 0xc9, 0xcc, 0xcf, 0xd1, 0xd2,
140 static const unsigned short cp1255_2uni
[128] = {
142 0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
143 0x02c6, 0x2030, 0xfffd, 0x2039, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
145 0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
146 0x02dc, 0x2122, 0xfffd, 0x203a, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
148 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x20aa, 0x00a5, 0x00a6, 0x00a7,
149 0x00a8, 0x00a9, 0x00d7, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
151 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
152 0x00b8, 0x00b9, 0x00f7, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
154 0x05b0, 0x05b1, 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7,
155 0x05b8, 0x05b9, 0x05ba, 0x05bb, 0x05bc, 0x05bd, 0x05be, 0x05bf,
157 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f0, 0x05f1, 0x05f2, 0x05f3,
158 0x05f4, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd,
160 0x05d0, 0x05d1, 0x05d2, 0x05d3, 0x05d4, 0x05d5, 0x05d6, 0x05d7,
161 0x05d8, 0x05d9, 0x05da, 0x05db, 0x05dc, 0x05dd, 0x05de, 0x05df,
163 0x05e0, 0x05e1, 0x05e2, 0x05e3, 0x05e4, 0x05e5, 0x05e6, 0x05e7,
164 0x05e8, 0x05e9, 0x05ea, 0xfffd, 0xfffd, 0x200e, 0x200f, 0xfffd,
167 /* In the CP1255 to Unicode direction, the state contains a buffered
168 character, or 0 if none. */
171 cp1255_mbtowc (conv_t conv
, ucs4_t
*pwc
, const unsigned char *s
, size_t n
)
173 unsigned char c
= *s
;
175 unsigned short last_wc
;
179 wc
= cp1255_2uni
[c
-0x80];
183 last_wc
= conv
->istate
;
185 if (wc
>= 0x05b0 && wc
< 0x05c5) {
186 /* See whether last_wc and wc can be combined. */
190 case 0x05b4: k
= 0; break;
191 case 0x05b7: k
= 1; break;
192 case 0x05b8: k
= 2; break;
193 case 0x05b9: k
= 3; break;
194 case 0x05bc: k
= 4; break;
195 case 0x05bf: k
= 5; break;
196 case 0x05c1: k
= 6; break;
197 case 0x05c2: k
= 7; break;
198 default: goto not_combining
;
200 i1
= cp1255_comp_table
[k
].idx
;
201 i2
= i1
+ cp1255_comp_table
[k
].len
-1;
202 if (last_wc
>= cp1255_comp_table_data
[i1
].base
203 && last_wc
<= cp1255_comp_table_data
[i2
].base
) {
207 if (last_wc
== cp1255_comp_table_data
[i
].base
)
209 if (last_wc
< cp1255_comp_table_data
[i
].base
) {
218 if (last_wc
== cp1255_comp_table_data
[i
].base
)
224 last_wc
= cp1255_comp_table_data
[i
].composed
;
225 if (last_wc
== 0xfb2a || last_wc
== 0xfb2b || last_wc
== 0xfb49) {
226 /* Buffer the combined character. */
227 conv
->istate
= last_wc
;
228 return RET_TOOFEW(1);
230 /* Output the combined character. */
232 *pwc
= (ucs4_t
) last_wc
;
238 /* Output the buffered character. */
240 *pwc
= (ucs4_t
) last_wc
;
241 return 0; /* Don't advance the input pointer. */
243 if ((wc
>= 0x05d0 && wc
<= 0x05ea && ((0x07db5f7f >> (wc
- 0x05d0)) & 1))
245 /* wc is a possible match in cp1255_comp_table_data. Buffer it. */
247 return RET_TOOFEW(1);
249 /* Output wc immediately. */
255 #define cp1255_flushwc normal_flushwc
257 static const unsigned char cp1255_page00
[88] = {
258 0xa0, 0xa1, 0xa2, 0xa3, 0x00, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
259 0xa8, 0xa9, 0x00, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
260 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
261 0xb8, 0xb9, 0x00, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
262 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
263 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
264 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xaa, /* 0xd0-0xd7 */
265 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
266 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */
267 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
268 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xba, /* 0xf0-0xf7 */
270 static const unsigned char cp1255_page02
[32] = {
271 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
272 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
273 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
274 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
276 static const unsigned char cp1255_page05
[72] = {
277 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xb0-0xb7 */
278 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xb8-0xbf */
279 0xd0, 0xd1, 0xd2, 0xd3, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */
280 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
281 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xd0-0xd7 */
282 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xd8-0xdf */
283 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xe0-0xe7 */
284 0xf8, 0xf9, 0xfa, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xe8-0xef */
285 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0x00, 0x00, 0x00, /* 0xf0-0xf7 */
287 static const unsigned char cp1255_page20
[56] = {
288 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xfd, 0xfe, /* 0x08-0x0f */
289 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
290 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
291 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
292 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
293 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
294 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
298 cp1255_wctomb (conv_t conv
, unsigned char *r
, ucs4_t wc
, size_t n
)
305 else if (wc
>= 0x00a0 && wc
< 0x00f8)
306 c
= cp1255_page00
[wc
-0x00a0];
307 else if (wc
== 0x0192)
309 else if (wc
>= 0x02c0 && wc
< 0x02e0)
310 c
= cp1255_page02
[wc
-0x02c0];
311 else if (wc
>= 0x05b0 && wc
< 0x05f8)
312 c
= cp1255_page05
[wc
-0x05b0];
313 else if (wc
>= 0x2008 && wc
< 0x2040)
314 c
= cp1255_page20
[wc
-0x2008];
315 else if (wc
== 0x20aa)
317 else if (wc
== 0x20ac)
319 else if (wc
== 0x2122)
325 /* Try canonical decomposition. */
327 /* Binary search through cp1255_decomp_table. */
329 unsigned int i2
= sizeof(cp1255_decomp_table
)/sizeof(cp1255_decomp_table
[0])-1;
330 if (wc
>= cp1255_decomp_table
[i1
].composed
331 && wc
<= cp1255_decomp_table
[i2
].composed
) {
334 /* Here i2 - i1 > 0. */
336 if (wc
== cp1255_decomp_table
[i
].composed
)
338 if (wc
< cp1255_decomp_table
[i
].composed
) {
341 /* Here i1 < i < i2. */
344 /* Here i1 <= i < i2. */
348 /* Here i2 - i1 = 1. */
350 if (wc
== cp1255_decomp_table
[i
].composed
)
357 /* Found a canonical decomposition. */
358 wc
= cp1255_decomp_table
[i
].base
;
359 /* wc is one of 0x05d0..0x05d6, 0x05d8..0x05dc, 0x05de, 0x05e0..0x05e1,
360 0x05e3..0x05e4, 0x05e6..0x05ea, 0x05f2. */
361 c
= cp1255_page05
[wc
-0x05b0];
362 if (cp1255_decomp_table
[i
].comb2
< 0) {
366 r
[1] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb1
];
372 r
[1] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb1
];
373 r
[2] = cp1255_comb_table
[cp1255_decomp_table
[i
].comb2
];