1 /* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018, 2020 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Library.
4 The GNU LIBICONV Library is free software; you can redistribute it
5 and/or modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either version 2.1
7 of the License, or (at your option) any later version.
9 The GNU LIBICONV Library is distributed in the hope that it will be
10 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16 If not, see <https://www.gnu.org/licenses/>. */
19 * Generates a table of small strings, used for transliteration, from a table
20 * containing lines of the form
21 * Unicode <tab> utf-8 replacement <tab> # comment
28 int main (int argc
, char *argv
[])
37 data
= malloc(0x100000 * sizeof(*data
));
38 uni2index
= malloc(0x110000 * sizeof(*uni2index
));
39 if (data
== NULL
|| uni2index
== NULL
) {
40 fprintf(stderr
, "out of memory\n");
45 printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
46 printf(" * This file is part of the GNU LIBICONV Library.\n");
48 printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
49 printf(" * and/or modify it under the terms of the GNU Lesser General Public\n");
50 printf(" * License as published by the Free Software Foundation; either version 2\n");
51 printf(" * of the License, or (at your option) any later version.\n");
53 printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
54 printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
55 printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
56 printf(" * Lesser General Public License for more details.\n");
58 printf(" * You should have received a copy of the GNU Lesser General Public\n");
59 printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
60 printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
64 printf(" * Transliteration table\n");
70 for (j
= 0; j
< 0x110000; j
++)
78 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
82 if (scanf("%x",&j
) != 1)
89 if (c
== EOF
|| c
== '\n')
93 if (uni2index
[j
] < 0) {
98 /* Finish reading an UTF-8 character. */
102 unsigned int i
= (c
< 0xe0 ? 2 : c
< 0xf0 ? 3 : c
< 0xf8 ? 4 : c
< 0xfc ? 5 : 6);
103 c
&= (1 << (8-i
)) - 1;
105 int cc
= getc(stdin
);
106 if (!(cc
>= 0x80 && cc
< 0xc0))
108 c
<<= 6; c
|= (cc
& 0x3f);
112 data
[index
++] = (unsigned int) c
;
114 if (uni2index
[j
] >= 0)
115 data
[uni2index
[j
]] = index
- uni2index
[j
] - 1;
116 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
119 printf("static const unsigned int translit_data[%d] = {",index
);
122 for (i
= 0; i
< index
; i
++) {
124 printf("\n %3d,",data
[i
]);
125 else if (data
[i
] == '\'')
127 else if (data
[i
] == '\\')
129 else if (data
[i
] < 127)
130 printf(" '%c',",data
[i
]);
131 else if (data
[i
] < 256)
132 printf("0x%02X,",data
[i
]);
134 printf("0x%04X,",data
[i
]);
142 struct { int minline
; int maxline
; int usecount
; const char* suffix
; } tables
[0x2000];
143 int i
, j
, p
, j1
, j2
, t
;
145 for (j1
= 0; j1
< 0x22000; j1
++) {
146 bool all_invalid
= true;
147 for (j2
= 0; j2
< 8; j2
++) {
149 if (uni2index
[j
] >= 0)
158 for (j1
= 0; j1
< 0x22000; j1
++) {
161 && ((j1
> 0 && line
[j1
-1] == tableno
-1)
162 || ((tables
[tableno
-1].maxline
>> 5) == (j1
>> 5)
163 && j1
- tables
[tableno
-1].maxline
<= 8))) {
164 line
[j1
] = tableno
-1;
165 tables
[tableno
-1].maxline
= j1
;
168 line
[j1
] = tableno
-1;
169 tables
[tableno
-1].minline
= tables
[tableno
-1].maxline
= j1
;
173 for (t
= 0; t
< tableno
; t
++) {
174 tables
[t
].usecount
= 0;
175 j1
= 8*tables
[t
].minline
;
176 j2
= 8*(tables
[t
].maxline
+1);
177 for (j
= j1
; j
< j2
; j
++)
178 if (uni2index
[j
] >= 0)
179 tables
[t
].usecount
++;
181 for (t
= 0, p
= -1, i
= 0; t
< tableno
; t
++) {
182 if (tables
[t
].usecount
> 1) {
184 if (p
== tables
[t
].minline
>> 5) {
186 /* i is the number of tables with the same (tables[t].minline >> 5)
187 that we have seen so far. Since the tables[t].minline values are
188 strongly monotonically increasing, there are at most 32 of them. */
189 if (!(i
>= 0 && i
<= 32)) abort();
190 s
= (char*) malloc(4+1+2+1);
191 sprintf(s
, "%02x_%d", p
, i
);
193 p
= tables
[t
].minline
>> 5;
195 s
= (char*) malloc(4+1);
196 sprintf(s
, "%02x", p
);
198 tables
[t
].suffix
= s
;
200 tables
[t
].suffix
= NULL
;
204 for (t
= 0; t
< tableno
; t
++)
205 if (tables
[t
].usecount
> 1) {
207 printf("static const short translit_page%s[%d] = {\n", tables
[t
].suffix
, 8*(tables
[t
].maxline
-tables
[t
].minline
+1));
208 for (j1
= tables
[t
].minline
; j1
<= tables
[t
].maxline
; j1
++) {
209 if ((j1
% 0x20) == 0 && j1
> tables
[t
].minline
)
210 printf(" /* 0x%04x */\n", 8*j1
);
212 for (j2
= 0; j2
< 8; j2
++) {
214 printf(" %4d,", uni2index
[j
]);
216 printf(" /* 0x%02x-0x%02x */\n", 8*(j1
% 0x20), 8*(j1
% 0x20)+7);
223 printf("#define translit_index(wc) \\\n (");
224 for (j1
= 0; j1
< 0x22000;) {
226 for (j2
= j1
; j2
< 0x22000 && line
[j2
] == t
; j2
++);
228 if (j1
!= tables
[t
].minline
) abort();
229 if (j2
> tables
[t
].maxline
+1) abort();
230 j2
= tables
[t
].maxline
+1;
234 if (t
>= 0 && tables
[t
].usecount
== 0) abort();
235 if (t
>= 0 && tables
[t
].usecount
== 1) {
236 if (j2
!= j1
+1) abort();
237 for (j
= 8*j1
; j
< 8*j2
; j
++)
238 if (uni2index
[j
] >= 0) {
239 printf("wc == 0x%04x ? %d", j
, uni2index
[j
]);
244 printf("wc < 0x%04x", 8*j2
);
246 printf("wc >= 0x%04x && wc < 0x%04x", 8*j1
, 8*j2
);
248 printf(" ? translit_page%s[wc", tables
[t
].suffix
);
249 if (tables
[t
].minline
> 0)
250 printf("-0x%04x", 8*j1
);
260 if (ferror(stdout
) || fclose(stdout
))