1 /* Copyright (C) 1999-2001 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Library.
4 The GNU LIBICONV Library is free software; you can redistribute it
5 and/or modify it under the terms of the GNU Library General Public
6 License as published by the Free Software Foundation; either version 2
7 of the License, or (at your option) any later version.
9 The GNU LIBICONV Library is distributed in the hope that it will be
10 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public
15 License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16 If not, write to the Free Software Foundation, Inc., 59 Temple Place -
17 Suite 330, Boston, MA 02111-1307, USA. */
20 * Generates a table of small strings, used for transliteration, from a table
21 * containing lines of the form
22 * Unicode <tab> utf-8 replacement <tab> # comment
29 int main (int argc
, char *argv
[])
31 unsigned short data
[0x100000];
32 int uni2index
[0x10000];
39 printf(" * Copyright (C) 1999-2001 Free Software Foundation, Inc.\n");
40 printf(" * This file is part of the GNU LIBICONV Library.\n");
42 printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
43 printf(" * and/or modify it under the terms of the GNU Library General Public\n");
44 printf(" * License as published by the Free Software Foundation; either version 2\n");
45 printf(" * of the License, or (at your option) any later version.\n");
47 printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
48 printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
49 printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
50 printf(" * Library General Public License for more details.\n");
52 printf(" * You should have received a copy of the GNU Library General Public\n");
53 printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
54 printf(" * If not, write to the Free Software Foundation, Inc., 59 Temple Place -\n");
55 printf(" * Suite 330, Boston, MA 02111-1307, USA.\n");
59 printf(" * Transliteration table\n");
65 for (j
= 0; j
< 0x10000; j
++)
73 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
77 if (scanf("%x",&j
) != 1)
84 if (c
== EOF
|| c
== '\n')
88 if (uni2index
[j
] < 0) {
93 /* Finish reading an UTF-8 character. */
97 unsigned int i
= (c
< 0xe0 ? 2 : c
< 0xf0 ? 3 : c
< 0xf8 ? 4 : c
< 0xfc ? 5 : 6);
98 c
&= (1 << (8-i
)) - 1;
100 int cc
= getc(stdin
);
101 if (!(cc
>= 0x80 && cc
< 0xc0))
103 c
<<= 6; c
|= (cc
& 0x3f);
107 data
[index
++] = (unsigned short) c
;
109 if (uni2index
[j
] >= 0)
110 data
[uni2index
[j
]] = index
- uni2index
[j
] - 1;
111 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
114 printf("static const unsigned short translit_data[%d] = {",index
);
117 for (i
= 0; i
< index
; i
++) {
119 printf("\n %3d,",data
[i
]);
120 else if (data
[i
] == '\'')
122 else if (data
[i
] == '\\')
124 else if (data
[i
] < 127)
125 printf(" '%c',",data
[i
]);
126 else if (data
[i
] < 256)
127 printf("0x%02X,",data
[i
]);
129 printf("0x%04X,",data
[i
]);
138 struct { int minline
; int maxline
; int usecount
; const char* suffix
; } tables
[0x2000];
139 int i
, j
, p
, j1
, j2
, t
;
141 for (p
= 0; p
< 0x100; p
++)
143 for (j
= 0; j
< 0x10000; j
++)
144 if (uni2index
[j
] >= 0)
146 for (j1
= 0; j1
< 0x2000; j1
++) {
147 bool all_invalid
= true;
148 for (j2
= 0; j2
< 8; j2
++) {
150 if (uni2index
[j
] >= 0)
159 for (j1
= 0; j1
< 0x2000; j1
++) {
162 && ((j1
> 0 && line
[j1
-1] == tableno
-1)
163 || ((tables
[tableno
-1].maxline
>> 5) == (j1
>> 5)
164 && j1
- tables
[tableno
-1].maxline
<= 8))) {
165 line
[j1
] = tableno
-1;
166 tables
[tableno
-1].maxline
= j1
;
169 line
[j1
] = tableno
-1;
170 tables
[tableno
-1].minline
= tables
[tableno
-1].maxline
= j1
;
174 for (t
= 0; t
< tableno
; t
++) {
175 tables
[t
].usecount
= 0;
176 j1
= 8*tables
[t
].minline
;
177 j2
= 8*(tables
[t
].maxline
+1);
178 for (j
= j1
; j
< j2
; j
++)
179 if (uni2index
[j
] >= 0)
180 tables
[t
].usecount
++;
182 for (t
= 0, p
= -1, i
= 0; t
< tableno
; t
++) {
183 if (tables
[t
].usecount
> 1) {
185 if (p
== tables
[t
].minline
>> 5) {
186 s
= (char*) malloc(5+1);
187 sprintf(s
, "%02x_%d", p
, ++i
);
189 p
= tables
[t
].minline
>> 5;
190 s
= (char*) malloc(2+1);
191 sprintf(s
, "%02x", p
);
193 tables
[t
].suffix
= s
;
195 tables
[t
].suffix
= NULL
;
199 for (t
= 0; t
< tableno
; t
++)
200 if (tables
[t
].usecount
> 1) {
202 printf("static const short translit_page%s[%d] = {\n", tables
[t
].suffix
, 8*(tables
[t
].maxline
-tables
[t
].minline
+1));
203 for (j1
= tables
[t
].minline
; j1
<= tables
[t
].maxline
; j1
++) {
204 if ((j1
% 0x20) == 0 && j1
> tables
[t
].minline
)
205 printf(" /* 0x%04x */\n", 8*j1
);
207 for (j2
= 0; j2
< 8; j2
++) {
209 printf(" %4d,", uni2index
[j
]);
211 printf(" /* 0x%02x-0x%02x */\n", 8*(j1
% 0x20), 8*(j1
% 0x20)+7);
218 printf("#define translit_index(wc) \\\n (");
219 for (j1
= 0; j1
< 0x2000;) {
221 for (j2
= j1
; j2
< 0x2000 && line
[j2
] == t
; j2
++);
223 if (j1
!= tables
[t
].minline
) abort();
224 if (j2
> tables
[t
].maxline
+1) abort();
225 j2
= tables
[t
].maxline
+1;
229 if (t
>= 0 && tables
[t
].usecount
== 0) abort();
230 if (t
>= 0 && tables
[t
].usecount
== 1) {
231 if (j2
!= j1
+1) abort();
232 for (j
= 8*j1
; j
< 8*j2
; j
++)
233 if (uni2index
[j
] >= 0) {
234 printf("wc == 0x%04x ? %d", j
, uni2index
[j
]);
239 printf("wc < 0x%04x", 8*j2
);
241 printf("wc >= 0x%04x && wc < 0x%04x", 8*j1
, 8*j2
);
243 printf(" ? translit_page%s[wc", tables
[t
].suffix
);
244 if (tables
[t
].minline
> 0)
245 printf("-0x%04x", 8*j1
);