Update after gnulib changed.
[libiconv.git] / lib / gentranslit.c
blobb9ee780bb4c61c0c399bc3f9dfc4ab80b58e7c70
1 /* Copyright (C) 1999-2003, 2005, 2011-2012, 2016, 2018, 2020 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Library.
4 The GNU LIBICONV Library is free software; you can redistribute it
5 and/or modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either version 2.1
7 of the License, or (at your option) any later version.
9 The GNU LIBICONV Library is distributed in the hope that it will be
10 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU LIBICONV Library; see the file COPYING.LIB.
16 If not, see <https://www.gnu.org/licenses/>. */
19 * Generates a table of small strings, used for transliteration, from a table
20 * containing lines of the form
21 * Unicode <tab> utf-8 replacement <tab> # comment
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <stdbool.h>
28 int main (int argc, char *argv[])
30 unsigned int *data;
31 int *uni2index;
32 int index;
34 if (argc != 1)
35 exit(1);
37 data = malloc(0x100000 * sizeof(*data));
38 uni2index = malloc(0x110000 * sizeof(*uni2index));
39 if (data == NULL || uni2index == NULL) {
40 fprintf(stderr, "out of memory\n");
41 exit(1);
44 printf("/*\n");
45 printf(" * Copyright (C) 1999-2003 Free Software Foundation, Inc.\n");
46 printf(" * This file is part of the GNU LIBICONV Library.\n");
47 printf(" *\n");
48 printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
49 printf(" * and/or modify it under the terms of the GNU Lesser General Public\n");
50 printf(" * License as published by the Free Software Foundation; either version 2\n");
51 printf(" * of the License, or (at your option) any later version.\n");
52 printf(" *\n");
53 printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
54 printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
55 printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
56 printf(" * Lesser General Public License for more details.\n");
57 printf(" *\n");
58 printf(" * You should have received a copy of the GNU Lesser General Public\n");
59 printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
60 printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
61 printf(" */\n");
62 printf("\n");
63 printf("/*\n");
64 printf(" * Transliteration table\n");
65 printf(" */\n");
66 printf("\n");
68 int c;
69 int j;
70 for (j = 0; j < 0x110000; j++)
71 uni2index[j] = -1;
72 index = 0;
73 for (;;) {
74 c = getc(stdin);
75 if (c == EOF)
76 break;
77 if (c == '#') {
78 do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
79 continue;
81 ungetc(c,stdin);
82 if (scanf("%x",&j) != 1)
83 exit(1);
84 c = getc(stdin);
85 if (c != '\t')
86 exit(1);
87 for (;;) {
88 c = getc(stdin);
89 if (c == EOF || c == '\n')
90 exit(1);
91 if (c == '\t')
92 break;
93 if (uni2index[j] < 0) {
94 uni2index[j] = index;
95 data[index++] = 0;
97 if (c >= 0x80) {
98 /* Finish reading an UTF-8 character. */
99 if (c < 0xc0)
100 exit(1);
101 else {
102 unsigned int i = (c < 0xe0 ? 2 : c < 0xf0 ? 3 : c < 0xf8 ? 4 : c < 0xfc ? 5 : 6);
103 c &= (1 << (8-i)) - 1;
104 while (--i > 0) {
105 int cc = getc(stdin);
106 if (!(cc >= 0x80 && cc < 0xc0))
107 exit(1);
108 c <<= 6; c |= (cc & 0x3f);
112 data[index++] = (unsigned int) c;
114 if (uni2index[j] >= 0)
115 data[uni2index[j]] = index - uni2index[j] - 1;
116 do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
119 printf("static const unsigned int translit_data[%d] = {",index);
121 int i;
122 for (i = 0; i < index; i++) {
123 if (data[i] < 32)
124 printf("\n %3d,",data[i]);
125 else if (data[i] == '\'')
126 printf("'\\'',");
127 else if (data[i] == '\\')
128 printf("'\\\\',");
129 else if (data[i] < 127)
130 printf(" '%c',",data[i]);
131 else if (data[i] < 256)
132 printf("0x%02X,",data[i]);
133 else
134 printf("0x%04X,",data[i]);
136 printf("\n};\n");
138 printf("\n");
140 int line[0x22000];
141 int tableno;
142 struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
143 int i, j, p, j1, j2, t;
145 for (j1 = 0; j1 < 0x22000; j1++) {
146 bool all_invalid = true;
147 for (j2 = 0; j2 < 8; j2++) {
148 j = 8*j1+j2;
149 if (uni2index[j] >= 0)
150 all_invalid = false;
152 if (all_invalid)
153 line[j1] = -1;
154 else
155 line[j1] = 0;
157 tableno = 0;
158 for (j1 = 0; j1 < 0x22000; j1++) {
159 if (line[j1] >= 0) {
160 if (tableno > 0
161 && ((j1 > 0 && line[j1-1] == tableno-1)
162 || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
163 && j1 - tables[tableno-1].maxline <= 8))) {
164 line[j1] = tableno-1;
165 tables[tableno-1].maxline = j1;
166 } else {
167 tableno++;
168 line[j1] = tableno-1;
169 tables[tableno-1].minline = tables[tableno-1].maxline = j1;
173 for (t = 0; t < tableno; t++) {
174 tables[t].usecount = 0;
175 j1 = 8*tables[t].minline;
176 j2 = 8*(tables[t].maxline+1);
177 for (j = j1; j < j2; j++)
178 if (uni2index[j] >= 0)
179 tables[t].usecount++;
181 for (t = 0, p = -1, i = 0; t < tableno; t++) {
182 if (tables[t].usecount > 1) {
183 char* s;
184 if (p == tables[t].minline >> 5) {
185 i++;
186 /* i is the number of tables with the same (tables[t].minline >> 5)
187 that we have seen so far. Since the tables[t].minline values are
188 strongly monotonically increasing, there are at most 32 of them. */
189 if (!(i >= 0 && i <= 32)) abort();
190 s = (char*) malloc(4+1+2+1);
191 sprintf(s, "%02x_%d", p, i);
192 } else {
193 p = tables[t].minline >> 5;
194 i = 0;
195 s = (char*) malloc(4+1);
196 sprintf(s, "%02x", p);
198 tables[t].suffix = s;
199 } else
200 tables[t].suffix = NULL;
203 p = -1;
204 for (t = 0; t < tableno; t++)
205 if (tables[t].usecount > 1) {
206 p = 0;
207 printf("static const short translit_page%s[%d] = {\n", tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
208 for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
209 if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
210 printf(" /* 0x%04x */\n", 8*j1);
211 printf(" ");
212 for (j2 = 0; j2 < 8; j2++) {
213 j = 8*j1+j2;
214 printf(" %4d,", uni2index[j]);
216 printf(" /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
218 printf("};\n");
220 if (p >= 0)
221 printf("\n");
223 printf("#define translit_index(wc) \\\n (");
224 for (j1 = 0; j1 < 0x22000;) {
225 t = line[j1];
226 for (j2 = j1; j2 < 0x22000 && line[j2] == t; j2++);
227 if (t >= 0) {
228 if (j1 != tables[t].minline) abort();
229 if (j2 > tables[t].maxline+1) abort();
230 j2 = tables[t].maxline+1;
232 if (t == -1) {
233 } else {
234 if (t >= 0 && tables[t].usecount == 0) abort();
235 if (t >= 0 && tables[t].usecount == 1) {
236 if (j2 != j1+1) abort();
237 for (j = 8*j1; j < 8*j2; j++)
238 if (uni2index[j] >= 0) {
239 printf("wc == 0x%04x ? %d", j, uni2index[j]);
240 break;
242 } else {
243 if (j1 == 0) {
244 printf("wc < 0x%04x", 8*j2);
245 } else {
246 printf("wc >= 0x%04x && wc < 0x%04x", 8*j1, 8*j2);
248 printf(" ? translit_page%s[wc", tables[t].suffix);
249 if (tables[t].minline > 0)
250 printf("-0x%04x", 8*j1);
251 printf("]");
253 printf(" : \\\n ");
255 j1 = j2;
257 printf("-1)\n");
260 if (ferror(stdout) || fclose(stdout))
261 exit(1);
262 exit(0);