Upgrade to libtool-1.4.
[libiconv.git] / tools / 8bit_tab_to_h.c
bloba2d62e81e6a7a4026767073edfc390dbb0e0988e
1 /* Copyright (C) 1999-2001 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Tools.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
7 any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 * Generates an 8-bit character set table from a .TXT table as found on
20 * ftp.unicode.org or from a table containing the 256 Unicode values as
21 * hexadecimal integers.
22 * Examples:
24 * ./8bit_tab_to_h ISO-8859-1 iso8859_1 < tab8859_1
25 * ./8bit_tab_to_h ISO-8859-2 iso8859_2 < tab8859_2
26 * ./8bit_tab_to_h ISO-8859-3 iso8859_3 < tab8859_3
27 * ./8bit_tab_to_h ISO-8859-4 iso8859_4 < tab8859_4
28 * ./8bit_tab_to_h ISO-8859-5 iso8859_5 < tab8859_5
29 * ./8bit_tab_to_h ISO-8859-6 iso8859_6 < tab8859_6
30 * ./8bit_tab_to_h ISO-8859-7 iso8859_7 < tab8859_7
31 * ./8bit_tab_to_h ISO-8859-8 iso8859_8 < tab8859_8
32 * ./8bit_tab_to_h ISO-8859-9 iso8859_9 < tab8859_9
33 * ./8bit_tab_to_h ISO-8859-10 iso8859_10 < tab8859_10
34 * ./8bit_tab_to_h ISO-8859-14 iso8859_14 < tab8859_14
35 * ./8bit_tab_to_h ISO-8859-15 iso8859_15 < tab8859_15
36 * ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < jis0201
37 * ./8bit_tab_to_h TIS620.2533-1 tis620 < tabtis620
38 * ./8bit_tab_to_h KOI8-R koi8_r < tabkoi8_r
39 * ./8bit_tab_to_h KOI8-U koi8_u < tabkoi8_u
40 * ./8bit_tab_to_h ARMSCII-8 armscii_8 < tabarmscii_8
41 * ./8bit_tab_to_h CP1133 cp1133 < tabibm_cp1133
42 * ./8bit_tab_to_h MULELAO-1 mulelao < tabmulelao_1
43 * ./8bit_tab_to_h VISCII1.1-1 viscii1 < tabviscii
44 * ./8bit_tab_to_h TCVN-5712 tcvn < tabtcvn
45 * ./8bit_tab_to_h GEORGIAN-ACADEMY georgian_ac < tabgeorgian_academy
46 * ./8bit_tab_to_h GEORGIAN-PS georgian_ps < tabgeorgian_ps
48 * ./8bit_tab_to_h ISO-8859-1 iso8859_1 < 8859-1.TXT
49 * ./8bit_tab_to_h ISO-8859-2 iso8859_2 < 8859-2.TXT
50 * ./8bit_tab_to_h ISO-8859-3 iso8859_3 < 8859-3.TXT
51 * ./8bit_tab_to_h ISO-8859-4 iso8859_4 < 8859-4.TXT
52 * ./8bit_tab_to_h ISO-8859-5 iso8859_5 < 8859-5.TXT
53 * ./8bit_tab_to_h ISO-8859-6 iso8859_6 < 8859-6.TXT
54 * ./8bit_tab_to_h ISO-8859-7 iso8859_7 < 8859-7.TXT
55 * ./8bit_tab_to_h ISO-8859-8 iso8859_8 < 8859-8.TXT
56 * ./8bit_tab_to_h ISO-8859-9 iso8859_9 < 8859-9.TXT
57 * ./8bit_tab_to_h ISO-8859-10 iso8859_10 < 8859-10.TXT
58 * ./8bit_tab_to_h ISO-8859-14 iso8859_14 < 8859-14.TXT
59 * ./8bit_tab_to_h ISO-8859-15 iso8859_15 < 8859-15.TXT
60 * ./8bit_tab_to_h JISX0201.1976-0 jisx0201 < JIS0201.TXT
61 * ./8bit_tab_to_h KOI8-R koi8_r < KOI8-R.TXT
64 #include <stdio.h>
65 #include <stdlib.h>
66 #include <stdbool.h>
67 #include <string.h>
69 int main (int argc, char *argv[])
71 const char* charsetname;
72 const char* c_charsetname;
73 const char* filename;
74 const char* directory;
75 int charset2uni[0x100];
77 if (argc != 3 && argc != 4 && argc != 5)
78 exit(1);
79 charsetname = argv[1];
80 c_charsetname = argv[2];
81 if (argc > 3) {
82 filename = argv[3];
83 } else {
84 char* s = (char*) malloc(strlen(c_charsetname)+strlen(".h")+1);
85 strcpy(s,c_charsetname); strcat(s,".h");
86 filename = s;
88 directory = (argc > 4 ? argv[4] : "");
90 fprintf(stderr, "Creating %s%s\n", directory, filename);
93 int i, c;
94 c = getc(stdin);
95 ungetc(c,stdin);
96 if (c == '#') {
97 /* Read a unicode.org style .TXT file. */
98 for (i = 0; i < 0x100; i++)
99 charset2uni[i] = 0xfffd;
100 for (;;) {
101 c = getc(stdin);
102 if (c == EOF)
103 break;
104 if (c == '\n' || c == ' ' || c == '\t')
105 continue;
106 if (c == '#') {
107 do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
108 continue;
110 ungetc(c,stdin);
111 if (scanf("0x%x", &i) != 1 || !(i >= 0 && i < 0x100))
112 exit(1);
113 do { c = getc(stdin); } while (c == ' ' || c == '\t');
114 if (c != EOF)
115 ungetc(c,stdin);
116 if (c == '\n' || c == '#')
117 continue;
118 if (scanf("0x%x", &charset2uni[i]) != 1)
119 exit(1);
121 } else {
122 /* Read a table of hexadecimal Unicode values. */
123 for (i = 0; i < 0x100; i++) {
124 if (scanf("%x", &charset2uni[i]) != 1)
125 exit(1);
126 if (charset2uni[i] < 0 || charset2uni[i] == 0xffff)
127 charset2uni[i] = 0xfffd;
129 if (scanf("%x", &i) != EOF)
130 exit(1);
134 /* Write the output file. */
136 FILE* f;
139 char* fname = malloc(strlen(directory)+strlen(filename)+1);
140 strcpy(fname,directory); strcat(fname,filename);
141 f = fopen(fname,"w");
142 if (f == NULL)
143 exit(1);
146 fprintf(f, "/*\n");
147 fprintf(f, " * Copyright (C) 1999-2001 Free Software Foundation, Inc.\n");
148 fprintf(f, " * This file is part of the GNU LIBICONV Library.\n");
149 fprintf(f, " *\n");
150 fprintf(f, " * The GNU LIBICONV Library is free software; you can redistribute it\n");
151 fprintf(f, " * and/or modify it under the terms of the GNU Library General Public\n");
152 fprintf(f, " * License as published by the Free Software Foundation; either version 2\n");
153 fprintf(f, " * of the License, or (at your option) any later version.\n");
154 fprintf(f, " *\n");
155 fprintf(f, " * The GNU LIBICONV Library is distributed in the hope that it will be\n");
156 fprintf(f, " * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
157 fprintf(f, " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
158 fprintf(f, " * Library General Public License for more details.\n");
159 fprintf(f, " *\n");
160 fprintf(f, " * You should have received a copy of the GNU Library General Public\n");
161 fprintf(f, " * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
162 fprintf(f, " * If not, write to the Free Software Foundation, Inc., 59 Temple Place -\n");
163 fprintf(f, " * Suite 330, Boston, MA 02111-1307, USA.\n");
164 fprintf(f, " */\n");
165 fprintf(f, "\n");
166 fprintf(f, "/*\n");
167 fprintf(f, " * %s\n", charsetname);
168 fprintf(f, " */\n");
169 fprintf(f, "\n");
172 int i, i1, i2, i3;
173 int line[16];
174 int tableno;
175 struct { int minline; int maxline; } tables[16];
176 bool some_invalid;
177 bool final_ret_reached;
179 for (i1 = 0; i1 < 16; i1++) {
180 bool all_invalid = true;
181 bool all_identity = true;
182 for (i2 = 0; i2 < 16; i2++) {
183 i = 16*i1+i2;
184 if (charset2uni[i] != 0xfffd)
185 all_invalid = false;
186 if (charset2uni[i] != i)
187 all_identity = false;
189 if (all_invalid)
190 line[i1] = -2;
191 else if (all_identity)
192 line[i1] = -1;
193 else
194 line[i1] = 0;
196 tableno = 0;
197 for (i1 = 0; i1 < 16; i1++) {
198 if (line[i1] >= 0) {
199 if (i1 > 0 && tableno > 0 && line[i1-1] == tableno-1) {
200 line[i1] = tableno-1;
201 tables[tableno-1].maxline = i1;
202 } else {
203 tableno++;
204 line[i1] = tableno-1;
205 tables[tableno-1].minline = tables[tableno-1].maxline = i1;
209 some_invalid = false;
210 for (i = 0; i < 0x100; i++)
211 if (charset2uni[i] == 0xfffd)
212 some_invalid = true;
213 if (tableno > 0) {
214 int t;
215 for (t = 0; t < tableno; t++) {
216 fprintf(f, "static const unsigned short %s_2uni", c_charsetname);
217 if (tableno > 1)
218 fprintf(f, "_%d", t+1);
219 fprintf(f, "[%d] = {\n", 16*(tables[t].maxline-tables[t].minline+1));
220 for (i1 = tables[t].minline; i1 <= tables[t].maxline; i1++) {
221 fprintf(f, " /* 0x%02x */\n", 16*i1);
222 for (i2 = 0; i2 < 2; i2++) {
223 fprintf(f, " ");
224 for (i3 = 0; i3 < 8; i3++) {
225 i = 16*i1+8*i2+i3;
226 fprintf(f, " 0x%04x,", charset2uni[i]);
228 fprintf(f, "\n");
231 fprintf(f, "};\n");
233 fprintf(f, "\n");
235 final_ret_reached = false;
236 fprintf(f, "static int\n%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", c_charsetname);
237 fprintf(f, "{\n");
238 fprintf(f, " unsigned char c = *s;\n");
239 if (some_invalid) {
240 for (i1 = 0; i1 < 16;) {
241 int t = line[i1];
242 const char* indent;
243 for (i2 = i1; i2 < 16 && line[i2] == t; i2++);
244 indent = (i1 == 0 && i2 == 16 ? " " : " ");
245 if (i1 == 0) {
246 if (i2 == 16) {
247 } else {
248 fprintf(f, " if (c < 0x%02x) {\n", 16*i2);
250 } else {
251 if (i2 == 16) {
252 fprintf(f, " else {\n");
253 } else {
254 fprintf(f, " else if (c < 0x%02x) {\n", 16*i2);
257 if (t == -2) {
258 final_ret_reached = true;
259 } else if (t == -1) {
260 fprintf(f, "%s*pwc = (ucs4_t) c;\n", indent);
261 fprintf(f, "%sreturn 1;\n", indent);
262 } else {
263 fprintf(f, "%s", indent);
264 some_invalid = false;
265 for (i = 16*i1; i < 16*i2; i++)
266 if (charset2uni[i] == 0xfffd)
267 some_invalid = true;
268 if (some_invalid)
269 fprintf(f, "unsigned short wc = ");
270 else
271 fprintf(f, "*pwc = (ucs4_t) ");
272 fprintf(f, "%s_2uni", c_charsetname);
273 if (tableno > 1)
274 fprintf(f, "_%d", t+1);
275 fprintf(f, "[c");
276 if (tables[t].minline > 0)
277 fprintf(f, "-0x%02x", 16*tables[t].minline);
278 fprintf(f, "];\n");
279 if (some_invalid) {
280 fprintf(f, "%sif (wc != 0xfffd) {\n", indent);
281 fprintf(f, "%s *pwc = (ucs4_t) wc;\n", indent);
282 fprintf(f, "%s return 1;\n", indent);
283 fprintf(f, "%s}\n", indent);
284 final_ret_reached = true;
285 } else {
286 fprintf(f, "%sreturn 1;\n", indent);
289 if (!(i1 == 0 && i2 == 16))
290 fprintf(f, " }\n");
291 i1 = i2;
293 if (final_ret_reached)
294 fprintf(f, " return RET_ILSEQ;\n");
295 } else {
296 for (i1 = 0; i1 < 16;) {
297 int t = line[i1];
298 for (i2 = i1; i2 < 16 && line[i2] == t; i2++);
299 if (i1 == 0) {
300 if (i2 == 16) {
301 fprintf(f, " ");
302 } else {
303 fprintf(f, " if (c < 0x%02x)\n ", 16*i2);
305 } else {
306 if (i2 == 16) {
307 fprintf(f, " else\n ");
308 } else {
309 fprintf(f, " else if (c < 0x%02x)\n ", 16*i2);
312 if (t == -1)
313 fprintf(f, "*pwc = (ucs4_t) c;\n");
314 else {
315 fprintf(f, "*pwc = (ucs4_t) %s_2uni", c_charsetname);
316 if (tableno > 1)
317 fprintf(f, "_%d", t+1);
318 fprintf(f, "[c");
319 if (tables[t].minline > 0)
320 fprintf(f, "-0x%02x", 16*tables[t].minline);
321 fprintf(f, "];\n");
323 i1 = i2;
325 fprintf(f, " return 1;\n");
327 fprintf(f, "}\n");
331 fprintf(f, "\n");
334 int uni2charset[0x10000];
335 bool pages[0x100];
336 int line[0x2000];
337 int tableno;
338 struct { int minline; int maxline; int usecount; const char* suffix; } tables[0x2000];
339 bool need_c;
340 bool fix_0000;
341 int i, j, p, j1, j2, t;
343 for (j = 0; j < 0x10000; j++)
344 uni2charset[j] = 0;
345 for (p = 0; p < 0x100; p++)
346 pages[p] = false;
347 for (i = 0; i < 0x100; i++) {
348 j = charset2uni[i];
349 if (j != 0xfffd) {
350 uni2charset[j] = i;
351 pages[j>>8] = true;
354 for (j1 = 0; j1 < 0x2000; j1++) {
355 bool all_invalid = true;
356 bool all_identity = true;
357 for (j2 = 0; j2 < 8; j2++) {
358 j = 8*j1+j2;
359 if (uni2charset[j] != 0)
360 all_invalid = false;
361 if (uni2charset[j] != j)
362 all_identity = false;
364 if (all_invalid)
365 line[j1] = -2;
366 else if (all_identity)
367 line[j1] = -1;
368 else
369 line[j1] = 0;
371 tableno = 0;
372 for (j1 = 0; j1 < 0x2000; j1++) {
373 if (line[j1] >= 0) {
374 if (tableno > 0
375 && ((j1 > 0 && line[j1-1] == tableno-1)
376 || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
377 && j1 - tables[tableno-1].maxline <= 8))) {
378 line[j1] = tableno-1;
379 tables[tableno-1].maxline = j1;
380 } else {
381 tableno++;
382 line[j1] = tableno-1;
383 tables[tableno-1].minline = tables[tableno-1].maxline = j1;
387 for (t = 0; t < tableno; t++) {
388 tables[t].usecount = 0;
389 j1 = 8*tables[t].minline;
390 j2 = 8*(tables[t].maxline+1);
391 for (j = j1; j < j2; j++)
392 if (uni2charset[j] != 0)
393 tables[t].usecount++;
395 for (t = 0, p = -1, i = 0; t < tableno; t++) {
396 if (tables[t].usecount > 1) {
397 char* s;
398 if (p == tables[t].minline >> 5) {
399 s = (char*) malloc(5+1);
400 sprintf(s, "%02x_%d", p, ++i);
401 } else {
402 p = tables[t].minline >> 5;
403 s = (char*) malloc(2+1);
404 sprintf(s, "%02x", p);
406 tables[t].suffix = s;
407 } else
408 tables[t].suffix = NULL;
411 p = -1;
412 for (t = 0; t < tableno; t++)
413 if (tables[t].usecount > 1) {
414 p = 0;
415 fprintf(f, "static const unsigned char %s_page%s[%d] = {\n", c_charsetname, tables[t].suffix, 8*(tables[t].maxline-tables[t].minline+1));
416 for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
417 if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
418 fprintf(f, " /* 0x%04x */\n", 8*j1);
419 fprintf(f, " ");
420 for (j2 = 0; j2 < 8; j2++) {
421 j = 8*j1+j2;
422 fprintf(f, " 0x%02x,", uni2charset[j]);
424 fprintf(f, " /* 0x%02x-0x%02x */\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
426 fprintf(f, "};\n");
428 if (p >= 0)
429 fprintf(f, "\n");
431 need_c = false;
432 for (j1 = 0; j1 < 0x2000;) {
433 t = line[j1];
434 for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
435 if (t >= 0)
436 j2 = tables[t].maxline+1;
437 if (!(t == -2 || (t == -1 && j1 == 0)))
438 need_c = true;
439 j1 = j2;
441 fix_0000 = false;
442 fprintf(f, "static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", c_charsetname);
443 fprintf(f, "{\n");
444 if (need_c)
445 fprintf(f, " unsigned char c = 0;\n");
446 for (j1 = 0; j1 < 0x2000;) {
447 t = line[j1];
448 for (j2 = j1; j2 < 0x2000 && line[j2] == t; j2++);
449 if (t >= 0) {
450 if (j1 != tables[t].minline) abort();
451 if (j2 > tables[t].maxline+1) abort();
452 j2 = tables[t].maxline+1;
454 if (t == -2) {
455 } else {
456 if (j1 == 0)
457 fprintf(f, " ");
458 else
459 fprintf(f, " else ");
460 if (t >= 0 && tables[t].usecount == 0) abort();
461 if (t >= 0 && tables[t].usecount == 1) {
462 if (j2 != j1+1) abort();
463 for (j = 8*j1; j < 8*j2; j++)
464 if (uni2charset[j] != 0) {
465 fprintf(f, "if (wc == 0x%04x)\n c = 0x%02x;\n", j, uni2charset[j]);
466 break;
468 } else {
469 if (j1 == 0) {
470 fprintf(f, "if (wc < 0x%04x)", 8*j2);
471 } else {
472 fprintf(f, "if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);
474 if (t == -1) {
475 if (j1 == 0)
476 /* If wc == 0, the function must return 1, not -1. */
477 fprintf(f, " {\n *r = wc;\n return 1;\n }\n");
478 else
479 fprintf(f, "\n c = wc;\n");
480 } else {
481 fprintf(f, "\n c = %s_page%s[wc", c_charsetname, tables[t].suffix);
482 if (tables[t].minline > 0)
483 fprintf(f, "-0x%04x", 8*j1);
484 fprintf(f, "];\n");
485 if (j1 == 0 && uni2charset[0] == 0)
486 /* If wc == 0, the function must return 1, not -1. */
487 fix_0000 = true;
491 j1 = j2;
493 if (need_c) {
494 if (fix_0000)
495 fprintf(f, " if (c != 0 || wc == 0) {\n");
496 else
497 fprintf(f, " if (c != 0) {\n");
498 fprintf(f, " *r = c;\n");
499 fprintf(f, " return 1;\n");
500 fprintf(f, " }\n");
502 fprintf(f, " return RET_ILUNI;\n");
503 fprintf(f, "}\n");
507 if (ferror(f) || fclose(f))
508 exit(1);
511 #if 0
513 int i1, i2, i3, i1_min, i1_max, j1, j2;
515 i1_min = 16;
516 i1_max = -1;
517 for (i1 = 0; i1 < 16; i1++)
518 for (i2 = 0; i2 < 16; i2++)
519 if (charset2uni[16*i1+i2] != 0xfffd) {
520 if (i1_min > i1) i1_min = i1;
521 if (i1_max < i1) i1_max = i1;
523 printf("static const unsigned short %s_2uni[%d] = {\n",
524 name, 16*(i1_max-i1_min+1));
525 for (i1 = i1_min; i1 <= i1_max; i1++) {
526 printf(" /""* 0x%02x *""/\n", 16*i1);
527 for (i2 = 0; i2 < 2; i2++) {
528 printf(" ");
529 for (i3 = 0; i3 < 8; i3++) {
530 if (i3 > 0) printf(" ");
531 printf("0x%04x,", charset2uni[16*i1+8*i2+i3]);
533 printf("\n");
536 printf("};\n");
537 printf("\n");
539 for (p = 0; p < 0x100; p++)
540 pages[p] = 0;
541 for (i = 0; i < 0x100; i++)
542 if (charset2uni[i] != 0xfffd)
543 pages[charset2uni[i]>>8] = 1;
544 for (p = 0; p < 0x100; p++)
545 if (pages[p]) {
546 int j1_min = 32;
547 int j1_max = -1;
548 for (j1 = 0; j1 < 32; j1++)
549 for (j2 = 0; j2 < 8; j2++)
550 if (uni2charset[256*p+8*j1+j2] != 0) {
551 if (j1_min > j1) j1_min = j1;
552 if (j1_max < j1) j1_max = j1;
554 printf("static const unsigned char %s_page%02x[%d] = {\n",
555 name, p, 8*(j1_max-j1_min+1));
556 for (j1 = j1_min; j1 <= j1_max; j1++) {
557 printf(" ");
558 for (j2 = 0; j2 < 8; j2++)
559 printf("0x%02x, ", uni2charset[256*p+8*j1+j2]);
560 printf("/""* 0x%02x-0x%02x *""/\n", 8*j1, 8*j1+7);
562 printf("};\n");
564 printf("\n");
567 #endif
569 exit(0);