Correct PPTP server firewall rules chain.
[tomato/davidwu.git] / release / src / router / libiconv / tools / cjk_tab_to_h.c
blobdb96ffd17c084af3ce6b538ae84174829091a53f
1 /* Copyright (C) 1999-2004, 2006-2007, 2010 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Tools.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 * Generates a CJK character set table from a .TXT table as found on
20 * ftp.unicode.org or in the X nls directory.
21 * Examples:
23 * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < gb2312
24 * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < jis0208
25 * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < ksc5601
27 * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < GB2312.TXT
28 * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < JIS0208.TXT
29 * ./cjk_tab_to_h JISX0212.1990-0 jisx0212 > jisx0212.h < JIS0212.TXT
30 * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < KSC5601.TXT
31 * ./cjk_tab_to_h KSX1001.1992-0 ksc5601 > ksc5601.h < KSX1001.TXT
33 * ./cjk_tab_to_h BIG5 big5 > big5.h < BIG5.TXT
35 * ./cjk_tab_to_h JOHAB johab > johab.h < JOHAB.TXT
37 * ./cjk_tab_to_h JISX0213:2004 jisx0213 > jisx0213.h < JISX0213.TXT
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include <stdbool.h>
43 #include <string.h>
44 #include <ctype.h>
45 #include <assert.h>
47 typedef struct {
48 int start;
49 int end;
50 } Block;
52 typedef struct {
53 int rows; /* number of possible values for the 1st byte */
54 int cols; /* number of possible values for the 2nd byte */
55 int (*row_byte) (int row); /* returns the 1st byte value for a given row */
56 int (*col_byte) (int col); /* returns the 2nd byte value for a given col */
57 int (*byte_row) (int byte); /* converts a 1st byte value to a row, else -1 */
58 int (*byte_col) (int byte); /* converts a 2nd byte value to a col, else -1 */
59 const char* check_row_expr; /* format string for 1st byte value checking */
60 const char* check_col_expr; /* format string for 2nd byte value checking */
61 const char* byte_row_expr; /* format string for 1st byte value to row */
62 const char* byte_col_expr; /* format string for 2nd byte value to col */
63 int** charset2uni; /* charset2uni[0..rows-1][0..cols-1] is valid */
64 /* You'll understand the terms "row" and "col" when you buy Ken Lunde's book.
65 Once a row is fixed, choosing a "col" is the same as choosing a "cell". */
66 int* charsetpage; /* charsetpage[0..rows]: how large is a page for a row */
67 int ncharsetblocks;
68 Block* charsetblocks; /* blocks[0..nblocks-1] */
69 int* uni2charset; /* uni2charset[0x0000..0xffff] */
70 int fffd; /* uni representation of the invalid character */
71 } Encoding;
74 * Outputs the file title.
76 static void output_title (const char *charsetname)
78 printf("/*\n");
79 printf(" * Copyright (C) 1999-2010 Free Software Foundation, Inc.\n");
80 printf(" * This file is part of the GNU LIBICONV Library.\n");
81 printf(" *\n");
82 printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
83 printf(" * and/or modify it under the terms of the GNU Library General Public\n");
84 printf(" * License as published by the Free Software Foundation; either version 2\n");
85 printf(" * of the License, or (at your option) any later version.\n");
86 printf(" *\n");
87 printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
88 printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
89 printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
90 printf(" * Library General Public License for more details.\n");
91 printf(" *\n");
92 printf(" * You should have received a copy of the GNU Library General Public\n");
93 printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
94 printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n");
95 printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n");
96 printf(" */\n");
97 printf("\n");
98 printf("/*\n");
99 printf(" * %s\n", charsetname);
100 printf(" */\n");
101 printf("\n");
105 * Reads the charset2uni table from standard input.
107 static void read_table (Encoding* enc)
109 int row, col, i, i1, i2, c, j;
111 enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
112 for (row = 0; row < enc->rows; row++)
113 enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
115 for (row = 0; row < enc->rows; row++)
116 for (col = 0; col < enc->cols; col++)
117 enc->charset2uni[row][col] = 0xfffd;
119 c = getc(stdin);
120 ungetc(c,stdin);
121 if (c == '#') {
122 /* Read a unicode.org style .TXT file. */
123 for (;;) {
124 c = getc(stdin);
125 if (c == EOF)
126 break;
127 if (c == '\n' || c == ' ' || c == '\t')
128 continue;
129 if (c == '#') {
130 do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
131 continue;
133 ungetc(c,stdin);
134 if (scanf("0x%x", &j) != 1)
135 exit(1);
136 i1 = j >> 8;
137 i2 = j & 0xff;
138 row = enc->byte_row(i1);
139 col = enc->byte_col(i2);
140 if (row < 0 || col < 0) {
141 fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);
142 exit(1);
144 if (scanf(" 0x%x", &enc->charset2uni[row][col]) != 1)
145 exit(1);
147 } else {
148 /* Read a table of hexadecimal Unicode values. */
149 for (i1 = 32; i1 < 132; i1++)
150 for (i2 = 32; i2 < 132; i2++) {
151 i = scanf("%x", &j);
152 if (i == EOF)
153 goto read_done;
154 if (i != 1)
155 exit(1);
156 if (j < 0 || j == 0xffff)
157 j = 0xfffd;
158 if (j != 0xfffd) {
159 if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {
160 fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);
161 exit (1);
163 enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;
166 read_done: ;
171 * Determine whether the Unicode range goes outside the BMP.
173 static bool is_charset2uni_large (Encoding* enc)
175 int row, col;
177 for (row = 0; row < enc->rows; row++)
178 for (col = 0; col < enc->cols; col++)
179 if (enc->charset2uni[row][col] >= 0x10000)
180 return true;
181 return false;
185 * Compactify the Unicode range by use of an auxiliary table,
186 * so 16 bits suffice to store each value.
188 static int compact_large_charset2uni (Encoding* enc, unsigned int **urows, unsigned int *urowshift)
190 unsigned int shift;
192 for (shift = 8; ; shift--) {
193 int *upages = (int *) malloc((0x110000>>shift) * sizeof(int));
194 int i, row, col, nurows;
196 for (i = 0; i < 0x110000>>shift; i++)
197 upages[i] = -1;
199 for (row = 0; row < enc->rows; row++)
200 for (col = 0; col < enc->cols; col++)
201 upages[enc->charset2uni[row][col] >> shift] = 0;
203 nurows = 0;
204 for (i = 0; i < 0x110000>>shift; i++)
205 if (upages[i] == 0)
206 nurows++;
208 /* We want all table entries to fit in an 'unsigned short'. */
209 if (nurows <= 1<<(16-shift)) {
210 int** old_charset2uni;
212 *urows = (unsigned int *) malloc(nurows * sizeof(unsigned int));
213 *urowshift = shift;
215 nurows = 0;
216 for (i = 0; i < 0x110000>>shift; i++)
217 if (upages[i] == 0) {
218 upages[i] = nurows;
219 (*urows)[nurows] = i;
220 nurows++;
223 old_charset2uni = enc->charset2uni;
224 enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
225 for (row = 0; row < enc->rows; row++)
226 enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
227 for (row = 0; row < enc->rows; row++)
228 for (col = 0; col < enc->cols; col++) {
229 int u = old_charset2uni[row][col];
230 enc->charset2uni[row][col] =
231 (upages[u >> shift] << shift) | (u & ((1 << shift) - 1));
233 enc->fffd =
234 (upages[0xfffd >> shift] << shift) | (0xfffd & ((1 << shift) - 1));
236 return nurows;
239 abort();
243 * Computes the charsetpage[0..rows] array.
245 static void find_charset2uni_pages (Encoding* enc)
247 int row, col;
249 enc->charsetpage = (int*) malloc((enc->rows+1)*sizeof(int));
251 for (row = 0; row <= enc->rows; row++)
252 enc->charsetpage[row] = 0;
254 for (row = 0; row < enc->rows; row++) {
255 int used = 0;
256 for (col = 0; col < enc->cols; col++)
257 if (enc->charset2uni[row][col] != enc->fffd)
258 used = col+1;
259 enc->charsetpage[row] = used;
264 * Fills in nblocks and blocks.
266 static void find_charset2uni_blocks (Encoding* enc)
268 int n, row, lastrow;
270 enc->charsetblocks = (Block*) malloc(enc->rows*sizeof(Block));
272 n = 0;
273 for (row = 0; row < enc->rows; row++)
274 if (enc->charsetpage[row] > 0 && (row == 0 || enc->charsetpage[row-1] == 0)) {
275 for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
276 enc->charsetblocks[n].start = row * enc->cols;
277 enc->charsetblocks[n].end = lastrow * enc->cols + enc->charsetpage[lastrow];
278 n++;
280 enc->ncharsetblocks = n;
284 * Outputs the charset to unicode table and function.
286 static void output_charset2uni (const char* name, Encoding* enc)
288 int nurows, row, col, lastrow, col_max, i, i1_min, i1_max;
289 bool is_large;
290 unsigned int* urows;
291 unsigned int urowshift;
292 Encoding tmpenc;
294 is_large = is_charset2uni_large(enc);
295 if (is_large) {
296 /* Use a temporary copy of enc. */
297 tmpenc = *enc;
298 enc = &tmpenc;
299 nurows = compact_large_charset2uni(enc,&urows,&urowshift);
300 } else {
301 nurows = 0; urows = NULL; urowshift = 0; enc->fffd = 0xfffd;
304 find_charset2uni_pages(enc);
306 find_charset2uni_blocks(enc);
308 for (row = 0; row < enc->rows; row++)
309 if (enc->charsetpage[row] > 0) {
310 if (row == 0 || enc->charsetpage[row-1] == 0) {
311 /* Start a new block. */
312 for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
313 printf("static const unsigned short %s_2uni_page%02x[%d] = {\n",
314 name, enc->row_byte(row),
315 (lastrow-row) * enc->cols + enc->charsetpage[lastrow]);
317 printf(" /""* 0x%02x *""/\n ", enc->row_byte(row));
318 col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]);
319 for (col = 0; col < col_max; col++) {
320 printf(" 0x%04x,", enc->charset2uni[row][col]);
321 if ((col % 8) == 7 && (col+1 < col_max)) printf("\n ");
323 printf("\n");
324 if (enc->charsetpage[row+1] == 0) {
325 /* End a block. */
326 printf("};\n");
329 printf("\n");
331 if (is_large) {
332 printf("static const ucs4_t %s_2uni_upages[%d] = {\n ", name, nurows);
333 for (i = 0; i < nurows; i++) {
334 printf(" 0x%05x,", urows[i] << urowshift);
335 if ((i % 8) == 7 && (i+1 < nurows)) printf("\n ");
337 printf("\n");
338 printf("};\n");
339 printf("\n");
342 printf("static int\n");
343 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
344 printf("{\n");
345 printf(" unsigned char c1 = s[0];\n");
346 printf(" if (");
347 for (i = 0; i < enc->ncharsetblocks; i++) {
348 i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols);
349 i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols);
350 if (i > 0)
351 printf(" || ");
352 if (i1_min == i1_max)
353 printf("(c1 == 0x%02x)", i1_min);
354 else
355 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max);
357 printf(") {\n");
358 printf(" if (n >= 2) {\n");
359 printf(" unsigned char c2 = s[1];\n");
360 printf(" if (");
361 printf(enc->check_col_expr, "c2");
362 printf(") {\n");
363 printf(" unsigned int i = %d * (", enc->cols);
364 printf(enc->byte_row_expr, "c1");
365 printf(") + (");
366 printf(enc->byte_col_expr, "c2");
367 printf(");\n");
368 printf(" %s wc = 0xfffd;\n", is_large ? "ucs4_t" : "unsigned short");
369 if (is_large) printf(" unsigned short swc;\n");
370 for (i = 0; i < enc->ncharsetblocks; i++) {
371 printf(" ");
372 if (i > 0)
373 printf("} else ");
374 if (i < enc->ncharsetblocks-1)
375 printf("if (i < %d) ", enc->charsetblocks[i+1].start);
376 printf("{\n");
377 printf(" if (i < %d)\n", enc->charsetblocks[i].end);
378 printf(" %s = ", is_large ? "swc" : "wc");
379 printf("%s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols));
380 if (enc->charsetblocks[i].start > 0)
381 printf("-%d", enc->charsetblocks[i].start);
382 printf("]");
383 if (is_large) printf(",\n wc = %s_2uni_upages[swc>>%d] | (swc & 0x%x)", name, urowshift, (1 << urowshift) - 1);
384 printf(";\n");
386 printf(" }\n");
387 printf(" if (wc != 0xfffd) {\n");
388 printf(" *pwc = %swc;\n", is_large ? "" : "(ucs4_t) ");
389 printf(" return 2;\n");
390 printf(" }\n");
391 printf(" }\n");
392 printf(" return RET_ILSEQ;\n");
393 printf(" }\n");
394 printf(" return RET_TOOFEW(0);\n");
395 printf(" }\n");
396 printf(" return RET_ILSEQ;\n");
397 printf("}\n");
398 printf("\n");
402 * Outputs the charset to unicode table and function.
403 * (Suitable if the mapping function is well defined, i.e. has no holes, and
404 * is monotonically increasing with small gaps only.)
406 static void output_charset2uni_noholes_monotonic (const char* name, Encoding* enc)
408 int row, col, lastrow, r, col_max, i, i1_min, i1_max;
410 /* Choose stepsize so that stepsize*steps_per_row >= enc->cols, and
411 enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]
412 is always < 0x100. */
413 int steps_per_row = 2;
414 int stepsize = (enc->cols + steps_per_row-1) / steps_per_row;
416 find_charset2uni_pages(enc);
418 find_charset2uni_blocks(enc);
420 for (row = 0; row < enc->rows; row++)
421 if (enc->charsetpage[row] > 0) {
422 if (row == 0 || enc->charsetpage[row-1] == 0) {
423 /* Start a new block. */
424 for (lastrow = row; enc->charsetpage[lastrow+1] > 0; lastrow++);
425 printf("static const unsigned short %s_2uni_main_page%02x[%d] = {\n ",
426 name, enc->row_byte(row),
427 steps_per_row*(lastrow-row+1));
428 for (r = row; r <= lastrow; r++) {
429 for (i = 0; i < steps_per_row; i++)
430 printf(" 0x%04x,", enc->charset2uni[r][i*stepsize]);
431 if (((r-row) % 4) == 3 && (r < lastrow)) printf("\n ");
433 printf("\n");
434 printf("};\n");
435 printf("static const unsigned char %s_2uni_page%02x[%d] = {\n",
436 name, enc->row_byte(row),
437 (lastrow-row) * enc->cols + enc->charsetpage[lastrow]);
439 printf(" /""* 0x%02x *""/\n ", enc->row_byte(row));
440 col_max = (enc->charsetpage[row+1] > 0 ? enc->cols : enc->charsetpage[row]);
441 for (col = 0; col < col_max; col++) {
442 printf(" 0x%02x,", enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]);
443 if ((col % 8) == 7 && (col+1 < col_max)) printf("\n ");
445 printf("\n");
446 if (enc->charsetpage[row+1] == 0) {
447 /* End a block. */
448 printf("};\n");
451 printf("\n");
453 printf("static int\n");
454 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
455 printf("{\n");
456 printf(" unsigned char c1 = s[0];\n");
457 printf(" if (");
458 for (i = 0; i < enc->ncharsetblocks; i++) {
459 i1_min = enc->row_byte(enc->charsetblocks[i].start / enc->cols);
460 i1_max = enc->row_byte((enc->charsetblocks[i].end-1) / enc->cols);
461 if (i > 0)
462 printf(" || ");
463 if (i1_min == i1_max)
464 printf("(c1 == 0x%02x)", i1_min);
465 else
466 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min, i1_max);
468 printf(") {\n");
469 printf(" if (n >= 2) {\n");
470 printf(" unsigned char c2 = s[1];\n");
471 printf(" if (");
472 printf(enc->check_col_expr, "c2");
473 printf(") {\n");
474 printf(" unsigned int row = ");
475 printf(enc->byte_row_expr, "c1");
476 printf(";\n");
477 printf(" unsigned int col = ");
478 printf(enc->byte_col_expr, "c2");
479 printf(";\n");
480 printf(" unsigned int i = %d * row + col;\n", enc->cols);
481 printf(" unsigned short wc = 0xfffd;\n");
482 for (i = 0; i < enc->ncharsetblocks; i++) {
483 printf(" ");
484 if (i > 0)
485 printf("} else ");
486 if (i < enc->ncharsetblocks-1)
487 printf("if (i < %d) ", enc->charsetblocks[i+1].start);
488 printf("{\n");
489 printf(" if (i < %d)\n", enc->charsetblocks[i].end);
490 printf(" wc = %s_2uni_main_page%02x[%d*", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols), steps_per_row);
491 if (enc->charsetblocks[i].start > 0)
492 printf("(row-%d)", enc->charsetblocks[i].start / enc->cols);
493 else
494 printf("row");
495 printf("+");
496 if (steps_per_row == 2)
497 printf("(col>=%d?1:0)", stepsize);
498 else
499 printf("col/%d", stepsize);
500 printf("] + %s_2uni_page%02x[i", name, enc->row_byte(enc->charsetblocks[i].start / enc->cols));
501 if (enc->charsetblocks[i].start > 0)
502 printf("-%d", enc->charsetblocks[i].start);
503 printf("];\n");
505 printf(" }\n");
506 printf(" if (wc != 0xfffd) {\n");
507 printf(" *pwc = (ucs4_t) wc;\n");
508 printf(" return 2;\n");
509 printf(" }\n");
510 printf(" }\n");
511 printf(" return RET_ILSEQ;\n");
512 printf(" }\n");
513 printf(" return RET_TOOFEW(0);\n");
514 printf(" }\n");
515 printf(" return RET_ILSEQ;\n");
516 printf("}\n");
517 printf("\n");
521 * Computes the uni2charset[0x0000..0x2ffff] array.
523 static void invert (Encoding* enc)
525 int row, col, j;
527 enc->uni2charset = (int*) malloc(0x30000*sizeof(int));
529 for (j = 0; j < 0x30000; j++)
530 enc->uni2charset[j] = 0;
532 for (row = 0; row < enc->rows; row++)
533 for (col = 0; col < enc->cols; col++) {
534 j = enc->charset2uni[row][col];
535 if (j != 0xfffd)
536 enc->uni2charset[j] = 0x100 * enc->row_byte(row) + enc->col_byte(col);
541 * Outputs the unicode to charset table and function, using a linear array.
542 * (Suitable if the table is dense.)
544 static void output_uni2charset_dense (const char* name, Encoding* enc)
546 /* Like in 8bit_tab_to_h.c */
547 bool pages[0x300];
548 int line[0x6000];
549 int tableno;
550 struct { int minline; int maxline; int usecount; } tables[0x6000];
551 bool first;
552 int row, col, j, p, j1, j2, t;
554 for (p = 0; p < 0x300; p++)
555 pages[p] = false;
556 for (row = 0; row < enc->rows; row++)
557 for (col = 0; col < enc->cols; col++) {
558 j = enc->charset2uni[row][col];
559 if (j != 0xfffd)
560 pages[j>>8] = true;
562 for (j1 = 0; j1 < 0x6000; j1++) {
563 bool all_invalid = true;
564 for (j2 = 0; j2 < 8; j2++) {
565 j = 8*j1+j2;
566 if (enc->uni2charset[j] != 0)
567 all_invalid = false;
569 if (all_invalid)
570 line[j1] = -1;
571 else
572 line[j1] = 0;
574 tableno = 0;
575 for (j1 = 0; j1 < 0x6000; j1++) {
576 if (line[j1] >= 0) {
577 if (tableno > 0
578 && ((j1 > 0 && line[j1-1] == tableno-1)
579 || ((tables[tableno-1].maxline >> 5) == (j1 >> 5)
580 && j1 - tables[tableno-1].maxline <= 8))) {
581 line[j1] = tableno-1;
582 tables[tableno-1].maxline = j1;
583 } else {
584 tableno++;
585 line[j1] = tableno-1;
586 tables[tableno-1].minline = tables[tableno-1].maxline = j1;
590 for (t = 0; t < tableno; t++) {
591 tables[t].usecount = 0;
592 j1 = 8*tables[t].minline;
593 j2 = 8*(tables[t].maxline+1);
594 for (j = j1; j < j2; j++)
595 if (enc->uni2charset[j] != 0)
596 tables[t].usecount++;
599 p = -1;
600 for (t = 0; t < tableno; t++)
601 if (tables[t].usecount > 1) {
602 p = tables[t].minline >> 5;
603 printf("static const unsigned short %s_page%02x[%d] = {\n", name, p, 8*(tables[t].maxline-tables[t].minline+1));
604 for (j1 = tables[t].minline; j1 <= tables[t].maxline; j1++) {
605 if ((j1 % 0x20) == 0 && j1 > tables[t].minline)
606 printf(" /* 0x%04x */\n", 8*j1);
607 printf(" ");
608 for (j2 = 0; j2 < 8; j2++) {
609 j = 8*j1+j2;
610 printf(" 0x%04x,", enc->uni2charset[j]);
612 printf(" /*0x%02x-0x%02x*/\n", 8*(j1 % 0x20), 8*(j1 % 0x20)+7);
614 printf("};\n");
616 if (p >= 0)
617 printf("\n");
619 printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
620 printf("{\n");
621 printf(" if (n >= 2) {\n");
622 printf(" unsigned short c = 0;\n");
623 first = true;
624 for (j1 = 0; j1 < 0x6000;) {
625 t = line[j1];
626 for (j2 = j1; j2 < 0x6000 && line[j2] == t; j2++);
627 if (t >= 0) {
628 if (j1 != tables[t].minline) abort();
629 if (j2 > tables[t].maxline+1) abort();
630 j2 = tables[t].maxline+1;
631 if (first)
632 printf(" ");
633 else
634 printf(" else ");
635 first = false;
636 if (tables[t].usecount == 0) abort();
637 if (tables[t].usecount == 1) {
638 if (j2 != j1+1) abort();
639 for (j = 8*j1; j < 8*j2; j++)
640 if (enc->uni2charset[j] != 0) {
641 printf("if (wc == 0x%04x)\n c = 0x%02x;\n", j, enc->uni2charset[j]);
642 break;
644 } else {
645 if (j1 == 0) {
646 printf("if (wc < 0x%04x)", 8*j2);
647 } else {
648 printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1, 8*j2);
650 printf("\n c = %s_page%02x[wc", name, j1 >> 5);
651 if (tables[t].minline > 0)
652 printf("-0x%04x", 8*j1);
653 printf("];\n");
656 j1 = j2;
658 printf(" if (c != 0) {\n");
659 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
660 printf(" return 2;\n");
661 printf(" }\n");
662 printf(" return RET_ILUNI;\n");
663 printf(" }\n");
664 printf(" return RET_TOOSMALL;\n");
665 printf("}\n");
669 * Outputs the unicode to charset table and function, using a packed array.
670 * (Suitable if the table is sparse.)
671 * The argument 'monotonic' may be set to true if the mapping is monotonically
672 * increasing with small gaps only.
674 static void output_uni2charset_sparse (const char* name, Encoding* enc, bool monotonic)
676 bool pages[0x300];
677 Block pageblocks[0x300]; int npageblocks;
678 int indx2charset[0x30000];
679 int summary_indx[0x3000];
680 int summary_used[0x3000];
681 int i, row, col, j, p, j1, j2, indx;
682 bool is_large;
683 /* for monotonic: */
684 int log2_stepsize = (!strcmp(name,"uhc_2") ? 6 : 7);
685 int stepsize = 1 << log2_stepsize;
686 int indxsteps;
688 /* Fill pages[0x300]. */
689 for (p = 0; p < 0x300; p++)
690 pages[p] = false;
691 for (row = 0; row < enc->rows; row++)
692 for (col = 0; col < enc->cols; col++) {
693 j = enc->charset2uni[row][col];
694 if (j != 0xfffd)
695 pages[j>>8] = true;
698 /* Determine whether two or three bytes are needed for each character. */
699 is_large = false;
700 for (j = 0; j < 0x30000; j++)
701 if (enc->uni2charset[j] >= 0x10000)
702 is_large = true;
704 #if 0
705 for (p = 0; p < 0x300; p++)
706 if (pages[p]) {
707 printf("static const unsigned short %s_page%02x[256] = {\n", name, p);
708 for (j1 = 0; j1 < 32; j1++) {
709 printf(" ");
710 for (j2 = 0; j2 < 8; j2++)
711 printf("0x%04x, ", enc->uni2charset[256*p+8*j1+j2]);
712 printf("/""*0x%02x-0x%02x*""/\n", 8*j1, 8*j1+7);
714 printf("};\n");
716 printf("\n");
717 #endif
719 /* Fill summary_indx[] and summary_used[]. */
720 indx = 0;
721 for (j1 = 0; j1 < 0x3000; j1++) {
722 summary_indx[j1] = indx;
723 summary_used[j1] = 0;
724 for (j2 = 0; j2 < 16; j2++) {
725 j = 16*j1+j2;
726 if (enc->uni2charset[j] != 0) {
727 indx2charset[indx++] = enc->uni2charset[j];
728 summary_used[j1] |= (1 << j2);
733 /* Fill npageblocks and pageblocks[]. */
734 npageblocks = 0;
735 for (p = 0; p < 0x300; ) {
736 if (pages[p] && (p == 0 || !pages[p-1])) {
737 pageblocks[npageblocks].start = 16*p;
738 do p++; while (p < 0x300 && pages[p]);
739 j1 = 16*p;
740 while (summary_used[j1-1] == 0) j1--;
741 pageblocks[npageblocks].end = j1;
742 npageblocks++;
743 } else
744 p++;
747 if (monotonic) {
748 indxsteps = (indx + stepsize-1) / stepsize;
749 printf("static const unsigned short %s_2charset_main[%d] = {\n", name, indxsteps);
750 for (i = 0; i < indxsteps; ) {
751 if ((i % 8) == 0) printf(" ");
752 printf(" 0x%04x,", indx2charset[i*stepsize]);
753 i++;
754 if ((i % 8) == 0 || i == indxsteps) printf("\n");
756 printf("};\n");
757 printf("static const unsigned char %s_2charset[%d] = {\n", name, indx);
758 for (i = 0; i < indx; ) {
759 if ((i % 8) == 0) printf(" ");
760 printf(" 0x%02x,", indx2charset[i] - indx2charset[i/stepsize*stepsize]);
761 i++;
762 if ((i % 8) == 0 || i == indx) printf("\n");
764 printf("};\n");
765 } else {
766 if (is_large) {
767 printf("static const unsigned char %s_2charset[3*%d] = {\n", name, indx);
768 for (i = 0; i < indx; ) {
769 if ((i % 4) == 0) printf(" ");
770 printf(" 0x%1x,0x%02x,0x%02x,", indx2charset[i] >> 16,
771 (indx2charset[i] >> 8) & 0xff, indx2charset[i] & 0xff);
772 i++;
773 if ((i % 4) == 0 || i == indx) printf("\n");
775 printf("};\n");
776 } else {
777 printf("static const unsigned short %s_2charset[%d] = {\n", name, indx);
778 for (i = 0; i < indx; ) {
779 if ((i % 8) == 0) printf(" ");
780 printf(" 0x%04x,", indx2charset[i]);
781 i++;
782 if ((i % 8) == 0 || i == indx) printf("\n");
784 printf("};\n");
787 printf("\n");
788 for (i = 0; i < npageblocks; i++) {
789 printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name,
790 pageblocks[i].start/16, pageblocks[i].end-pageblocks[i].start);
791 for (j1 = pageblocks[i].start; j1 < pageblocks[i].end; ) {
792 if (((16*j1) % 0x100) == 0) printf(" /""* 0x%04x *""/\n", 16*j1);
793 if ((j1 % 4) == 0) printf(" ");
794 printf(" { %4d, 0x%04x },", summary_indx[j1], summary_used[j1]);
795 j1++;
796 if ((j1 % 4) == 0 || j1 == pageblocks[i].end) printf("\n");
798 printf("};\n");
800 printf("\n");
802 printf("static int\n");
803 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
804 printf("{\n");
805 printf(" if (n >= 2) {\n");
806 printf(" const Summary16 *summary = NULL;\n");
807 for (i = 0; i < npageblocks; i++) {
808 printf(" ");
809 if (i > 0)
810 printf("else ");
811 printf("if (wc >= 0x%04x && wc < 0x%04x)\n",
812 16*pageblocks[i].start, 16*pageblocks[i].end);
813 printf(" summary = &%s_uni2indx_page%02x[(wc>>4)", name,
814 pageblocks[i].start/16);
815 if (pageblocks[i].start > 0)
816 printf("-0x%03x", pageblocks[i].start);
817 printf("];\n");
819 printf(" if (summary) {\n");
820 printf(" unsigned short used = summary->used;\n");
821 printf(" unsigned int i = wc & 0x0f;\n");
822 printf(" if (used & ((unsigned short) 1 << i)) {\n");
823 if (monotonic || !is_large)
824 printf(" unsigned short c;\n");
825 printf(" /* Keep in `used' only the bits 0..i-1. */\n");
826 printf(" used &= ((unsigned short) 1 << i) - 1;\n");
827 printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
828 printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
829 printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
830 printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
831 printf(" used = (used & 0x00ff) + (used >> 8);\n");
832 if (monotonic) {
833 printf(" used += summary->indx;\n");
834 printf(" c = %s_2charset_main[used>>%d] + %s_2charset[used];\n", name, log2_stepsize, name);
835 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
836 printf(" return 2;\n");
837 } else {
838 if (is_large) {
839 printf(" used += summary->indx;\n");
840 printf(" r[0] = %s_2charset[3*used];\n", name);
841 printf(" r[1] = %s_2charset[3*used+1];\n", name);
842 printf(" r[2] = %s_2charset[3*used+2];\n", name);
843 printf(" return 3;\n");
844 } else {
845 printf(" c = %s_2charset[summary->indx + used];\n", name);
846 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
847 printf(" return 2;\n");
850 printf(" }\n");
851 printf(" }\n");
852 printf(" return RET_ILUNI;\n");
853 printf(" }\n");
854 printf(" return RET_TOOSMALL;\n");
855 printf("}\n");
858 /* ISO-2022/EUC specifics */
860 static int row_byte_normal (int row) { return 0x21+row; }
861 static int col_byte_normal (int col) { return 0x21+col; }
862 static int byte_row_normal (int byte) { return byte-0x21; }
863 static int byte_col_normal (int byte) { return byte-0x21; }
865 static void do_normal (const char* name)
867 Encoding enc;
869 enc.rows = 94;
870 enc.cols = 94;
871 enc.row_byte = row_byte_normal;
872 enc.col_byte = col_byte_normal;
873 enc.byte_row = byte_row_normal;
874 enc.byte_col = byte_col_normal;
875 enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
876 enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
877 enc.byte_row_expr = "%1$s - 0x21";
878 enc.byte_col_expr = "%1$s - 0x21";
880 read_table(&enc);
881 output_charset2uni(name,&enc);
882 invert(&enc); output_uni2charset_sparse(name,&enc,false);
885 /* Note: On first sight, the jisx0212_2charset[] table seems to be in order,
886 starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in
887 order. There are 75 out-of-order values, scattered all throughout the table.
890 static void do_normal_only_charset2uni (const char* name)
892 Encoding enc;
894 enc.rows = 94;
895 enc.cols = 94;
896 enc.row_byte = row_byte_normal;
897 enc.col_byte = col_byte_normal;
898 enc.byte_row = byte_row_normal;
899 enc.byte_col = byte_col_normal;
900 enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
901 enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
902 enc.byte_row_expr = "%1$s - 0x21";
903 enc.byte_col_expr = "%1$s - 0x21";
905 read_table(&enc);
906 output_charset2uni(name,&enc);
909 /* CNS 11643 specifics - trick to put two tables into one */
911 static int row_byte_cns11643 (int row) {
912 return 0x100 * (row / 94) + (row % 94) + 0x21;
914 static int byte_row_cns11643 (int byte) {
915 return (byte >> 8) * 94 + (byte & 0xff) - 0x21;
918 static void do_cns11643_only_uni2charset (const char* name)
920 Encoding enc;
922 enc.rows = 16*94;
923 enc.cols = 94;
924 enc.row_byte = row_byte_cns11643;
925 enc.col_byte = col_byte_normal;
926 enc.byte_row = byte_row_cns11643;
927 enc.byte_col = byte_col_normal;
928 enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
929 enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
930 enc.byte_row_expr = "%1$s - 0x21";
931 enc.byte_col_expr = "%1$s - 0x21";
933 read_table(&enc);
934 invert(&enc);
935 output_uni2charset_sparse(name,&enc,false);
938 /* GBK specifics */
940 static int row_byte_gbk1 (int row) {
941 return 0x81+row;
943 static int col_byte_gbk1 (int col) {
944 return (col >= 0x3f ? 0x41 : 0x40) + col;
946 static int byte_row_gbk1 (int byte) {
947 if (byte >= 0x81 && byte < 0xff)
948 return byte-0x81;
949 else
950 return -1;
952 static int byte_col_gbk1 (int byte) {
953 if (byte >= 0x40 && byte < 0x7f)
954 return byte-0x40;
955 else if (byte >= 0x80 && byte < 0xff)
956 return byte-0x41;
957 else
958 return -1;
961 static void do_gbk1 (const char* name)
963 Encoding enc;
965 enc.rows = 126;
966 enc.cols = 190;
967 enc.row_byte = row_byte_gbk1;
968 enc.col_byte = col_byte_gbk1;
969 enc.byte_row = byte_row_gbk1;
970 enc.byte_col = byte_col_gbk1;
971 enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
972 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
973 enc.byte_row_expr = "%1$s - 0x81";
974 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
976 read_table(&enc);
977 output_charset2uni(name,&enc);
978 invert(&enc); output_uni2charset_dense(name,&enc);
981 static void do_gbk1_only_charset2uni (const char* name)
983 Encoding enc;
985 enc.rows = 126;
986 enc.cols = 190;
987 enc.row_byte = row_byte_gbk1;
988 enc.col_byte = col_byte_gbk1;
989 enc.byte_row = byte_row_gbk1;
990 enc.byte_col = byte_col_gbk1;
991 enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
992 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
993 enc.byte_row_expr = "%1$s - 0x81";
994 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
996 read_table(&enc);
997 output_charset2uni(name,&enc);
1000 static int row_byte_gbk2 (int row) {
1001 return 0x81+row;
1003 static int col_byte_gbk2 (int col) {
1004 return (col >= 0x3f ? 0x41 : 0x40) + col;
1006 static int byte_row_gbk2 (int byte) {
1007 if (byte >= 0x81 && byte < 0xff)
1008 return byte-0x81;
1009 else
1010 return -1;
1012 static int byte_col_gbk2 (int byte) {
1013 if (byte >= 0x40 && byte < 0x7f)
1014 return byte-0x40;
1015 else if (byte >= 0x80 && byte < 0xa1)
1016 return byte-0x41;
1017 else
1018 return -1;
1021 static void do_gbk2_only_charset2uni (const char* name)
1023 Encoding enc;
1025 enc.rows = 126;
1026 enc.cols = 96;
1027 enc.row_byte = row_byte_gbk2;
1028 enc.col_byte = col_byte_gbk2;
1029 enc.byte_row = byte_row_gbk2;
1030 enc.byte_col = byte_col_gbk2;
1031 enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
1032 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)";
1033 enc.byte_row_expr = "%1$s - 0x81";
1034 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1036 read_table(&enc);
1037 output_charset2uni(name,&enc);
1040 static void do_gbk1_only_uni2charset (const char* name)
1042 Encoding enc;
1044 enc.rows = 126;
1045 enc.cols = 190;
1046 enc.row_byte = row_byte_gbk1;
1047 enc.col_byte = col_byte_gbk1;
1048 enc.byte_row = byte_row_gbk1;
1049 enc.byte_col = byte_col_gbk1;
1050 enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
1051 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
1052 enc.byte_row_expr = "%1$s - 0x81";
1053 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1055 read_table(&enc);
1056 invert(&enc); output_uni2charset_sparse(name,&enc,false);
1059 /* KSC 5601 specifics */
1062 * Reads the charset2uni table from standard input.
1064 static void read_table_ksc5601 (Encoding* enc)
1066 int row, col, i, i1, i2, c, j;
1068 enc->charset2uni = (int**) malloc(enc->rows*sizeof(int*));
1069 for (row = 0; row < enc->rows; row++)
1070 enc->charset2uni[row] = (int*) malloc(enc->cols*sizeof(int));
1072 for (row = 0; row < enc->rows; row++)
1073 for (col = 0; col < enc->cols; col++)
1074 enc->charset2uni[row][col] = 0xfffd;
1076 c = getc(stdin);
1077 ungetc(c,stdin);
1078 if (c == '#') {
1079 /* Read a unicode.org style .TXT file. */
1080 for (;;) {
1081 c = getc(stdin);
1082 if (c == EOF)
1083 break;
1084 if (c == '\n' || c == ' ' || c == '\t')
1085 continue;
1086 if (c == '#') {
1087 do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
1088 continue;
1090 ungetc(c,stdin);
1091 if (scanf("0x%x", &j) != 1)
1092 exit(1);
1093 i1 = j >> 8;
1094 i2 = j & 0xff;
1095 if (scanf(" 0x%x", &j) != 1)
1096 exit(1);
1097 /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0
1098 = KS X 1001.1992, ignore the rest. */
1099 if (!(i1 >= 128+33 && i1 < 128+127 && i2 >= 128+33 && i2 < 128+127))
1100 continue; /* KSC5601 specific */
1101 i1 &= 0x7f; /* KSC5601 specific */
1102 i2 &= 0x7f; /* KSC5601 specific */
1103 row = enc->byte_row(i1);
1104 col = enc->byte_col(i2);
1105 if (row < 0 || col < 0) {
1106 fprintf(stderr, "lost entry for %02x %02x\n", i1, i2);
1107 exit(1);
1109 enc->charset2uni[row][col] = j;
1111 } else {
1112 /* Read a table of hexadecimal Unicode values. */
1113 for (i1 = 33; i1 < 127; i1++)
1114 for (i2 = 33; i2 < 127; i2++) {
1115 i = scanf("%x", &j);
1116 if (i == EOF)
1117 goto read_done;
1118 if (i != 1)
1119 exit(1);
1120 if (j < 0 || j == 0xffff)
1121 j = 0xfffd;
1122 if (j != 0xfffd) {
1123 if (enc->byte_row(i1) < 0 || enc->byte_col(i2) < 0) {
1124 fprintf(stderr, "lost entry at %02x %02x\n", i1, i2);
1125 exit (1);
1127 enc->charset2uni[enc->byte_row(i1)][enc->byte_col(i2)] = j;
1130 read_done: ;
1134 static void do_ksc5601 (const char* name)
1136 Encoding enc;
1138 enc.rows = 94;
1139 enc.cols = 94;
1140 enc.row_byte = row_byte_normal;
1141 enc.col_byte = col_byte_normal;
1142 enc.byte_row = byte_row_normal;
1143 enc.byte_col = byte_col_normal;
1144 enc.check_row_expr = "%1$s >= 0x21 && %1$s < 0x7f";
1145 enc.check_col_expr = "%1$s >= 0x21 && %1$s < 0x7f";
1146 enc.byte_row_expr = "%1$s - 0x21";
1147 enc.byte_col_expr = "%1$s - 0x21";
1149 read_table_ksc5601(&enc);
1150 output_charset2uni(name,&enc);
1151 invert(&enc); output_uni2charset_sparse(name,&enc,false);
1154 /* UHC specifics */
1156 /* UHC part 1: 0x{81..A0}{41..5A,61..7A,81..FE} */
1158 static int row_byte_uhc_1 (int row) {
1159 return 0x81 + row;
1161 static int col_byte_uhc_1 (int col) {
1162 return (col >= 0x34 ? 0x4d : col >= 0x1a ? 0x47 : 0x41) + col;
1164 static int byte_row_uhc_1 (int byte) {
1165 if (byte >= 0x81 && byte < 0xa1)
1166 return byte-0x81;
1167 else
1168 return -1;
1170 static int byte_col_uhc_1 (int byte) {
1171 if (byte >= 0x41 && byte < 0x5b)
1172 return byte-0x41;
1173 else if (byte >= 0x61 && byte < 0x7b)
1174 return byte-0x47;
1175 else if (byte >= 0x81 && byte < 0xff)
1176 return byte-0x4d;
1177 else
1178 return -1;
1181 static void do_uhc_1 (const char* name)
1183 Encoding enc;
1185 enc.rows = 32;
1186 enc.cols = 178;
1187 enc.row_byte = row_byte_uhc_1;
1188 enc.col_byte = col_byte_uhc_1;
1189 enc.byte_row = byte_row_uhc_1;
1190 enc.byte_col = byte_col_uhc_1;
1191 enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa1)";
1192 enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xff)";
1193 enc.byte_row_expr = "%1$s - 0x81";
1194 enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
1196 read_table(&enc);
1197 output_charset2uni_noholes_monotonic(name,&enc);
1198 invert(&enc); output_uni2charset_sparse(name,&enc,true);
1201 /* UHC part 2: 0x{A1..C6}{41..5A,61..7A,81..A0} */
1203 static int row_byte_uhc_2 (int row) {
1204 return 0xa1 + row;
1206 static int col_byte_uhc_2 (int col) {
1207 return (col >= 0x34 ? 0x4d : col >= 0x1a ? 0x47 : 0x41) + col;
1209 static int byte_row_uhc_2 (int byte) {
1210 if (byte >= 0xa1 && byte < 0xff)
1211 return byte-0xa1;
1212 else
1213 return -1;
1215 static int byte_col_uhc_2 (int byte) {
1216 if (byte >= 0x41 && byte < 0x5b)
1217 return byte-0x41;
1218 else if (byte >= 0x61 && byte < 0x7b)
1219 return byte-0x47;
1220 else if (byte >= 0x81 && byte < 0xa1)
1221 return byte-0x4d;
1222 else
1223 return -1;
1226 static void do_uhc_2 (const char* name)
1228 Encoding enc;
1230 enc.rows = 94;
1231 enc.cols = 84;
1232 enc.row_byte = row_byte_uhc_2;
1233 enc.col_byte = col_byte_uhc_2;
1234 enc.byte_row = byte_row_uhc_2;
1235 enc.byte_col = byte_col_uhc_2;
1236 enc.check_row_expr = "(%1$s >= 0xa1 && %1$s < 0xff)";
1237 enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xa1)";
1238 enc.byte_row_expr = "%1$s - 0xa1";
1239 enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
1241 read_table(&enc);
1242 output_charset2uni_noholes_monotonic(name,&enc);
1243 invert(&enc); output_uni2charset_sparse(name,&enc,true);
1246 /* Big5 specifics */
1248 static int row_byte_big5 (int row) {
1249 return 0xa1+row;
1251 static int col_byte_big5 (int col) {
1252 return (col >= 0x3f ? 0x62 : 0x40) + col;
1254 static int byte_row_big5 (int byte) {
1255 if (byte >= 0xa1 && byte < 0xff)
1256 return byte-0xa1;
1257 else
1258 return -1;
1260 static int byte_col_big5 (int byte) {
1261 if (byte >= 0x40 && byte < 0x7f)
1262 return byte-0x40;
1263 else if (byte >= 0xa1 && byte < 0xff)
1264 return byte-0x62;
1265 else
1266 return -1;
1269 static void do_big5 (const char* name)
1271 Encoding enc;
1273 enc.rows = 94;
1274 enc.cols = 157;
1275 enc.row_byte = row_byte_big5;
1276 enc.col_byte = col_byte_big5;
1277 enc.byte_row = byte_row_big5;
1278 enc.byte_col = byte_col_big5;
1279 enc.check_row_expr = "%1$s >= 0xa1 && %1$s < 0xff";
1280 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
1281 enc.byte_row_expr = "%1$s - 0xa1";
1282 enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
1284 read_table(&enc);
1285 output_charset2uni(name,&enc);
1286 invert(&enc); output_uni2charset_sparse(name,&enc,false);
1289 /* HKSCS specifics */
1291 static int row_byte_hkscs (int row) {
1292 return 0x80+row;
1294 static int byte_row_hkscs (int byte) {
1295 if (byte >= 0x80 && byte < 0xff)
1296 return byte-0x80;
1297 else
1298 return -1;
1301 static void do_hkscs (const char* name)
1303 Encoding enc;
1305 enc.rows = 128;
1306 enc.cols = 157;
1307 enc.row_byte = row_byte_hkscs;
1308 enc.col_byte = col_byte_big5;
1309 enc.byte_row = byte_row_hkscs;
1310 enc.byte_col = byte_col_big5;
1311 enc.check_row_expr = "%1$s >= 0x80 && %1$s < 0xff";
1312 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
1313 enc.byte_row_expr = "%1$s - 0x80";
1314 enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
1316 read_table(&enc);
1317 output_charset2uni(name,&enc);
1318 invert(&enc); output_uni2charset_sparse(name,&enc,false);
1321 /* Johab Hangul specifics */
1323 static int row_byte_johab_hangul (int row) {
1324 return 0x84+row;
1326 static int col_byte_johab_hangul (int col) {
1327 return (col >= 0x3e ? 0x43 : 0x41) + col;
1329 static int byte_row_johab_hangul (int byte) {
1330 if (byte >= 0x84 && byte < 0xd4)
1331 return byte-0x84;
1332 else
1333 return -1;
1335 static int byte_col_johab_hangul (int byte) {
1336 if (byte >= 0x41 && byte < 0x7f)
1337 return byte-0x41;
1338 else if (byte >= 0x81 && byte < 0xff)
1339 return byte-0x43;
1340 else
1341 return -1;
1344 static void do_johab_hangul (const char* name)
1346 Encoding enc;
1348 enc.rows = 80;
1349 enc.cols = 188;
1350 enc.row_byte = row_byte_johab_hangul;
1351 enc.col_byte = col_byte_johab_hangul;
1352 enc.byte_row = byte_row_johab_hangul;
1353 enc.byte_col = byte_col_johab_hangul;
1354 enc.check_row_expr = "%1$s >= 0x84 && %1$s < 0xd4";
1355 enc.check_col_expr = "(%1$s >= 0x41 && %1$s < 0x7f) || (%1$s >= 0x81 && %1$s < 0xff)";
1356 enc.byte_row_expr = "%1$s - 0x84";
1357 enc.byte_col_expr = "%1$s - (%1$s >= 0x81 ? 0x43 : 0x41)";
1359 read_table(&enc);
1360 output_charset2uni(name,&enc);
1361 invert(&enc); output_uni2charset_dense(name,&enc);
1364 /* SJIS specifics */
1366 static int row_byte_sjis (int row) {
1367 return (row >= 0x1f ? 0xc1 : 0x81) + row;
1369 static int col_byte_sjis (int col) {
1370 return (col >= 0x3f ? 0x41 : 0x40) + col;
1372 static int byte_row_sjis (int byte) {
1373 if (byte >= 0x81 && byte < 0xa0)
1374 return byte-0x81;
1375 else if (byte >= 0xe0)
1376 return byte-0xc1;
1377 else
1378 return -1;
1380 static int byte_col_sjis (int byte) {
1381 if (byte >= 0x40 && byte < 0x7f)
1382 return byte-0x40;
1383 else if (byte >= 0x80 && byte < 0xfd)
1384 return byte-0x41;
1385 else
1386 return -1;
1389 static void do_sjis (const char* name)
1391 Encoding enc;
1393 enc.rows = 94;
1394 enc.cols = 188;
1395 enc.row_byte = row_byte_sjis;
1396 enc.col_byte = col_byte_sjis;
1397 enc.byte_row = byte_row_sjis;
1398 enc.byte_col = byte_col_sjis;
1399 enc.check_row_expr = "(%1$s >= 0x81 && %1$s < 0xa0) || (%1$s >= 0xe0)";
1400 enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xfd)";
1401 enc.byte_row_expr = "%1$s - (%1$s >= 0xe0 ? 0xc1 : 0x81)";
1402 enc.byte_col_expr = "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1404 read_table(&enc);
1405 output_charset2uni(name,&enc);
1406 invert(&enc); output_uni2charset_sparse(name,&enc,false);
1409 /* GB18030 Unicode specifics */
1411 static void do_gb18030uni (const char* name)
1413 int c;
1414 unsigned int bytes;
1415 int i1, i2, i3, i4, i, j, k;
1416 int charset2uni[4*10*126*10];
1417 int uni2charset[0x10000];
1418 struct { int low; int high; int diff; int total; } ranges[256];
1419 int ranges_count, ranges_total;
1421 for (i = 0; i < 4*10*126*10; i++)
1422 charset2uni[i] = 0;
1423 for (j = 0; j < 0x10000; j++)
1424 uni2charset[j] = 0;
1426 /* Read a unicode.org style .TXT file. */
1427 for (;;) {
1428 c = getc(stdin);
1429 if (c == EOF)
1430 break;
1431 if (c == '\n' || c == ' ' || c == '\t')
1432 continue;
1433 if (c == '#') {
1434 do { c = getc(stdin); } while (!(c == EOF || c == '\n'));
1435 continue;
1437 ungetc(c,stdin);
1438 if (scanf("0x%x", &bytes) != 1)
1439 exit(1);
1440 i1 = (bytes >> 24) & 0xff;
1441 i2 = (bytes >> 16) & 0xff;
1442 i3 = (bytes >> 8) & 0xff;
1443 i4 = bytes & 0xff;
1444 if (!(i1 >= 0x81 && i1 <= 0x84
1445 && i2 >= 0x30 && i2 <= 0x39
1446 && i3 >= 0x81 && i3 <= 0xfe
1447 && i4 >= 0x30 && i4 <= 0x39)) {
1448 fprintf(stderr, "lost entry for %02x %02x %02x %02x\n", i1, i2, i3, i4);
1449 exit(1);
1451 i = (((i1-0x81) * 10 + (i2-0x30)) * 126 + (i3-0x81)) * 10 + (i4-0x30);
1452 if (scanf(" 0x%x", &j) != 1)
1453 exit(1);
1454 if (!(j >= 0 && j < 0x10000))
1455 exit(1);
1456 charset2uni[i] = j;
1457 uni2charset[j] = i;
1460 /* Verify that the mapping i -> j is monotonically increasing and
1461 of the form
1462 low[k] <= i <= high[k] => j = diff[k] + i
1463 with a set of disjoint intervals (low[k], high[k]). */
1464 ranges_count = 0;
1465 for (i = 0; i < 4*10*126*10; i++)
1466 if (charset2uni[i] != 0) {
1467 int diff;
1468 j = charset2uni[i];
1469 diff = j - i;
1470 if (ranges_count > 0) {
1471 if (!(i > ranges[ranges_count-1].high))
1472 exit(1);
1473 if (!(j > ranges[ranges_count-1].high + ranges[ranges_count-1].diff))
1474 exit(1);
1475 /* Additional property: The diffs are also increasing. */
1476 if (!(diff >= ranges[ranges_count-1].diff))
1477 exit(1);
1479 if (ranges_count > 0 && diff == ranges[ranges_count-1].diff)
1480 ranges[ranges_count-1].high = i;
1481 else {
1482 if (ranges_count == 256)
1483 exit(1);
1484 ranges[ranges_count].low = i;
1485 ranges[ranges_count].high = i;
1486 ranges[ranges_count].diff = diff;
1487 ranges_count++;
1491 /* Determine size of bitmap. */
1492 ranges_total = 0;
1493 for (k = 0; k < ranges_count; k++) {
1494 ranges[k].total = ranges_total;
1495 ranges_total += ranges[k].high - ranges[k].low + 1;
1498 printf("static const unsigned short %s_charset2uni_ranges[%d] = {\n", name, 2*ranges_count);
1499 for (k = 0; k < ranges_count; k++) {
1500 printf(" 0x%04x, 0x%04x", ranges[k].low, ranges[k].high);
1501 if (k+1 < ranges_count) printf(",");
1502 if ((k % 4) == 3 && k+1 < ranges_count) printf("\n");
1504 printf("\n");
1505 printf("};\n");
1507 printf("\n");
1509 printf("static const unsigned short %s_uni2charset_ranges[%d] = {\n", name, 2*ranges_count);
1510 for (k = 0; k < ranges_count; k++) {
1511 printf(" 0x%04x, 0x%04x", ranges[k].low + ranges[k].diff, ranges[k].high + ranges[k].diff);
1512 if (k+1 < ranges_count) printf(",");
1513 if ((k % 4) == 3 && k+1 < ranges_count) printf("\n");
1515 printf("\n");
1516 printf("};\n");
1518 printf("\n");
1520 printf("static const struct { unsigned short diff; unsigned short bitmap_offset; } %s_ranges[%d] = {\n ", name, ranges_count);
1521 for (k = 0; k < ranges_count; k++) {
1522 printf(" { %5d, 0x%04x }", ranges[k].diff, ranges[k].total);
1523 if (k+1 < ranges_count) printf(",");
1524 if ((k % 4) == 3 && k+1 < ranges_count) printf("\n ");
1526 printf("\n");
1527 printf("};\n");
1529 printf("\n");
1531 printf("static const unsigned char %s_bitmap[%d] = {\n ", name, (ranges_total + 7) / 8);
1533 int accu = 0;
1534 for (k = 0; k < ranges_count; k++) {
1535 for (i = ranges[k].total; i <= ranges[k].total + (ranges[k].high - ranges[k].low);) {
1536 if (charset2uni[i - ranges[k].total + ranges[k].low] != 0)
1537 accu |= (1 << (i % 8));
1538 i++;
1539 if ((i % 8) == 0) {
1540 printf(" 0x%02x", accu);
1541 if ((i / 8) < (ranges_total + 7) / 8) printf(",");
1542 if (((i / 8) % 12) == 0)
1543 printf("\n ");
1544 accu = 0;
1547 if (i != (k+1 < ranges_count ? ranges[k+1].total : ranges_total)) abort();
1549 if ((ranges_total % 8) != 0)
1550 printf(" 0x%02x", accu);
1551 printf("\n");
1553 printf("};\n");
1555 printf("\n");
1557 printf("static int\n");
1558 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name);
1559 printf("{\n");
1560 printf(" unsigned char c1 = s[0];\n");
1561 printf(" if (c1 >= 0x81 && c1 <= 0x84) {\n");
1562 printf(" if (n >= 2) {\n");
1563 printf(" unsigned char c2 = s[1];\n");
1564 printf(" if (c2 >= 0x30 && c2 <= 0x39) {\n");
1565 printf(" if (n >= 3) {\n");
1566 printf(" unsigned char c3 = s[2];\n");
1567 printf(" if (c3 >= 0x81 && c3 <= 0xfe) {\n");
1568 printf(" if (n >= 4) {\n");
1569 printf(" unsigned char c4 = s[3];\n");
1570 printf(" if (c4 >= 0x30 && c4 <= 0x39) {\n");
1571 printf(" unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);\n");
1572 printf(" if (i >= %d && i <= %d) {\n", ranges[0].low, ranges[ranges_count-1].high);
1573 printf(" unsigned int k1 = 0;\n");
1574 printf(" unsigned int k2 = %d;\n", ranges_count-1);
1575 printf(" while (k1 < k2) {\n");
1576 printf(" unsigned int k = (k1 + k2) / 2;\n");
1577 printf(" if (i <= %s_charset2uni_ranges[2*k+1])\n", name);
1578 printf(" k2 = k;\n");
1579 printf(" else if (i >= %s_charset2uni_ranges[2*k+2])\n", name);
1580 printf(" k1 = k + 1;\n");
1581 printf(" else\n");
1582 printf(" return RET_ILSEQ;\n");
1583 printf(" }\n");
1584 printf(" {\n");
1585 printf(" unsigned int bitmap_index = i - %s_charset2uni_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name, name);
1586 printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name);
1587 printf(" unsigned int diff = %s_ranges[k1].diff;\n", name);
1588 printf(" *pwc = (ucs4_t) (i + diff);\n");
1589 printf(" return 4;\n");
1590 printf(" }\n");
1591 printf(" }\n");
1592 printf(" }\n");
1593 printf(" }\n");
1594 printf(" return RET_ILSEQ;\n");
1595 printf(" }\n");
1596 printf(" return RET_TOOFEW(0);\n");
1597 printf(" }\n");
1598 printf(" return RET_ILSEQ;\n");
1599 printf(" }\n");
1600 printf(" return RET_TOOFEW(0);\n");
1601 printf(" }\n");
1602 printf(" return RET_ILSEQ;\n");
1603 printf(" }\n");
1604 printf(" return RET_TOOFEW(0);\n");
1605 printf(" }\n");
1606 printf(" return RET_ILSEQ;\n");
1607 printf("}\n");
1609 printf("\n");
1611 printf("static int\n");
1612 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name);
1613 printf("{\n");
1614 printf(" if (n >= 4) {\n");
1615 printf(" unsigned int i = wc;\n");
1616 printf(" if (i >= 0x%04x && i <= 0x%04x) {\n", ranges[0].low + ranges[0].diff, ranges[ranges_count-1].high + ranges[ranges_count-1].diff);
1617 printf(" unsigned int k1 = 0;\n");
1618 printf(" unsigned int k2 = %d;\n", ranges_count-1);
1619 printf(" while (k1 < k2) {\n");
1620 printf(" unsigned int k = (k1 + k2) / 2;\n");
1621 printf(" if (i <= %s_uni2charset_ranges[2*k+1])\n", name);
1622 printf(" k2 = k;\n");
1623 printf(" else if (i >= %s_uni2charset_ranges[2*k+2])\n", name);
1624 printf(" k1 = k + 1;\n");
1625 printf(" else\n");
1626 printf(" return RET_ILUNI;\n");
1627 printf(" }\n");
1628 printf(" {\n");
1629 printf(" unsigned int bitmap_index = i - %s_uni2charset_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name, name);
1630 printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name);
1631 printf(" unsigned int diff = %s_ranges[k1].diff;\n", name);
1632 printf(" i -= diff;\n");
1633 printf(" r[3] = (i %% 10) + 0x30; i = i / 10;\n");
1634 printf(" r[2] = (i %% 126) + 0x81; i = i / 126;\n");
1635 printf(" r[1] = (i %% 10) + 0x30; i = i / 10;\n");
1636 printf(" r[0] = i + 0x81;\n");
1637 printf(" return 4;\n");
1638 printf(" }\n");
1639 printf(" }\n");
1640 printf(" }\n");
1641 printf(" return RET_ILUNI;\n");
1642 printf(" }\n");
1643 printf(" return RET_TOOSMALL;\n");
1644 printf("}\n");
1647 /* JISX0213 specifics */
1649 static void do_jisx0213 (const char* name)
1651 printf("#ifndef _JISX0213_H\n");
1652 printf("#define _JISX0213_H\n");
1653 printf("\n");
1654 printf("/* JISX0213 plane 1 (= ISO-IR-233) characters are in the range\n");
1655 printf(" 0x{21..7E}{21..7E}.\n");
1656 printf(" JISX0213 plane 2 (= ISO-IR-229) characters are in the range\n");
1657 printf(" 0x{21,23..25,28,2C..2F,6E..7E}{21..7E}.\n");
1658 printf(" Together this makes 120 rows of 94 characters.\n");
1659 printf("*/\n");
1660 printf("\n");
1662 #define row_convert(row) \
1663 ((row) >= 0x121 && (row) <= 0x17E ? row-289 : /* 0..93 */ \
1664 (row) == 0x221 ? row-451 : /* 94 */ \
1665 (row) >= 0x223 && (row) <= 0x225 ? row-452 : /* 95..97 */ \
1666 (row) == 0x228 ? row-454 : /* 98 */ \
1667 (row) >= 0x22C && (row) <= 0x22F ? row-457 : /* 99..102 */ \
1668 (row) >= 0x26E && (row) <= 0x27E ? row-519 : /* 103..119 */ \
1670 unsigned int table[120][94];
1671 int pagemin[0x1100];
1672 int pagemax[0x1100];
1673 int pageidx[0x1100];
1674 unsigned int pagestart[0x1100];
1675 unsigned int pagestart_len = 0;
1677 unsigned int rowc, colc;
1678 for (rowc = 0; rowc < 120; rowc++)
1679 for (colc = 0; colc < 94; colc++)
1680 table[rowc][colc] = 0;
1683 unsigned int page;
1684 for (page = 0; page < 0x1100; page++)
1685 pagemin[page] = -1;
1686 for (page = 0; page < 0x1100; page++)
1687 pagemax[page] = -1;
1688 for (page = 0; page < 0x1100; page++)
1689 pageidx[page] = -1;
1691 printf("static const unsigned short jisx0213_to_ucs_combining[][2] = {\n");
1693 int private_use = 0x0001;
1694 for (;;) {
1695 char line[30];
1696 unsigned int row, col;
1697 unsigned int ucs;
1698 memset(line,0,sizeof(line));
1699 if (scanf("%[^\n]\n",line) < 1)
1700 break;
1701 assert(line[0]=='0');
1702 assert(line[1]=='x');
1703 assert(isxdigit(line[2]));
1704 assert(isxdigit(line[3]));
1705 assert(isxdigit(line[4]));
1706 assert(isxdigit(line[5]));
1707 assert(isxdigit(line[6]));
1708 assert(line[7]=='\t');
1709 line[7] = '\0';
1710 col = strtoul(&line[5],NULL,16);
1711 line[5] = '\0';
1712 row = strtoul(&line[2],NULL,16);
1713 if (line[20] != '\0' && line[21] == '\0') {
1714 unsigned int u1, u2;
1715 assert(line[8]=='0');
1716 assert(line[9]=='x');
1717 assert(isxdigit(line[10]));
1718 assert(isxdigit(line[11]));
1719 assert(isxdigit(line[12]));
1720 assert(isxdigit(line[13]));
1721 assert(line[14]==' ');
1722 assert(line[15]=='0');
1723 assert(line[16]=='x');
1724 assert(isxdigit(line[17]));
1725 assert(isxdigit(line[18]));
1726 assert(isxdigit(line[19]));
1727 assert(isxdigit(line[20]));
1728 u2 = strtoul(&line[17],NULL,16);
1729 line[14] = '\0';
1730 u1 = strtoul(&line[10],NULL,16);
1731 printf(" { 0x%04x, 0x%04x },\n", u1, u2);
1732 ucs = private_use++;
1733 } else {
1734 assert(line[8]=='0');
1735 assert(line[9]=='x');
1736 assert(isxdigit(line[10]));
1737 assert(isxdigit(line[11]));
1738 assert(isxdigit(line[12]));
1739 assert(isxdigit(line[13]));
1740 ucs = strtoul(&line[10],NULL,16);
1742 assert((unsigned int) row_convert(row) < 120);
1743 assert((unsigned int) (col-0x21) < 94);
1744 table[row_convert(row)][col-0x21] = ucs;
1747 printf("};\n");
1748 printf("\n");
1750 unsigned int rowc, colc;
1751 for (rowc = 0; rowc < 120; rowc++) {
1752 for (colc = 0; colc < 94; colc++) {
1753 unsigned int value = table[rowc][colc];
1754 unsigned int page = value >> 8;
1755 unsigned int rest = value & 0xff;
1756 if (pagemin[page] < 0 || pagemin[page] > rest) pagemin[page] = rest;
1757 if (pagemax[page] < 0 || pagemax[page] < rest) pagemax[page] = rest;
1762 unsigned int index = 0;
1763 unsigned int i;
1764 for (i = 0; i < 0x1100; ) {
1765 if (pagemin[i] >= 0) {
1766 if (pagemin[i+1] >= 0 && pagemin[i] >= 0x80 && pagemax[i+1] < 0x80) {
1767 /* Combine two pages into a single one. */
1768 assert(pagestart_len < sizeof(pagestart)/sizeof(pagestart[0]));
1769 pagestart[pagestart_len++] = (i<<8)+0x80;
1770 pageidx[i] = index;
1771 pageidx[i+1] = index;
1772 index++;
1773 i += 2;
1774 } else {
1775 /* A single page. */
1776 assert(pagestart_len < sizeof(pagestart)/sizeof(pagestart[0]));
1777 pagestart[pagestart_len++] = i<<8;
1778 pageidx[i] = index;
1779 index++;
1780 i += 1;
1782 } else
1783 i++;
1786 printf("static const unsigned short jisx0213_to_ucs_main[120 * 94] = {\n");
1788 unsigned int row;
1789 for (row = 0; row < 0x300; row++) {
1790 unsigned int rowc = row_convert(row);
1791 if (rowc != (unsigned int) (-1)) {
1792 printf(" /* 0x%X21..0x%X7E */\n",row,row);
1794 unsigned int count = 0;
1795 unsigned int colc;
1796 for (colc = 0; colc < 94; colc++) {
1797 if ((count % 8) == 0) printf(" ");
1799 unsigned int value = table[rowc][colc];
1800 unsigned int page = value >> 8;
1801 unsigned int index = pageidx[page];
1802 assert(value-pagestart[index] < 0x100);
1803 printf(" 0x%04x,",(index<<8)|(value-pagestart[index]));
1805 count++;
1806 if ((count % 8) == 0) printf("\n");
1809 printf("\n");
1813 printf("};\n");
1814 printf("\n");
1815 printf("static const ucs4_t jisx0213_to_ucs_pagestart[] = {\n");
1817 unsigned int count = 0;
1818 unsigned int i;
1819 for (i = 0; i < pagestart_len; i++) {
1820 char buf[10];
1821 if ((count % 8) == 0) printf(" ");
1822 printf(" ");
1823 sprintf(buf,"0x%04x",pagestart[i]);
1824 if (strlen(buf) < 7) printf("%*s",(int)(7-strlen(buf)),"");
1825 printf("%s,",buf);
1826 count++;
1827 if ((count % 8) == 0) printf("\n");
1830 printf("\n");
1831 printf("};\n");
1832 #undef row_convert
1834 rewind(stdin);
1835 printf("\n");
1837 int table[0x110000];
1838 bool pages[0x4400];
1839 int maxpage = -1;
1840 unsigned int combining_prefixes[100];
1841 unsigned int combining_prefixes_len = 0;
1843 unsigned int i;
1844 for (i = 0; i < 0x110000; i++)
1845 table[i] = -1;
1846 for (i = 0; i < 0x4400; i++)
1847 pages[i] = false;
1849 for (;;) {
1850 char line[30];
1851 unsigned int plane, row, col;
1852 memset(line,0,sizeof(line));
1853 if (scanf("%[^\n]\n",line) < 1)
1854 break;
1855 assert(line[0]=='0');
1856 assert(line[1]=='x');
1857 assert(isxdigit(line[2]));
1858 assert(isxdigit(line[3]));
1859 assert(isxdigit(line[4]));
1860 assert(isxdigit(line[5]));
1861 assert(isxdigit(line[6]));
1862 assert(line[7]=='\t');
1863 line[7] = '\0';
1864 col = strtoul(&line[5],NULL,16);
1865 line[5] = '\0';
1866 row = strtoul(&line[3],NULL,16);
1867 line[3] = '\0';
1868 plane = strtoul(&line[2],NULL,16) - 1;
1869 if (line[20] != '\0' && line[21] == '\0') {
1870 unsigned int u1, u2;
1871 assert(line[8]=='0');
1872 assert(line[9]=='x');
1873 assert(isxdigit(line[10]));
1874 assert(isxdigit(line[11]));
1875 assert(isxdigit(line[12]));
1876 assert(isxdigit(line[13]));
1877 assert(line[14]==' ');
1878 assert(line[15]=='0');
1879 assert(line[16]=='x');
1880 assert(isxdigit(line[17]));
1881 assert(isxdigit(line[18]));
1882 assert(isxdigit(line[19]));
1883 assert(isxdigit(line[20]));
1884 u2 = strtoul(&line[17],NULL,16);
1885 line[14] = '\0';
1886 u1 = strtoul(&line[10],NULL,16);
1887 assert(u2 == 0x02E5 || u2 == 0x02E9 || u2 == 0x0300 || u2 == 0x0301
1888 || u2 == 0x309A);
1889 assert(combining_prefixes_len < sizeof(combining_prefixes)/sizeof(combining_prefixes[0]));
1890 combining_prefixes[combining_prefixes_len++] = u1;
1891 } else {
1892 unsigned int ucs;
1893 assert(line[8]=='0');
1894 assert(line[9]=='x');
1895 assert(isxdigit(line[10]));
1896 assert(isxdigit(line[11]));
1897 assert(isxdigit(line[12]));
1898 assert(isxdigit(line[13]));
1899 ucs = strtoul(&line[10],NULL,16);
1900 /* Add an entry. */
1901 assert(plane <= 1);
1902 assert(row <= 0x7f);
1903 assert(col <= 0x7f);
1904 table[ucs] = (plane << 15) | (row << 8) | col;
1905 pages[ucs>>6] = true;
1906 if (maxpage < 0 || (ucs>>6) > maxpage) maxpage = ucs>>6;
1910 unsigned int i;
1911 for (i = 0; i < combining_prefixes_len; i++) {
1912 unsigned int u1 = combining_prefixes[i];
1913 assert(table[u1] >= 0);
1914 table[u1] |= 0x0080;
1917 printf("static const short jisx0213_from_ucs_level1[%d] = {\n",maxpage+1);
1919 unsigned int index = 0;
1920 unsigned int i;
1921 for (i = 0; i <= maxpage; i++) {
1922 if ((i % 8) == 0) printf(" ");
1923 if (pages[i]) {
1924 printf(" %3u,",index);
1925 index++;
1926 } else {
1927 printf(" %3d,",-1);
1929 if (((i+1) % 8) == 0) printf("\n");
1932 printf("\n");
1933 printf("};\n");
1934 printf("\n");
1935 #if 0 /* Dense array */
1936 printf("static const unsigned short jisx0213_from_ucs_level2[] = {\n");
1938 unsigned int i;
1939 for (i = 0; i <= maxpage; i++) {
1940 if (pages[i]) {
1941 printf(" /* 0x%04X */\n",i<<6);
1943 unsigned int j;
1944 for (j = 0; j < 0x40; ) {
1945 unsigned int ucs = (i<<6)+j;
1946 int value = table[ucs];
1947 if (value < 0) value = 0;
1948 if ((j % 8) == 0) printf(" ");
1949 printf(" 0x%04x,",value);
1950 j++;
1951 if ((j % 8) == 0) printf("\n");
1957 printf("};\n");
1958 #else /* Sparse array */
1960 int summary_indx[0x11000];
1961 int summary_used[0x11000];
1962 unsigned int i, k, indx;
1963 printf("static const unsigned short jisx0213_from_ucs_level2_data[] = {\n");
1964 /* Fill summary_indx[] and summary_used[]. */
1965 indx = 0;
1966 for (i = 0, k = 0; i <= maxpage; i++) {
1967 if (pages[i]) {
1968 unsigned int j1, j2;
1969 unsigned int count = 0;
1970 printf(" /* 0x%04X */\n",i<<6);
1971 for (j1 = 0; j1 < 4; j1++) {
1972 summary_indx[4*k+j1] = indx;
1973 summary_used[4*k+j1] = 0;
1974 for (j2 = 0; j2 < 16; j2++) {
1975 unsigned int j = 16*j1+j2;
1976 unsigned int ucs = (i<<6)+j;
1977 int value = table[ucs];
1978 if (value < 0) value = 0;
1979 if (value > 0) {
1980 summary_used[4*k+j1] |= (1 << j2);
1981 if ((count % 8) == 0) printf(" ");
1982 printf(" 0x%04x,",value);
1983 count++;
1984 if ((count % 8) == 0) printf("\n");
1985 indx++;
1989 if ((count % 8) > 0)
1990 printf("\n");
1991 k++;
1994 printf("};\n");
1995 printf("\n");
1996 printf("static const Summary16 jisx0213_from_ucs_level2_2indx[] = {\n");
1997 for (i = 0, k = 0; i <= maxpage; i++) {
1998 if (pages[i]) {
1999 unsigned int j1;
2000 printf(" /* 0x%04X */\n",i<<6);
2001 printf(" ");
2002 for (j1 = 0; j1 < 4; j1++) {
2003 printf(" { %4d, 0x%04x },", summary_indx[4*k+j1], summary_used[4*k+j1]);
2005 printf("\n");
2006 k++;
2009 printf("};\n");
2011 #endif
2012 printf("\n");
2014 printf("#ifdef __GNUC__\n");
2015 printf("__inline\n");
2016 printf("#else\n");
2017 printf("#ifdef __cplusplus\n");
2018 printf("inline\n");
2019 printf("#endif\n");
2020 printf("#endif\n");
2021 printf("static ucs4_t jisx0213_to_ucs4 (unsigned int row, unsigned int col)\n");
2022 printf("{\n");
2023 printf(" ucs4_t val;\n");
2024 printf("\n");
2025 printf(" if (row >= 0x121 && row <= 0x17e)\n");
2026 printf(" row -= 289;\n");
2027 printf(" else if (row == 0x221)\n");
2028 printf(" row -= 451;\n");
2029 printf(" else if (row >= 0x223 && row <= 0x225)\n");
2030 printf(" row -= 452;\n");
2031 printf(" else if (row == 0x228)\n");
2032 printf(" row -= 454;\n");
2033 printf(" else if (row >= 0x22c && row <= 0x22f)\n");
2034 printf(" row -= 457;\n");
2035 printf(" else if (row >= 0x26e && row <= 0x27e)\n");
2036 printf(" row -= 519;\n");
2037 printf(" else\n");
2038 printf(" return 0x0000;\n");
2039 printf("\n");
2040 printf(" if (col >= 0x21 && col <= 0x7e)\n");
2041 printf(" col -= 0x21;\n");
2042 printf(" else\n");
2043 printf(" return 0x0000;\n");
2044 printf("\n");
2045 printf(" val = jisx0213_to_ucs_main[row * 94 + col];\n");
2046 printf(" val = jisx0213_to_ucs_pagestart[val >> 8] + (val & 0xff);\n");
2047 printf(" if (val == 0xfffd)\n");
2048 printf(" val = 0x0000;\n");
2049 printf(" return val;\n");
2050 printf("}\n");
2051 printf("\n");
2052 printf("#ifdef __GNUC__\n");
2053 printf("__inline\n");
2054 printf("#else\n");
2055 printf("#ifdef __cplusplus\n");
2056 printf("inline\n");
2057 printf("#endif\n");
2058 printf("#endif\n");
2059 printf("static unsigned short ucs4_to_jisx0213 (ucs4_t ucs)\n");
2060 printf("{\n");
2061 printf(" if (ucs < (sizeof(jisx0213_from_ucs_level1)/sizeof(jisx0213_from_ucs_level1[0])) << 6) {\n");
2062 printf(" int index1 = jisx0213_from_ucs_level1[ucs >> 6];\n");
2063 printf(" if (index1 >= 0)");
2064 #if 0 /* Dense array */
2065 printf("\n");
2066 printf(" return jisx0213_from_ucs_level2[(index1 << 6) + (ucs & 0x3f)];\n");
2067 #else /* Sparse array */
2068 printf(" {\n");
2069 printf(" const Summary16 *summary = &jisx0213_from_ucs_level2_2indx[((index1 << 6) + (ucs & 0x3f)) >> 4];\n");
2070 printf(" unsigned short used = summary->used;\n");
2071 printf(" unsigned int i = ucs & 0x0f;\n");
2072 printf(" if (used & ((unsigned short) 1 << i)) {\n");
2073 printf(" /* Keep in `used' only the bits 0..i-1. */\n");
2074 printf(" used &= ((unsigned short) 1 << i) - 1;\n");
2075 printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
2076 printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
2077 printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
2078 printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
2079 printf(" used = (used & 0x00ff) + (used >> 8);\n");
2080 printf(" return jisx0213_from_ucs_level2_data[summary->indx + used];\n");
2081 printf(" };\n");
2082 printf(" };\n");
2083 #endif
2084 printf(" }\n");
2085 printf(" return 0x0000;\n");
2086 printf("}\n");
2087 printf("\n");
2088 printf("#endif /* _JISX0213_H */\n");
2091 /* Main program */
2093 int main (int argc, char *argv[])
2095 const char* charsetname;
2096 const char* name;
2098 if (argc != 3)
2099 exit(1);
2100 charsetname = argv[1];
2101 name = argv[2];
2103 output_title(charsetname);
2105 if (!strcmp(name,"gb2312")
2106 || !strcmp(name,"isoir165ext") || !strcmp(name,"gb12345ext")
2107 || !strcmp(name,"jisx0208") || !strcmp(name,"jisx0212"))
2108 do_normal(name);
2109 else if (!strcmp(name,"cns11643_1") || !strcmp(name,"cns11643_2")
2110 || !strcmp(name,"cns11643_3") || !strcmp(name,"cns11643_4a")
2111 || !strcmp(name,"cns11643_4b") || !strcmp(name,"cns11643_5")
2112 || !strcmp(name,"cns11643_6") || !strcmp(name,"cns11643_7")
2113 || !strcmp(name,"cns11643_15"))
2114 do_normal_only_charset2uni(name);
2115 else if (!strcmp(name,"cns11643_inv"))
2116 do_cns11643_only_uni2charset(name);
2117 else if (!strcmp(name,"gbkext1"))
2118 do_gbk1_only_charset2uni(name);
2119 else if (!strcmp(name,"gbkext2"))
2120 do_gbk2_only_charset2uni(name);
2121 else if (!strcmp(name,"gbkext_inv"))
2122 do_gbk1_only_uni2charset(name);
2123 else if (!strcmp(name,"cp936ext") || !strcmp(name,"gb18030ext"))
2124 do_gbk1(name);
2125 else if (!strcmp(name,"ksc5601"))
2126 do_ksc5601(name);
2127 else if (!strcmp(name,"uhc_1"))
2128 do_uhc_1(name);
2129 else if (!strcmp(name,"uhc_2"))
2130 do_uhc_2(name);
2131 else if (!strcmp(name,"big5") || !strcmp(name,"cp950ext"))
2132 do_big5(name);
2133 else if (!strcmp(name,"hkscs1999") || !strcmp(name,"hkscs2001")
2134 || !strcmp(name,"hkscs2004") || !strcmp(name,"hkscs2008"))
2135 do_hkscs(name);
2136 else if (!strcmp(name,"johab_hangul"))
2137 do_johab_hangul(name);
2138 else if (!strcmp(name,"cp932ext"))
2139 do_sjis(name);
2140 else if (!strcmp(name,"gb18030uni"))
2141 do_gb18030uni(name);
2142 else if (!strcmp(name,"jisx0213"))
2143 do_jisx0213(name);
2144 else
2145 exit(1);
2147 return 0;