1 /* Copyright (C) 1999-2004, 2006-2007, 2010, 2012, 2016, 2018 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Tools.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, see <https://www.gnu.org/licenses/>. */
18 * Generates a CJK character set table from a .TXT table as found on
19 * ftp.unicode.org or in the X nls directory.
22 * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < gb2312
23 * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < jis0208
24 * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < ksc5601
26 * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < GB2312.TXT
27 * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < JIS0208.TXT
28 * ./cjk_tab_to_h JISX0212.1990-0 jisx0212 > jisx0212.h < JIS0212.TXT
29 * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < KSC5601.TXT
30 * ./cjk_tab_to_h KSX1001.1992-0 ksc5601 > ksc5601.h < KSX1001.TXT
32 * ./cjk_tab_to_h BIG5 big5 > big5.h < BIG5.TXT
34 * ./cjk_tab_to_h JOHAB johab > johab.h < JOHAB.TXT
36 * ./cjk_tab_to_h JISX0213:2004 jisx0213 > jisx0213.h < JISX0213.TXT
52 int rows
; /* number of possible values for the 1st byte */
53 int cols
; /* number of possible values for the 2nd byte */
54 int (*row_byte
) (int row
); /* returns the 1st byte value for a given row */
55 int (*col_byte
) (int col
); /* returns the 2nd byte value for a given col */
56 int (*byte_row
) (int byte
); /* converts a 1st byte value to a row, else -1 */
57 int (*byte_col
) (int byte
); /* converts a 2nd byte value to a col, else -1 */
58 const char* check_row_expr
; /* format string for 1st byte value checking */
59 const char* check_col_expr
; /* format string for 2nd byte value checking */
60 const char* byte_row_expr
; /* format string for 1st byte value to row */
61 const char* byte_col_expr
; /* format string for 2nd byte value to col */
62 int** charset2uni
; /* charset2uni[0..rows-1][0..cols-1] is valid */
63 /* You'll understand the terms "row" and "col" when you buy Ken Lunde's book.
64 Once a row is fixed, choosing a "col" is the same as choosing a "cell". */
65 int* charsetpage
; /* charsetpage[0..rows]: how large is a page for a row */
67 Block
* charsetblocks
; /* blocks[0..nblocks-1] */
68 int* uni2charset
; /* uni2charset[0x0000..0xffff] */
69 int fffd
; /* uni representation of the invalid character */
73 * Outputs the file title.
75 static void output_title (const char *charsetname
)
78 printf(" * Copyright (C) 1999-2016 Free Software Foundation, Inc.\n");
79 printf(" * This file is part of the GNU LIBICONV Library.\n");
81 printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
82 printf(" * and/or modify it under the terms of the GNU Lesser General Public\n");
83 printf(" * License as published by the Free Software Foundation; either version 2\n");
84 printf(" * of the License, or (at your option) any later version.\n");
86 printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
87 printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
88 printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
89 printf(" * Lesser General Public License for more details.\n");
91 printf(" * You should have received a copy of the GNU Lesser General Public\n");
92 printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
93 printf(" * If not, see <https://www.gnu.org/licenses/>.\n");
97 printf(" * %s\n", charsetname
);
103 * Reads the charset2uni table from standard input.
105 static void read_table (Encoding
* enc
)
107 int row
, col
, i
, i1
, i2
, c
, j
;
109 enc
->charset2uni
= (int**) malloc(enc
->rows
*sizeof(int*));
110 for (row
= 0; row
< enc
->rows
; row
++)
111 enc
->charset2uni
[row
] = (int*) malloc(enc
->cols
*sizeof(int));
113 for (row
= 0; row
< enc
->rows
; row
++)
114 for (col
= 0; col
< enc
->cols
; col
++)
115 enc
->charset2uni
[row
][col
] = 0xfffd;
120 /* Read a unicode.org style .TXT file. */
125 if (c
== '\n' || c
== ' ' || c
== '\t')
128 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
132 if (scanf("0x%x", &j
) != 1)
136 row
= enc
->byte_row(i1
);
137 col
= enc
->byte_col(i2
);
138 if (row
< 0 || col
< 0) {
139 fprintf(stderr
, "lost entry for %02x %02x\n", i1
, i2
);
142 if (scanf(" 0x%x", &enc
->charset2uni
[row
][col
]) != 1)
146 /* Read a table of hexadecimal Unicode values. */
147 for (i1
= 32; i1
< 132; i1
++)
148 for (i2
= 32; i2
< 132; i2
++) {
154 if (j
< 0 || j
== 0xffff)
157 if (enc
->byte_row(i1
) < 0 || enc
->byte_col(i2
) < 0) {
158 fprintf(stderr
, "lost entry at %02x %02x\n", i1
, i2
);
161 enc
->charset2uni
[enc
->byte_row(i1
)][enc
->byte_col(i2
)] = j
;
169 * Determine whether the Unicode range goes outside the BMP.
171 static bool is_charset2uni_large (Encoding
* enc
)
175 for (row
= 0; row
< enc
->rows
; row
++)
176 for (col
= 0; col
< enc
->cols
; col
++)
177 if (enc
->charset2uni
[row
][col
] >= 0x10000)
183 * Compactify the Unicode range by use of an auxiliary table,
184 * so 16 bits suffice to store each value.
186 static int compact_large_charset2uni (Encoding
* enc
, unsigned int **urows
, unsigned int *urowshift
)
190 for (shift
= 8; ; shift
--) {
191 int *upages
= (int *) malloc((0x110000>>shift
) * sizeof(int));
192 int i
, row
, col
, nurows
;
194 for (i
= 0; i
< 0x110000>>shift
; i
++)
197 for (row
= 0; row
< enc
->rows
; row
++)
198 for (col
= 0; col
< enc
->cols
; col
++)
199 upages
[enc
->charset2uni
[row
][col
] >> shift
] = 0;
202 for (i
= 0; i
< 0x110000>>shift
; i
++)
206 /* We want all table entries to fit in an 'unsigned short'. */
207 if (nurows
<= 1<<(16-shift
)) {
208 int** old_charset2uni
;
210 *urows
= (unsigned int *) malloc(nurows
* sizeof(unsigned int));
214 for (i
= 0; i
< 0x110000>>shift
; i
++)
215 if (upages
[i
] == 0) {
217 (*urows
)[nurows
] = i
;
221 old_charset2uni
= enc
->charset2uni
;
222 enc
->charset2uni
= (int**) malloc(enc
->rows
*sizeof(int*));
223 for (row
= 0; row
< enc
->rows
; row
++)
224 enc
->charset2uni
[row
] = (int*) malloc(enc
->cols
*sizeof(int));
225 for (row
= 0; row
< enc
->rows
; row
++)
226 for (col
= 0; col
< enc
->cols
; col
++) {
227 int u
= old_charset2uni
[row
][col
];
228 enc
->charset2uni
[row
][col
] =
229 (upages
[u
>> shift
] << shift
) | (u
& ((1 << shift
) - 1));
232 (upages
[0xfffd >> shift
] << shift
) | (0xfffd & ((1 << shift
) - 1));
241 * Computes the charsetpage[0..rows] array.
243 static void find_charset2uni_pages (Encoding
* enc
)
247 enc
->charsetpage
= (int*) malloc((enc
->rows
+1)*sizeof(int));
249 for (row
= 0; row
<= enc
->rows
; row
++)
250 enc
->charsetpage
[row
] = 0;
252 for (row
= 0; row
< enc
->rows
; row
++) {
254 for (col
= 0; col
< enc
->cols
; col
++)
255 if (enc
->charset2uni
[row
][col
] != enc
->fffd
)
257 enc
->charsetpage
[row
] = used
;
262 * Fills in nblocks and blocks.
264 static void find_charset2uni_blocks (Encoding
* enc
)
268 enc
->charsetblocks
= (Block
*) malloc(enc
->rows
*sizeof(Block
));
271 for (row
= 0; row
< enc
->rows
; row
++)
272 if (enc
->charsetpage
[row
] > 0 && (row
== 0 || enc
->charsetpage
[row
-1] == 0)) {
273 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
274 enc
->charsetblocks
[n
].start
= row
* enc
->cols
;
275 enc
->charsetblocks
[n
].end
= lastrow
* enc
->cols
+ enc
->charsetpage
[lastrow
];
278 enc
->ncharsetblocks
= n
;
282 * Outputs the charset to unicode table and function.
284 static void output_charset2uni (const char* name
, Encoding
* enc
)
286 int nurows
, row
, col
, lastrow
, col_max
, i
, i1_min
, i1_max
;
289 unsigned int urowshift
;
292 is_large
= is_charset2uni_large(enc
);
294 /* Use a temporary copy of enc. */
297 nurows
= compact_large_charset2uni(enc
,&urows
,&urowshift
);
299 nurows
= 0; urows
= NULL
; urowshift
= 0; enc
->fffd
= 0xfffd;
302 find_charset2uni_pages(enc
);
304 find_charset2uni_blocks(enc
);
306 for (row
= 0; row
< enc
->rows
; row
++)
307 if (enc
->charsetpage
[row
] > 0) {
308 if (row
== 0 || enc
->charsetpage
[row
-1] == 0) {
309 /* Start a new block. */
310 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
311 printf("static const unsigned short %s_2uni_page%02x[%d] = {\n",
312 name
, enc
->row_byte(row
),
313 (lastrow
-row
) * enc
->cols
+ enc
->charsetpage
[lastrow
]);
315 printf(" /""* 0x%02x *""/\n ", enc
->row_byte(row
));
316 col_max
= (enc
->charsetpage
[row
+1] > 0 ? enc
->cols
: enc
->charsetpage
[row
]);
317 for (col
= 0; col
< col_max
; col
++) {
318 printf(" 0x%04x,", enc
->charset2uni
[row
][col
]);
319 if ((col
% 8) == 7 && (col
+1 < col_max
)) printf("\n ");
322 if (enc
->charsetpage
[row
+1] == 0) {
330 printf("static const ucs4_t %s_2uni_upages[%d] = {\n ", name
, nurows
);
331 for (i
= 0; i
< nurows
; i
++) {
332 printf(" 0x%05x,", urows
[i
] << urowshift
);
333 if ((i
% 8) == 7 && (i
+1 < nurows
)) printf("\n ");
340 printf("static int\n");
341 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)\n", name
);
343 printf(" unsigned char c1 = s[0];\n");
345 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
346 i1_min
= enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
);
347 i1_max
= enc
->row_byte((enc
->charsetblocks
[i
].end
-1) / enc
->cols
);
350 if (i1_min
== i1_max
)
351 printf("(c1 == 0x%02x)", i1_min
);
353 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min
, i1_max
);
356 printf(" if (n >= 2) {\n");
357 printf(" unsigned char c2 = s[1];\n");
359 printf(enc
->check_col_expr
, "c2");
361 printf(" unsigned int i = %d * (", enc
->cols
);
362 printf(enc
->byte_row_expr
, "c1");
364 printf(enc
->byte_col_expr
, "c2");
366 printf(" %s wc = 0xfffd;\n", is_large
? "ucs4_t" : "unsigned short");
367 if (is_large
) printf(" unsigned short swc;\n");
368 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
372 if (i
< enc
->ncharsetblocks
-1)
373 printf("if (i < %d) ", enc
->charsetblocks
[i
+1].start
);
375 printf(" if (i < %d)\n", enc
->charsetblocks
[i
].end
);
376 printf(" %s = ", is_large
? "swc" : "wc");
377 printf("%s_2uni_page%02x[i", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
));
378 if (enc
->charsetblocks
[i
].start
> 0)
379 printf("-%d", enc
->charsetblocks
[i
].start
);
381 if (is_large
) printf(",\n wc = %s_2uni_upages[swc>>%d] | (swc & 0x%x)", name
, urowshift
, (1 << urowshift
) - 1);
385 printf(" if (wc != 0xfffd) {\n");
386 printf(" *pwc = %swc;\n", is_large
? "" : "(ucs4_t) ");
387 printf(" return 2;\n");
390 printf(" return RET_ILSEQ;\n");
392 printf(" return RET_TOOFEW(0);\n");
394 printf(" return RET_ILSEQ;\n");
400 * Outputs the charset to unicode table and function.
401 * (Suitable if the mapping function is well defined, i.e. has no holes, and
402 * is monotonically increasing with small gaps only.)
404 static void output_charset2uni_noholes_monotonic (const char* name
, Encoding
* enc
)
406 int row
, col
, lastrow
, r
, col_max
, i
, i1_min
, i1_max
;
408 /* Choose stepsize so that stepsize*steps_per_row >= enc->cols, and
409 enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]
410 is always < 0x100. */
411 int steps_per_row
= 2;
412 int stepsize
= (enc
->cols
+ steps_per_row
-1) / steps_per_row
;
414 find_charset2uni_pages(enc
);
416 find_charset2uni_blocks(enc
);
418 for (row
= 0; row
< enc
->rows
; row
++)
419 if (enc
->charsetpage
[row
] > 0) {
420 if (row
== 0 || enc
->charsetpage
[row
-1] == 0) {
421 /* Start a new block. */
422 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
423 printf("static const unsigned short %s_2uni_main_page%02x[%d] = {\n ",
424 name
, enc
->row_byte(row
),
425 steps_per_row
*(lastrow
-row
+1));
426 for (r
= row
; r
<= lastrow
; r
++) {
427 for (i
= 0; i
< steps_per_row
; i
++)
428 printf(" 0x%04x,", enc
->charset2uni
[r
][i
*stepsize
]);
429 if (((r
-row
) % 4) == 3 && (r
< lastrow
)) printf("\n ");
433 printf("static const unsigned char %s_2uni_page%02x[%d] = {\n",
434 name
, enc
->row_byte(row
),
435 (lastrow
-row
) * enc
->cols
+ enc
->charsetpage
[lastrow
]);
437 printf(" /""* 0x%02x *""/\n ", enc
->row_byte(row
));
438 col_max
= (enc
->charsetpage
[row
+1] > 0 ? enc
->cols
: enc
->charsetpage
[row
]);
439 for (col
= 0; col
< col_max
; col
++) {
440 printf(" 0x%02x,", enc
->charset2uni
[row
][col
] - enc
->charset2uni
[row
][col
/stepsize
*stepsize
]);
441 if ((col
% 8) == 7 && (col
+1 < col_max
)) printf("\n ");
444 if (enc
->charsetpage
[row
+1] == 0) {
451 printf("static int\n");
452 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)\n", name
);
454 printf(" unsigned char c1 = s[0];\n");
456 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
457 i1_min
= enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
);
458 i1_max
= enc
->row_byte((enc
->charsetblocks
[i
].end
-1) / enc
->cols
);
461 if (i1_min
== i1_max
)
462 printf("(c1 == 0x%02x)", i1_min
);
464 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min
, i1_max
);
467 printf(" if (n >= 2) {\n");
468 printf(" unsigned char c2 = s[1];\n");
470 printf(enc
->check_col_expr
, "c2");
472 printf(" unsigned int row = ");
473 printf(enc
->byte_row_expr
, "c1");
475 printf(" unsigned int col = ");
476 printf(enc
->byte_col_expr
, "c2");
478 printf(" unsigned int i = %d * row + col;\n", enc
->cols
);
479 printf(" unsigned short wc = 0xfffd;\n");
480 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
484 if (i
< enc
->ncharsetblocks
-1)
485 printf("if (i < %d) ", enc
->charsetblocks
[i
+1].start
);
487 printf(" if (i < %d)\n", enc
->charsetblocks
[i
].end
);
488 printf(" wc = %s_2uni_main_page%02x[%d*", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
), steps_per_row
);
489 if (enc
->charsetblocks
[i
].start
> 0)
490 printf("(row-%d)", enc
->charsetblocks
[i
].start
/ enc
->cols
);
494 if (steps_per_row
== 2)
495 printf("(col>=%d?1:0)", stepsize
);
497 printf("col/%d", stepsize
);
498 printf("] + %s_2uni_page%02x[i", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
));
499 if (enc
->charsetblocks
[i
].start
> 0)
500 printf("-%d", enc
->charsetblocks
[i
].start
);
504 printf(" if (wc != 0xfffd) {\n");
505 printf(" *pwc = (ucs4_t) wc;\n");
506 printf(" return 2;\n");
509 printf(" return RET_ILSEQ;\n");
511 printf(" return RET_TOOFEW(0);\n");
513 printf(" return RET_ILSEQ;\n");
519 * Computes the uni2charset[0x0000..0x2ffff] array.
521 static void invert (Encoding
* enc
)
525 enc
->uni2charset
= (int*) malloc(0x30000*sizeof(int));
527 for (j
= 0; j
< 0x30000; j
++)
528 enc
->uni2charset
[j
] = 0;
530 for (row
= 0; row
< enc
->rows
; row
++)
531 for (col
= 0; col
< enc
->cols
; col
++) {
532 j
= enc
->charset2uni
[row
][col
];
534 enc
->uni2charset
[j
] = 0x100 * enc
->row_byte(row
) + enc
->col_byte(col
);
539 * Outputs the unicode to charset table and function, using a linear array.
540 * (Suitable if the table is dense.)
542 static void output_uni2charset_dense (const char* name
, Encoding
* enc
)
544 /* Like in 8bit_tab_to_h.c */
548 struct { int minline
; int maxline
; int usecount
; } tables
[0x6000];
550 int row
, col
, j
, p
, j1
, j2
, t
;
552 for (p
= 0; p
< 0x300; p
++)
554 for (row
= 0; row
< enc
->rows
; row
++)
555 for (col
= 0; col
< enc
->cols
; col
++) {
556 j
= enc
->charset2uni
[row
][col
];
560 for (j1
= 0; j1
< 0x6000; j1
++) {
561 bool all_invalid
= true;
562 for (j2
= 0; j2
< 8; j2
++) {
564 if (enc
->uni2charset
[j
] != 0)
573 for (j1
= 0; j1
< 0x6000; j1
++) {
576 && ((j1
> 0 && line
[j1
-1] == tableno
-1)
577 || ((tables
[tableno
-1].maxline
>> 5) == (j1
>> 5)
578 && j1
- tables
[tableno
-1].maxline
<= 8))) {
579 line
[j1
] = tableno
-1;
580 tables
[tableno
-1].maxline
= j1
;
583 line
[j1
] = tableno
-1;
584 tables
[tableno
-1].minline
= tables
[tableno
-1].maxline
= j1
;
588 for (t
= 0; t
< tableno
; t
++) {
589 tables
[t
].usecount
= 0;
590 j1
= 8*tables
[t
].minline
;
591 j2
= 8*(tables
[t
].maxline
+1);
592 for (j
= j1
; j
< j2
; j
++)
593 if (enc
->uni2charset
[j
] != 0)
594 tables
[t
].usecount
++;
598 for (t
= 0; t
< tableno
; t
++)
599 if (tables
[t
].usecount
> 1) {
600 p
= tables
[t
].minline
>> 5;
601 printf("static const unsigned short %s_page%02x[%d] = {\n", name
, p
, 8*(tables
[t
].maxline
-tables
[t
].minline
+1));
602 for (j1
= tables
[t
].minline
; j1
<= tables
[t
].maxline
; j1
++) {
603 if ((j1
% 0x20) == 0 && j1
> tables
[t
].minline
)
604 printf(" /* 0x%04x */\n", 8*j1
);
606 for (j2
= 0; j2
< 8; j2
++) {
608 printf(" 0x%04x,", enc
->uni2charset
[j
]);
610 printf(" /*0x%02x-0x%02x*/\n", 8*(j1
% 0x20), 8*(j1
% 0x20)+7);
617 printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)\n", name
);
619 printf(" if (n >= 2) {\n");
620 printf(" unsigned short c = 0;\n");
622 for (j1
= 0; j1
< 0x6000;) {
624 for (j2
= j1
; j2
< 0x6000 && line
[j2
] == t
; j2
++);
626 if (j1
!= tables
[t
].minline
) abort();
627 if (j2
> tables
[t
].maxline
+1) abort();
628 j2
= tables
[t
].maxline
+1;
634 if (tables
[t
].usecount
== 0) abort();
635 if (tables
[t
].usecount
== 1) {
636 if (j2
!= j1
+1) abort();
637 for (j
= 8*j1
; j
< 8*j2
; j
++)
638 if (enc
->uni2charset
[j
] != 0) {
639 printf("if (wc == 0x%04x)\n c = 0x%02x;\n", j
, enc
->uni2charset
[j
]);
644 printf("if (wc < 0x%04x)", 8*j2
);
646 printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1
, 8*j2
);
648 printf("\n c = %s_page%02x[wc", name
, j1
>> 5);
649 if (tables
[t
].minline
> 0)
650 printf("-0x%04x", 8*j1
);
656 printf(" if (c != 0) {\n");
657 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
658 printf(" return 2;\n");
660 printf(" return RET_ILUNI;\n");
662 printf(" return RET_TOOSMALL;\n");
667 * Outputs the unicode to charset table and function, using a packed array.
668 * (Suitable if the table is sparse.)
669 * The argument 'monotonic' may be set to true if the mapping is monotonically
670 * increasing with small gaps only.
672 static void output_uni2charset_sparse (const char* name
, Encoding
* enc
, bool monotonic
)
675 Block pageblocks
[0x300]; int npageblocks
;
676 int indx2charset
[0x30000];
677 int summary_indx
[0x3000];
678 int summary_used
[0x3000];
679 int i
, row
, col
, j
, p
, j1
, j2
, indx
;
682 int log2_stepsize
= (!strcmp(name
,"uhc_2") ? 6 : 7);
683 int stepsize
= 1 << log2_stepsize
;
686 /* Fill pages[0x300]. */
687 for (p
= 0; p
< 0x300; p
++)
689 for (row
= 0; row
< enc
->rows
; row
++)
690 for (col
= 0; col
< enc
->cols
; col
++) {
691 j
= enc
->charset2uni
[row
][col
];
696 /* Determine whether two or three bytes are needed for each character. */
698 for (j
= 0; j
< 0x30000; j
++)
699 if (enc
->uni2charset
[j
] >= 0x10000)
703 for (p
= 0; p
< 0x300; p
++)
705 printf("static const unsigned short %s_page%02x[256] = {\n", name
, p
);
706 for (j1
= 0; j1
< 32; j1
++) {
708 for (j2
= 0; j2
< 8; j2
++)
709 printf("0x%04x, ", enc
->uni2charset
[256*p
+8*j1
+j2
]);
710 printf("/""*0x%02x-0x%02x*""/\n", 8*j1
, 8*j1
+7);
717 /* Fill summary_indx[] and summary_used[]. */
719 for (j1
= 0; j1
< 0x3000; j1
++) {
720 summary_indx
[j1
] = indx
;
721 summary_used
[j1
] = 0;
722 for (j2
= 0; j2
< 16; j2
++) {
724 if (enc
->uni2charset
[j
] != 0) {
725 indx2charset
[indx
++] = enc
->uni2charset
[j
];
726 summary_used
[j1
] |= (1 << j2
);
731 /* Fill npageblocks and pageblocks[]. */
733 for (p
= 0; p
< 0x300; ) {
734 if (pages
[p
] && (p
== 0 || !pages
[p
-1])) {
735 pageblocks
[npageblocks
].start
= 16*p
;
736 do p
++; while (p
< 0x300 && pages
[p
]);
738 while (summary_used
[j1
-1] == 0) j1
--;
739 pageblocks
[npageblocks
].end
= j1
;
746 indxsteps
= (indx
+ stepsize
-1) / stepsize
;
747 printf("static const unsigned short %s_2charset_main[%d] = {\n", name
, indxsteps
);
748 for (i
= 0; i
< indxsteps
; ) {
749 if ((i
% 8) == 0) printf(" ");
750 printf(" 0x%04x,", indx2charset
[i
*stepsize
]);
752 if ((i
% 8) == 0 || i
== indxsteps
) printf("\n");
755 printf("static const unsigned char %s_2charset[%d] = {\n", name
, indx
);
756 for (i
= 0; i
< indx
; ) {
757 if ((i
% 8) == 0) printf(" ");
758 printf(" 0x%02x,", indx2charset
[i
] - indx2charset
[i
/stepsize
*stepsize
]);
760 if ((i
% 8) == 0 || i
== indx
) printf("\n");
765 printf("static const unsigned char %s_2charset[3*%d] = {\n", name
, indx
);
766 for (i
= 0; i
< indx
; ) {
767 if ((i
% 4) == 0) printf(" ");
768 printf(" 0x%1x,0x%02x,0x%02x,", indx2charset
[i
] >> 16,
769 (indx2charset
[i
] >> 8) & 0xff, indx2charset
[i
] & 0xff);
771 if ((i
% 4) == 0 || i
== indx
) printf("\n");
775 printf("static const unsigned short %s_2charset[%d] = {\n", name
, indx
);
776 for (i
= 0; i
< indx
; ) {
777 if ((i
% 8) == 0) printf(" ");
778 printf(" 0x%04x,", indx2charset
[i
]);
780 if ((i
% 8) == 0 || i
== indx
) printf("\n");
786 for (i
= 0; i
< npageblocks
; i
++) {
787 printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name
,
788 pageblocks
[i
].start
/16, pageblocks
[i
].end
-pageblocks
[i
].start
);
789 for (j1
= pageblocks
[i
].start
; j1
< pageblocks
[i
].end
; ) {
790 if (((16*j1
) % 0x100) == 0) printf(" /""* 0x%04x *""/\n", 16*j1
);
791 if ((j1
% 4) == 0) printf(" ");
792 printf(" { %4d, 0x%04x },", summary_indx
[j1
], summary_used
[j1
]);
794 if ((j1
% 4) == 0 || j1
== pageblocks
[i
].end
) printf("\n");
800 printf("static int\n");
801 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)\n", name
);
803 printf(" if (n >= 2) {\n");
804 printf(" const Summary16 *summary = NULL;\n");
805 for (i
= 0; i
< npageblocks
; i
++) {
809 printf("if (wc >= 0x%04x && wc < 0x%04x)\n",
810 16*pageblocks
[i
].start
, 16*pageblocks
[i
].end
);
811 printf(" summary = &%s_uni2indx_page%02x[(wc>>4)", name
,
812 pageblocks
[i
].start
/16);
813 if (pageblocks
[i
].start
> 0)
814 printf("-0x%03x", pageblocks
[i
].start
);
817 printf(" if (summary) {\n");
818 printf(" unsigned short used = summary->used;\n");
819 printf(" unsigned int i = wc & 0x0f;\n");
820 printf(" if (used & ((unsigned short) 1 << i)) {\n");
821 if (monotonic
|| !is_large
)
822 printf(" unsigned short c;\n");
823 printf(" /* Keep in 'used' only the bits 0..i-1. */\n");
824 printf(" used &= ((unsigned short) 1 << i) - 1;\n");
825 printf(" /* Add 'summary->indx' and the number of bits set in 'used'. */\n");
826 printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
827 printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
828 printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
829 printf(" used = (used & 0x00ff) + (used >> 8);\n");
831 printf(" used += summary->indx;\n");
832 printf(" c = %s_2charset_main[used>>%d] + %s_2charset[used];\n", name
, log2_stepsize
, name
);
833 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
834 printf(" return 2;\n");
837 printf(" used += summary->indx;\n");
838 printf(" r[0] = %s_2charset[3*used];\n", name
);
839 printf(" r[1] = %s_2charset[3*used+1];\n", name
);
840 printf(" r[2] = %s_2charset[3*used+2];\n", name
);
841 printf(" return 3;\n");
843 printf(" c = %s_2charset[summary->indx + used];\n", name
);
844 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
845 printf(" return 2;\n");
850 printf(" return RET_ILUNI;\n");
852 printf(" return RET_TOOSMALL;\n");
856 /* ISO-2022/EUC specifics */
858 static int row_byte_normal (int row
) { return 0x21+row
; }
859 static int col_byte_normal (int col
) { return 0x21+col
; }
860 static int byte_row_normal (int byte
) { return byte
-0x21; }
861 static int byte_col_normal (int byte
) { return byte
-0x21; }
863 static void do_normal (const char* name
)
869 enc
.row_byte
= row_byte_normal
;
870 enc
.col_byte
= col_byte_normal
;
871 enc
.byte_row
= byte_row_normal
;
872 enc
.byte_col
= byte_col_normal
;
873 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
874 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
875 enc
.byte_row_expr
= "%1$s - 0x21";
876 enc
.byte_col_expr
= "%1$s - 0x21";
879 output_charset2uni(name
,&enc
);
880 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
883 /* Note: On first sight, the jisx0212_2charset[] table seems to be in order,
884 starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in
885 order. There are 75 out-of-order values, scattered all throughout the table.
888 static void do_normal_only_charset2uni (const char* name
)
894 enc
.row_byte
= row_byte_normal
;
895 enc
.col_byte
= col_byte_normal
;
896 enc
.byte_row
= byte_row_normal
;
897 enc
.byte_col
= byte_col_normal
;
898 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
899 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
900 enc
.byte_row_expr
= "%1$s - 0x21";
901 enc
.byte_col_expr
= "%1$s - 0x21";
904 output_charset2uni(name
,&enc
);
907 /* CNS 11643 specifics - trick to put two tables into one */
909 static int row_byte_cns11643 (int row
) {
910 return 0x100 * (row
/ 94) + (row
% 94) + 0x21;
912 static int byte_row_cns11643 (int byte
) {
913 return (byte
>> 8) * 94 + (byte
& 0xff) - 0x21;
916 static void do_cns11643_only_uni2charset (const char* name
)
922 enc
.row_byte
= row_byte_cns11643
;
923 enc
.col_byte
= col_byte_normal
;
924 enc
.byte_row
= byte_row_cns11643
;
925 enc
.byte_col
= byte_col_normal
;
926 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
927 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
928 enc
.byte_row_expr
= "%1$s - 0x21";
929 enc
.byte_col_expr
= "%1$s - 0x21";
933 output_uni2charset_sparse(name
,&enc
,false);
938 static int row_byte_gbk1 (int row
) {
941 static int col_byte_gbk1 (int col
) {
942 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
944 static int byte_row_gbk1 (int byte
) {
945 if (byte
>= 0x81 && byte
< 0xff)
950 static int byte_col_gbk1 (int byte
) {
951 if (byte
>= 0x40 && byte
< 0x7f)
953 else if (byte
>= 0x80 && byte
< 0xff)
959 static void do_gbk1 (const char* name
)
965 enc
.row_byte
= row_byte_gbk1
;
966 enc
.col_byte
= col_byte_gbk1
;
967 enc
.byte_row
= byte_row_gbk1
;
968 enc
.byte_col
= byte_col_gbk1
;
969 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
970 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
971 enc
.byte_row_expr
= "%1$s - 0x81";
972 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
975 output_charset2uni(name
,&enc
);
976 invert(&enc
); output_uni2charset_dense(name
,&enc
);
979 static void do_gbk1_only_charset2uni (const char* name
)
985 enc
.row_byte
= row_byte_gbk1
;
986 enc
.col_byte
= col_byte_gbk1
;
987 enc
.byte_row
= byte_row_gbk1
;
988 enc
.byte_col
= byte_col_gbk1
;
989 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
990 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
991 enc
.byte_row_expr
= "%1$s - 0x81";
992 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
995 output_charset2uni(name
,&enc
);
998 static int row_byte_gbk2 (int row
) {
1001 static int col_byte_gbk2 (int col
) {
1002 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
1004 static int byte_row_gbk2 (int byte
) {
1005 if (byte
>= 0x81 && byte
< 0xff)
1010 static int byte_col_gbk2 (int byte
) {
1011 if (byte
>= 0x40 && byte
< 0x7f)
1013 else if (byte
>= 0x80 && byte
< 0xa1)
1019 static void do_gbk2_only_charset2uni (const char* name
)
1025 enc
.row_byte
= row_byte_gbk2
;
1026 enc
.col_byte
= col_byte_gbk2
;
1027 enc
.byte_row
= byte_row_gbk2
;
1028 enc
.byte_col
= byte_col_gbk2
;
1029 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
1030 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)";
1031 enc
.byte_row_expr
= "%1$s - 0x81";
1032 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1035 output_charset2uni(name
,&enc
);
1038 static void do_gbk1_only_uni2charset (const char* name
)
1044 enc
.row_byte
= row_byte_gbk1
;
1045 enc
.col_byte
= col_byte_gbk1
;
1046 enc
.byte_row
= byte_row_gbk1
;
1047 enc
.byte_col
= byte_col_gbk1
;
1048 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
1049 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
1050 enc
.byte_row_expr
= "%1$s - 0x81";
1051 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1054 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1057 /* KSC 5601 specifics */
1060 * Reads the charset2uni table from standard input.
1062 static void read_table_ksc5601 (Encoding
* enc
)
1064 int row
, col
, i
, i1
, i2
, c
, j
;
1066 enc
->charset2uni
= (int**) malloc(enc
->rows
*sizeof(int*));
1067 for (row
= 0; row
< enc
->rows
; row
++)
1068 enc
->charset2uni
[row
] = (int*) malloc(enc
->cols
*sizeof(int));
1070 for (row
= 0; row
< enc
->rows
; row
++)
1071 for (col
= 0; col
< enc
->cols
; col
++)
1072 enc
->charset2uni
[row
][col
] = 0xfffd;
1077 /* Read a unicode.org style .TXT file. */
1082 if (c
== '\n' || c
== ' ' || c
== '\t')
1085 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
1089 if (scanf("0x%x", &j
) != 1)
1093 if (scanf(" 0x%x", &j
) != 1)
1095 /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0
1096 = KS X 1001.1992, ignore the rest. */
1097 if (!(i1
>= 128+33 && i1
< 128+127 && i2
>= 128+33 && i2
< 128+127))
1098 continue; /* KSC5601 specific */
1099 i1
&= 0x7f; /* KSC5601 specific */
1100 i2
&= 0x7f; /* KSC5601 specific */
1101 row
= enc
->byte_row(i1
);
1102 col
= enc
->byte_col(i2
);
1103 if (row
< 0 || col
< 0) {
1104 fprintf(stderr
, "lost entry for %02x %02x\n", i1
, i2
);
1107 enc
->charset2uni
[row
][col
] = j
;
1110 /* Read a table of hexadecimal Unicode values. */
1111 for (i1
= 33; i1
< 127; i1
++)
1112 for (i2
= 33; i2
< 127; i2
++) {
1113 i
= scanf("%x", &j
);
1118 if (j
< 0 || j
== 0xffff)
1121 if (enc
->byte_row(i1
) < 0 || enc
->byte_col(i2
) < 0) {
1122 fprintf(stderr
, "lost entry at %02x %02x\n", i1
, i2
);
1125 enc
->charset2uni
[enc
->byte_row(i1
)][enc
->byte_col(i2
)] = j
;
1132 static void do_ksc5601 (const char* name
)
1138 enc
.row_byte
= row_byte_normal
;
1139 enc
.col_byte
= col_byte_normal
;
1140 enc
.byte_row
= byte_row_normal
;
1141 enc
.byte_col
= byte_col_normal
;
1142 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
1143 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
1144 enc
.byte_row_expr
= "%1$s - 0x21";
1145 enc
.byte_col_expr
= "%1$s - 0x21";
1147 read_table_ksc5601(&enc
);
1148 output_charset2uni(name
,&enc
);
1149 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1154 /* UHC part 1: 0x{81..A0}{41..5A,61..7A,81..FE} */
1156 static int row_byte_uhc_1 (int row
) {
1159 static int col_byte_uhc_1 (int col
) {
1160 return (col
>= 0x34 ? 0x4d : col
>= 0x1a ? 0x47 : 0x41) + col
;
1162 static int byte_row_uhc_1 (int byte
) {
1163 if (byte
>= 0x81 && byte
< 0xa1)
1168 static int byte_col_uhc_1 (int byte
) {
1169 if (byte
>= 0x41 && byte
< 0x5b)
1171 else if (byte
>= 0x61 && byte
< 0x7b)
1173 else if (byte
>= 0x81 && byte
< 0xff)
1179 static void do_uhc_1 (const char* name
)
1185 enc
.row_byte
= row_byte_uhc_1
;
1186 enc
.col_byte
= col_byte_uhc_1
;
1187 enc
.byte_row
= byte_row_uhc_1
;
1188 enc
.byte_col
= byte_col_uhc_1
;
1189 enc
.check_row_expr
= "(%1$s >= 0x81 && %1$s < 0xa1)";
1190 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xff)";
1191 enc
.byte_row_expr
= "%1$s - 0x81";
1192 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
1195 output_charset2uni_noholes_monotonic(name
,&enc
);
1196 invert(&enc
); output_uni2charset_sparse(name
,&enc
,true);
1199 /* UHC part 2: 0x{A1..C6}{41..5A,61..7A,81..A0} */
1201 static int row_byte_uhc_2 (int row
) {
1204 static int col_byte_uhc_2 (int col
) {
1205 return (col
>= 0x34 ? 0x4d : col
>= 0x1a ? 0x47 : 0x41) + col
;
1207 static int byte_row_uhc_2 (int byte
) {
1208 if (byte
>= 0xa1 && byte
< 0xff)
1213 static int byte_col_uhc_2 (int byte
) {
1214 if (byte
>= 0x41 && byte
< 0x5b)
1216 else if (byte
>= 0x61 && byte
< 0x7b)
1218 else if (byte
>= 0x81 && byte
< 0xa1)
1224 static void do_uhc_2 (const char* name
)
1230 enc
.row_byte
= row_byte_uhc_2
;
1231 enc
.col_byte
= col_byte_uhc_2
;
1232 enc
.byte_row
= byte_row_uhc_2
;
1233 enc
.byte_col
= byte_col_uhc_2
;
1234 enc
.check_row_expr
= "(%1$s >= 0xa1 && %1$s < 0xff)";
1235 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xa1)";
1236 enc
.byte_row_expr
= "%1$s - 0xa1";
1237 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
1240 output_charset2uni_noholes_monotonic(name
,&enc
);
1241 invert(&enc
); output_uni2charset_sparse(name
,&enc
,true);
1244 /* Big5 specifics */
1246 static int row_byte_big5 (int row
) {
1249 static int col_byte_big5 (int col
) {
1250 return (col
>= 0x3f ? 0x62 : 0x40) + col
;
1252 static int byte_row_big5 (int byte
) {
1253 if (byte
>= 0xa1 && byte
< 0xff)
1258 static int byte_col_big5 (int byte
) {
1259 if (byte
>= 0x40 && byte
< 0x7f)
1261 else if (byte
>= 0xa1 && byte
< 0xff)
1267 static void do_big5 (const char* name
)
1273 enc
.row_byte
= row_byte_big5
;
1274 enc
.col_byte
= col_byte_big5
;
1275 enc
.byte_row
= byte_row_big5
;
1276 enc
.byte_col
= byte_col_big5
;
1277 enc
.check_row_expr
= "%1$s >= 0xa1 && %1$s < 0xff";
1278 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
1279 enc
.byte_row_expr
= "%1$s - 0xa1";
1280 enc
.byte_col_expr
= "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
1283 output_charset2uni(name
,&enc
);
1284 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1287 /* HKSCS specifics */
1289 static int row_byte_hkscs (int row
) {
1292 static int byte_row_hkscs (int byte
) {
1293 if (byte
>= 0x80 && byte
< 0xff)
1299 static void do_hkscs (const char* name
)
1305 enc
.row_byte
= row_byte_hkscs
;
1306 enc
.col_byte
= col_byte_big5
;
1307 enc
.byte_row
= byte_row_hkscs
;
1308 enc
.byte_col
= byte_col_big5
;
1309 enc
.check_row_expr
= "%1$s >= 0x80 && %1$s < 0xff";
1310 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
1311 enc
.byte_row_expr
= "%1$s - 0x80";
1312 enc
.byte_col_expr
= "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
1315 output_charset2uni(name
,&enc
);
1316 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1319 /* Johab Hangul specifics */
1321 static int row_byte_johab_hangul (int row
) {
1324 static int col_byte_johab_hangul (int col
) {
1325 return (col
>= 0x3e ? 0x43 : 0x41) + col
;
1327 static int byte_row_johab_hangul (int byte
) {
1328 if (byte
>= 0x84 && byte
< 0xd4)
1333 static int byte_col_johab_hangul (int byte
) {
1334 if (byte
>= 0x41 && byte
< 0x7f)
1336 else if (byte
>= 0x81 && byte
< 0xff)
1342 static void do_johab_hangul (const char* name
)
1348 enc
.row_byte
= row_byte_johab_hangul
;
1349 enc
.col_byte
= col_byte_johab_hangul
;
1350 enc
.byte_row
= byte_row_johab_hangul
;
1351 enc
.byte_col
= byte_col_johab_hangul
;
1352 enc
.check_row_expr
= "%1$s >= 0x84 && %1$s < 0xd4";
1353 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x7f) || (%1$s >= 0x81 && %1$s < 0xff)";
1354 enc
.byte_row_expr
= "%1$s - 0x84";
1355 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x43 : 0x41)";
1358 output_charset2uni(name
,&enc
);
1359 invert(&enc
); output_uni2charset_dense(name
,&enc
);
1362 /* SJIS specifics */
1364 static int row_byte_sjis (int row
) {
1365 return (row
>= 0x1f ? 0xc1 : 0x81) + row
;
1367 static int col_byte_sjis (int col
) {
1368 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
1370 static int byte_row_sjis (int byte
) {
1371 if (byte
>= 0x81 && byte
< 0xa0)
1373 else if (byte
>= 0xe0)
1378 static int byte_col_sjis (int byte
) {
1379 if (byte
>= 0x40 && byte
< 0x7f)
1381 else if (byte
>= 0x80 && byte
< 0xfd)
1387 static void do_sjis (const char* name
)
1393 enc
.row_byte
= row_byte_sjis
;
1394 enc
.col_byte
= col_byte_sjis
;
1395 enc
.byte_row
= byte_row_sjis
;
1396 enc
.byte_col
= byte_col_sjis
;
1397 enc
.check_row_expr
= "(%1$s >= 0x81 && %1$s < 0xa0) || (%1$s >= 0xe0)";
1398 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xfd)";
1399 enc
.byte_row_expr
= "%1$s - (%1$s >= 0xe0 ? 0xc1 : 0x81)";
1400 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1403 output_charset2uni(name
,&enc
);
1404 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1407 /* GB18030 Unicode specifics */
1409 static void do_gb18030uni (const char* name
)
1413 int i1
, i2
, i3
, i4
, i
, j
, k
;
1414 int charset2uni
[4*10*126*10];
1415 int uni2charset
[0x10000];
1416 struct { int low
; int high
; int diff
; int total
; } ranges
[256];
1417 int ranges_count
, ranges_total
;
1419 for (i
= 0; i
< 4*10*126*10; i
++)
1421 for (j
= 0; j
< 0x10000; j
++)
1424 /* Read a unicode.org style .TXT file. */
1429 if (c
== '\n' || c
== ' ' || c
== '\t')
1432 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
1436 if (scanf("0x%x", &bytes
) != 1)
1438 i1
= (bytes
>> 24) & 0xff;
1439 i2
= (bytes
>> 16) & 0xff;
1440 i3
= (bytes
>> 8) & 0xff;
1442 if (!(i1
>= 0x81 && i1
<= 0x84
1443 && i2
>= 0x30 && i2
<= 0x39
1444 && i3
>= 0x81 && i3
<= 0xfe
1445 && i4
>= 0x30 && i4
<= 0x39)) {
1446 fprintf(stderr
, "lost entry for %02x %02x %02x %02x\n", i1
, i2
, i3
, i4
);
1449 i
= (((i1
-0x81) * 10 + (i2
-0x30)) * 126 + (i3
-0x81)) * 10 + (i4
-0x30);
1450 if (scanf(" 0x%x", &j
) != 1)
1452 if (!(j
>= 0 && j
< 0x10000))
1458 /* Verify that the mapping i -> j is monotonically increasing and
1460 low[k] <= i <= high[k] => j = diff[k] + i
1461 with a set of disjoint intervals (low[k], high[k]). */
1463 for (i
= 0; i
< 4*10*126*10; i
++)
1464 if (charset2uni
[i
] != 0) {
1468 if (ranges_count
> 0) {
1469 if (!(i
> ranges
[ranges_count
-1].high
))
1471 if (!(j
> ranges
[ranges_count
-1].high
+ ranges
[ranges_count
-1].diff
))
1473 /* Additional property: The diffs are also increasing. */
1474 if (!(diff
>= ranges
[ranges_count
-1].diff
))
1477 if (ranges_count
> 0 && diff
== ranges
[ranges_count
-1].diff
)
1478 ranges
[ranges_count
-1].high
= i
;
1480 if (ranges_count
== 256)
1482 ranges
[ranges_count
].low
= i
;
1483 ranges
[ranges_count
].high
= i
;
1484 ranges
[ranges_count
].diff
= diff
;
1489 /* Determine size of bitmap. */
1491 for (k
= 0; k
< ranges_count
; k
++) {
1492 ranges
[k
].total
= ranges_total
;
1493 ranges_total
+= ranges
[k
].high
- ranges
[k
].low
+ 1;
1496 printf("static const unsigned short %s_charset2uni_ranges[%d] = {\n", name
, 2*ranges_count
);
1497 for (k
= 0; k
< ranges_count
; k
++) {
1498 printf(" 0x%04x, 0x%04x", ranges
[k
].low
, ranges
[k
].high
);
1499 if (k
+1 < ranges_count
) printf(",");
1500 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n");
1507 printf("static const unsigned short %s_uni2charset_ranges[%d] = {\n", name
, 2*ranges_count
);
1508 for (k
= 0; k
< ranges_count
; k
++) {
1509 printf(" 0x%04x, 0x%04x", ranges
[k
].low
+ ranges
[k
].diff
, ranges
[k
].high
+ ranges
[k
].diff
);
1510 if (k
+1 < ranges_count
) printf(",");
1511 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n");
1518 printf("static const struct { unsigned short diff; unsigned short bitmap_offset; } %s_ranges[%d] = {\n ", name
, ranges_count
);
1519 for (k
= 0; k
< ranges_count
; k
++) {
1520 printf(" { %5d, 0x%04x }", ranges
[k
].diff
, ranges
[k
].total
);
1521 if (k
+1 < ranges_count
) printf(",");
1522 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n ");
1529 printf("static const unsigned char %s_bitmap[%d] = {\n ", name
, (ranges_total
+ 7) / 8);
1532 for (k
= 0; k
< ranges_count
; k
++) {
1533 for (i
= ranges
[k
].total
; i
<= ranges
[k
].total
+ (ranges
[k
].high
- ranges
[k
].low
);) {
1534 if (charset2uni
[i
- ranges
[k
].total
+ ranges
[k
].low
] != 0)
1535 accu
|= (1 << (i
% 8));
1538 printf(" 0x%02x", accu
);
1539 if ((i
/ 8) < (ranges_total
+ 7) / 8) printf(",");
1540 if (((i
/ 8) % 12) == 0)
1545 if (i
!= (k
+1 < ranges_count
? ranges
[k
+1].total
: ranges_total
)) abort();
1547 if ((ranges_total
% 8) != 0)
1548 printf(" 0x%02x", accu
);
1555 printf("static int\n");
1556 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, size_t n)\n", name
);
1558 printf(" unsigned char c1 = s[0];\n");
1559 printf(" if (c1 >= 0x81 && c1 <= 0x84) {\n");
1560 printf(" if (n >= 2) {\n");
1561 printf(" unsigned char c2 = s[1];\n");
1562 printf(" if (c2 >= 0x30 && c2 <= 0x39) {\n");
1563 printf(" if (n >= 3) {\n");
1564 printf(" unsigned char c3 = s[2];\n");
1565 printf(" if (c3 >= 0x81 && c3 <= 0xfe) {\n");
1566 printf(" if (n >= 4) {\n");
1567 printf(" unsigned char c4 = s[3];\n");
1568 printf(" if (c4 >= 0x30 && c4 <= 0x39) {\n");
1569 printf(" unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);\n");
1570 printf(" if (i >= %d && i <= %d) {\n", ranges
[0].low
, ranges
[ranges_count
-1].high
);
1571 printf(" unsigned int k1 = 0;\n");
1572 printf(" unsigned int k2 = %d;\n", ranges_count
-1);
1573 printf(" while (k1 < k2) {\n");
1574 printf(" unsigned int k = (k1 + k2) / 2;\n");
1575 printf(" if (i <= %s_charset2uni_ranges[2*k+1])\n", name
);
1576 printf(" k2 = k;\n");
1577 printf(" else if (i >= %s_charset2uni_ranges[2*k+2])\n", name
);
1578 printf(" k1 = k + 1;\n");
1580 printf(" return RET_ILSEQ;\n");
1583 printf(" unsigned int bitmap_index = i - %s_charset2uni_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name
, name
);
1584 printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name
);
1585 printf(" unsigned int diff = %s_ranges[k1].diff;\n", name
);
1586 printf(" *pwc = (ucs4_t) (i + diff);\n");
1587 printf(" return 4;\n");
1592 printf(" return RET_ILSEQ;\n");
1594 printf(" return RET_TOOFEW(0);\n");
1596 printf(" return RET_ILSEQ;\n");
1598 printf(" return RET_TOOFEW(0);\n");
1600 printf(" return RET_ILSEQ;\n");
1602 printf(" return RET_TOOFEW(0);\n");
1604 printf(" return RET_ILSEQ;\n");
1609 printf("static int\n");
1610 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, size_t n)\n", name
);
1612 printf(" if (n >= 4) {\n");
1613 printf(" unsigned int i = wc;\n");
1614 printf(" if (i >= 0x%04x && i <= 0x%04x) {\n", ranges
[0].low
+ ranges
[0].diff
, ranges
[ranges_count
-1].high
+ ranges
[ranges_count
-1].diff
);
1615 printf(" unsigned int k1 = 0;\n");
1616 printf(" unsigned int k2 = %d;\n", ranges_count
-1);
1617 printf(" while (k1 < k2) {\n");
1618 printf(" unsigned int k = (k1 + k2) / 2;\n");
1619 printf(" if (i <= %s_uni2charset_ranges[2*k+1])\n", name
);
1620 printf(" k2 = k;\n");
1621 printf(" else if (i >= %s_uni2charset_ranges[2*k+2])\n", name
);
1622 printf(" k1 = k + 1;\n");
1624 printf(" return RET_ILUNI;\n");
1627 printf(" unsigned int bitmap_index = i - %s_uni2charset_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name
, name
);
1628 printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name
);
1629 printf(" unsigned int diff = %s_ranges[k1].diff;\n", name
);
1630 printf(" i -= diff;\n");
1631 printf(" r[3] = (i %% 10) + 0x30; i = i / 10;\n");
1632 printf(" r[2] = (i %% 126) + 0x81; i = i / 126;\n");
1633 printf(" r[1] = (i %% 10) + 0x30; i = i / 10;\n");
1634 printf(" r[0] = i + 0x81;\n");
1635 printf(" return 4;\n");
1639 printf(" return RET_ILUNI;\n");
1641 printf(" return RET_TOOSMALL;\n");
1645 /* JISX0213 specifics */
1647 static void do_jisx0213 (const char* name
)
1649 printf("#ifndef _JISX0213_H\n");
1650 printf("#define _JISX0213_H\n");
1652 printf("/* JISX0213 plane 1 (= ISO-IR-233) characters are in the range\n");
1653 printf(" 0x{21..7E}{21..7E}.\n");
1654 printf(" JISX0213 plane 2 (= ISO-IR-229) characters are in the range\n");
1655 printf(" 0x{21,23..25,28,2C..2F,6E..7E}{21..7E}.\n");
1656 printf(" Together this makes 120 rows of 94 characters.\n");
1660 #define row_convert(row) \
1661 ((row) >= 0x121 && (row) <= 0x17E ? row-289 : /* 0..93 */ \
1662 (row) == 0x221 ? row-451 : /* 94 */ \
1663 (row) >= 0x223 && (row) <= 0x225 ? row-452 : /* 95..97 */ \
1664 (row) == 0x228 ? row-454 : /* 98 */ \
1665 (row) >= 0x22C && (row) <= 0x22F ? row-457 : /* 99..102 */ \
1666 (row) >= 0x26E && (row) <= 0x27E ? row-519 : /* 103..119 */ \
1668 unsigned int table
[120][94];
1669 int pagemin
[0x1100];
1670 int pagemax
[0x1100];
1671 int pageidx
[0x1100];
1672 unsigned int pagestart
[0x1100];
1673 unsigned int pagestart_len
= 0;
1675 unsigned int rowc
, colc
;
1676 for (rowc
= 0; rowc
< 120; rowc
++)
1677 for (colc
= 0; colc
< 94; colc
++)
1678 table
[rowc
][colc
] = 0;
1682 for (page
= 0; page
< 0x1100; page
++)
1684 for (page
= 0; page
< 0x1100; page
++)
1686 for (page
= 0; page
< 0x1100; page
++)
1689 printf("static const unsigned short jisx0213_to_ucs_combining[][2] = {\n");
1691 int private_use
= 0x0001;
1694 unsigned int row
, col
;
1696 memset(line
,0,sizeof(line
));
1697 if (scanf("%[^\n]\n",line
) < 1)
1699 assert(line
[0]=='0');
1700 assert(line
[1]=='x');
1701 assert(isxdigit(line
[2]));
1702 assert(isxdigit(line
[3]));
1703 assert(isxdigit(line
[4]));
1704 assert(isxdigit(line
[5]));
1705 assert(isxdigit(line
[6]));
1706 assert(line
[7]=='\t');
1708 col
= strtoul(&line
[5],NULL
,16);
1710 row
= strtoul(&line
[2],NULL
,16);
1711 if (line
[20] != '\0' && line
[21] == '\0') {
1712 unsigned int u1
, u2
;
1713 assert(line
[8]=='0');
1714 assert(line
[9]=='x');
1715 assert(isxdigit(line
[10]));
1716 assert(isxdigit(line
[11]));
1717 assert(isxdigit(line
[12]));
1718 assert(isxdigit(line
[13]));
1719 assert(line
[14]==' ');
1720 assert(line
[15]=='0');
1721 assert(line
[16]=='x');
1722 assert(isxdigit(line
[17]));
1723 assert(isxdigit(line
[18]));
1724 assert(isxdigit(line
[19]));
1725 assert(isxdigit(line
[20]));
1726 u2
= strtoul(&line
[17],NULL
,16);
1728 u1
= strtoul(&line
[10],NULL
,16);
1729 printf(" { 0x%04x, 0x%04x },\n", u1
, u2
);
1730 ucs
= private_use
++;
1732 assert(line
[8]=='0');
1733 assert(line
[9]=='x');
1734 assert(isxdigit(line
[10]));
1735 assert(isxdigit(line
[11]));
1736 assert(isxdigit(line
[12]));
1737 assert(isxdigit(line
[13]));
1738 ucs
= strtoul(&line
[10],NULL
,16);
1740 assert((unsigned int) row_convert(row
) < 120);
1741 assert((unsigned int) (col
-0x21) < 94);
1742 table
[row_convert(row
)][col
-0x21] = ucs
;
1748 unsigned int rowc
, colc
;
1749 for (rowc
= 0; rowc
< 120; rowc
++) {
1750 for (colc
= 0; colc
< 94; colc
++) {
1751 unsigned int value
= table
[rowc
][colc
];
1752 unsigned int page
= value
>> 8;
1753 unsigned int rest
= value
& 0xff;
1754 if (pagemin
[page
] < 0 || pagemin
[page
] > rest
) pagemin
[page
] = rest
;
1755 if (pagemax
[page
] < 0 || pagemax
[page
] < rest
) pagemax
[page
] = rest
;
1760 unsigned int index
= 0;
1762 for (i
= 0; i
< 0x1100; ) {
1763 if (pagemin
[i
] >= 0) {
1764 if (pagemin
[i
+1] >= 0 && pagemin
[i
] >= 0x80 && pagemax
[i
+1] < 0x80) {
1765 /* Combine two pages into a single one. */
1766 assert(pagestart_len
< sizeof(pagestart
)/sizeof(pagestart
[0]));
1767 pagestart
[pagestart_len
++] = (i
<<8)+0x80;
1769 pageidx
[i
+1] = index
;
1773 /* A single page. */
1774 assert(pagestart_len
< sizeof(pagestart
)/sizeof(pagestart
[0]));
1775 pagestart
[pagestart_len
++] = i
<<8;
1784 printf("static const unsigned short jisx0213_to_ucs_main[120 * 94] = {\n");
1787 for (row
= 0; row
< 0x300; row
++) {
1788 unsigned int rowc
= row_convert(row
);
1789 if (rowc
!= (unsigned int) (-1)) {
1790 printf(" /* 0x%X21..0x%X7E */\n",row
,row
);
1792 unsigned int count
= 0;
1794 for (colc
= 0; colc
< 94; colc
++) {
1795 if ((count
% 8) == 0) printf(" ");
1797 unsigned int value
= table
[rowc
][colc
];
1798 unsigned int page
= value
>> 8;
1799 unsigned int index
= pageidx
[page
];
1800 assert(value
-pagestart
[index
] < 0x100);
1801 printf(" 0x%04x,",(index
<<8)|(value
-pagestart
[index
]));
1804 if ((count
% 8) == 0) printf("\n");
1813 printf("static const ucs4_t jisx0213_to_ucs_pagestart[] = {\n");
1815 unsigned int count
= 0;
1817 for (i
= 0; i
< pagestart_len
; i
++) {
1819 if ((count
% 8) == 0) printf(" ");
1821 sprintf(buf
,"0x%04x",pagestart
[i
]);
1822 if (strlen(buf
) < 7) printf("%*s",(int)(7-strlen(buf
)),"");
1825 if ((count
% 8) == 0) printf("\n");
1835 int table
[0x110000];
1838 unsigned int combining_prefixes
[100];
1839 unsigned int combining_prefixes_len
= 0;
1842 for (i
= 0; i
< 0x110000; i
++)
1844 for (i
= 0; i
< 0x4400; i
++)
1849 unsigned int plane
, row
, col
;
1850 memset(line
,0,sizeof(line
));
1851 if (scanf("%[^\n]\n",line
) < 1)
1853 assert(line
[0]=='0');
1854 assert(line
[1]=='x');
1855 assert(isxdigit(line
[2]));
1856 assert(isxdigit(line
[3]));
1857 assert(isxdigit(line
[4]));
1858 assert(isxdigit(line
[5]));
1859 assert(isxdigit(line
[6]));
1860 assert(line
[7]=='\t');
1862 col
= strtoul(&line
[5],NULL
,16);
1864 row
= strtoul(&line
[3],NULL
,16);
1866 plane
= strtoul(&line
[2],NULL
,16) - 1;
1867 if (line
[20] != '\0' && line
[21] == '\0') {
1868 unsigned int u1
, u2
;
1869 assert(line
[8]=='0');
1870 assert(line
[9]=='x');
1871 assert(isxdigit(line
[10]));
1872 assert(isxdigit(line
[11]));
1873 assert(isxdigit(line
[12]));
1874 assert(isxdigit(line
[13]));
1875 assert(line
[14]==' ');
1876 assert(line
[15]=='0');
1877 assert(line
[16]=='x');
1878 assert(isxdigit(line
[17]));
1879 assert(isxdigit(line
[18]));
1880 assert(isxdigit(line
[19]));
1881 assert(isxdigit(line
[20]));
1882 u2
= strtoul(&line
[17],NULL
,16);
1884 u1
= strtoul(&line
[10],NULL
,16);
1885 assert(u2
== 0x02E5 || u2
== 0x02E9 || u2
== 0x0300 || u2
== 0x0301
1887 assert(combining_prefixes_len
< sizeof(combining_prefixes
)/sizeof(combining_prefixes
[0]));
1888 combining_prefixes
[combining_prefixes_len
++] = u1
;
1891 assert(line
[8]=='0');
1892 assert(line
[9]=='x');
1893 assert(isxdigit(line
[10]));
1894 assert(isxdigit(line
[11]));
1895 assert(isxdigit(line
[12]));
1896 assert(isxdigit(line
[13]));
1897 ucs
= strtoul(&line
[10],NULL
,16);
1900 assert(row
<= 0x7f);
1901 assert(col
<= 0x7f);
1902 table
[ucs
] = (plane
<< 15) | (row
<< 8) | col
;
1903 pages
[ucs
>>6] = true;
1904 if (maxpage
< 0 || (ucs
>>6) > maxpage
) maxpage
= ucs
>>6;
1909 for (i
= 0; i
< combining_prefixes_len
; i
++) {
1910 unsigned int u1
= combining_prefixes
[i
];
1911 assert(table
[u1
] >= 0);
1912 table
[u1
] |= 0x0080;
1915 printf("static const short jisx0213_from_ucs_level1[%d] = {\n",maxpage
+1);
1917 unsigned int index
= 0;
1919 for (i
= 0; i
<= maxpage
; i
++) {
1920 if ((i
% 8) == 0) printf(" ");
1922 printf(" %3u,",index
);
1927 if (((i
+1) % 8) == 0) printf("\n");
1933 #if 0 /* Dense array */
1934 printf("static const unsigned short jisx0213_from_ucs_level2[] = {\n");
1937 for (i
= 0; i
<= maxpage
; i
++) {
1939 printf(" /* 0x%04X */\n",i
<<6);
1942 for (j
= 0; j
< 0x40; ) {
1943 unsigned int ucs
= (i
<<6)+j
;
1944 int value
= table
[ucs
];
1945 if (value
< 0) value
= 0;
1946 if ((j
% 8) == 0) printf(" ");
1947 printf(" 0x%04x,",value
);
1949 if ((j
% 8) == 0) printf("\n");
1956 #else /* Sparse array */
1958 int summary_indx
[0x11000];
1959 int summary_used
[0x11000];
1960 unsigned int i
, k
, indx
;
1961 printf("static const unsigned short jisx0213_from_ucs_level2_data[] = {\n");
1962 /* Fill summary_indx[] and summary_used[]. */
1964 for (i
= 0, k
= 0; i
<= maxpage
; i
++) {
1966 unsigned int j1
, j2
;
1967 unsigned int count
= 0;
1968 printf(" /* 0x%04X */\n",i
<<6);
1969 for (j1
= 0; j1
< 4; j1
++) {
1970 summary_indx
[4*k
+j1
] = indx
;
1971 summary_used
[4*k
+j1
] = 0;
1972 for (j2
= 0; j2
< 16; j2
++) {
1973 unsigned int j
= 16*j1
+j2
;
1974 unsigned int ucs
= (i
<<6)+j
;
1975 int value
= table
[ucs
];
1976 if (value
< 0) value
= 0;
1978 summary_used
[4*k
+j1
] |= (1 << j2
);
1979 if ((count
% 8) == 0) printf(" ");
1980 printf(" 0x%04x,",value
);
1982 if ((count
% 8) == 0) printf("\n");
1987 if ((count
% 8) > 0)
1994 printf("static const Summary16 jisx0213_from_ucs_level2_2indx[] = {\n");
1995 for (i
= 0, k
= 0; i
<= maxpage
; i
++) {
1998 printf(" /* 0x%04X */\n",i
<<6);
2000 for (j1
= 0; j1
< 4; j1
++) {
2001 printf(" { %4d, 0x%04x },", summary_indx
[4*k
+j1
], summary_used
[4*k
+j1
]);
2012 printf("#ifdef __GNUC__\n");
2013 printf("__inline\n");
2015 printf("#ifdef __cplusplus\n");
2019 printf("static ucs4_t jisx0213_to_ucs4 (unsigned int row, unsigned int col)\n");
2021 printf(" ucs4_t val;\n");
2023 printf(" if (row >= 0x121 && row <= 0x17e)\n");
2024 printf(" row -= 289;\n");
2025 printf(" else if (row == 0x221)\n");
2026 printf(" row -= 451;\n");
2027 printf(" else if (row >= 0x223 && row <= 0x225)\n");
2028 printf(" row -= 452;\n");
2029 printf(" else if (row == 0x228)\n");
2030 printf(" row -= 454;\n");
2031 printf(" else if (row >= 0x22c && row <= 0x22f)\n");
2032 printf(" row -= 457;\n");
2033 printf(" else if (row >= 0x26e && row <= 0x27e)\n");
2034 printf(" row -= 519;\n");
2036 printf(" return 0x0000;\n");
2038 printf(" if (col >= 0x21 && col <= 0x7e)\n");
2039 printf(" col -= 0x21;\n");
2041 printf(" return 0x0000;\n");
2043 printf(" val = jisx0213_to_ucs_main[row * 94 + col];\n");
2044 printf(" val = jisx0213_to_ucs_pagestart[val >> 8] + (val & 0xff);\n");
2045 printf(" if (val == 0xfffd)\n");
2046 printf(" val = 0x0000;\n");
2047 printf(" return val;\n");
2050 printf("#ifdef __GNUC__\n");
2051 printf("__inline\n");
2053 printf("#ifdef __cplusplus\n");
2057 printf("static unsigned short ucs4_to_jisx0213 (ucs4_t ucs)\n");
2059 printf(" if (ucs < (sizeof(jisx0213_from_ucs_level1)/sizeof(jisx0213_from_ucs_level1[0])) << 6) {\n");
2060 printf(" int index1 = jisx0213_from_ucs_level1[ucs >> 6];\n");
2061 printf(" if (index1 >= 0)");
2062 #if 0 /* Dense array */
2064 printf(" return jisx0213_from_ucs_level2[(index1 << 6) + (ucs & 0x3f)];\n");
2065 #else /* Sparse array */
2067 printf(" const Summary16 *summary = &jisx0213_from_ucs_level2_2indx[((index1 << 6) + (ucs & 0x3f)) >> 4];\n");
2068 printf(" unsigned short used = summary->used;\n");
2069 printf(" unsigned int i = ucs & 0x0f;\n");
2070 printf(" if (used & ((unsigned short) 1 << i)) {\n");
2071 printf(" /* Keep in 'used' only the bits 0..i-1. */\n");
2072 printf(" used &= ((unsigned short) 1 << i) - 1;\n");
2073 printf(" /* Add 'summary->indx' and the number of bits set in 'used'. */\n");
2074 printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
2075 printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
2076 printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
2077 printf(" used = (used & 0x00ff) + (used >> 8);\n");
2078 printf(" return jisx0213_from_ucs_level2_data[summary->indx + used];\n");
2083 printf(" return 0x0000;\n");
2086 printf("#endif /* _JISX0213_H */\n");
2091 int main (int argc
, char *argv
[])
2093 const char* charsetname
;
2098 charsetname
= argv
[1];
2101 output_title(charsetname
);
2103 if (!strcmp(name
,"gb2312")
2104 || !strcmp(name
,"isoir165ext") || !strcmp(name
,"gb12345ext")
2105 || !strcmp(name
,"jisx0208") || !strcmp(name
,"jisx0212"))
2107 else if (!strcmp(name
,"cns11643_1") || !strcmp(name
,"cns11643_2")
2108 || !strcmp(name
,"cns11643_3") || !strcmp(name
,"cns11643_4a")
2109 || !strcmp(name
,"cns11643_4b") || !strcmp(name
,"cns11643_5")
2110 || !strcmp(name
,"cns11643_6") || !strcmp(name
,"cns11643_7")
2111 || !strcmp(name
,"cns11643_15"))
2112 do_normal_only_charset2uni(name
);
2113 else if (!strcmp(name
,"cns11643_inv"))
2114 do_cns11643_only_uni2charset(name
);
2115 else if (!strcmp(name
,"gbkext1"))
2116 do_gbk1_only_charset2uni(name
);
2117 else if (!strcmp(name
,"gbkext2"))
2118 do_gbk2_only_charset2uni(name
);
2119 else if (!strcmp(name
,"gbkext_inv"))
2120 do_gbk1_only_uni2charset(name
);
2121 else if (!strcmp(name
,"cp936ext") || !strcmp(name
,"gb18030ext"))
2123 else if (!strcmp(name
,"ksc5601"))
2125 else if (!strcmp(name
,"uhc_1"))
2127 else if (!strcmp(name
,"uhc_2"))
2129 else if (!strcmp(name
,"big5") || !strcmp(name
,"cp950ext"))
2131 else if (!strcmp(name
,"hkscs1999") || !strcmp(name
,"hkscs2001")
2132 || !strcmp(name
,"hkscs2004") || !strcmp(name
,"hkscs2008"))
2134 else if (!strcmp(name
,"johab_hangul"))
2135 do_johab_hangul(name
);
2136 else if (!strcmp(name
,"cp932ext"))
2138 else if (!strcmp(name
,"gb18030uni"))
2139 do_gb18030uni(name
);
2140 else if (!strcmp(name
,"jisx0213"))