1 /* Copyright (C) 1999-2004, 2006-2007, 2010 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Tools.
4 This program is free software: you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 3 of the License, or
7 (at your option) any later version.
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., along with this program. If not, see <http://www.gnu.org/licenses/>. */
19 * Generates a CJK character set table from a .TXT table as found on
20 * ftp.unicode.org or in the X nls directory.
23 * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < gb2312
24 * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < jis0208
25 * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < ksc5601
27 * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < GB2312.TXT
28 * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < JIS0208.TXT
29 * ./cjk_tab_to_h JISX0212.1990-0 jisx0212 > jisx0212.h < JIS0212.TXT
30 * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < KSC5601.TXT
31 * ./cjk_tab_to_h KSX1001.1992-0 ksc5601 > ksc5601.h < KSX1001.TXT
33 * ./cjk_tab_to_h BIG5 big5 > big5.h < BIG5.TXT
35 * ./cjk_tab_to_h JOHAB johab > johab.h < JOHAB.TXT
37 * ./cjk_tab_to_h JISX0213:2004 jisx0213 > jisx0213.h < JISX0213.TXT
53 int rows
; /* number of possible values for the 1st byte */
54 int cols
; /* number of possible values for the 2nd byte */
55 int (*row_byte
) (int row
); /* returns the 1st byte value for a given row */
56 int (*col_byte
) (int col
); /* returns the 2nd byte value for a given col */
57 int (*byte_row
) (int byte
); /* converts a 1st byte value to a row, else -1 */
58 int (*byte_col
) (int byte
); /* converts a 2nd byte value to a col, else -1 */
59 const char* check_row_expr
; /* format string for 1st byte value checking */
60 const char* check_col_expr
; /* format string for 2nd byte value checking */
61 const char* byte_row_expr
; /* format string for 1st byte value to row */
62 const char* byte_col_expr
; /* format string for 2nd byte value to col */
63 int** charset2uni
; /* charset2uni[0..rows-1][0..cols-1] is valid */
64 /* You'll understand the terms "row" and "col" when you buy Ken Lunde's book.
65 Once a row is fixed, choosing a "col" is the same as choosing a "cell". */
66 int* charsetpage
; /* charsetpage[0..rows]: how large is a page for a row */
68 Block
* charsetblocks
; /* blocks[0..nblocks-1] */
69 int* uni2charset
; /* uni2charset[0x0000..0xffff] */
70 int fffd
; /* uni representation of the invalid character */
74 * Outputs the file title.
76 static void output_title (const char *charsetname
)
79 printf(" * Copyright (C) 1999-2010 Free Software Foundation, Inc.\n");
80 printf(" * This file is part of the GNU LIBICONV Library.\n");
82 printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
83 printf(" * and/or modify it under the terms of the GNU Library General Public\n");
84 printf(" * License as published by the Free Software Foundation; either version 2\n");
85 printf(" * of the License, or (at your option) any later version.\n");
87 printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
88 printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
89 printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
90 printf(" * Library General Public License for more details.\n");
92 printf(" * You should have received a copy of the GNU Library General Public\n");
93 printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
94 printf(" * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,\n");
95 printf(" * Fifth Floor, Boston, MA 02110-1301, USA.\n");
99 printf(" * %s\n", charsetname
);
105 * Reads the charset2uni table from standard input.
107 static void read_table (Encoding
* enc
)
109 int row
, col
, i
, i1
, i2
, c
, j
;
111 enc
->charset2uni
= (int**) malloc(enc
->rows
*sizeof(int*));
112 for (row
= 0; row
< enc
->rows
; row
++)
113 enc
->charset2uni
[row
] = (int*) malloc(enc
->cols
*sizeof(int));
115 for (row
= 0; row
< enc
->rows
; row
++)
116 for (col
= 0; col
< enc
->cols
; col
++)
117 enc
->charset2uni
[row
][col
] = 0xfffd;
122 /* Read a unicode.org style .TXT file. */
127 if (c
== '\n' || c
== ' ' || c
== '\t')
130 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
134 if (scanf("0x%x", &j
) != 1)
138 row
= enc
->byte_row(i1
);
139 col
= enc
->byte_col(i2
);
140 if (row
< 0 || col
< 0) {
141 fprintf(stderr
, "lost entry for %02x %02x\n", i1
, i2
);
144 if (scanf(" 0x%x", &enc
->charset2uni
[row
][col
]) != 1)
148 /* Read a table of hexadecimal Unicode values. */
149 for (i1
= 32; i1
< 132; i1
++)
150 for (i2
= 32; i2
< 132; i2
++) {
156 if (j
< 0 || j
== 0xffff)
159 if (enc
->byte_row(i1
) < 0 || enc
->byte_col(i2
) < 0) {
160 fprintf(stderr
, "lost entry at %02x %02x\n", i1
, i2
);
163 enc
->charset2uni
[enc
->byte_row(i1
)][enc
->byte_col(i2
)] = j
;
171 * Determine whether the Unicode range goes outside the BMP.
173 static bool is_charset2uni_large (Encoding
* enc
)
177 for (row
= 0; row
< enc
->rows
; row
++)
178 for (col
= 0; col
< enc
->cols
; col
++)
179 if (enc
->charset2uni
[row
][col
] >= 0x10000)
185 * Compactify the Unicode range by use of an auxiliary table,
186 * so 16 bits suffice to store each value.
188 static int compact_large_charset2uni (Encoding
* enc
, unsigned int **urows
, unsigned int *urowshift
)
192 for (shift
= 8; ; shift
--) {
193 int *upages
= (int *) malloc((0x110000>>shift
) * sizeof(int));
194 int i
, row
, col
, nurows
;
196 for (i
= 0; i
< 0x110000>>shift
; i
++)
199 for (row
= 0; row
< enc
->rows
; row
++)
200 for (col
= 0; col
< enc
->cols
; col
++)
201 upages
[enc
->charset2uni
[row
][col
] >> shift
] = 0;
204 for (i
= 0; i
< 0x110000>>shift
; i
++)
208 /* We want all table entries to fit in an 'unsigned short'. */
209 if (nurows
<= 1<<(16-shift
)) {
210 int** old_charset2uni
;
212 *urows
= (unsigned int *) malloc(nurows
* sizeof(unsigned int));
216 for (i
= 0; i
< 0x110000>>shift
; i
++)
217 if (upages
[i
] == 0) {
219 (*urows
)[nurows
] = i
;
223 old_charset2uni
= enc
->charset2uni
;
224 enc
->charset2uni
= (int**) malloc(enc
->rows
*sizeof(int*));
225 for (row
= 0; row
< enc
->rows
; row
++)
226 enc
->charset2uni
[row
] = (int*) malloc(enc
->cols
*sizeof(int));
227 for (row
= 0; row
< enc
->rows
; row
++)
228 for (col
= 0; col
< enc
->cols
; col
++) {
229 int u
= old_charset2uni
[row
][col
];
230 enc
->charset2uni
[row
][col
] =
231 (upages
[u
>> shift
] << shift
) | (u
& ((1 << shift
) - 1));
234 (upages
[0xfffd >> shift
] << shift
) | (0xfffd & ((1 << shift
) - 1));
243 * Computes the charsetpage[0..rows] array.
245 static void find_charset2uni_pages (Encoding
* enc
)
249 enc
->charsetpage
= (int*) malloc((enc
->rows
+1)*sizeof(int));
251 for (row
= 0; row
<= enc
->rows
; row
++)
252 enc
->charsetpage
[row
] = 0;
254 for (row
= 0; row
< enc
->rows
; row
++) {
256 for (col
= 0; col
< enc
->cols
; col
++)
257 if (enc
->charset2uni
[row
][col
] != enc
->fffd
)
259 enc
->charsetpage
[row
] = used
;
264 * Fills in nblocks and blocks.
266 static void find_charset2uni_blocks (Encoding
* enc
)
270 enc
->charsetblocks
= (Block
*) malloc(enc
->rows
*sizeof(Block
));
273 for (row
= 0; row
< enc
->rows
; row
++)
274 if (enc
->charsetpage
[row
] > 0 && (row
== 0 || enc
->charsetpage
[row
-1] == 0)) {
275 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
276 enc
->charsetblocks
[n
].start
= row
* enc
->cols
;
277 enc
->charsetblocks
[n
].end
= lastrow
* enc
->cols
+ enc
->charsetpage
[lastrow
];
280 enc
->ncharsetblocks
= n
;
284 * Outputs the charset to unicode table and function.
286 static void output_charset2uni (const char* name
, Encoding
* enc
)
288 int nurows
, row
, col
, lastrow
, col_max
, i
, i1_min
, i1_max
;
291 unsigned int urowshift
;
294 is_large
= is_charset2uni_large(enc
);
296 /* Use a temporary copy of enc. */
299 nurows
= compact_large_charset2uni(enc
,&urows
,&urowshift
);
301 nurows
= 0; urows
= NULL
; urowshift
= 0; enc
->fffd
= 0xfffd;
304 find_charset2uni_pages(enc
);
306 find_charset2uni_blocks(enc
);
308 for (row
= 0; row
< enc
->rows
; row
++)
309 if (enc
->charsetpage
[row
] > 0) {
310 if (row
== 0 || enc
->charsetpage
[row
-1] == 0) {
311 /* Start a new block. */
312 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
313 printf("static const unsigned short %s_2uni_page%02x[%d] = {\n",
314 name
, enc
->row_byte(row
),
315 (lastrow
-row
) * enc
->cols
+ enc
->charsetpage
[lastrow
]);
317 printf(" /""* 0x%02x *""/\n ", enc
->row_byte(row
));
318 col_max
= (enc
->charsetpage
[row
+1] > 0 ? enc
->cols
: enc
->charsetpage
[row
]);
319 for (col
= 0; col
< col_max
; col
++) {
320 printf(" 0x%04x,", enc
->charset2uni
[row
][col
]);
321 if ((col
% 8) == 7 && (col
+1 < col_max
)) printf("\n ");
324 if (enc
->charsetpage
[row
+1] == 0) {
332 printf("static const ucs4_t %s_2uni_upages[%d] = {\n ", name
, nurows
);
333 for (i
= 0; i
< nurows
; i
++) {
334 printf(" 0x%05x,", urows
[i
] << urowshift
);
335 if ((i
% 8) == 7 && (i
+1 < nurows
)) printf("\n ");
342 printf("static int\n");
343 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name
);
345 printf(" unsigned char c1 = s[0];\n");
347 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
348 i1_min
= enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
);
349 i1_max
= enc
->row_byte((enc
->charsetblocks
[i
].end
-1) / enc
->cols
);
352 if (i1_min
== i1_max
)
353 printf("(c1 == 0x%02x)", i1_min
);
355 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min
, i1_max
);
358 printf(" if (n >= 2) {\n");
359 printf(" unsigned char c2 = s[1];\n");
361 printf(enc
->check_col_expr
, "c2");
363 printf(" unsigned int i = %d * (", enc
->cols
);
364 printf(enc
->byte_row_expr
, "c1");
366 printf(enc
->byte_col_expr
, "c2");
368 printf(" %s wc = 0xfffd;\n", is_large
? "ucs4_t" : "unsigned short");
369 if (is_large
) printf(" unsigned short swc;\n");
370 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
374 if (i
< enc
->ncharsetblocks
-1)
375 printf("if (i < %d) ", enc
->charsetblocks
[i
+1].start
);
377 printf(" if (i < %d)\n", enc
->charsetblocks
[i
].end
);
378 printf(" %s = ", is_large
? "swc" : "wc");
379 printf("%s_2uni_page%02x[i", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
));
380 if (enc
->charsetblocks
[i
].start
> 0)
381 printf("-%d", enc
->charsetblocks
[i
].start
);
383 if (is_large
) printf(",\n wc = %s_2uni_upages[swc>>%d] | (swc & 0x%x)", name
, urowshift
, (1 << urowshift
) - 1);
387 printf(" if (wc != 0xfffd) {\n");
388 printf(" *pwc = %swc;\n", is_large
? "" : "(ucs4_t) ");
389 printf(" return 2;\n");
392 printf(" return RET_ILSEQ;\n");
394 printf(" return RET_TOOFEW(0);\n");
396 printf(" return RET_ILSEQ;\n");
402 * Outputs the charset to unicode table and function.
403 * (Suitable if the mapping function is well defined, i.e. has no holes, and
404 * is monotonically increasing with small gaps only.)
406 static void output_charset2uni_noholes_monotonic (const char* name
, Encoding
* enc
)
408 int row
, col
, lastrow
, r
, col_max
, i
, i1_min
, i1_max
;
410 /* Choose stepsize so that stepsize*steps_per_row >= enc->cols, and
411 enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]
412 is always < 0x100. */
413 int steps_per_row
= 2;
414 int stepsize
= (enc
->cols
+ steps_per_row
-1) / steps_per_row
;
416 find_charset2uni_pages(enc
);
418 find_charset2uni_blocks(enc
);
420 for (row
= 0; row
< enc
->rows
; row
++)
421 if (enc
->charsetpage
[row
] > 0) {
422 if (row
== 0 || enc
->charsetpage
[row
-1] == 0) {
423 /* Start a new block. */
424 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
425 printf("static const unsigned short %s_2uni_main_page%02x[%d] = {\n ",
426 name
, enc
->row_byte(row
),
427 steps_per_row
*(lastrow
-row
+1));
428 for (r
= row
; r
<= lastrow
; r
++) {
429 for (i
= 0; i
< steps_per_row
; i
++)
430 printf(" 0x%04x,", enc
->charset2uni
[r
][i
*stepsize
]);
431 if (((r
-row
) % 4) == 3 && (r
< lastrow
)) printf("\n ");
435 printf("static const unsigned char %s_2uni_page%02x[%d] = {\n",
436 name
, enc
->row_byte(row
),
437 (lastrow
-row
) * enc
->cols
+ enc
->charsetpage
[lastrow
]);
439 printf(" /""* 0x%02x *""/\n ", enc
->row_byte(row
));
440 col_max
= (enc
->charsetpage
[row
+1] > 0 ? enc
->cols
: enc
->charsetpage
[row
]);
441 for (col
= 0; col
< col_max
; col
++) {
442 printf(" 0x%02x,", enc
->charset2uni
[row
][col
] - enc
->charset2uni
[row
][col
/stepsize
*stepsize
]);
443 if ((col
% 8) == 7 && (col
+1 < col_max
)) printf("\n ");
446 if (enc
->charsetpage
[row
+1] == 0) {
453 printf("static int\n");
454 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name
);
456 printf(" unsigned char c1 = s[0];\n");
458 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
459 i1_min
= enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
);
460 i1_max
= enc
->row_byte((enc
->charsetblocks
[i
].end
-1) / enc
->cols
);
463 if (i1_min
== i1_max
)
464 printf("(c1 == 0x%02x)", i1_min
);
466 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min
, i1_max
);
469 printf(" if (n >= 2) {\n");
470 printf(" unsigned char c2 = s[1];\n");
472 printf(enc
->check_col_expr
, "c2");
474 printf(" unsigned int row = ");
475 printf(enc
->byte_row_expr
, "c1");
477 printf(" unsigned int col = ");
478 printf(enc
->byte_col_expr
, "c2");
480 printf(" unsigned int i = %d * row + col;\n", enc
->cols
);
481 printf(" unsigned short wc = 0xfffd;\n");
482 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
486 if (i
< enc
->ncharsetblocks
-1)
487 printf("if (i < %d) ", enc
->charsetblocks
[i
+1].start
);
489 printf(" if (i < %d)\n", enc
->charsetblocks
[i
].end
);
490 printf(" wc = %s_2uni_main_page%02x[%d*", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
), steps_per_row
);
491 if (enc
->charsetblocks
[i
].start
> 0)
492 printf("(row-%d)", enc
->charsetblocks
[i
].start
/ enc
->cols
);
496 if (steps_per_row
== 2)
497 printf("(col>=%d?1:0)", stepsize
);
499 printf("col/%d", stepsize
);
500 printf("] + %s_2uni_page%02x[i", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
));
501 if (enc
->charsetblocks
[i
].start
> 0)
502 printf("-%d", enc
->charsetblocks
[i
].start
);
506 printf(" if (wc != 0xfffd) {\n");
507 printf(" *pwc = (ucs4_t) wc;\n");
508 printf(" return 2;\n");
511 printf(" return RET_ILSEQ;\n");
513 printf(" return RET_TOOFEW(0);\n");
515 printf(" return RET_ILSEQ;\n");
521 * Computes the uni2charset[0x0000..0x2ffff] array.
523 static void invert (Encoding
* enc
)
527 enc
->uni2charset
= (int*) malloc(0x30000*sizeof(int));
529 for (j
= 0; j
< 0x30000; j
++)
530 enc
->uni2charset
[j
] = 0;
532 for (row
= 0; row
< enc
->rows
; row
++)
533 for (col
= 0; col
< enc
->cols
; col
++) {
534 j
= enc
->charset2uni
[row
][col
];
536 enc
->uni2charset
[j
] = 0x100 * enc
->row_byte(row
) + enc
->col_byte(col
);
541 * Outputs the unicode to charset table and function, using a linear array.
542 * (Suitable if the table is dense.)
544 static void output_uni2charset_dense (const char* name
, Encoding
* enc
)
546 /* Like in 8bit_tab_to_h.c */
550 struct { int minline
; int maxline
; int usecount
; } tables
[0x6000];
552 int row
, col
, j
, p
, j1
, j2
, t
;
554 for (p
= 0; p
< 0x300; p
++)
556 for (row
= 0; row
< enc
->rows
; row
++)
557 for (col
= 0; col
< enc
->cols
; col
++) {
558 j
= enc
->charset2uni
[row
][col
];
562 for (j1
= 0; j1
< 0x6000; j1
++) {
563 bool all_invalid
= true;
564 for (j2
= 0; j2
< 8; j2
++) {
566 if (enc
->uni2charset
[j
] != 0)
575 for (j1
= 0; j1
< 0x6000; j1
++) {
578 && ((j1
> 0 && line
[j1
-1] == tableno
-1)
579 || ((tables
[tableno
-1].maxline
>> 5) == (j1
>> 5)
580 && j1
- tables
[tableno
-1].maxline
<= 8))) {
581 line
[j1
] = tableno
-1;
582 tables
[tableno
-1].maxline
= j1
;
585 line
[j1
] = tableno
-1;
586 tables
[tableno
-1].minline
= tables
[tableno
-1].maxline
= j1
;
590 for (t
= 0; t
< tableno
; t
++) {
591 tables
[t
].usecount
= 0;
592 j1
= 8*tables
[t
].minline
;
593 j2
= 8*(tables
[t
].maxline
+1);
594 for (j
= j1
; j
< j2
; j
++)
595 if (enc
->uni2charset
[j
] != 0)
596 tables
[t
].usecount
++;
600 for (t
= 0; t
< tableno
; t
++)
601 if (tables
[t
].usecount
> 1) {
602 p
= tables
[t
].minline
>> 5;
603 printf("static const unsigned short %s_page%02x[%d] = {\n", name
, p
, 8*(tables
[t
].maxline
-tables
[t
].minline
+1));
604 for (j1
= tables
[t
].minline
; j1
<= tables
[t
].maxline
; j1
++) {
605 if ((j1
% 0x20) == 0 && j1
> tables
[t
].minline
)
606 printf(" /* 0x%04x */\n", 8*j1
);
608 for (j2
= 0; j2
< 8; j2
++) {
610 printf(" 0x%04x,", enc
->uni2charset
[j
]);
612 printf(" /*0x%02x-0x%02x*/\n", 8*(j1
% 0x20), 8*(j1
% 0x20)+7);
619 printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name
);
621 printf(" if (n >= 2) {\n");
622 printf(" unsigned short c = 0;\n");
624 for (j1
= 0; j1
< 0x6000;) {
626 for (j2
= j1
; j2
< 0x6000 && line
[j2
] == t
; j2
++);
628 if (j1
!= tables
[t
].minline
) abort();
629 if (j2
> tables
[t
].maxline
+1) abort();
630 j2
= tables
[t
].maxline
+1;
636 if (tables
[t
].usecount
== 0) abort();
637 if (tables
[t
].usecount
== 1) {
638 if (j2
!= j1
+1) abort();
639 for (j
= 8*j1
; j
< 8*j2
; j
++)
640 if (enc
->uni2charset
[j
] != 0) {
641 printf("if (wc == 0x%04x)\n c = 0x%02x;\n", j
, enc
->uni2charset
[j
]);
646 printf("if (wc < 0x%04x)", 8*j2
);
648 printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1
, 8*j2
);
650 printf("\n c = %s_page%02x[wc", name
, j1
>> 5);
651 if (tables
[t
].minline
> 0)
652 printf("-0x%04x", 8*j1
);
658 printf(" if (c != 0) {\n");
659 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
660 printf(" return 2;\n");
662 printf(" return RET_ILUNI;\n");
664 printf(" return RET_TOOSMALL;\n");
669 * Outputs the unicode to charset table and function, using a packed array.
670 * (Suitable if the table is sparse.)
671 * The argument 'monotonic' may be set to true if the mapping is monotonically
672 * increasing with small gaps only.
674 static void output_uni2charset_sparse (const char* name
, Encoding
* enc
, bool monotonic
)
677 Block pageblocks
[0x300]; int npageblocks
;
678 int indx2charset
[0x30000];
679 int summary_indx
[0x3000];
680 int summary_used
[0x3000];
681 int i
, row
, col
, j
, p
, j1
, j2
, indx
;
684 int log2_stepsize
= (!strcmp(name
,"uhc_2") ? 6 : 7);
685 int stepsize
= 1 << log2_stepsize
;
688 /* Fill pages[0x300]. */
689 for (p
= 0; p
< 0x300; p
++)
691 for (row
= 0; row
< enc
->rows
; row
++)
692 for (col
= 0; col
< enc
->cols
; col
++) {
693 j
= enc
->charset2uni
[row
][col
];
698 /* Determine whether two or three bytes are needed for each character. */
700 for (j
= 0; j
< 0x30000; j
++)
701 if (enc
->uni2charset
[j
] >= 0x10000)
705 for (p
= 0; p
< 0x300; p
++)
707 printf("static const unsigned short %s_page%02x[256] = {\n", name
, p
);
708 for (j1
= 0; j1
< 32; j1
++) {
710 for (j2
= 0; j2
< 8; j2
++)
711 printf("0x%04x, ", enc
->uni2charset
[256*p
+8*j1
+j2
]);
712 printf("/""*0x%02x-0x%02x*""/\n", 8*j1
, 8*j1
+7);
719 /* Fill summary_indx[] and summary_used[]. */
721 for (j1
= 0; j1
< 0x3000; j1
++) {
722 summary_indx
[j1
] = indx
;
723 summary_used
[j1
] = 0;
724 for (j2
= 0; j2
< 16; j2
++) {
726 if (enc
->uni2charset
[j
] != 0) {
727 indx2charset
[indx
++] = enc
->uni2charset
[j
];
728 summary_used
[j1
] |= (1 << j2
);
733 /* Fill npageblocks and pageblocks[]. */
735 for (p
= 0; p
< 0x300; ) {
736 if (pages
[p
] && (p
== 0 || !pages
[p
-1])) {
737 pageblocks
[npageblocks
].start
= 16*p
;
738 do p
++; while (p
< 0x300 && pages
[p
]);
740 while (summary_used
[j1
-1] == 0) j1
--;
741 pageblocks
[npageblocks
].end
= j1
;
748 indxsteps
= (indx
+ stepsize
-1) / stepsize
;
749 printf("static const unsigned short %s_2charset_main[%d] = {\n", name
, indxsteps
);
750 for (i
= 0; i
< indxsteps
; ) {
751 if ((i
% 8) == 0) printf(" ");
752 printf(" 0x%04x,", indx2charset
[i
*stepsize
]);
754 if ((i
% 8) == 0 || i
== indxsteps
) printf("\n");
757 printf("static const unsigned char %s_2charset[%d] = {\n", name
, indx
);
758 for (i
= 0; i
< indx
; ) {
759 if ((i
% 8) == 0) printf(" ");
760 printf(" 0x%02x,", indx2charset
[i
] - indx2charset
[i
/stepsize
*stepsize
]);
762 if ((i
% 8) == 0 || i
== indx
) printf("\n");
767 printf("static const unsigned char %s_2charset[3*%d] = {\n", name
, indx
);
768 for (i
= 0; i
< indx
; ) {
769 if ((i
% 4) == 0) printf(" ");
770 printf(" 0x%1x,0x%02x,0x%02x,", indx2charset
[i
] >> 16,
771 (indx2charset
[i
] >> 8) & 0xff, indx2charset
[i
] & 0xff);
773 if ((i
% 4) == 0 || i
== indx
) printf("\n");
777 printf("static const unsigned short %s_2charset[%d] = {\n", name
, indx
);
778 for (i
= 0; i
< indx
; ) {
779 if ((i
% 8) == 0) printf(" ");
780 printf(" 0x%04x,", indx2charset
[i
]);
782 if ((i
% 8) == 0 || i
== indx
) printf("\n");
788 for (i
= 0; i
< npageblocks
; i
++) {
789 printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name
,
790 pageblocks
[i
].start
/16, pageblocks
[i
].end
-pageblocks
[i
].start
);
791 for (j1
= pageblocks
[i
].start
; j1
< pageblocks
[i
].end
; ) {
792 if (((16*j1
) % 0x100) == 0) printf(" /""* 0x%04x *""/\n", 16*j1
);
793 if ((j1
% 4) == 0) printf(" ");
794 printf(" { %4d, 0x%04x },", summary_indx
[j1
], summary_used
[j1
]);
796 if ((j1
% 4) == 0 || j1
== pageblocks
[i
].end
) printf("\n");
802 printf("static int\n");
803 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name
);
805 printf(" if (n >= 2) {\n");
806 printf(" const Summary16 *summary = NULL;\n");
807 for (i
= 0; i
< npageblocks
; i
++) {
811 printf("if (wc >= 0x%04x && wc < 0x%04x)\n",
812 16*pageblocks
[i
].start
, 16*pageblocks
[i
].end
);
813 printf(" summary = &%s_uni2indx_page%02x[(wc>>4)", name
,
814 pageblocks
[i
].start
/16);
815 if (pageblocks
[i
].start
> 0)
816 printf("-0x%03x", pageblocks
[i
].start
);
819 printf(" if (summary) {\n");
820 printf(" unsigned short used = summary->used;\n");
821 printf(" unsigned int i = wc & 0x0f;\n");
822 printf(" if (used & ((unsigned short) 1 << i)) {\n");
823 if (monotonic
|| !is_large
)
824 printf(" unsigned short c;\n");
825 printf(" /* Keep in `used' only the bits 0..i-1. */\n");
826 printf(" used &= ((unsigned short) 1 << i) - 1;\n");
827 printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
828 printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
829 printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
830 printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
831 printf(" used = (used & 0x00ff) + (used >> 8);\n");
833 printf(" used += summary->indx;\n");
834 printf(" c = %s_2charset_main[used>>%d] + %s_2charset[used];\n", name
, log2_stepsize
, name
);
835 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
836 printf(" return 2;\n");
839 printf(" used += summary->indx;\n");
840 printf(" r[0] = %s_2charset[3*used];\n", name
);
841 printf(" r[1] = %s_2charset[3*used+1];\n", name
);
842 printf(" r[2] = %s_2charset[3*used+2];\n", name
);
843 printf(" return 3;\n");
845 printf(" c = %s_2charset[summary->indx + used];\n", name
);
846 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
847 printf(" return 2;\n");
852 printf(" return RET_ILUNI;\n");
854 printf(" return RET_TOOSMALL;\n");
858 /* ISO-2022/EUC specifics */
860 static int row_byte_normal (int row
) { return 0x21+row
; }
861 static int col_byte_normal (int col
) { return 0x21+col
; }
862 static int byte_row_normal (int byte
) { return byte
-0x21; }
863 static int byte_col_normal (int byte
) { return byte
-0x21; }
865 static void do_normal (const char* name
)
871 enc
.row_byte
= row_byte_normal
;
872 enc
.col_byte
= col_byte_normal
;
873 enc
.byte_row
= byte_row_normal
;
874 enc
.byte_col
= byte_col_normal
;
875 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
876 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
877 enc
.byte_row_expr
= "%1$s - 0x21";
878 enc
.byte_col_expr
= "%1$s - 0x21";
881 output_charset2uni(name
,&enc
);
882 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
885 /* Note: On first sight, the jisx0212_2charset[] table seems to be in order,
886 starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in
887 order. There are 75 out-of-order values, scattered all throughout the table.
890 static void do_normal_only_charset2uni (const char* name
)
896 enc
.row_byte
= row_byte_normal
;
897 enc
.col_byte
= col_byte_normal
;
898 enc
.byte_row
= byte_row_normal
;
899 enc
.byte_col
= byte_col_normal
;
900 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
901 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
902 enc
.byte_row_expr
= "%1$s - 0x21";
903 enc
.byte_col_expr
= "%1$s - 0x21";
906 output_charset2uni(name
,&enc
);
909 /* CNS 11643 specifics - trick to put two tables into one */
911 static int row_byte_cns11643 (int row
) {
912 return 0x100 * (row
/ 94) + (row
% 94) + 0x21;
914 static int byte_row_cns11643 (int byte
) {
915 return (byte
>> 8) * 94 + (byte
& 0xff) - 0x21;
918 static void do_cns11643_only_uni2charset (const char* name
)
924 enc
.row_byte
= row_byte_cns11643
;
925 enc
.col_byte
= col_byte_normal
;
926 enc
.byte_row
= byte_row_cns11643
;
927 enc
.byte_col
= byte_col_normal
;
928 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
929 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
930 enc
.byte_row_expr
= "%1$s - 0x21";
931 enc
.byte_col_expr
= "%1$s - 0x21";
935 output_uni2charset_sparse(name
,&enc
,false);
940 static int row_byte_gbk1 (int row
) {
943 static int col_byte_gbk1 (int col
) {
944 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
946 static int byte_row_gbk1 (int byte
) {
947 if (byte
>= 0x81 && byte
< 0xff)
952 static int byte_col_gbk1 (int byte
) {
953 if (byte
>= 0x40 && byte
< 0x7f)
955 else if (byte
>= 0x80 && byte
< 0xff)
961 static void do_gbk1 (const char* name
)
967 enc
.row_byte
= row_byte_gbk1
;
968 enc
.col_byte
= col_byte_gbk1
;
969 enc
.byte_row
= byte_row_gbk1
;
970 enc
.byte_col
= byte_col_gbk1
;
971 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
972 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
973 enc
.byte_row_expr
= "%1$s - 0x81";
974 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
977 output_charset2uni(name
,&enc
);
978 invert(&enc
); output_uni2charset_dense(name
,&enc
);
981 static void do_gbk1_only_charset2uni (const char* name
)
987 enc
.row_byte
= row_byte_gbk1
;
988 enc
.col_byte
= col_byte_gbk1
;
989 enc
.byte_row
= byte_row_gbk1
;
990 enc
.byte_col
= byte_col_gbk1
;
991 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
992 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
993 enc
.byte_row_expr
= "%1$s - 0x81";
994 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
997 output_charset2uni(name
,&enc
);
1000 static int row_byte_gbk2 (int row
) {
1003 static int col_byte_gbk2 (int col
) {
1004 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
1006 static int byte_row_gbk2 (int byte
) {
1007 if (byte
>= 0x81 && byte
< 0xff)
1012 static int byte_col_gbk2 (int byte
) {
1013 if (byte
>= 0x40 && byte
< 0x7f)
1015 else if (byte
>= 0x80 && byte
< 0xa1)
1021 static void do_gbk2_only_charset2uni (const char* name
)
1027 enc
.row_byte
= row_byte_gbk2
;
1028 enc
.col_byte
= col_byte_gbk2
;
1029 enc
.byte_row
= byte_row_gbk2
;
1030 enc
.byte_col
= byte_col_gbk2
;
1031 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
1032 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)";
1033 enc
.byte_row_expr
= "%1$s - 0x81";
1034 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1037 output_charset2uni(name
,&enc
);
1040 static void do_gbk1_only_uni2charset (const char* name
)
1046 enc
.row_byte
= row_byte_gbk1
;
1047 enc
.col_byte
= col_byte_gbk1
;
1048 enc
.byte_row
= byte_row_gbk1
;
1049 enc
.byte_col
= byte_col_gbk1
;
1050 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
1051 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
1052 enc
.byte_row_expr
= "%1$s - 0x81";
1053 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1056 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1059 /* KSC 5601 specifics */
1062 * Reads the charset2uni table from standard input.
1064 static void read_table_ksc5601 (Encoding
* enc
)
1066 int row
, col
, i
, i1
, i2
, c
, j
;
1068 enc
->charset2uni
= (int**) malloc(enc
->rows
*sizeof(int*));
1069 for (row
= 0; row
< enc
->rows
; row
++)
1070 enc
->charset2uni
[row
] = (int*) malloc(enc
->cols
*sizeof(int));
1072 for (row
= 0; row
< enc
->rows
; row
++)
1073 for (col
= 0; col
< enc
->cols
; col
++)
1074 enc
->charset2uni
[row
][col
] = 0xfffd;
1079 /* Read a unicode.org style .TXT file. */
1084 if (c
== '\n' || c
== ' ' || c
== '\t')
1087 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
1091 if (scanf("0x%x", &j
) != 1)
1095 if (scanf(" 0x%x", &j
) != 1)
1097 /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0
1098 = KS X 1001.1992, ignore the rest. */
1099 if (!(i1
>= 128+33 && i1
< 128+127 && i2
>= 128+33 && i2
< 128+127))
1100 continue; /* KSC5601 specific */
1101 i1
&= 0x7f; /* KSC5601 specific */
1102 i2
&= 0x7f; /* KSC5601 specific */
1103 row
= enc
->byte_row(i1
);
1104 col
= enc
->byte_col(i2
);
1105 if (row
< 0 || col
< 0) {
1106 fprintf(stderr
, "lost entry for %02x %02x\n", i1
, i2
);
1109 enc
->charset2uni
[row
][col
] = j
;
1112 /* Read a table of hexadecimal Unicode values. */
1113 for (i1
= 33; i1
< 127; i1
++)
1114 for (i2
= 33; i2
< 127; i2
++) {
1115 i
= scanf("%x", &j
);
1120 if (j
< 0 || j
== 0xffff)
1123 if (enc
->byte_row(i1
) < 0 || enc
->byte_col(i2
) < 0) {
1124 fprintf(stderr
, "lost entry at %02x %02x\n", i1
, i2
);
1127 enc
->charset2uni
[enc
->byte_row(i1
)][enc
->byte_col(i2
)] = j
;
1134 static void do_ksc5601 (const char* name
)
1140 enc
.row_byte
= row_byte_normal
;
1141 enc
.col_byte
= col_byte_normal
;
1142 enc
.byte_row
= byte_row_normal
;
1143 enc
.byte_col
= byte_col_normal
;
1144 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
1145 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
1146 enc
.byte_row_expr
= "%1$s - 0x21";
1147 enc
.byte_col_expr
= "%1$s - 0x21";
1149 read_table_ksc5601(&enc
);
1150 output_charset2uni(name
,&enc
);
1151 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1156 /* UHC part 1: 0x{81..A0}{41..5A,61..7A,81..FE} */
1158 static int row_byte_uhc_1 (int row
) {
1161 static int col_byte_uhc_1 (int col
) {
1162 return (col
>= 0x34 ? 0x4d : col
>= 0x1a ? 0x47 : 0x41) + col
;
1164 static int byte_row_uhc_1 (int byte
) {
1165 if (byte
>= 0x81 && byte
< 0xa1)
1170 static int byte_col_uhc_1 (int byte
) {
1171 if (byte
>= 0x41 && byte
< 0x5b)
1173 else if (byte
>= 0x61 && byte
< 0x7b)
1175 else if (byte
>= 0x81 && byte
< 0xff)
1181 static void do_uhc_1 (const char* name
)
1187 enc
.row_byte
= row_byte_uhc_1
;
1188 enc
.col_byte
= col_byte_uhc_1
;
1189 enc
.byte_row
= byte_row_uhc_1
;
1190 enc
.byte_col
= byte_col_uhc_1
;
1191 enc
.check_row_expr
= "(%1$s >= 0x81 && %1$s < 0xa1)";
1192 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xff)";
1193 enc
.byte_row_expr
= "%1$s - 0x81";
1194 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
1197 output_charset2uni_noholes_monotonic(name
,&enc
);
1198 invert(&enc
); output_uni2charset_sparse(name
,&enc
,true);
1201 /* UHC part 2: 0x{A1..C6}{41..5A,61..7A,81..A0} */
1203 static int row_byte_uhc_2 (int row
) {
1206 static int col_byte_uhc_2 (int col
) {
1207 return (col
>= 0x34 ? 0x4d : col
>= 0x1a ? 0x47 : 0x41) + col
;
1209 static int byte_row_uhc_2 (int byte
) {
1210 if (byte
>= 0xa1 && byte
< 0xff)
1215 static int byte_col_uhc_2 (int byte
) {
1216 if (byte
>= 0x41 && byte
< 0x5b)
1218 else if (byte
>= 0x61 && byte
< 0x7b)
1220 else if (byte
>= 0x81 && byte
< 0xa1)
1226 static void do_uhc_2 (const char* name
)
1232 enc
.row_byte
= row_byte_uhc_2
;
1233 enc
.col_byte
= col_byte_uhc_2
;
1234 enc
.byte_row
= byte_row_uhc_2
;
1235 enc
.byte_col
= byte_col_uhc_2
;
1236 enc
.check_row_expr
= "(%1$s >= 0xa1 && %1$s < 0xff)";
1237 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xa1)";
1238 enc
.byte_row_expr
= "%1$s - 0xa1";
1239 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
1242 output_charset2uni_noholes_monotonic(name
,&enc
);
1243 invert(&enc
); output_uni2charset_sparse(name
,&enc
,true);
1246 /* Big5 specifics */
1248 static int row_byte_big5 (int row
) {
1251 static int col_byte_big5 (int col
) {
1252 return (col
>= 0x3f ? 0x62 : 0x40) + col
;
1254 static int byte_row_big5 (int byte
) {
1255 if (byte
>= 0xa1 && byte
< 0xff)
1260 static int byte_col_big5 (int byte
) {
1261 if (byte
>= 0x40 && byte
< 0x7f)
1263 else if (byte
>= 0xa1 && byte
< 0xff)
1269 static void do_big5 (const char* name
)
1275 enc
.row_byte
= row_byte_big5
;
1276 enc
.col_byte
= col_byte_big5
;
1277 enc
.byte_row
= byte_row_big5
;
1278 enc
.byte_col
= byte_col_big5
;
1279 enc
.check_row_expr
= "%1$s >= 0xa1 && %1$s < 0xff";
1280 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
1281 enc
.byte_row_expr
= "%1$s - 0xa1";
1282 enc
.byte_col_expr
= "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
1285 output_charset2uni(name
,&enc
);
1286 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1289 /* HKSCS specifics */
1291 static int row_byte_hkscs (int row
) {
1294 static int byte_row_hkscs (int byte
) {
1295 if (byte
>= 0x80 && byte
< 0xff)
1301 static void do_hkscs (const char* name
)
1307 enc
.row_byte
= row_byte_hkscs
;
1308 enc
.col_byte
= col_byte_big5
;
1309 enc
.byte_row
= byte_row_hkscs
;
1310 enc
.byte_col
= byte_col_big5
;
1311 enc
.check_row_expr
= "%1$s >= 0x80 && %1$s < 0xff";
1312 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
1313 enc
.byte_row_expr
= "%1$s - 0x80";
1314 enc
.byte_col_expr
= "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
1317 output_charset2uni(name
,&enc
);
1318 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1321 /* Johab Hangul specifics */
1323 static int row_byte_johab_hangul (int row
) {
1326 static int col_byte_johab_hangul (int col
) {
1327 return (col
>= 0x3e ? 0x43 : 0x41) + col
;
1329 static int byte_row_johab_hangul (int byte
) {
1330 if (byte
>= 0x84 && byte
< 0xd4)
1335 static int byte_col_johab_hangul (int byte
) {
1336 if (byte
>= 0x41 && byte
< 0x7f)
1338 else if (byte
>= 0x81 && byte
< 0xff)
1344 static void do_johab_hangul (const char* name
)
1350 enc
.row_byte
= row_byte_johab_hangul
;
1351 enc
.col_byte
= col_byte_johab_hangul
;
1352 enc
.byte_row
= byte_row_johab_hangul
;
1353 enc
.byte_col
= byte_col_johab_hangul
;
1354 enc
.check_row_expr
= "%1$s >= 0x84 && %1$s < 0xd4";
1355 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x7f) || (%1$s >= 0x81 && %1$s < 0xff)";
1356 enc
.byte_row_expr
= "%1$s - 0x84";
1357 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x43 : 0x41)";
1360 output_charset2uni(name
,&enc
);
1361 invert(&enc
); output_uni2charset_dense(name
,&enc
);
1364 /* SJIS specifics */
1366 static int row_byte_sjis (int row
) {
1367 return (row
>= 0x1f ? 0xc1 : 0x81) + row
;
1369 static int col_byte_sjis (int col
) {
1370 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
1372 static int byte_row_sjis (int byte
) {
1373 if (byte
>= 0x81 && byte
< 0xa0)
1375 else if (byte
>= 0xe0)
1380 static int byte_col_sjis (int byte
) {
1381 if (byte
>= 0x40 && byte
< 0x7f)
1383 else if (byte
>= 0x80 && byte
< 0xfd)
1389 static void do_sjis (const char* name
)
1395 enc
.row_byte
= row_byte_sjis
;
1396 enc
.col_byte
= col_byte_sjis
;
1397 enc
.byte_row
= byte_row_sjis
;
1398 enc
.byte_col
= byte_col_sjis
;
1399 enc
.check_row_expr
= "(%1$s >= 0x81 && %1$s < 0xa0) || (%1$s >= 0xe0)";
1400 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xfd)";
1401 enc
.byte_row_expr
= "%1$s - (%1$s >= 0xe0 ? 0xc1 : 0x81)";
1402 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1405 output_charset2uni(name
,&enc
);
1406 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1409 /* GB18030 Unicode specifics */
1411 static void do_gb18030uni (const char* name
)
1415 int i1
, i2
, i3
, i4
, i
, j
, k
;
1416 int charset2uni
[4*10*126*10];
1417 int uni2charset
[0x10000];
1418 struct { int low
; int high
; int diff
; int total
; } ranges
[256];
1419 int ranges_count
, ranges_total
;
1421 for (i
= 0; i
< 4*10*126*10; i
++)
1423 for (j
= 0; j
< 0x10000; j
++)
1426 /* Read a unicode.org style .TXT file. */
1431 if (c
== '\n' || c
== ' ' || c
== '\t')
1434 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
1438 if (scanf("0x%x", &bytes
) != 1)
1440 i1
= (bytes
>> 24) & 0xff;
1441 i2
= (bytes
>> 16) & 0xff;
1442 i3
= (bytes
>> 8) & 0xff;
1444 if (!(i1
>= 0x81 && i1
<= 0x84
1445 && i2
>= 0x30 && i2
<= 0x39
1446 && i3
>= 0x81 && i3
<= 0xfe
1447 && i4
>= 0x30 && i4
<= 0x39)) {
1448 fprintf(stderr
, "lost entry for %02x %02x %02x %02x\n", i1
, i2
, i3
, i4
);
1451 i
= (((i1
-0x81) * 10 + (i2
-0x30)) * 126 + (i3
-0x81)) * 10 + (i4
-0x30);
1452 if (scanf(" 0x%x", &j
) != 1)
1454 if (!(j
>= 0 && j
< 0x10000))
1460 /* Verify that the mapping i -> j is monotonically increasing and
1462 low[k] <= i <= high[k] => j = diff[k] + i
1463 with a set of disjoint intervals (low[k], high[k]). */
1465 for (i
= 0; i
< 4*10*126*10; i
++)
1466 if (charset2uni
[i
] != 0) {
1470 if (ranges_count
> 0) {
1471 if (!(i
> ranges
[ranges_count
-1].high
))
1473 if (!(j
> ranges
[ranges_count
-1].high
+ ranges
[ranges_count
-1].diff
))
1475 /* Additional property: The diffs are also increasing. */
1476 if (!(diff
>= ranges
[ranges_count
-1].diff
))
1479 if (ranges_count
> 0 && diff
== ranges
[ranges_count
-1].diff
)
1480 ranges
[ranges_count
-1].high
= i
;
1482 if (ranges_count
== 256)
1484 ranges
[ranges_count
].low
= i
;
1485 ranges
[ranges_count
].high
= i
;
1486 ranges
[ranges_count
].diff
= diff
;
1491 /* Determine size of bitmap. */
1493 for (k
= 0; k
< ranges_count
; k
++) {
1494 ranges
[k
].total
= ranges_total
;
1495 ranges_total
+= ranges
[k
].high
- ranges
[k
].low
+ 1;
1498 printf("static const unsigned short %s_charset2uni_ranges[%d] = {\n", name
, 2*ranges_count
);
1499 for (k
= 0; k
< ranges_count
; k
++) {
1500 printf(" 0x%04x, 0x%04x", ranges
[k
].low
, ranges
[k
].high
);
1501 if (k
+1 < ranges_count
) printf(",");
1502 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n");
1509 printf("static const unsigned short %s_uni2charset_ranges[%d] = {\n", name
, 2*ranges_count
);
1510 for (k
= 0; k
< ranges_count
; k
++) {
1511 printf(" 0x%04x, 0x%04x", ranges
[k
].low
+ ranges
[k
].diff
, ranges
[k
].high
+ ranges
[k
].diff
);
1512 if (k
+1 < ranges_count
) printf(",");
1513 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n");
1520 printf("static const struct { unsigned short diff; unsigned short bitmap_offset; } %s_ranges[%d] = {\n ", name
, ranges_count
);
1521 for (k
= 0; k
< ranges_count
; k
++) {
1522 printf(" { %5d, 0x%04x }", ranges
[k
].diff
, ranges
[k
].total
);
1523 if (k
+1 < ranges_count
) printf(",");
1524 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n ");
1531 printf("static const unsigned char %s_bitmap[%d] = {\n ", name
, (ranges_total
+ 7) / 8);
1534 for (k
= 0; k
< ranges_count
; k
++) {
1535 for (i
= ranges
[k
].total
; i
<= ranges
[k
].total
+ (ranges
[k
].high
- ranges
[k
].low
);) {
1536 if (charset2uni
[i
- ranges
[k
].total
+ ranges
[k
].low
] != 0)
1537 accu
|= (1 << (i
% 8));
1540 printf(" 0x%02x", accu
);
1541 if ((i
/ 8) < (ranges_total
+ 7) / 8) printf(",");
1542 if (((i
/ 8) % 12) == 0)
1547 if (i
!= (k
+1 < ranges_count
? ranges
[k
+1].total
: ranges_total
)) abort();
1549 if ((ranges_total
% 8) != 0)
1550 printf(" 0x%02x", accu
);
1557 printf("static int\n");
1558 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name
);
1560 printf(" unsigned char c1 = s[0];\n");
1561 printf(" if (c1 >= 0x81 && c1 <= 0x84) {\n");
1562 printf(" if (n >= 2) {\n");
1563 printf(" unsigned char c2 = s[1];\n");
1564 printf(" if (c2 >= 0x30 && c2 <= 0x39) {\n");
1565 printf(" if (n >= 3) {\n");
1566 printf(" unsigned char c3 = s[2];\n");
1567 printf(" if (c3 >= 0x81 && c3 <= 0xfe) {\n");
1568 printf(" if (n >= 4) {\n");
1569 printf(" unsigned char c4 = s[3];\n");
1570 printf(" if (c4 >= 0x30 && c4 <= 0x39) {\n");
1571 printf(" unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);\n");
1572 printf(" if (i >= %d && i <= %d) {\n", ranges
[0].low
, ranges
[ranges_count
-1].high
);
1573 printf(" unsigned int k1 = 0;\n");
1574 printf(" unsigned int k2 = %d;\n", ranges_count
-1);
1575 printf(" while (k1 < k2) {\n");
1576 printf(" unsigned int k = (k1 + k2) / 2;\n");
1577 printf(" if (i <= %s_charset2uni_ranges[2*k+1])\n", name
);
1578 printf(" k2 = k;\n");
1579 printf(" else if (i >= %s_charset2uni_ranges[2*k+2])\n", name
);
1580 printf(" k1 = k + 1;\n");
1582 printf(" return RET_ILSEQ;\n");
1585 printf(" unsigned int bitmap_index = i - %s_charset2uni_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name
, name
);
1586 printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name
);
1587 printf(" unsigned int diff = %s_ranges[k1].diff;\n", name
);
1588 printf(" *pwc = (ucs4_t) (i + diff);\n");
1589 printf(" return 4;\n");
1594 printf(" return RET_ILSEQ;\n");
1596 printf(" return RET_TOOFEW(0);\n");
1598 printf(" return RET_ILSEQ;\n");
1600 printf(" return RET_TOOFEW(0);\n");
1602 printf(" return RET_ILSEQ;\n");
1604 printf(" return RET_TOOFEW(0);\n");
1606 printf(" return RET_ILSEQ;\n");
1611 printf("static int\n");
1612 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name
);
1614 printf(" if (n >= 4) {\n");
1615 printf(" unsigned int i = wc;\n");
1616 printf(" if (i >= 0x%04x && i <= 0x%04x) {\n", ranges
[0].low
+ ranges
[0].diff
, ranges
[ranges_count
-1].high
+ ranges
[ranges_count
-1].diff
);
1617 printf(" unsigned int k1 = 0;\n");
1618 printf(" unsigned int k2 = %d;\n", ranges_count
-1);
1619 printf(" while (k1 < k2) {\n");
1620 printf(" unsigned int k = (k1 + k2) / 2;\n");
1621 printf(" if (i <= %s_uni2charset_ranges[2*k+1])\n", name
);
1622 printf(" k2 = k;\n");
1623 printf(" else if (i >= %s_uni2charset_ranges[2*k+2])\n", name
);
1624 printf(" k1 = k + 1;\n");
1626 printf(" return RET_ILUNI;\n");
1629 printf(" unsigned int bitmap_index = i - %s_uni2charset_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name
, name
);
1630 printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name
);
1631 printf(" unsigned int diff = %s_ranges[k1].diff;\n", name
);
1632 printf(" i -= diff;\n");
1633 printf(" r[3] = (i %% 10) + 0x30; i = i / 10;\n");
1634 printf(" r[2] = (i %% 126) + 0x81; i = i / 126;\n");
1635 printf(" r[1] = (i %% 10) + 0x30; i = i / 10;\n");
1636 printf(" r[0] = i + 0x81;\n");
1637 printf(" return 4;\n");
1641 printf(" return RET_ILUNI;\n");
1643 printf(" return RET_TOOSMALL;\n");
1647 /* JISX0213 specifics */
1649 static void do_jisx0213 (const char* name
)
1651 printf("#ifndef _JISX0213_H\n");
1652 printf("#define _JISX0213_H\n");
1654 printf("/* JISX0213 plane 1 (= ISO-IR-233) characters are in the range\n");
1655 printf(" 0x{21..7E}{21..7E}.\n");
1656 printf(" JISX0213 plane 2 (= ISO-IR-229) characters are in the range\n");
1657 printf(" 0x{21,23..25,28,2C..2F,6E..7E}{21..7E}.\n");
1658 printf(" Together this makes 120 rows of 94 characters.\n");
1662 #define row_convert(row) \
1663 ((row) >= 0x121 && (row) <= 0x17E ? row-289 : /* 0..93 */ \
1664 (row) == 0x221 ? row-451 : /* 94 */ \
1665 (row) >= 0x223 && (row) <= 0x225 ? row-452 : /* 95..97 */ \
1666 (row) == 0x228 ? row-454 : /* 98 */ \
1667 (row) >= 0x22C && (row) <= 0x22F ? row-457 : /* 99..102 */ \
1668 (row) >= 0x26E && (row) <= 0x27E ? row-519 : /* 103..119 */ \
1670 unsigned int table
[120][94];
1671 int pagemin
[0x1100];
1672 int pagemax
[0x1100];
1673 int pageidx
[0x1100];
1674 unsigned int pagestart
[0x1100];
1675 unsigned int pagestart_len
= 0;
1677 unsigned int rowc
, colc
;
1678 for (rowc
= 0; rowc
< 120; rowc
++)
1679 for (colc
= 0; colc
< 94; colc
++)
1680 table
[rowc
][colc
] = 0;
1684 for (page
= 0; page
< 0x1100; page
++)
1686 for (page
= 0; page
< 0x1100; page
++)
1688 for (page
= 0; page
< 0x1100; page
++)
1691 printf("static const unsigned short jisx0213_to_ucs_combining[][2] = {\n");
1693 int private_use
= 0x0001;
1696 unsigned int row
, col
;
1698 memset(line
,0,sizeof(line
));
1699 if (scanf("%[^\n]\n",line
) < 1)
1701 assert(line
[0]=='0');
1702 assert(line
[1]=='x');
1703 assert(isxdigit(line
[2]));
1704 assert(isxdigit(line
[3]));
1705 assert(isxdigit(line
[4]));
1706 assert(isxdigit(line
[5]));
1707 assert(isxdigit(line
[6]));
1708 assert(line
[7]=='\t');
1710 col
= strtoul(&line
[5],NULL
,16);
1712 row
= strtoul(&line
[2],NULL
,16);
1713 if (line
[20] != '\0' && line
[21] == '\0') {
1714 unsigned int u1
, u2
;
1715 assert(line
[8]=='0');
1716 assert(line
[9]=='x');
1717 assert(isxdigit(line
[10]));
1718 assert(isxdigit(line
[11]));
1719 assert(isxdigit(line
[12]));
1720 assert(isxdigit(line
[13]));
1721 assert(line
[14]==' ');
1722 assert(line
[15]=='0');
1723 assert(line
[16]=='x');
1724 assert(isxdigit(line
[17]));
1725 assert(isxdigit(line
[18]));
1726 assert(isxdigit(line
[19]));
1727 assert(isxdigit(line
[20]));
1728 u2
= strtoul(&line
[17],NULL
,16);
1730 u1
= strtoul(&line
[10],NULL
,16);
1731 printf(" { 0x%04x, 0x%04x },\n", u1
, u2
);
1732 ucs
= private_use
++;
1734 assert(line
[8]=='0');
1735 assert(line
[9]=='x');
1736 assert(isxdigit(line
[10]));
1737 assert(isxdigit(line
[11]));
1738 assert(isxdigit(line
[12]));
1739 assert(isxdigit(line
[13]));
1740 ucs
= strtoul(&line
[10],NULL
,16);
1742 assert((unsigned int) row_convert(row
) < 120);
1743 assert((unsigned int) (col
-0x21) < 94);
1744 table
[row_convert(row
)][col
-0x21] = ucs
;
1750 unsigned int rowc
, colc
;
1751 for (rowc
= 0; rowc
< 120; rowc
++) {
1752 for (colc
= 0; colc
< 94; colc
++) {
1753 unsigned int value
= table
[rowc
][colc
];
1754 unsigned int page
= value
>> 8;
1755 unsigned int rest
= value
& 0xff;
1756 if (pagemin
[page
] < 0 || pagemin
[page
] > rest
) pagemin
[page
] = rest
;
1757 if (pagemax
[page
] < 0 || pagemax
[page
] < rest
) pagemax
[page
] = rest
;
1762 unsigned int index
= 0;
1764 for (i
= 0; i
< 0x1100; ) {
1765 if (pagemin
[i
] >= 0) {
1766 if (pagemin
[i
+1] >= 0 && pagemin
[i
] >= 0x80 && pagemax
[i
+1] < 0x80) {
1767 /* Combine two pages into a single one. */
1768 assert(pagestart_len
< sizeof(pagestart
)/sizeof(pagestart
[0]));
1769 pagestart
[pagestart_len
++] = (i
<<8)+0x80;
1771 pageidx
[i
+1] = index
;
1775 /* A single page. */
1776 assert(pagestart_len
< sizeof(pagestart
)/sizeof(pagestart
[0]));
1777 pagestart
[pagestart_len
++] = i
<<8;
1786 printf("static const unsigned short jisx0213_to_ucs_main[120 * 94] = {\n");
1789 for (row
= 0; row
< 0x300; row
++) {
1790 unsigned int rowc
= row_convert(row
);
1791 if (rowc
!= (unsigned int) (-1)) {
1792 printf(" /* 0x%X21..0x%X7E */\n",row
,row
);
1794 unsigned int count
= 0;
1796 for (colc
= 0; colc
< 94; colc
++) {
1797 if ((count
% 8) == 0) printf(" ");
1799 unsigned int value
= table
[rowc
][colc
];
1800 unsigned int page
= value
>> 8;
1801 unsigned int index
= pageidx
[page
];
1802 assert(value
-pagestart
[index
] < 0x100);
1803 printf(" 0x%04x,",(index
<<8)|(value
-pagestart
[index
]));
1806 if ((count
% 8) == 0) printf("\n");
1815 printf("static const ucs4_t jisx0213_to_ucs_pagestart[] = {\n");
1817 unsigned int count
= 0;
1819 for (i
= 0; i
< pagestart_len
; i
++) {
1821 if ((count
% 8) == 0) printf(" ");
1823 sprintf(buf
,"0x%04x",pagestart
[i
]);
1824 if (strlen(buf
) < 7) printf("%*s",(int)(7-strlen(buf
)),"");
1827 if ((count
% 8) == 0) printf("\n");
1837 int table
[0x110000];
1840 unsigned int combining_prefixes
[100];
1841 unsigned int combining_prefixes_len
= 0;
1844 for (i
= 0; i
< 0x110000; i
++)
1846 for (i
= 0; i
< 0x4400; i
++)
1851 unsigned int plane
, row
, col
;
1852 memset(line
,0,sizeof(line
));
1853 if (scanf("%[^\n]\n",line
) < 1)
1855 assert(line
[0]=='0');
1856 assert(line
[1]=='x');
1857 assert(isxdigit(line
[2]));
1858 assert(isxdigit(line
[3]));
1859 assert(isxdigit(line
[4]));
1860 assert(isxdigit(line
[5]));
1861 assert(isxdigit(line
[6]));
1862 assert(line
[7]=='\t');
1864 col
= strtoul(&line
[5],NULL
,16);
1866 row
= strtoul(&line
[3],NULL
,16);
1868 plane
= strtoul(&line
[2],NULL
,16) - 1;
1869 if (line
[20] != '\0' && line
[21] == '\0') {
1870 unsigned int u1
, u2
;
1871 assert(line
[8]=='0');
1872 assert(line
[9]=='x');
1873 assert(isxdigit(line
[10]));
1874 assert(isxdigit(line
[11]));
1875 assert(isxdigit(line
[12]));
1876 assert(isxdigit(line
[13]));
1877 assert(line
[14]==' ');
1878 assert(line
[15]=='0');
1879 assert(line
[16]=='x');
1880 assert(isxdigit(line
[17]));
1881 assert(isxdigit(line
[18]));
1882 assert(isxdigit(line
[19]));
1883 assert(isxdigit(line
[20]));
1884 u2
= strtoul(&line
[17],NULL
,16);
1886 u1
= strtoul(&line
[10],NULL
,16);
1887 assert(u2
== 0x02E5 || u2
== 0x02E9 || u2
== 0x0300 || u2
== 0x0301
1889 assert(combining_prefixes_len
< sizeof(combining_prefixes
)/sizeof(combining_prefixes
[0]));
1890 combining_prefixes
[combining_prefixes_len
++] = u1
;
1893 assert(line
[8]=='0');
1894 assert(line
[9]=='x');
1895 assert(isxdigit(line
[10]));
1896 assert(isxdigit(line
[11]));
1897 assert(isxdigit(line
[12]));
1898 assert(isxdigit(line
[13]));
1899 ucs
= strtoul(&line
[10],NULL
,16);
1902 assert(row
<= 0x7f);
1903 assert(col
<= 0x7f);
1904 table
[ucs
] = (plane
<< 15) | (row
<< 8) | col
;
1905 pages
[ucs
>>6] = true;
1906 if (maxpage
< 0 || (ucs
>>6) > maxpage
) maxpage
= ucs
>>6;
1911 for (i
= 0; i
< combining_prefixes_len
; i
++) {
1912 unsigned int u1
= combining_prefixes
[i
];
1913 assert(table
[u1
] >= 0);
1914 table
[u1
] |= 0x0080;
1917 printf("static const short jisx0213_from_ucs_level1[%d] = {\n",maxpage
+1);
1919 unsigned int index
= 0;
1921 for (i
= 0; i
<= maxpage
; i
++) {
1922 if ((i
% 8) == 0) printf(" ");
1924 printf(" %3u,",index
);
1929 if (((i
+1) % 8) == 0) printf("\n");
1935 #if 0 /* Dense array */
1936 printf("static const unsigned short jisx0213_from_ucs_level2[] = {\n");
1939 for (i
= 0; i
<= maxpage
; i
++) {
1941 printf(" /* 0x%04X */\n",i
<<6);
1944 for (j
= 0; j
< 0x40; ) {
1945 unsigned int ucs
= (i
<<6)+j
;
1946 int value
= table
[ucs
];
1947 if (value
< 0) value
= 0;
1948 if ((j
% 8) == 0) printf(" ");
1949 printf(" 0x%04x,",value
);
1951 if ((j
% 8) == 0) printf("\n");
1958 #else /* Sparse array */
1960 int summary_indx
[0x11000];
1961 int summary_used
[0x11000];
1962 unsigned int i
, k
, indx
;
1963 printf("static const unsigned short jisx0213_from_ucs_level2_data[] = {\n");
1964 /* Fill summary_indx[] and summary_used[]. */
1966 for (i
= 0, k
= 0; i
<= maxpage
; i
++) {
1968 unsigned int j1
, j2
;
1969 unsigned int count
= 0;
1970 printf(" /* 0x%04X */\n",i
<<6);
1971 for (j1
= 0; j1
< 4; j1
++) {
1972 summary_indx
[4*k
+j1
] = indx
;
1973 summary_used
[4*k
+j1
] = 0;
1974 for (j2
= 0; j2
< 16; j2
++) {
1975 unsigned int j
= 16*j1
+j2
;
1976 unsigned int ucs
= (i
<<6)+j
;
1977 int value
= table
[ucs
];
1978 if (value
< 0) value
= 0;
1980 summary_used
[4*k
+j1
] |= (1 << j2
);
1981 if ((count
% 8) == 0) printf(" ");
1982 printf(" 0x%04x,",value
);
1984 if ((count
% 8) == 0) printf("\n");
1989 if ((count
% 8) > 0)
1996 printf("static const Summary16 jisx0213_from_ucs_level2_2indx[] = {\n");
1997 for (i
= 0, k
= 0; i
<= maxpage
; i
++) {
2000 printf(" /* 0x%04X */\n",i
<<6);
2002 for (j1
= 0; j1
< 4; j1
++) {
2003 printf(" { %4d, 0x%04x },", summary_indx
[4*k
+j1
], summary_used
[4*k
+j1
]);
2014 printf("#ifdef __GNUC__\n");
2015 printf("__inline\n");
2017 printf("#ifdef __cplusplus\n");
2021 printf("static ucs4_t jisx0213_to_ucs4 (unsigned int row, unsigned int col)\n");
2023 printf(" ucs4_t val;\n");
2025 printf(" if (row >= 0x121 && row <= 0x17e)\n");
2026 printf(" row -= 289;\n");
2027 printf(" else if (row == 0x221)\n");
2028 printf(" row -= 451;\n");
2029 printf(" else if (row >= 0x223 && row <= 0x225)\n");
2030 printf(" row -= 452;\n");
2031 printf(" else if (row == 0x228)\n");
2032 printf(" row -= 454;\n");
2033 printf(" else if (row >= 0x22c && row <= 0x22f)\n");
2034 printf(" row -= 457;\n");
2035 printf(" else if (row >= 0x26e && row <= 0x27e)\n");
2036 printf(" row -= 519;\n");
2038 printf(" return 0x0000;\n");
2040 printf(" if (col >= 0x21 && col <= 0x7e)\n");
2041 printf(" col -= 0x21;\n");
2043 printf(" return 0x0000;\n");
2045 printf(" val = jisx0213_to_ucs_main[row * 94 + col];\n");
2046 printf(" val = jisx0213_to_ucs_pagestart[val >> 8] + (val & 0xff);\n");
2047 printf(" if (val == 0xfffd)\n");
2048 printf(" val = 0x0000;\n");
2049 printf(" return val;\n");
2052 printf("#ifdef __GNUC__\n");
2053 printf("__inline\n");
2055 printf("#ifdef __cplusplus\n");
2059 printf("static unsigned short ucs4_to_jisx0213 (ucs4_t ucs)\n");
2061 printf(" if (ucs < (sizeof(jisx0213_from_ucs_level1)/sizeof(jisx0213_from_ucs_level1[0])) << 6) {\n");
2062 printf(" int index1 = jisx0213_from_ucs_level1[ucs >> 6];\n");
2063 printf(" if (index1 >= 0)");
2064 #if 0 /* Dense array */
2066 printf(" return jisx0213_from_ucs_level2[(index1 << 6) + (ucs & 0x3f)];\n");
2067 #else /* Sparse array */
2069 printf(" const Summary16 *summary = &jisx0213_from_ucs_level2_2indx[((index1 << 6) + (ucs & 0x3f)) >> 4];\n");
2070 printf(" unsigned short used = summary->used;\n");
2071 printf(" unsigned int i = ucs & 0x0f;\n");
2072 printf(" if (used & ((unsigned short) 1 << i)) {\n");
2073 printf(" /* Keep in `used' only the bits 0..i-1. */\n");
2074 printf(" used &= ((unsigned short) 1 << i) - 1;\n");
2075 printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
2076 printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
2077 printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
2078 printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
2079 printf(" used = (used & 0x00ff) + (used >> 8);\n");
2080 printf(" return jisx0213_from_ucs_level2_data[summary->indx + used];\n");
2085 printf(" return 0x0000;\n");
2088 printf("#endif /* _JISX0213_H */\n");
2093 int main (int argc
, char *argv
[])
2095 const char* charsetname
;
2100 charsetname
= argv
[1];
2103 output_title(charsetname
);
2105 if (!strcmp(name
,"gb2312")
2106 || !strcmp(name
,"isoir165ext") || !strcmp(name
,"gb12345ext")
2107 || !strcmp(name
,"jisx0208") || !strcmp(name
,"jisx0212"))
2109 else if (!strcmp(name
,"cns11643_1") || !strcmp(name
,"cns11643_2")
2110 || !strcmp(name
,"cns11643_3") || !strcmp(name
,"cns11643_4a")
2111 || !strcmp(name
,"cns11643_4b") || !strcmp(name
,"cns11643_5")
2112 || !strcmp(name
,"cns11643_6") || !strcmp(name
,"cns11643_7")
2113 || !strcmp(name
,"cns11643_15"))
2114 do_normal_only_charset2uni(name
);
2115 else if (!strcmp(name
,"cns11643_inv"))
2116 do_cns11643_only_uni2charset(name
);
2117 else if (!strcmp(name
,"gbkext1"))
2118 do_gbk1_only_charset2uni(name
);
2119 else if (!strcmp(name
,"gbkext2"))
2120 do_gbk2_only_charset2uni(name
);
2121 else if (!strcmp(name
,"gbkext_inv"))
2122 do_gbk1_only_uni2charset(name
);
2123 else if (!strcmp(name
,"cp936ext") || !strcmp(name
,"gb18030ext"))
2125 else if (!strcmp(name
,"ksc5601"))
2127 else if (!strcmp(name
,"uhc_1"))
2129 else if (!strcmp(name
,"uhc_2"))
2131 else if (!strcmp(name
,"big5") || !strcmp(name
,"cp950ext"))
2133 else if (!strcmp(name
,"hkscs1999") || !strcmp(name
,"hkscs2001")
2134 || !strcmp(name
,"hkscs2004") || !strcmp(name
,"hkscs2008"))
2136 else if (!strcmp(name
,"johab_hangul"))
2137 do_johab_hangul(name
);
2138 else if (!strcmp(name
,"cp932ext"))
2140 else if (!strcmp(name
,"gb18030uni"))
2141 do_gb18030uni(name
);
2142 else if (!strcmp(name
,"jisx0213"))