1 /* Copyright (C) 1999-2001 Free Software Foundation, Inc.
2 This file is part of the GNU LIBICONV Tools.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
19 * Generates a CJK character set table from a .TXT table as found on
20 * ftp.unicode.org or in the X nls directory.
23 * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < gb2312
24 * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < jis0208
25 * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < ksc5601
27 * ./cjk_tab_to_h GB2312.1980-0 gb2312 > gb2312.h < GB2312.TXT
28 * ./cjk_tab_to_h JISX0208.1983-0 jisx0208 > jisx0208.h < JIS0208.TXT
29 * ./cjk_tab_to_h JISX0212.1990-0 jisx0212 > jisx0212.h < JIS0212.TXT
30 * ./cjk_tab_to_h KSC5601.1987-0 ksc5601 > ksc5601.h < KSC5601.TXT
31 * ./cjk_tab_to_h KSX1001.1992-0 ksc5601 > ksc5601.h < KSX1001.TXT
33 * ./cjk_tab_to_h BIG5 big5 > big5.h < BIG5.TXT
35 * ./cjk_tab_to_h JOHAB johab > johab.h < JOHAB.TXT
49 int rows
; /* number of possible values for the 1st byte */
50 int cols
; /* number of possible values for the 2nd byte */
51 int (*row_byte
) (int row
); /* returns the 1st byte value for a given row */
52 int (*col_byte
) (int col
); /* returns the 2nd byte value for a given col */
53 int (*byte_row
) (int byte
); /* converts a 1st byte value to a row, else -1 */
54 int (*byte_col
) (int byte
); /* converts a 2nd byte value to a col, else -1 */
55 const char* check_row_expr
; /* format string for 1st byte value checking */
56 const char* check_col_expr
; /* format string for 2nd byte value checking */
57 const char* byte_row_expr
; /* format string for 1st byte value to row */
58 const char* byte_col_expr
; /* format string for 2nd byte value to col */
59 int** charset2uni
; /* charset2uni[0..rows-1][0..cols-1] is valid */
60 /* You'll understand the terms "row" and "col" when you buy Ken Lunde's book.
61 Once a row is fixed, choosing a "col" is the same as choosing a "cell". */
62 int* charsetpage
; /* charsetpage[0..rows]: how large is a page for a row */
64 Block
* charsetblocks
; /* blocks[0..nblocks-1] */
65 int* uni2charset
; /* uni2charset[0x0000..0xffff] */
69 * Outputs the file title.
71 static void output_title (const char *charsetname
)
74 printf(" * Copyright (C) 1999-2001 Free Software Foundation, Inc.\n");
75 printf(" * This file is part of the GNU LIBICONV Library.\n");
77 printf(" * The GNU LIBICONV Library is free software; you can redistribute it\n");
78 printf(" * and/or modify it under the terms of the GNU Library General Public\n");
79 printf(" * License as published by the Free Software Foundation; either version 2\n");
80 printf(" * of the License, or (at your option) any later version.\n");
82 printf(" * The GNU LIBICONV Library is distributed in the hope that it will be\n");
83 printf(" * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
84 printf(" * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n");
85 printf(" * Library General Public License for more details.\n");
87 printf(" * You should have received a copy of the GNU Library General Public\n");
88 printf(" * License along with the GNU LIBICONV Library; see the file COPYING.LIB.\n");
89 printf(" * If not, write to the Free Software Foundation, Inc., 59 Temple Place -\n");
90 printf(" * Suite 330, Boston, MA 02111-1307, USA.\n");
94 printf(" * %s\n", charsetname
);
100 * Reads the charset2uni table from standard input.
102 static void read_table (Encoding
* enc
)
104 int row
, col
, i
, i1
, i2
, c
, j
;
106 enc
->charset2uni
= (int**) malloc(enc
->rows
*sizeof(int*));
107 for (row
= 0; row
< enc
->rows
; row
++)
108 enc
->charset2uni
[row
] = (int*) malloc(enc
->cols
*sizeof(int));
110 for (row
= 0; row
< enc
->rows
; row
++)
111 for (col
= 0; col
< enc
->cols
; col
++)
112 enc
->charset2uni
[row
][col
] = 0xfffd;
117 /* Read a unicode.org style .TXT file. */
122 if (c
== '\n' || c
== ' ' || c
== '\t')
125 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
129 if (scanf("0x%x", &j
) != 1)
133 row
= enc
->byte_row(i1
);
134 col
= enc
->byte_col(i2
);
135 if (row
< 0 || col
< 0) {
136 fprintf(stderr
, "lost entry for %02x %02x\n", i1
, i2
);
139 if (scanf(" 0x%x", &enc
->charset2uni
[row
][col
]) != 1)
143 /* Read a table of hexadecimal Unicode values. */
144 for (i1
= 32; i1
< 132; i1
++)
145 for (i2
= 32; i2
< 132; i2
++) {
151 if (j
< 0 || j
== 0xffff)
154 if (enc
->byte_row(i1
) < 0 || enc
->byte_col(i2
) < 0) {
155 fprintf(stderr
, "lost entry at %02x %02x\n", i1
, i2
);
158 enc
->charset2uni
[enc
->byte_row(i1
)][enc
->byte_col(i2
)] = j
;
166 * Computes the charsetpage[0..rows] array.
168 static void find_charset2uni_pages (Encoding
* enc
)
172 enc
->charsetpage
= (int*) malloc((enc
->rows
+1)*sizeof(int));
174 for (row
= 0; row
<= enc
->rows
; row
++)
175 enc
->charsetpage
[row
] = 0;
177 for (row
= 0; row
< enc
->rows
; row
++) {
179 for (col
= 0; col
< enc
->cols
; col
++)
180 if (enc
->charset2uni
[row
][col
] != 0xfffd)
182 enc
->charsetpage
[row
] = used
;
187 * Fills in nblocks and blocks.
189 static void find_charset2uni_blocks (Encoding
* enc
)
193 enc
->charsetblocks
= (Block
*) malloc(enc
->rows
*sizeof(Block
));
196 for (row
= 0; row
< enc
->rows
; row
++)
197 if (enc
->charsetpage
[row
] > 0 && (row
== 0 || enc
->charsetpage
[row
-1] == 0)) {
198 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
199 enc
->charsetblocks
[n
].start
= row
* enc
->cols
;
200 enc
->charsetblocks
[n
].end
= lastrow
* enc
->cols
+ enc
->charsetpage
[lastrow
];
203 enc
->ncharsetblocks
= n
;
207 * Outputs the charset to unicode table and function.
209 static void output_charset2uni (const char* name
, Encoding
* enc
)
211 int row
, col
, lastrow
, col_max
, i
, i1_min
, i1_max
;
213 find_charset2uni_pages(enc
);
215 find_charset2uni_blocks(enc
);
217 for (row
= 0; row
< enc
->rows
; row
++)
218 if (enc
->charsetpage
[row
] > 0) {
219 if (row
== 0 || enc
->charsetpage
[row
-1] == 0) {
220 /* Start a new block. */
221 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
222 printf("static const unsigned short %s_2uni_page%02x[%d] = {\n",
223 name
, enc
->row_byte(row
),
224 (lastrow
-row
) * enc
->cols
+ enc
->charsetpage
[lastrow
]);
226 printf(" /""* 0x%02x *""/\n ", enc
->row_byte(row
));
227 col_max
= (enc
->charsetpage
[row
+1] > 0 ? enc
->cols
: enc
->charsetpage
[row
]);
228 for (col
= 0; col
< col_max
; col
++) {
229 printf(" 0x%04x,", enc
->charset2uni
[row
][col
]);
230 if ((col
% 8) == 7 && (col
+1 < col_max
)) printf("\n ");
233 if (enc
->charsetpage
[row
+1] == 0) {
240 printf("static int\n");
241 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name
);
243 printf(" unsigned char c1 = s[0];\n");
245 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
246 i1_min
= enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
);
247 i1_max
= enc
->row_byte((enc
->charsetblocks
[i
].end
-1) / enc
->cols
);
250 if (i1_min
== i1_max
)
251 printf("(c1 == 0x%02x)", i1_min
);
253 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min
, i1_max
);
256 printf(" if (n >= 2) {\n");
257 printf(" unsigned char c2 = s[1];\n");
259 printf(enc
->check_col_expr
, "c2");
261 printf(" unsigned int i = %d * (", enc
->cols
);
262 printf(enc
->byte_row_expr
, "c1");
264 printf(enc
->byte_col_expr
, "c2");
266 printf(" unsigned short wc = 0xfffd;\n");
267 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
271 if (i
< enc
->ncharsetblocks
-1)
272 printf("if (i < %d) ", enc
->charsetblocks
[i
+1].start
);
274 printf(" if (i < %d)\n", enc
->charsetblocks
[i
].end
);
275 printf(" wc = %s_2uni_page%02x[i", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
));
276 if (enc
->charsetblocks
[i
].start
> 0)
277 printf("-%d", enc
->charsetblocks
[i
].start
);
281 printf(" if (wc != 0xfffd) {\n");
282 printf(" *pwc = (ucs4_t) wc;\n");
283 printf(" return 2;\n");
286 printf(" return RET_ILSEQ;\n");
288 printf(" return RET_TOOFEW(0);\n");
290 printf(" return RET_ILSEQ;\n");
296 * Outputs the charset to unicode table and function.
297 * (Suitable if the mapping function is well defined, i.e. has no holes, and
298 * is monotonically increasing with small gaps only.)
300 static void output_charset2uni_noholes_monotonic (const char* name
, Encoding
* enc
)
302 int row
, col
, lastrow
, r
, col_max
, i
, i1_min
, i1_max
;
304 /* Choose stepsize so that stepsize*steps_per_row >= enc->cols, and
305 enc->charset2uni[row][col] - enc->charset2uni[row][col/stepsize*stepsize]
306 is always < 0x100. */
307 int steps_per_row
= 2;
308 int stepsize
= (enc
->cols
+ steps_per_row
-1) / steps_per_row
;
310 find_charset2uni_pages(enc
);
312 find_charset2uni_blocks(enc
);
314 for (row
= 0; row
< enc
->rows
; row
++)
315 if (enc
->charsetpage
[row
] > 0) {
316 if (row
== 0 || enc
->charsetpage
[row
-1] == 0) {
317 /* Start a new block. */
318 for (lastrow
= row
; enc
->charsetpage
[lastrow
+1] > 0; lastrow
++);
319 printf("static const unsigned short %s_2uni_main_page%02x[%d] = {\n ",
320 name
, enc
->row_byte(row
),
321 steps_per_row
*(lastrow
-row
+1));
322 for (r
= row
; r
<= lastrow
; r
++) {
323 for (i
= 0; i
< steps_per_row
; i
++)
324 printf(" 0x%04x,", enc
->charset2uni
[r
][i
*stepsize
]);
325 if (((r
-row
) % 4) == 3 && (r
< lastrow
)) printf("\n ");
329 printf("static const unsigned char %s_2uni_page%02x[%d] = {\n",
330 name
, enc
->row_byte(row
),
331 (lastrow
-row
) * enc
->cols
+ enc
->charsetpage
[lastrow
]);
333 printf(" /""* 0x%02x *""/\n ", enc
->row_byte(row
));
334 col_max
= (enc
->charsetpage
[row
+1] > 0 ? enc
->cols
: enc
->charsetpage
[row
]);
335 for (col
= 0; col
< col_max
; col
++) {
336 printf(" 0x%02x,", enc
->charset2uni
[row
][col
] - enc
->charset2uni
[row
][col
/stepsize
*stepsize
]);
337 if ((col
% 8) == 7 && (col
+1 < col_max
)) printf("\n ");
340 if (enc
->charsetpage
[row
+1] == 0) {
347 printf("static int\n");
348 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name
);
350 printf(" unsigned char c1 = s[0];\n");
352 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
353 i1_min
= enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
);
354 i1_max
= enc
->row_byte((enc
->charsetblocks
[i
].end
-1) / enc
->cols
);
357 if (i1_min
== i1_max
)
358 printf("(c1 == 0x%02x)", i1_min
);
360 printf("(c1 >= 0x%02x && c1 <= 0x%02x)", i1_min
, i1_max
);
363 printf(" if (n >= 2) {\n");
364 printf(" unsigned char c2 = s[1];\n");
366 printf(enc
->check_col_expr
, "c2");
368 printf(" unsigned int row = ");
369 printf(enc
->byte_row_expr
, "c1");
371 printf(" unsigned int col = ");
372 printf(enc
->byte_col_expr
, "c2");
374 printf(" unsigned int i = %d * row + col;\n", enc
->cols
);
375 printf(" unsigned short wc = 0xfffd;\n");
376 for (i
= 0; i
< enc
->ncharsetblocks
; i
++) {
380 if (i
< enc
->ncharsetblocks
-1)
381 printf("if (i < %d) ", enc
->charsetblocks
[i
+1].start
);
383 printf(" if (i < %d)\n", enc
->charsetblocks
[i
].end
);
384 printf(" wc = %s_2uni_main_page%02x[%d*", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
), steps_per_row
);
385 if (enc
->charsetblocks
[i
].start
> 0)
386 printf("(row-%d)", enc
->charsetblocks
[i
].start
/ enc
->cols
);
390 if (steps_per_row
== 2)
391 printf("(col>=%d?1:0)", stepsize
);
393 printf("col/%d", stepsize
);
394 printf("] + %s_2uni_page%02x[i", name
, enc
->row_byte(enc
->charsetblocks
[i
].start
/ enc
->cols
));
395 if (enc
->charsetblocks
[i
].start
> 0)
396 printf("-%d", enc
->charsetblocks
[i
].start
);
400 printf(" if (wc != 0xfffd) {\n");
401 printf(" *pwc = (ucs4_t) wc;\n");
402 printf(" return 2;\n");
405 printf(" return RET_ILSEQ;\n");
407 printf(" return RET_TOOFEW(0);\n");
409 printf(" return RET_ILSEQ;\n");
415 * Computes the uni2charset[0x0000..0xffff] array.
417 static void invert (Encoding
* enc
)
421 enc
->uni2charset
= (int*) malloc(0x10000*sizeof(int));
423 for (j
= 0; j
< 0x10000; j
++)
424 enc
->uni2charset
[j
] = 0;
426 for (row
= 0; row
< enc
->rows
; row
++)
427 for (col
= 0; col
< enc
->cols
; col
++) {
428 j
= enc
->charset2uni
[row
][col
];
430 enc
->uni2charset
[j
] = 0x100 * enc
->row_byte(row
) + enc
->col_byte(col
);
435 * Outputs the unicode to charset table and function, using a linear array.
436 * (Suitable if the table is dense.)
438 static void output_uni2charset_dense (const char* name
, Encoding
* enc
)
440 /* Like in 8bit_tab_to_h.c */
444 struct { int minline
; int maxline
; int usecount
; } tables
[0x2000];
446 int row
, col
, j
, p
, j1
, j2
, t
;
448 for (p
= 0; p
< 0x100; p
++)
450 for (row
= 0; row
< enc
->rows
; row
++)
451 for (col
= 0; col
< enc
->cols
; col
++) {
452 j
= enc
->charset2uni
[row
][col
];
456 for (j1
= 0; j1
< 0x2000; j1
++) {
457 bool all_invalid
= true;
458 for (j2
= 0; j2
< 8; j2
++) {
460 if (enc
->uni2charset
[j
] != 0)
469 for (j1
= 0; j1
< 0x2000; j1
++) {
472 && ((j1
> 0 && line
[j1
-1] == tableno
-1)
473 || ((tables
[tableno
-1].maxline
>> 5) == (j1
>> 5)
474 && j1
- tables
[tableno
-1].maxline
<= 8))) {
475 line
[j1
] = tableno
-1;
476 tables
[tableno
-1].maxline
= j1
;
479 line
[j1
] = tableno
-1;
480 tables
[tableno
-1].minline
= tables
[tableno
-1].maxline
= j1
;
484 for (t
= 0; t
< tableno
; t
++) {
485 tables
[t
].usecount
= 0;
486 j1
= 8*tables
[t
].minline
;
487 j2
= 8*(tables
[t
].maxline
+1);
488 for (j
= j1
; j
< j2
; j
++)
489 if (enc
->uni2charset
[j
] != 0)
490 tables
[t
].usecount
++;
494 for (t
= 0; t
< tableno
; t
++)
495 if (tables
[t
].usecount
> 1) {
496 p
= tables
[t
].minline
>> 5;
497 printf("static const unsigned short %s_page%02x[%d] = {\n", name
, p
, 8*(tables
[t
].maxline
-tables
[t
].minline
+1));
498 for (j1
= tables
[t
].minline
; j1
<= tables
[t
].maxline
; j1
++) {
499 if ((j1
% 0x20) == 0 && j1
> tables
[t
].minline
)
500 printf(" /* 0x%04x */\n", 8*j1
);
502 for (j2
= 0; j2
< 8; j2
++) {
504 printf(" 0x%04x,", enc
->uni2charset
[j
]);
506 printf(" /*0x%02x-0x%02x*/\n", 8*(j1
% 0x20), 8*(j1
% 0x20)+7);
513 printf("static int\n%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name
);
515 printf(" if (n >= 2) {\n");
516 printf(" unsigned short c = 0;\n");
518 for (j1
= 0; j1
< 0x2000;) {
520 for (j2
= j1
; j2
< 0x2000 && line
[j2
] == t
; j2
++);
522 if (j1
!= tables
[t
].minline
) abort();
523 if (j2
> tables
[t
].maxline
+1) abort();
524 j2
= tables
[t
].maxline
+1;
530 if (tables
[t
].usecount
== 0) abort();
531 if (tables
[t
].usecount
== 1) {
532 if (j2
!= j1
+1) abort();
533 for (j
= 8*j1
; j
< 8*j2
; j
++)
534 if (enc
->uni2charset
[j
] != 0) {
535 printf("if (wc == 0x%04x)\n c = 0x%02x;\n", j
, enc
->uni2charset
[j
]);
540 printf("if (wc < 0x%04x)", 8*j2
);
542 printf("if (wc >= 0x%04x && wc < 0x%04x)", 8*j1
, 8*j2
);
544 printf("\n c = %s_page%02x[wc", name
, j1
>> 5);
545 if (tables
[t
].minline
> 0)
546 printf("-0x%04x", 8*j1
);
552 printf(" if (c != 0) {\n");
553 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
554 printf(" return 2;\n");
556 printf(" return RET_ILSEQ;\n");
558 printf(" return RET_TOOSMALL;\n");
563 * Outputs the unicode to charset table and function, using a packed array.
564 * (Suitable if the table is sparse.)
565 * The argument 'monotonic' may be set to true if the mapping is monotonically
566 * increasing with small gaps only.
568 static void output_uni2charset_sparse (const char* name
, Encoding
* enc
, bool monotonic
)
571 Block pageblocks
[0x100]; int npageblocks
;
572 int indx2charset
[0x10000];
573 int summary_indx
[0x1000];
574 int summary_used
[0x1000];
575 int i
, row
, col
, j
, p
, j1
, j2
, indx
;
577 int log2_stepsize
= (!strcmp(name
,"uhc_2") ? 6 : 7);
578 int stepsize
= 1 << log2_stepsize
;
581 /* Fill pages[0x100]. */
582 for (p
= 0; p
< 0x100; p
++)
584 for (row
= 0; row
< enc
->rows
; row
++)
585 for (col
= 0; col
< enc
->cols
; col
++) {
586 j
= enc
->charset2uni
[row
][col
];
592 for (p
= 0; p
< 0x100; p
++)
594 printf("static const unsigned short %s_page%02x[256] = {\n", name
, p
);
595 for (j1
= 0; j1
< 32; j1
++) {
597 for (j2
= 0; j2
< 8; j2
++)
598 printf("0x%04x, ", enc
->uni2charset
[256*p
+8*j1
+j2
]);
599 printf("/""*0x%02x-0x%02x*""/\n", 8*j1
, 8*j1
+7);
606 /* Fill summary_indx[] and summary_used[]. */
608 for (j1
= 0; j1
< 0x1000; j1
++) {
609 summary_indx
[j1
] = indx
;
610 summary_used
[j1
] = 0;
611 for (j2
= 0; j2
< 16; j2
++) {
613 if (enc
->uni2charset
[j
] != 0) {
614 indx2charset
[indx
++] = enc
->uni2charset
[j
];
615 summary_used
[j1
] |= (1 << j2
);
620 /* Fill npageblocks and pageblocks[]. */
622 for (p
= 0; p
< 0x100; ) {
623 if (pages
[p
] && (p
== 0 || !pages
[p
-1])) {
624 pageblocks
[npageblocks
].start
= 16*p
;
625 do p
++; while (p
< 0x100 && pages
[p
]);
627 while (summary_used
[j1
-1] == 0) j1
--;
628 pageblocks
[npageblocks
].end
= j1
;
635 indxsteps
= (indx
+ stepsize
-1) / stepsize
;
636 printf("static const unsigned short %s_2charset_main[%d] = {\n", name
, indxsteps
);
637 for (i
= 0; i
< indxsteps
; ) {
638 if ((i
% 8) == 0) printf(" ");
639 printf(" 0x%04x,", indx2charset
[i
*stepsize
]);
641 if ((i
% 8) == 0 || i
== indxsteps
) printf("\n");
644 printf("static const unsigned char %s_2charset[%d] = {\n", name
, indx
);
645 for (i
= 0; i
< indx
; ) {
646 if ((i
% 8) == 0) printf(" ");
647 printf(" 0x%02x,", indx2charset
[i
] - indx2charset
[i
/stepsize
*stepsize
]);
649 if ((i
% 8) == 0 || i
== indx
) printf("\n");
653 printf("static const unsigned short %s_2charset[%d] = {\n", name
, indx
);
654 for (i
= 0; i
< indx
; ) {
655 if ((i
% 8) == 0) printf(" ");
656 printf(" 0x%04x,", indx2charset
[i
]);
658 if ((i
% 8) == 0 || i
== indx
) printf("\n");
663 for (i
= 0; i
< npageblocks
; i
++) {
664 printf("static const Summary16 %s_uni2indx_page%02x[%d] = {\n", name
,
665 pageblocks
[i
].start
/16, pageblocks
[i
].end
-pageblocks
[i
].start
);
666 for (j1
= pageblocks
[i
].start
; j1
< pageblocks
[i
].end
; ) {
667 if (((16*j1
) % 0x100) == 0) printf(" /""* 0x%04x *""/\n", 16*j1
);
668 if ((j1
% 4) == 0) printf(" ");
669 printf(" { %4d, 0x%04x },", summary_indx
[j1
], summary_used
[j1
]);
671 if ((j1
% 4) == 0 || j1
== pageblocks
[i
].end
) printf("\n");
677 printf("static int\n");
678 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name
);
680 printf(" if (n >= 2) {\n");
681 printf(" const Summary16 *summary = NULL;\n");
682 for (i
= 0; i
< npageblocks
; i
++) {
686 printf("if (wc >= 0x%04x && wc < 0x%04x)\n",
687 16*pageblocks
[i
].start
, 16*pageblocks
[i
].end
);
688 printf(" summary = &%s_uni2indx_page%02x[(wc>>4)", name
,
689 pageblocks
[i
].start
/16);
690 if (pageblocks
[i
].start
> 0)
691 printf("-0x%03x", pageblocks
[i
].start
);
694 printf(" if (summary) {\n");
695 printf(" unsigned short used = summary->used;\n");
696 printf(" unsigned int i = wc & 0x0f;\n");
697 printf(" if (used & ((unsigned short) 1 << i)) {\n");
698 printf(" unsigned short c;\n");
699 printf(" /* Keep in `used' only the bits 0..i-1. */\n");
700 printf(" used &= ((unsigned short) 1 << i) - 1;\n");
701 printf(" /* Add `summary->indx' and the number of bits set in `used'. */\n");
702 printf(" used = (used & 0x5555) + ((used & 0xaaaa) >> 1);\n");
703 printf(" used = (used & 0x3333) + ((used & 0xcccc) >> 2);\n");
704 printf(" used = (used & 0x0f0f) + ((used & 0xf0f0) >> 4);\n");
705 printf(" used = (used & 0x00ff) + (used >> 8);\n");
707 printf(" used += summary->indx;\n");
708 printf(" c = %s_2charset_main[used>>%d] + %s_2charset[used];\n", name
, log2_stepsize
, name
);
710 printf(" c = %s_2charset[summary->indx + used];\n", name
);
711 printf(" r[0] = (c >> 8); r[1] = (c & 0xff);\n");
712 printf(" return 2;\n");
715 printf(" return RET_ILSEQ;\n");
717 printf(" return RET_TOOSMALL;\n");
721 /* ISO-2022/EUC specifics */
723 static int row_byte_normal (int row
) { return 0x21+row
; }
724 static int col_byte_normal (int col
) { return 0x21+col
; }
725 static int byte_row_normal (int byte
) { return byte
-0x21; }
726 static int byte_col_normal (int byte
) { return byte
-0x21; }
728 static void do_normal (const char* name
)
734 enc
.row_byte
= row_byte_normal
;
735 enc
.col_byte
= col_byte_normal
;
736 enc
.byte_row
= byte_row_normal
;
737 enc
.byte_col
= byte_col_normal
;
738 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
739 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
740 enc
.byte_row_expr
= "%1$s - 0x21";
741 enc
.byte_col_expr
= "%1$s - 0x21";
744 output_charset2uni(name
,&enc
);
745 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
748 /* Note: On first sight, the jisx0212_2charset[] table seems to be in order,
749 starting from the charset=0x3021/uni=0x4e02 pair. But it's only mostly in
750 order. There are 75 out-of-order values, scattered all throughout the table.
753 static void do_normal_only_charset2uni (const char* name
)
759 enc
.row_byte
= row_byte_normal
;
760 enc
.col_byte
= col_byte_normal
;
761 enc
.byte_row
= byte_row_normal
;
762 enc
.byte_col
= byte_col_normal
;
763 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
764 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
765 enc
.byte_row_expr
= "%1$s - 0x21";
766 enc
.byte_col_expr
= "%1$s - 0x21";
769 output_charset2uni(name
,&enc
);
772 /* CNS 11643 specifics - trick to put two tables into one */
774 static int row_byte_cns11643 (int row
) {
775 return 0x100 * (row
/ 94) + (row
% 94) + 0x21;
777 static int byte_row_cns11643 (int byte
) {
778 return (byte
>= 0x100 && byte
< 0x200 ? byte
-0x121 :
779 byte
>= 0x200 && byte
< 0x300 ? byte
-0x221+94 :
780 byte
>= 0x300 && byte
< 0x400 ? byte
-0x321+2*94 :
784 static void do_cns11643_only_uni2charset (const char* name
)
791 enc
.row_byte
= row_byte_cns11643
;
792 enc
.col_byte
= col_byte_normal
;
793 enc
.byte_row
= byte_row_cns11643
;
794 enc
.byte_col
= byte_col_normal
;
795 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
796 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
797 enc
.byte_row_expr
= "%1$s - 0x21";
798 enc
.byte_col_expr
= "%1$s - 0x21";
802 /* Move the 2 plane bits into the unused bits 15 and 7. */
803 for (j
= 0; j
< 0x10000; j
++) {
804 x
= enc
.uni2charset
[j
];
806 if (x
& 0x8080) abort();
808 case 0: /* plane 1 */ x
= (x
& 0xffff) | 0x0000; break;
809 case 1: /* plane 2 */ x
= (x
& 0xffff) | 0x0080; break;
810 case 2: /* plane 3 */ x
= (x
& 0xffff) | 0x8000; break;
813 enc
.uni2charset
[j
] = x
;
816 output_uni2charset_sparse(name
,&enc
,false);
821 static int row_byte_gbk1 (int row
) {
824 static int col_byte_gbk1 (int col
) {
825 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
827 static int byte_row_gbk1 (int byte
) {
828 if (byte
>= 0x81 && byte
< 0xff)
833 static int byte_col_gbk1 (int byte
) {
834 if (byte
>= 0x40 && byte
< 0x7f)
836 else if (byte
>= 0x80 && byte
< 0xff)
842 static void do_gbk1 (const char* name
)
848 enc
.row_byte
= row_byte_gbk1
;
849 enc
.col_byte
= col_byte_gbk1
;
850 enc
.byte_row
= byte_row_gbk1
;
851 enc
.byte_col
= byte_col_gbk1
;
852 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
853 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
854 enc
.byte_row_expr
= "%1$s - 0x81";
855 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
858 output_charset2uni(name
,&enc
);
859 invert(&enc
); output_uni2charset_dense(name
,&enc
);
862 static void do_gbk1_only_charset2uni (const char* name
)
868 enc
.row_byte
= row_byte_gbk1
;
869 enc
.col_byte
= col_byte_gbk1
;
870 enc
.byte_row
= byte_row_gbk1
;
871 enc
.byte_col
= byte_col_gbk1
;
872 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
873 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
874 enc
.byte_row_expr
= "%1$s - 0x81";
875 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
878 output_charset2uni(name
,&enc
);
881 static int row_byte_gbk2 (int row
) {
884 static int col_byte_gbk2 (int col
) {
885 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
887 static int byte_row_gbk2 (int byte
) {
888 if (byte
>= 0x81 && byte
< 0xff)
893 static int byte_col_gbk2 (int byte
) {
894 if (byte
>= 0x40 && byte
< 0x7f)
896 else if (byte
>= 0x80 && byte
< 0xa1)
902 static void do_gbk2_only_charset2uni (const char* name
)
908 enc
.row_byte
= row_byte_gbk2
;
909 enc
.col_byte
= col_byte_gbk2
;
910 enc
.byte_row
= byte_row_gbk2
;
911 enc
.byte_col
= byte_col_gbk2
;
912 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
913 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xa1)";
914 enc
.byte_row_expr
= "%1$s - 0x81";
915 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
918 output_charset2uni(name
,&enc
);
921 static void do_gbk1_only_uni2charset (const char* name
)
927 enc
.row_byte
= row_byte_gbk1
;
928 enc
.col_byte
= col_byte_gbk1
;
929 enc
.byte_row
= byte_row_gbk1
;
930 enc
.byte_col
= byte_col_gbk1
;
931 enc
.check_row_expr
= "%1$s >= 0x81 && %1$s < 0xff";
932 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xff)";
933 enc
.byte_row_expr
= "%1$s - 0x81";
934 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
937 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
940 /* KSC 5601 specifics */
943 * Reads the charset2uni table from standard input.
945 static void read_table_ksc5601 (Encoding
* enc
)
947 int row
, col
, i
, i1
, i2
, c
, j
;
949 enc
->charset2uni
= (int**) malloc(enc
->rows
*sizeof(int*));
950 for (row
= 0; row
< enc
->rows
; row
++)
951 enc
->charset2uni
[row
] = (int*) malloc(enc
->cols
*sizeof(int));
953 for (row
= 0; row
< enc
->rows
; row
++)
954 for (col
= 0; col
< enc
->cols
; col
++)
955 enc
->charset2uni
[row
][col
] = 0xfffd;
960 /* Read a unicode.org style .TXT file. */
965 if (c
== '\n' || c
== ' ' || c
== '\t')
968 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
972 if (scanf("0x%x", &j
) != 1)
976 if (scanf(" 0x%x", &j
) != 1)
978 /* Take only the range covered by KS C 5601.1987-0 = KS C 5601.1989-0
979 = KS X 1001.1992, ignore the rest. */
980 if (!(i1
>= 128+33 && i1
< 128+127 && i2
>= 128+33 && i2
< 128+127))
981 continue; /* KSC5601 specific */
982 i1
&= 0x7f; /* KSC5601 specific */
983 i2
&= 0x7f; /* KSC5601 specific */
984 row
= enc
->byte_row(i1
);
985 col
= enc
->byte_col(i2
);
986 if (row
< 0 || col
< 0) {
987 fprintf(stderr
, "lost entry for %02x %02x\n", i1
, i2
);
990 enc
->charset2uni
[row
][col
] = j
;
993 /* Read a table of hexadecimal Unicode values. */
994 for (i1
= 33; i1
< 127; i1
++)
995 for (i2
= 33; i2
< 127; i2
++) {
1001 if (j
< 0 || j
== 0xffff)
1004 if (enc
->byte_row(i1
) < 0 || enc
->byte_col(i2
) < 0) {
1005 fprintf(stderr
, "lost entry at %02x %02x\n", i1
, i2
);
1008 enc
->charset2uni
[enc
->byte_row(i1
)][enc
->byte_col(i2
)] = j
;
1015 static void do_ksc5601 (const char* name
)
1021 enc
.row_byte
= row_byte_normal
;
1022 enc
.col_byte
= col_byte_normal
;
1023 enc
.byte_row
= byte_row_normal
;
1024 enc
.byte_col
= byte_col_normal
;
1025 enc
.check_row_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
1026 enc
.check_col_expr
= "%1$s >= 0x21 && %1$s < 0x7f";
1027 enc
.byte_row_expr
= "%1$s - 0x21";
1028 enc
.byte_col_expr
= "%1$s - 0x21";
1030 read_table_ksc5601(&enc
);
1031 output_charset2uni(name
,&enc
);
1032 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1037 /* UHC part 1: 0x{81..A0}{41..5A,61..7A,81..FE} */
1039 static int row_byte_uhc_1 (int row
) {
1042 static int col_byte_uhc_1 (int col
) {
1043 return (col
>= 0x34 ? 0x4d : col
>= 0x1a ? 0x47 : 0x41) + col
;
1045 static int byte_row_uhc_1 (int byte
) {
1046 if (byte
>= 0x81 && byte
< 0xa1)
1051 static int byte_col_uhc_1 (int byte
) {
1052 if (byte
>= 0x41 && byte
< 0x5b)
1054 else if (byte
>= 0x61 && byte
< 0x7b)
1056 else if (byte
>= 0x81 && byte
< 0xff)
1062 static void do_uhc_1 (const char* name
)
1068 enc
.row_byte
= row_byte_uhc_1
;
1069 enc
.col_byte
= col_byte_uhc_1
;
1070 enc
.byte_row
= byte_row_uhc_1
;
1071 enc
.byte_col
= byte_col_uhc_1
;
1072 enc
.check_row_expr
= "(%1$s >= 0x81 && %1$s < 0xa1)";
1073 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xff)";
1074 enc
.byte_row_expr
= "%1$s - 0x81";
1075 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
1078 output_charset2uni_noholes_monotonic(name
,&enc
);
1079 invert(&enc
); output_uni2charset_sparse(name
,&enc
,true);
1082 /* UHC part 2: 0x{A1..C6}{41..5A,61..7A,81..A0} */
1084 static int row_byte_uhc_2 (int row
) {
1087 static int col_byte_uhc_2 (int col
) {
1088 return (col
>= 0x34 ? 0x4d : col
>= 0x1a ? 0x47 : 0x41) + col
;
1090 static int byte_row_uhc_2 (int byte
) {
1091 if (byte
>= 0xa1 && byte
< 0xff)
1096 static int byte_col_uhc_2 (int byte
) {
1097 if (byte
>= 0x41 && byte
< 0x5b)
1099 else if (byte
>= 0x61 && byte
< 0x7b)
1101 else if (byte
>= 0x81 && byte
< 0xa1)
1107 static void do_uhc_2 (const char* name
)
1113 enc
.row_byte
= row_byte_uhc_2
;
1114 enc
.col_byte
= col_byte_uhc_2
;
1115 enc
.byte_row
= byte_row_uhc_2
;
1116 enc
.byte_col
= byte_col_uhc_2
;
1117 enc
.check_row_expr
= "(%1$s >= 0xa1 && %1$s < 0xff)";
1118 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x5b) || (%1$s >= 0x61 && %1$s < 0x7b) || (%1$s >= 0x81 && %1$s < 0xa1)";
1119 enc
.byte_row_expr
= "%1$s - 0xa1";
1120 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x4d : %1$s >= 0x61 ? 0x47 : 0x41)";
1123 output_charset2uni_noholes_monotonic(name
,&enc
);
1124 invert(&enc
); output_uni2charset_sparse(name
,&enc
,true);
1127 /* Big5 specifics */
1129 static int row_byte_big5 (int row
) {
1132 static int col_byte_big5 (int col
) {
1133 return (col
>= 0x3f ? 0x62 : 0x40) + col
;
1135 static int byte_row_big5 (int byte
) {
1136 if (byte
>= 0xa1 && byte
< 0xff)
1141 static int byte_col_big5 (int byte
) {
1142 if (byte
>= 0x40 && byte
< 0x7f)
1144 else if (byte
>= 0xa1 && byte
< 0xff)
1150 static void do_big5 (const char* name
)
1156 enc
.row_byte
= row_byte_big5
;
1157 enc
.col_byte
= col_byte_big5
;
1158 enc
.byte_row
= byte_row_big5
;
1159 enc
.byte_col
= byte_col_big5
;
1160 enc
.check_row_expr
= "%1$s >= 0xa1 && %1$s < 0xff";
1161 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
1162 enc
.byte_row_expr
= "%1$s - 0xa1";
1163 enc
.byte_col_expr
= "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
1166 output_charset2uni(name
,&enc
);
1167 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1170 /* HKSCS specifics */
1172 static int row_byte_hkscs (int row
) {
1175 static int byte_row_hkscs (int byte
) {
1176 if (byte
>= 0x80 && byte
< 0xff)
1182 static void do_hkscs (const char* name
)
1188 enc
.row_byte
= row_byte_hkscs
;
1189 enc
.col_byte
= col_byte_big5
;
1190 enc
.byte_row
= byte_row_hkscs
;
1191 enc
.byte_col
= byte_col_big5
;
1192 enc
.check_row_expr
= "%1$s >= 0x80 && %1$s < 0xff";
1193 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
1194 enc
.byte_row_expr
= "%1$s - 0x80";
1195 enc
.byte_col_expr
= "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
1198 output_charset2uni(name
,&enc
);
1199 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1202 /* Johab Hangul specifics */
1204 static int row_byte_johab_hangul (int row
) {
1207 static int col_byte_johab_hangul (int col
) {
1208 return (col
>= 0x3e ? 0x43 : 0x41) + col
;
1210 static int byte_row_johab_hangul (int byte
) {
1211 if (byte
>= 0x84 && byte
< 0xd4)
1216 static int byte_col_johab_hangul (int byte
) {
1217 if (byte
>= 0x41 && byte
< 0x7f)
1219 else if (byte
>= 0x81 && byte
< 0xff)
1225 static void do_johab_hangul (const char* name
)
1231 enc
.row_byte
= row_byte_johab_hangul
;
1232 enc
.col_byte
= col_byte_johab_hangul
;
1233 enc
.byte_row
= byte_row_johab_hangul
;
1234 enc
.byte_col
= byte_col_johab_hangul
;
1235 enc
.check_row_expr
= "%1$s >= 0x84 && %1$s < 0xd4";
1236 enc
.check_col_expr
= "(%1$s >= 0x41 && %1$s < 0x7f) || (%1$s >= 0x81 && %1$s < 0xff)";
1237 enc
.byte_row_expr
= "%1$s - 0x84";
1238 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x81 ? 0x43 : 0x41)";
1241 output_charset2uni(name
,&enc
);
1242 invert(&enc
); output_uni2charset_dense(name
,&enc
);
1245 /* SJIS specifics */
1247 static int row_byte_sjis (int row
) {
1248 return (row
>= 0x1f ? 0xc1 : 0x81) + row
;
1250 static int col_byte_sjis (int col
) {
1251 return (col
>= 0x3f ? 0x41 : 0x40) + col
;
1253 static int byte_row_sjis (int byte
) {
1254 if (byte
>= 0x81 && byte
< 0xa0)
1256 else if (byte
>= 0xe0)
1261 static int byte_col_sjis (int byte
) {
1262 if (byte
>= 0x40 && byte
< 0x7f)
1264 else if (byte
>= 0x80 && byte
< 0xfd)
1270 static void do_sjis (const char* name
)
1276 enc
.row_byte
= row_byte_sjis
;
1277 enc
.col_byte
= col_byte_sjis
;
1278 enc
.byte_row
= byte_row_sjis
;
1279 enc
.byte_col
= byte_col_sjis
;
1280 enc
.check_row_expr
= "(%1$s >= 0x81 && %1$s < 0xa0) || (%1$s >= 0xe0)";
1281 enc
.check_col_expr
= "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0x80 && %1$s < 0xfd)";
1282 enc
.byte_row_expr
= "%1$s - (%1$s >= 0xe0 ? 0xc1 : 0x81)";
1283 enc
.byte_col_expr
= "%1$s - (%1$s >= 0x80 ? 0x41 : 0x40)";
1286 output_charset2uni(name
,&enc
);
1287 invert(&enc
); output_uni2charset_sparse(name
,&enc
,false);
1290 /* GB18030 Unicode specifics */
1292 static void do_gb18030uni (const char* name
)
1296 int i1
, i2
, i3
, i4
, i
, j
, k
;
1297 int charset2uni
[4*10*126*10];
1298 int uni2charset
[0x10000];
1299 struct { int low
; int high
; int diff
; int total
; } ranges
[256];
1300 int ranges_count
, ranges_total
;
1302 for (i
= 0; i
< 4*10*126*10; i
++)
1304 for (j
= 0; j
< 0x10000; j
++)
1307 /* Read a unicode.org style .TXT file. */
1312 if (c
== '\n' || c
== ' ' || c
== '\t')
1315 do { c
= getc(stdin
); } while (!(c
== EOF
|| c
== '\n'));
1319 if (scanf("0x%x", &bytes
) != 1)
1321 i1
= (bytes
>> 24) & 0xff;
1322 i2
= (bytes
>> 16) & 0xff;
1323 i3
= (bytes
>> 8) & 0xff;
1325 if (!(i1
>= 0x81 && i1
<= 0x84
1326 && i2
>= 0x30 && i2
<= 0x39
1327 && i3
>= 0x81 && i3
<= 0xfe
1328 && i4
>= 0x30 && i4
<= 0x39)) {
1329 fprintf(stderr
, "lost entry for %02x %02x %02x %02x\n", i1
, i2
, i3
, i4
);
1332 i
= (((i1
-0x81) * 10 + (i2
-0x30)) * 126 + (i3
-0x81)) * 10 + (i4
-0x30);
1333 if (scanf(" 0x%x", &j
) != 1)
1335 if (!(j
>= 0 && j
< 0x10000))
1341 /* Verify that the mapping i -> j is monotonically increasing and
1343 low[k] <= i <= high[k] => j = diff[k] + i
1344 with a set of disjoint intervals (low[k], high[k]). */
1346 for (i
= 0; i
< 4*10*126*10; i
++)
1347 if (charset2uni
[i
] != 0) {
1351 if (ranges_count
> 0) {
1352 if (!(i
> ranges
[ranges_count
-1].high
))
1354 if (!(j
> ranges
[ranges_count
-1].high
+ ranges
[ranges_count
-1].diff
))
1356 /* Additional property: The diffs are also increasing. */
1357 if (!(diff
>= ranges
[ranges_count
-1].diff
))
1360 if (ranges_count
> 0 && diff
== ranges
[ranges_count
-1].diff
)
1361 ranges
[ranges_count
-1].high
= i
;
1363 if (ranges_count
== 256)
1365 ranges
[ranges_count
].low
= i
;
1366 ranges
[ranges_count
].high
= i
;
1367 ranges
[ranges_count
].diff
= diff
;
1372 /* Determine size of bitmap. */
1374 for (k
= 0; k
< ranges_count
; k
++) {
1375 ranges
[k
].total
= ranges_total
;
1376 ranges_total
+= ranges
[k
].high
- ranges
[k
].low
+ 1;
1379 printf("static const unsigned short %s_charset2uni_ranges[%d] = {\n", name
, 2*ranges_count
);
1380 for (k
= 0; k
< ranges_count
; k
++) {
1381 printf(" 0x%04x, 0x%04x", ranges
[k
].low
, ranges
[k
].high
);
1382 if (k
+1 < ranges_count
) printf(",");
1383 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n");
1390 printf("static const unsigned short %s_uni2charset_ranges[%d] = {\n", name
, 2*ranges_count
);
1391 for (k
= 0; k
< ranges_count
; k
++) {
1392 printf(" 0x%04x, 0x%04x", ranges
[k
].low
+ ranges
[k
].diff
, ranges
[k
].high
+ ranges
[k
].diff
);
1393 if (k
+1 < ranges_count
) printf(",");
1394 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n");
1401 printf("static const struct { unsigned short diff; unsigned short bitmap_offset; } %s_ranges[%d] = {\n ", name
, ranges_count
);
1402 for (k
= 0; k
< ranges_count
; k
++) {
1403 printf(" { %5d, 0x%04x }", ranges
[k
].diff
, ranges
[k
].total
);
1404 if (k
+1 < ranges_count
) printf(",");
1405 if ((k
% 4) == 3 && k
+1 < ranges_count
) printf("\n ");
1412 printf("static const unsigned char %s_bitmap[%d] = {\n ", name
, (ranges_total
+ 7) / 8);
1415 for (k
= 0; k
< ranges_count
; k
++) {
1416 for (i
= ranges
[k
].total
; i
<= ranges
[k
].total
+ (ranges
[k
].high
- ranges
[k
].low
);) {
1417 if (charset2uni
[i
- ranges
[k
].total
+ ranges
[k
].low
] != 0)
1418 accu
|= (1 << (i
% 8));
1421 printf(" 0x%02x", accu
);
1422 if ((i
/ 8) < (ranges_total
+ 7) / 8) printf(",");
1423 if (((i
/ 8) % 12) == 0)
1428 if (i
!= (k
+1 < ranges_count
? ranges
[k
+1].total
: ranges_total
)) abort();
1430 if ((ranges_total
% 8) != 0)
1431 printf(" 0x%02x", accu
);
1438 printf("static int\n");
1439 printf("%s_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)\n", name
);
1441 printf(" unsigned char c1 = s[0];\n");
1442 printf(" if (c1 >= 0x81 && c1 <= 0x84) {\n");
1443 printf(" if (n >= 2) {\n");
1444 printf(" unsigned char c2 = s[1];\n");
1445 printf(" if (c2 >= 0x30 && c2 <= 0x39) {\n");
1446 printf(" if (n >= 3) {\n");
1447 printf(" unsigned char c3 = s[2];\n");
1448 printf(" if (c3 >= 0x81 && c3 <= 0xfe) {\n");
1449 printf(" if (n >= 4) {\n");
1450 printf(" unsigned char c4 = s[3];\n");
1451 printf(" if (c4 >= 0x30 && c4 <= 0x39) {\n");
1452 printf(" unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);\n");
1453 printf(" if (i >= %d && i <= %d) {\n", ranges
[0].low
, ranges
[ranges_count
-1].high
);
1454 printf(" unsigned int k1 = 0;\n");
1455 printf(" unsigned int k2 = %d;\n", ranges_count
-1);
1456 printf(" while (k1 < k2) {\n");
1457 printf(" unsigned int k = (k1 + k2) / 2;\n");
1458 printf(" if (i <= %s_charset2uni_ranges[2*k+1])\n", name
);
1459 printf(" k2 = k;\n");
1460 printf(" else if (i >= %s_charset2uni_ranges[2*k+2])\n", name
);
1461 printf(" k1 = k + 1;\n");
1463 printf(" return RET_ILSEQ;\n");
1466 printf(" unsigned int bitmap_index = i - %s_charset2uni_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name
, name
);
1467 printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name
);
1468 printf(" unsigned int diff = %s_ranges[k1].diff;\n", name
);
1469 printf(" *pwc = (ucs4_t) (i + diff);\n");
1470 printf(" return 4;\n");
1475 printf(" return RET_ILSEQ;\n");
1477 printf(" return RET_TOOFEW(0);\n");
1479 printf(" return RET_ILSEQ;\n");
1481 printf(" return RET_TOOFEW(0);\n");
1483 printf(" return RET_ILSEQ;\n");
1485 printf(" return RET_TOOFEW(0);\n");
1487 printf(" return RET_ILSEQ;\n");
1492 printf("static int\n");
1493 printf("%s_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)\n", name
);
1495 printf(" if (n >= 4) {\n");
1496 printf(" unsigned int i = wc;\n");
1497 printf(" if (i >= 0x%04x && i <= 0x%04x) {\n", ranges
[0].low
+ ranges
[0].diff
, ranges
[ranges_count
-1].high
+ ranges
[ranges_count
-1].diff
);
1498 printf(" unsigned int k1 = 0;\n");
1499 printf(" unsigned int k2 = %d;\n", ranges_count
-1);
1500 printf(" while (k1 < k2) {\n");
1501 printf(" unsigned int k = (k1 + k2) / 2;\n");
1502 printf(" if (i <= %s_uni2charset_ranges[2*k+1])\n", name
);
1503 printf(" k2 = k;\n");
1504 printf(" else if (i >= %s_uni2charset_ranges[2*k+2])\n", name
);
1505 printf(" k1 = k + 1;\n");
1507 printf(" return RET_ILSEQ;\n");
1510 printf(" unsigned int bitmap_index = i - %s_uni2charset_ranges[2*k1] + %s_ranges[k1].bitmap_offset;\n", name
, name
);
1511 printf(" if ((%s_bitmap[bitmap_index >> 3] >> (bitmap_index & 7)) & 1) {\n", name
);
1512 printf(" unsigned int diff = %s_ranges[k1].diff;\n", name
);
1513 printf(" i -= diff;\n");
1514 printf(" r[3] = (i %% 10) + 0x30; i = i / 10;\n");
1515 printf(" r[2] = (i %% 126) + 0x81; i = i / 126;\n");
1516 printf(" r[1] = (i %% 10) + 0x30; i = i / 10;\n");
1517 printf(" r[0] = i + 0x81;\n");
1518 printf(" return 4;\n");
1522 printf(" return RET_ILSEQ;\n");
1524 printf(" return RET_TOOSMALL;\n");
1530 int main (int argc
, char *argv
[])
1532 const char* charsetname
;
1537 charsetname
= argv
[1];
1540 output_title(charsetname
);
1542 if (!strcmp(name
,"gb2312")
1543 || !strcmp(name
,"isoir165ext") || !strcmp(name
,"gb12345ext")
1544 || !strcmp(name
,"jisx0208") || !strcmp(name
,"jisx0212"))
1546 else if (!strcmp(name
,"cns11643_1") || !strcmp(name
,"cns11643_2")
1547 || !strcmp(name
,"cns11643_3"))
1548 do_normal_only_charset2uni(name
);
1549 else if (!strcmp(name
,"cns11643_inv"))
1550 do_cns11643_only_uni2charset(name
);
1551 else if (!strcmp(name
,"gbkext1"))
1552 do_gbk1_only_charset2uni(name
);
1553 else if (!strcmp(name
,"gbkext2"))
1554 do_gbk2_only_charset2uni(name
);
1555 else if (!strcmp(name
,"gbkext_inv"))
1556 do_gbk1_only_uni2charset(name
);
1557 else if (!strcmp(name
,"cp936ext") || !strcmp(name
,"gb18030ext"))
1559 else if (!strcmp(name
,"ksc5601"))
1561 else if (!strcmp(name
,"uhc_1"))
1563 else if (!strcmp(name
,"uhc_2"))
1565 else if (!strcmp(name
,"big5") || !strcmp(name
,"cp950ext"))
1567 else if (!strcmp(name
,"hkscs"))
1569 else if (!strcmp(name
,"johab_hangul"))
1570 do_johab_hangul(name
);
1571 else if (!strcmp(name
,"cp932ext"))
1573 else if (!strcmp(name
,"gb18030uni"))
1574 do_gb18030uni(name
);