swi-prolog: update to 9.2.9
[oi-userland.git] / components / x11 / libX11 / srcs / modules / lc / gb18030 / lcGB18030.c
blob1292e10a649333672c816f8f4e42af712541f380
1 /*
2 * Copyright © 1996, 2015, Oracle and/or its affiliates. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 #ifdef HAVE_CONFIG_H
25 #include <config.h>
26 #endif
28 #include <stdio.h>
29 #include "Xlibint.h"
30 #include "XlcGeneric.h"
32 /*
33 * codesets will derived from XLC_XLOCALE and codesets[0] is
34 * gb18030.2000-1 in XLC_XLOCALE
36 #define CS0 codesets[2] /* Codeset 0 - 7-bit ASCII */
37 #define CS1 codesets[0] /* Codeset 1 - gb18030.2000-0 */
38 #define CS2 codesets[1] /* Codeset 2 - gb18030.2000-1 */
40 #define ascii (codeset->cs_num == 0)
41 #define gb18030 (codeset->cs_num == 2)
43 #define ASCII_CODESET 0
44 #define GBK2K_CODESET1 1
45 #define GBK2K_CODESET2 2
46 #define MAX_CODESETS 3
48 #define ISASCII(ch) ((unsigned char)(ch) <= 0x7F)
50 #define GR 0x80 /* Begins right-side (non-ascii) region. */
51 #define GL 0x7f /* Ends left-side (ascii) region. */
53 #define isleftside(c) (((c) & GR) ? 0 : 1)
54 #define isrightside(c) (! isleftside(c))
56 typedef unsigned char Uchar;
57 typedef unsigned long Ulong;
58 typedef unsigned int Uint;
60 #define BIT8OFF(c) ((c) & GL)
61 #define BIT8ON(c) ((c) | GR)
63 #define ESC 0x1b
65 typedef struct _CT_DATA
67 char *charset;
68 char *encode_string;
69 } CT_DATA;
71 static CT_DATA default_ct_data[] =
73 {"ASCII", "\033(B" },
74 {"GB2312", "\033$(A" },
75 {"GB18030-0", "\033%/2??SUN-GB18030-0" },
76 {"GB18030-1", "\033%/2??SUN-GB18030-1" }
79 static int num_ct_data = sizeof(default_ct_data)/sizeof(CT_DATA);
81 static char *
82 get_CT_encode_string(const char *charset)
84 if (charset) {
85 int i;
87 for (i=0; i < num_ct_data; ++i)
88 if (strncmp(charset,
89 default_ct_data[i].charset,
90 strlen(charset)) == 0)
91 return default_ct_data[i].encode_string;
94 return NULL;
97 typedef enum {t_ASCII, t_GB2312, t_GB18030_0, t_GB18030_1, t_NONE} CharSet_Type;
99 static CharSet_Type
100 get_charset_with_encode_string(const char *s)
102 if (s) {
103 int i;
105 for (i=0; i < num_ct_data; ++i)
106 if (strncmp(s, default_ct_data[i].encode_string,
107 strlen(default_ct_data[i].encode_string)) == 0)
108 return i;
111 return t_NONE;
114 static void
115 twobyte_to_fourbyte(unsigned char *in_buf, unsigned char *out_buf)
117 int tmp;
119 tmp=(in_buf[0] << 8) + in_buf[1];
121 out_buf[3] = 0x30 + tmp%10; tmp/=10;
122 out_buf[2] = 0x81 + tmp%126; tmp/=126;
123 out_buf[1] = 0x30 + tmp%10; tmp/=10;
124 out_buf[0] = 0x81 + tmp;
127 static void
128 fourbyte_to_twobyte(unsigned char *in_buf, unsigned char *out_buf)
130 int tmp;
132 tmp = 12600 * (in_buf[0] - 0x81) + 1260 * (in_buf[1] - 0x30) +
133 10 * (in_buf[2] - 0x81) + (in_buf[3] - 0x30);
134 out_buf[0] = (tmp & 0xff00) >> 8;
135 out_buf[1] = tmp & 0xff;
139 * In GB2312 range or not
141 static int
142 isgb(unsigned char *s)
144 /* consider the first byte */
145 if (s[0] >= 0xA1 && s[0] <= 0xFE && s[1] >= 0xA1 &&
146 s[1] <= 0xFE) {
147 return True;
148 }else{
149 return False;
154 * In GB18030 2 bytes range or not
156 static int
157 isgb18030_2(unsigned char *s)
159 /* consider the first byte */
160 if (s[0] >= 0x81 && s[0] <= 0xfe &&
161 ((s[1] >= 0x40 && s[1] <= 0x7e) ||
162 (s[1] >= 0x80 && s[1] <= 0xfe)))
163 return True;
164 else
165 return False;
169 * In GB18030 4 bytes range or not
171 static int isgb18030_4(unsigned char *s)
173 /* consider the first byte */
174 if (s[0] >= 0x81 && s[0] <= 0xfe && s[1] >= 0x30 && s[1] <= 0x39 &&
175 s[2] >= 0x81 && s[2] <= 0xfe && s[3] >= 0x30 && s[3] <= 0x39)
176 return True;
177 else
178 return False;
182 * multibyte -> charset: codesets[0], codesets[1], codeset[2]
184 static int
185 gb18030_mbtocs(
186 XlcConv conv,
187 XPointer *from,
188 int *from_left,
189 XPointer *to,
190 int *to_left,
191 XPointer *args,
192 int num_args)
194 unsigned char *src = (unsigned char *)*from;
195 unsigned char *dst = (unsigned char *)*to;
196 int unconv_num = 0;
197 int char_size = 0;
198 XLCd lcd = (XLCd)conv->state;
199 CodeSet *codesets = XLC_GENERIC(lcd, codeset_list);
200 int codeset_num = XLC_GENERIC(lcd, codeset_num);
201 XlcCharSet charset = NULL;
203 if (isgb18030_2(src)) {
204 if (GBK2K_CODESET1 >= codeset_num)
205 return -1;
206 charset = *CS1->charset_list;
207 char_size = charset->char_size;
209 if (*from_left >= 2 && *to_left >= char_size) {
210 *dst++ = *src++;
211 *dst++ = *src++;
212 } else {
213 return -1;
216 *from_left -= char_size;
217 *to_left -= char_size;
218 } else if (isgb18030_4(src)) {
219 unsigned char iconv_buf[6];
221 if (GBK2K_CODESET2 >= codeset_num)
222 return -1;
224 charset = *CS2->charset_list;
225 char_size = charset->char_size;
226 fourbyte_to_twobyte(src, iconv_buf);
227 if (*from_left >= 4 && *to_left >= char_size) {
228 *dst++ = iconv_buf[0];
229 *dst++ = iconv_buf[1];
230 src += 4;
231 } else {
232 return -1;
235 *from_left -= 4;
236 *to_left -= char_size;
237 } else if (ISASCII(*src)) {
238 if (ASCII_CODESET >= codeset_num)
239 return -1;
240 charset = *CS0->charset_list;
241 char_size = charset->char_size;
243 if (*from_left >= char_size && *to_left >= char_size)
244 *dst++ = *src++;
245 else
246 return -1;
248 *from_left -= char_size;
249 *to_left -= char_size;
250 } else { /* unknown */
251 unconv_num++;
252 src++;
253 *from_left -= 1;
256 *to = (XPointer)dst;
257 *from = (XPointer)src;
259 if (num_args > 0)
260 *((XlcCharSet *) args[0]) = charset;
262 return unconv_num;
265 static int
266 gb18030_mbstocs(
267 XlcConv conv,
268 XPointer *from,
269 int *from_left,
270 XPointer *to,
271 int *to_left,
272 XPointer *args,
273 int num_args)
275 unsigned char *src = (unsigned char *)*from;
276 unsigned char *dst = (unsigned char *)*to;
277 int char_size = 0;
278 int unconv_num = 0;
279 int flag = 0;
280 XLCd lcd = (XLCd)conv->state;
281 CodeSet *codesets = XLC_GENERIC(lcd, codeset_list);
282 int codeset_num = XLC_GENERIC(lcd, codeset_num);
283 XlcCharSet charset = NULL;
285 for (;;) {
286 if (isgb18030_2(src)) {
287 if (flag == 0)
288 flag = 1;
289 if (flag == 3 || flag == 2 )
290 break;
291 if (GBK2K_CODESET1 >= codeset_num)
292 return -1;
294 charset = *CS1->charset_list;
295 char_size = charset->char_size;
297 if (*from_left >= char_size && *to_left >= char_size) {
298 *dst++ = *src++;
299 *dst++ = *src++;
300 *to_left -= char_size;
301 *from_left -= char_size;
302 }else {
303 return -1;
305 } else if (isgb18030_4(src)) {
306 unsigned char iconv_buf[6];
308 if (flag == 0)
309 flag = 2;
310 if (flag == 3 || flag == 1)
311 break;
312 if (GBK2K_CODESET2 >= codeset_num)
313 return -1;
315 charset = *CS2->charset_list;
316 char_size = charset->char_size;
317 fourbyte_to_twobyte(src, iconv_buf);
319 if (*from_left >= 4 && *to_left >= char_size) {
320 *dst++ = iconv_buf[0];
321 *dst++ = iconv_buf[1];
322 src += 4;
323 *to_left -= char_size;
324 *from_left -= 4;
325 } else {
326 return -1;
328 } else if (ISASCII(*src)) {
329 if (flag == 0)
330 flag = 3;
331 if (flag == 1 || flag ==2 )
332 break;
333 if (ASCII_CODESET >= codeset_num)
334 return -1;
336 charset = *CS0->charset_list;
337 char_size = charset->char_size;
338 if (*from_left >= char_size && *to_left >= char_size) {
339 *dst++ = *src++;
340 *to_left -= char_size;
341 *from_left -= char_size;
342 } else {
343 break;
345 } else{ /* unknown */
346 unconv_num++;
347 src++;
348 *from_left -= 1;
351 if (*from_left <= 0)
352 break;
355 *from = (XPointer)src;
356 *to = (XPointer)dst;
358 if (num_args > 0)
359 *((XlcCharSet *) args[0]) = charset;
361 return unconv_num;
364 static int
365 gb18030_wcstocs(
366 XlcConv conv,
367 XPointer *from,
368 int *from_left,
369 XPointer *to,
370 int *to_left,
371 XPointer *args,
372 int num_args)
374 unsigned char *dst = (unsigned char *)*to;
375 wchar_t *src = (wchar_t *)*from;
376 int char_size = 0;
377 int unconv_num = 0;
378 int flag = 0;
379 XLCd lcd = (XLCd)conv->state;
380 XlcCharSet charset = NULL;
381 CodeSet *codesets = XLC_GENERIC(lcd, codeset_list);
382 int codeset_num = XLC_GENERIC(lcd, codeset_num);
384 charset = (XlcCharSet)args[0];
386 for (;;) {
387 wchar_t wch = *src;
388 char tmp[32];
390 wctomb(tmp, wch);
392 if (isgb18030_2((unsigned char*)tmp)) {
393 if (flag == 0)
394 flag = 1;
395 if (flag == 2 || flag == 3)
396 break;
397 if (GBK2K_CODESET1 >= codeset_num)
398 return -1;
400 charset = *CS1->charset_list;
401 char_size = charset->char_size;
403 if (*from_left > 0 && *to_left >= char_size) {
404 *dst++ = tmp[0];
405 *dst++ = tmp[1];
406 *to_left -= char_size;
407 } else {
408 return -1;
410 } else if (isgb18030_4((unsigned char*)tmp)) {
411 unsigned char iconv_buf[6];
413 if (flag == 0)
414 flag = 2;
415 if (flag == 1 || flag == 3)
416 break;
417 if (GBK2K_CODESET2 >= codeset_num)
418 return -1;
420 charset = *CS2->charset_list;
421 char_size = charset->char_size;
422 fourbyte_to_twobyte((unsigned char*)tmp, iconv_buf);
424 if (*from_left > 0 && *to_left >= char_size) {
425 *dst++ = iconv_buf[0];
426 *dst++ = iconv_buf[1];
427 *to_left -= char_size;
428 } else {
429 return -1;
431 } else if (ISASCII(tmp[0])) {
432 if (flag == 0)
433 flag = 3;
434 if (flag == 1 || flag == 2)
435 break;
436 if (ASCII_CODESET >= codeset_num)
437 return -1;
439 charset = *CS0->charset_list;
440 char_size = charset->char_size;
441 if (*from_left > 0 && *to_left >= char_size) {
442 *dst++ = tmp[0];
443 *to_left -= char_size;
444 } else {
445 return -1;
447 } else { /* unknown */
448 unconv_num++;
451 src++; /* advance one wchar_t */
452 (*from_left)--;
453 if (*from_left <= 0)
454 break;
457 *from = (XPointer)src;
458 *to = (XPointer)dst;
460 if (num_args > 0)
461 *((XlcCharSet *)args[0]) = charset;
463 return unconv_num;
466 static CodeSet
467 get_code_set_from_charset(
468 XLCd lcd,
469 XlcCharSet charset)
471 CodeSet *codeset = XLC_GENERIC(lcd, codeset_list);
472 XlcCharSet *charset_list;
473 int codeset_num, num_charsets;
475 codeset_num = XLC_GENERIC(lcd, codeset_num);
477 for (; codeset_num-- > 0; codeset++) {
478 num_charsets = (*codeset)->num_charsets;
479 charset_list = (*codeset)->charset_list;
481 for (; num_charsets-- > 0; charset_list++)
482 if (*charset_list == charset)
483 return *codeset;
486 return (CodeSet)NULL;
489 static int
490 gb18030_cstombs(
491 XlcConv conv,
492 XPointer *from,
493 int *from_left,
494 XPointer *to,
495 int *to_left,
496 XPointer *args,
497 int num_args)
499 unsigned char *src = (unsigned char *)*from;
500 unsigned char *dst = (unsigned char *)*to;
501 int char_size = 0;
502 int unconv_num = 0;
503 XLCd lcd = (XLCd)conv->state;
504 XlcCharSet charset = (XlcCharSet) args[0];
505 CodeSet codeset;
507 codeset = get_code_set_from_charset(lcd, charset);
508 if (codeset == NULL)
509 return -1;
511 for (;;) {
512 if (codeset->wc_encoding==0x5e84) { /* GB18030-1 */
513 char_size = 2;
515 if (*from_left >= 2 && *to_left >= 4) {
516 unsigned char iconv_buf[6];
518 twobyte_to_fourbyte(src, iconv_buf);
519 *dst++ = iconv_buf[0];
520 *dst++ = iconv_buf[1];
521 *dst++ = iconv_buf[2];
522 *dst++ = iconv_buf[3];
523 *to_left -= 4;
524 } else {
525 return -1;
527 } else if (isgb18030_2((unsigned char*)src)) { /*2 bytes character*/
528 char_size = 2;
530 if (*from_left >= 2 && *to_left >= 2) {
531 *dst++ = *src++;
532 *dst++ = *src++;
533 *to_left -= 2;
534 } else {
535 return -1;
537 } else if (ISASCII(*src)) { /*ASCII character*/
538 char_size = 1;
540 if (*from_left >= 1 && *to_left >= 1) {
541 *dst++ = *src++;
542 *to_left -= 1;
543 } else {
544 return -1;
546 } else { /* unknown */
547 unconv_num++;
548 src++;
549 char_size = 1;
552 *from_left -= char_size;;
553 if (*from_left <= 0)
554 break;
557 *from = (XPointer)src;
558 *to = (XPointer )dst;
560 if (num_args > 0)
561 *((XlcCharSet *) args[0]) = charset;
563 return unconv_num;
566 static int
567 gb18030_cstowcs(
568 XlcConv conv,
569 XPointer *from,
570 int *from_left,
571 XPointer *to,
572 int *to_left,
573 XPointer *args,
574 int num_args)
576 XPointer outbufptr, outbufptr_save;
577 int to_left_save = *to_left;
578 wchar_t *pwc = (wchar_t *) *to;
579 int rtn, rtn_1;
581 outbufptr = (XPointer) Xmalloc(*to_left * 4);
582 outbufptr_save = outbufptr;
584 rtn = gb18030_cstombs(conv, from, from_left,
585 &outbufptr, to_left,
586 args, num_args);
587 *outbufptr='\0';
589 rtn_1 = mbstowcs(pwc, outbufptr_save, (to_left_save - *to_left));
591 Xfree(outbufptr_save);
593 *to_left = to_left_save - rtn_1;
594 *to = *to+ rtn_1 * sizeof(wchar_t);
596 return rtn;
601 * In gb18030 locale, we only consider the following possibilities
602 * all other ct formats are ignored, keep looping until end of buffer
603 * ASCII
604 * GB2312
605 * GB18030-0
606 * GB18030-1
608 static int
609 gb18030_ctstombs(
610 XlcConv conv,
611 XPointer *from,
612 int *from_left,
613 XPointer *to,
614 int *to_left,
615 XPointer *args,
616 int num_args)
618 unsigned char *inbufptr = (unsigned char *)*from;
619 unsigned char *outbufptr = (unsigned char *)*to;
620 int unconv_num = 0;
621 int i, j;
622 int save_outbuf = True;
623 CharSet_Type charset_type;
626 * If outbufptr is NULL, doen't save output, but just counts
627 * a length to hold the output.
629 if (outbufptr == NULL)
630 save_outbuf = False;
632 i = 0;
633 j = 0;
635 while (i < *from_left) {
636 if (inbufptr[i] == ESC) { /* got an escape */
637 charset_type = get_charset_with_encode_string(
638 (const char *)inbufptr + i);
640 switch (charset_type) {
641 case t_ASCII:
642 i += strlen(default_ct_data[charset_type].encode_string);
644 for (;;) {
645 if (i >= *from_left) { /* end of from buffer */
646 goto FINISHED;
649 if ((inbufptr[i] == 0x0a || inbufptr[i] == 0x09 ||
650 inbufptr[i] >= 0x20) && ISASCII(inbufptr[i])) {
651 if (*to_left < 1) { /* end of to buffer */
652 goto FINISHED;
655 if (save_outbuf == True)
656 outbufptr[j++] = inbufptr[i];
658 (*to_left)--;
659 ++i;
660 } else {
661 break;
664 break;
666 case t_GB2312:
667 i += strlen(default_ct_data[charset_type].encode_string);
669 for (;;) {
670 unsigned char iconv_buf[3];
672 if (i >= *from_left) { /* end of from buffer */
673 goto FINISHED;
676 iconv_buf[0] = (inbufptr[i] & 0x7f) | 0x80;
677 iconv_buf[1] = (inbufptr[i + 1] & 0x7f) | 0x80;
679 if (isgb(iconv_buf)) {
680 if (*to_left < 2 || *from_left < 2) {
681 goto FINISHED;
684 if (save_outbuf == True) {
685 outbufptr[j++] = iconv_buf[0];
686 outbufptr[j++] = iconv_buf[1];
689 *to_left -= 2;
690 i = i + 2;
691 } else {
692 break;
695 break;
697 case t_GB18030_0:
698 i += strlen(default_ct_data[charset_type].encode_string);
700 for (;;) {
701 if (i >= *from_left) { /* end of from buffer */
702 goto FINISHED;
705 if (isgb18030_2(inbufptr + i)) {
706 if (*to_left < 2 || *from_left < 2) {
707 goto FINISHED;
710 if (save_outbuf == True) {
711 outbufptr[j++] = inbufptr[i];
712 outbufptr[j++] = inbufptr[i + 1];
715 *to_left -= 2;
716 i = i + 2;
717 } else {
718 break;
721 break;
723 case t_GB18030_1: /* gb18030.2000-1 character */
724 i += strlen(default_ct_data[charset_type].encode_string);
726 for (;;) {
727 if (i >= *from_left) { /* end of from buffer */
728 goto FINISHED;
731 if (isgb18030_4(inbufptr + i)) {
732 if (*to_left < 4 || *from_left < 4) {
733 goto FINISHED;
736 if (save_outbuf == True) {
737 outbufptr[j++] = inbufptr[i] & 0xff;
738 outbufptr[j++] = inbufptr[i+1] & 0xff;
739 outbufptr[j++] = inbufptr[i+2] & 0xff;
740 outbufptr[j++] = inbufptr[i+3] & 0xff;
743 *to_left -= 4;
744 i = i + 4;
745 } else {
746 break;
749 break;
751 case t_NONE:
752 i++; /* encounter unknown escape sequence */
753 unconv_num++;
754 break;
756 } else if ((inbufptr[i] == 0x0a || inbufptr[i] == 0x09 ||
757 inbufptr[i] >= 0x20) && ISASCII(inbufptr[i])) {
758 /* Process default CT G0 ascii character */
759 if (*to_left < 1) {
760 goto FINISHED;
763 if (save_outbuf == True)
764 outbufptr[j++] = inbufptr[i];
766 i++;
767 *to_left -= 1;
768 } else { /* unknown character */
769 i++;
770 unconv_num++;
774 FINISHED:
775 *from = (XPointer)(inbufptr + i);
776 *to = (XPointer)(outbufptr + j);
777 if (i >= *from_left)
778 *from_left = 0;
779 else
780 *from_left -= i;
782 return (unconv_num);
785 static int
786 gb18030_ctstowcs(
787 XlcConv conv,
788 XPointer *from,
789 int *from_left,
790 XPointer *to,
791 int *to_left,
792 XPointer *args,
793 int num_args)
795 XPointer outbufptr, outbufptr_end;
796 int to_left_save = *to_left;
797 wchar_t *pwc = (wchar_t *) *to;
798 int rtn, rtn_1;
801 outbufptr = (XPointer) Xmalloc(*to_left * 4); /* 100 safty tolerence */
802 outbufptr_end = outbufptr;
804 rtn = gb18030_ctstombs(conv,
805 from,
806 from_left,
807 &outbufptr_end,
808 to_left,
809 args,
810 num_args);
812 *outbufptr_end = '\0';
814 rtn_1 = mbstowcs(pwc, outbufptr, (to_left_save - *to_left));
816 Xfree(outbufptr);
818 *to_left = to_left_save - rtn_1;
819 *to = *to + rtn_1 * sizeof(wchar_t);
821 return rtn;
825 * The mbs is GB18030 code, must be converted to euc code,
826 * then pack to ct format.
828 static int
829 gb18030_mbstocts(
830 XlcConv conv,
831 XPointer *from,
832 int *from_left,
833 XPointer *to,
834 int *to_left,
835 XPointer *args,
836 int num_args)
838 unsigned char *inbufptr = (unsigned char *)*from;
839 unsigned char *outbufptr = (unsigned char *)*to;
840 int unconv_num = 0;
841 int flag = 0, j = 0, i = 0;
842 int encode_string_len;
843 char *encode_string;
845 while (i < *from_left && j < *to_left) {
846 if (isgb(&inbufptr[i])) { /* GB2312 character */
847 if (flag == 0 || flag != 2) {
848 encode_string = get_CT_encode_string("GB2312");
849 if (! encode_string )
850 return -1;
852 encode_string_len = strlen(encode_string);
853 if (j + encode_string_len + 2 >= *to_left)
854 break;
856 strncpy((char *)outbufptr+j, encode_string, encode_string_len);
857 j += encode_string_len;
858 flag = 2;
861 if (j + 2 >= *to_left)
862 break;
864 outbufptr[j++] = inbufptr[i++] & 0x7f;
865 outbufptr[j++] = inbufptr[i++] & 0x7f;
866 } else if (isgb18030_2(&inbufptr[i])) { /* 2 bytes GB 18030 */
867 if (flag == 0 || flag != 4) {
868 encode_string = get_CT_encode_string("GB18030-0");
869 if (! encode_string )
870 return -1;
872 encode_string_len = strlen(encode_string);
873 if (j + encode_string_len + 2 >= *to_left)
874 break;
876 strncpy((char *)outbufptr+j, encode_string, encode_string_len);
877 j += encode_string_len;
878 flag = 4;
881 if (j + 2 >= *to_left)
882 break;
884 outbufptr[j++] = inbufptr[i++] & 0xff;
885 outbufptr[j++] = inbufptr[i++] & 0xff;
886 } else if (isgb18030_4(&inbufptr[i])) { /* 4 bytes GB18030 */
887 if (flag == 0 || flag != 5) {
888 encode_string = get_CT_encode_string("GB18030-1");
889 if (!encode_string)
890 return -1;
891 encode_string_len = strlen(encode_string);
893 if (j + encode_string_len + 4 >= *to_left)
894 break;
896 strncpy((char *)outbufptr+j, encode_string, encode_string_len);
897 j += encode_string_len;
898 flag = 5;
901 if (j + 4 >= *to_left)
902 break;
904 outbufptr[j++] = inbufptr[i++];
905 outbufptr[j++] = inbufptr[i++];
906 outbufptr[j++] = inbufptr[i++];
907 outbufptr[j++] = inbufptr[i++];
908 } else if (ISASCII(inbufptr[i])) { /* ASCII */
909 if (flag == 0 || flag != 3) {
910 encode_string = get_CT_encode_string("ASCII");
911 if (!encode_string )
912 return -1;
914 encode_string_len = strlen(encode_string);
915 if (j + encode_string_len + 1 >= *to_left)
916 break;
918 strncpy((char *)outbufptr+j, encode_string, encode_string_len);
919 j += encode_string_len;
920 flag = 3;
923 if (j + 1 >= *to_left)
924 break;
926 outbufptr[j++] = inbufptr[i++];
927 } else{
928 i++; /* Skip this byte */
929 unconv_num++; /* Count this as an unconverted byte */
933 *from = (XPointer)&inbufptr[i];
934 *to = (XPointer)&outbufptr[j];
935 *from_left = *from_left - i;
936 *to_left = *to_left - j;
938 return unconv_num;
941 static int
942 gb18030_wcstocts(
943 XlcConv conv,
944 XPointer *from,
945 int *from_left,
946 XPointer *to,
947 int *to_left,
948 XPointer *args,
949 int num_args)
952 unsigned char inbufptr[10];
953 unsigned char *outbufptr = (unsigned char *)*to;
954 wchar_t *pwc = (wchar_t *)*from;
955 int unconv_num = 0;
956 int flag,j,i,k;
957 char *encode_string;
958 int encode_string_len;
960 i = k = j = 0;
961 flag = 0;
963 while (k < *from_left) {
964 if (wctomb((char *)inbufptr, pwc[k++]) == -1)
965 return (-1);
967 i=0;
969 if (isgb(&inbufptr[i])) { /* GB2312 */
970 if (flag == 0 || flag != 1) {
971 encode_string = get_CT_encode_string("GB2312");
972 if (! encode_string)
973 return (-1);
975 encode_string_len = strlen(encode_string);
976 if (j + encode_string_len + 2 >= *to_left)
977 break;
979 strncpy((char *)outbufptr+j, encode_string, encode_string_len);
980 j += encode_string_len;
981 flag = 1;
984 if (j + 2 >= *to_left)
985 break;
987 outbufptr[j++] = inbufptr[i++];
988 outbufptr[j++] = inbufptr[i++];
989 } else if (isgb18030_2(&inbufptr[i])) {
990 if (flag == 0 || flag != 2) {
991 encode_string = get_CT_encode_string("GB18030-0");
992 if (! encode_string)
993 return -1;
995 encode_string_len = strlen(encode_string);
996 if (j + encode_string_len + 2 >= *to_left)
997 break;
999 strncpy((char *)outbufptr+j, encode_string, encode_string_len);
1000 j += encode_string_len;
1001 flag = 2;
1004 if (j + 2 >= *to_left)
1005 break;
1007 outbufptr[j++] = inbufptr[i++];
1008 outbufptr[j++] = inbufptr[i++];
1009 } else if (isgb18030_4(&inbufptr[i])) {
1010 if (flag == 0 || flag != 6) {
1011 encode_string = get_CT_encode_string("GB18030-1");
1012 if (! encode_string)
1013 return -1;
1015 encode_string_len = strlen(encode_string);
1016 if (j + encode_string_len + 4 >= *to_left)
1017 break;
1019 strncpy((char *)outbufptr+j, encode_string, encode_string_len);
1020 j += encode_string_len;
1021 flag = 6;
1024 if (j + 4 >= *to_left)
1025 break;
1027 outbufptr[j++] = inbufptr[i++];
1028 outbufptr[j++] = inbufptr[i++];
1029 outbufptr[j++] = inbufptr[i++];
1030 outbufptr[j++] = inbufptr[i++];
1031 } else if (ISASCII(inbufptr[i])) {
1032 if (flag == 0 || flag != 3) {
1033 encode_string = get_CT_encode_string("ASCII");
1034 if (! encode_string)
1035 return -1;
1037 encode_string_len = strlen(encode_string);
1038 if (j + encode_string_len + 1 >= *to_left)
1039 break;
1041 strncpy((char *)outbufptr+j, encode_string, encode_string_len);
1042 j += encode_string_len;
1044 flag = 3;
1047 outbufptr[j++] = inbufptr[i++];
1048 } else {
1049 unconv_num++;
1053 *from = (XPointer)&pwc[k];
1054 *to = (XPointer)&outbufptr[j];
1055 *from_left = *from_left - k;
1056 *to_left = *to_left - j;
1058 return unconv_num;
1061 static int
1062 gb18030_mbstowcs(
1063 XlcConv conv,
1064 XPointer *from,
1065 int *from_left,
1066 XPointer *to,
1067 int *to_left,
1068 XPointer *args,
1069 int num_args)
1071 char *src = *((char **) from);
1072 wchar_t *dst = *((wchar_t **) to);
1073 int src_left = *from_left;
1074 int dst_left = *to_left;
1075 int mblen, unconv_num = 0;
1077 while (src_left > 0 && dst_left > 0) {
1078 mblen = mbtowc(dst, src, src_left);
1080 if (mblen > 0) {
1081 src += mblen;
1082 src_left -= mblen;
1083 dst++;
1084 dst_left--;
1085 } else {
1086 src++;
1087 src_left--;
1088 if (mblen < 0) {
1089 unconv_num++;
1090 } else {
1091 *dst++ = L'\0';
1092 dst_left--;
1097 *from = (XPointer) src;
1098 *to = (XPointer) dst;
1099 *from_left = src_left;
1100 *to_left = dst_left;
1101 return unconv_num;
1104 static int
1105 gb18030_wcstombs(
1106 XlcConv conv,
1107 XPointer *from,
1108 int *from_left,
1109 XPointer *to,
1110 int *to_left,
1111 XPointer *args,
1112 int num_args)
1114 wchar_t *src = *((wchar_t **)from);
1115 char *dst = *((char **) to);
1116 char buf[MB_CUR_MAX];
1117 int src_left = *from_left;
1118 int dst_left = *to_left;
1119 int mblen, unconv_num = 0;
1121 while (src_left > 0) {
1122 mblen = wctomb(buf, *src);
1124 if (dst_left < mblen) {
1125 break;
1128 src++;
1129 src_left--;
1131 if (mblen < 0) {
1132 unconv_num++;
1133 continue;
1136 dst_left -= mblen;
1137 for (int i = 0; i < mblen; i++) {
1138 *dst++ = buf[i];
1142 *from = (XPointer) src;
1143 *to = (XPointer) dst;
1144 *from_left = src_left;
1145 *to_left = dst_left;
1146 return unconv_num;
1150 static void
1151 close_converter(XlcConv conv)
1153 Xfree((char *) conv);
1156 static XlcConv
1157 create_conv(
1158 XLCd lcd,
1159 XlcConvMethods methods)
1161 XlcConv conv;
1163 conv = (XlcConv) Xmalloc(sizeof(XlcConvRec));
1164 if (conv == NULL)
1165 return (XlcConv) NULL;
1167 conv->methods = methods;
1168 conv->state = (XPointer) lcd;
1169 return conv;
1173 enum { MBSTOCS, WCSTOCS, MBTOCS, CSTOMBS, CSTOWCS, MBSTOCTS, CTSTOMBS,
1174 CTSTOWCS, WCSTOCTS, MBSTOWCS, WCSTOMBS, STRTOMBS };
1176 static XlcConvMethodsRec conv_methods[] = {
1177 {close_converter, gb18030_mbstocs, NULL },
1178 {close_converter, gb18030_wcstocs, NULL },
1179 {close_converter, gb18030_mbtocs, NULL },
1180 {close_converter, gb18030_cstombs, NULL },
1181 {close_converter, gb18030_cstowcs, NULL },
1182 {close_converter, gb18030_mbstocts, NULL },
1183 {close_converter, gb18030_ctstombs, NULL },
1184 {close_converter, gb18030_ctstowcs, NULL },
1185 {close_converter, gb18030_wcstocts, NULL },
1186 {close_converter, gb18030_mbstowcs, NULL },
1187 {close_converter, gb18030_wcstombs, NULL },
1192 static XlcConv
1193 open_mbstocs(
1194 XLCd from_lcd,
1195 const char *from_type,
1196 XLCd to_lcd,
1197 const char *to_type)
1199 return create_conv(from_lcd, &conv_methods[MBSTOCS]);
1202 static XlcConv
1203 open_wcstocs(
1204 XLCd from_lcd,
1205 const char *from_type,
1206 XLCd to_lcd,
1207 const char *to_type)
1209 return create_conv(from_lcd, &conv_methods[WCSTOCS]);
1212 static XlcConv
1213 open_mbtocs(
1214 XLCd from_lcd,
1215 const char *from_type,
1216 XLCd to_lcd,
1217 const char *to_type)
1219 return create_conv(from_lcd, &conv_methods[MBTOCS]);
1222 static XlcConv
1223 open_cstombs(
1224 XLCd from_lcd,
1225 const char *from_type,
1226 XLCd to_lcd,
1227 const char *to_type)
1229 return create_conv(from_lcd, &conv_methods[CSTOMBS]);
1232 static XlcConv
1233 open_cstowcs(
1234 XLCd from_lcd,
1235 const char *from_type,
1236 XLCd to_lcd,
1237 const char *to_type)
1239 return create_conv(from_lcd, &conv_methods[CSTOWCS]);
1242 static XlcConv
1243 open_wcstocts(
1244 XLCd from_lcd,
1245 const char *from_type,
1246 XLCd to_lcd,
1247 const char *to_type)
1249 return create_conv(from_lcd, &conv_methods[WCSTOCTS]);
1252 static XlcConv
1253 open_mbstocts(
1254 XLCd from_lcd,
1255 const char *from_type,
1256 XLCd to_lcd,
1257 const char *to_type)
1259 return create_conv(from_lcd, &conv_methods[MBSTOCTS]);
1262 static XlcConv
1263 open_ctstombs(
1264 XLCd from_lcd,
1265 const char *from_type,
1266 XLCd to_lcd,
1267 const char *to_type)
1269 return create_conv(from_lcd, &conv_methods[CTSTOMBS]);
1272 static XlcConv
1273 open_ctstowcs(
1274 XLCd from_lcd,
1275 const char *from_type,
1276 XLCd to_lcd,
1277 const char *to_type)
1279 return create_conv(from_lcd, &conv_methods[CTSTOWCS]);
1282 static XlcConv
1283 open_mbstowcs(
1284 XLCd from_lcd,
1285 const char *from_type,
1286 XLCd to_lcd,
1287 const char *to_type)
1289 return create_conv(from_lcd, &conv_methods[MBSTOWCS]);
1292 static XlcConv
1293 open_wcstombs(
1294 XLCd from_lcd,
1295 const char *from_type,
1296 XLCd to_lcd,
1297 const char *to_type)
1299 return create_conv(from_lcd, &conv_methods[WCSTOMBS]);
1302 XLCd
1303 _XlcGb18030Loader(const char *name)
1305 XLCd lcd;
1307 lcd = _XlcCreateLC(name, _XlcGenericMethods);
1308 if (lcd == NULL)
1309 return lcd;
1311 if ((_XlcNCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "gb18030", 7))) {
1312 _XlcDestroyLC(lcd);
1313 return (XLCd) NULL;
1316 /* MB/WC <-> CS */
1317 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_mbstocs);
1318 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
1319 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_cstombs);
1320 _XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
1322 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_mbtocs);
1324 /* MB/WC <-> CT */
1325 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCompoundText, open_mbstocts);
1326 _XlcSetConverter(lcd, XlcNCompoundText, lcd, XlcNMultiByte, open_ctstombs);
1327 _XlcSetConverter(lcd, XlcNCompoundText, lcd, XlcNWideChar, open_ctstowcs);
1328 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCompoundText, open_wcstocts);
1330 /* MB <-> WC */
1331 _XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_mbstowcs);
1332 _XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_wcstombs);
1335 _XlcAddUtf8Converters(lcd);
1337 return lcd;