3 * Author: Jean-Marc Lienher ( http://oksid.ch )
4 * Copyright 2000-2003 by O'ksi'D.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21 * Please report all bugs and problems on the following page:
23 * http://www.fltk.org/str.php
26 #if !defined(WIN32) && !defined(__APPLE__)
29 #include "../../FL/Xutf8.h"
32 #include <X11/Xutil.h>
40 I haven't found much doc on the web about EUC encodings, so I've used
41 GNU libiconv source code as a reference.
42 http://clisp.cons.org/~haible/packages-libiconv.html
46 #define RET_TOOFEW(x) (-10 - x)
47 #define RET_TOOSMALL -2
49 #define ucs4_t unsigned int
55 #define NEED_TOWC /* indicates what part of these include files is needed here (avoid compilation warnings) */
56 #include "lcUniConv/big5.h"
57 #include "lcUniConv/gb2312.h"
58 #include "lcUniConv/cp936ext.h"
59 #include "lcUniConv/jisx0201.h"
60 #include "lcUniConv/jisx0208.h"
61 #include "lcUniConv/jisx0212.h"
62 #include "lcUniConv/ksc5601.h"
65 XConvertEucTwToUtf8(char* buffer_return
, int len
) {
76 if (len
< 1) return 0;
77 b
= buf
= (char*) malloc((unsigned)len
);
78 memcpy(buf
, buffer_return
, (unsigned) len
);
82 cd
= iconv_open("EUC-TW", "UTF-8");
83 iconv(cd
, &b
, &len
, &buffer_return
, &cdl
);
90 c
= (unsigned char) buf
[i
];
94 } else if (c
>= 0xa1 && c
< 0xff && len
- i
> 1 ) {
96 b
[0] = (unsigned char) c
- 0x80;
97 b
[1] = (unsigned char) buf
[i
+ 1] - 0x80;
99 } else if (c
== 0x8e && len
- i
> 3) {
101 unsigned char c1
= buf
[i
+ 1];
102 unsigned char c2
= buf
[i
+ 2];
103 unsigned char c3
= buf
[i
+ 3];
104 b
[0] = (unsigned char) buf
[i
+ 2] - 0x80;
105 b
[1] = (unsigned char) buf
[i
+ 3] - 0x80;
106 if (c1
>= 0xa1 && c1
<= 0xb0) {
107 if (c2
>= 0xa1 && c2
< 0xff && c3
>= 0xa1 && c3
< 0xff) {
119 l
+= XConvertUcsToUtf8(ucs
, buffer_return
+ l
);
127 XConvertEucKrToUtf8(char* buffer_return
, int len
) {
131 if (len
< 1) return 0;
133 buf
= (char*) malloc((unsigned)len
);
134 memcpy(buf
, buffer_return
, (unsigned)len
);
139 c
= (unsigned char) buf
[i
];
143 } else if (c
>= 0xA1 && c
< 0xFF && len
- i
> 1) {
144 c1
= (unsigned char) buf
[i
+ 1];
145 if (c1
>= 0xa1 && c1
< 0xff) {
149 if (ksc5601_mbtowc(NULL
, &ucs
, b
, 2) < 1) {
160 l
+= XConvertUcsToUtf8(ucs
, buffer_return
+ l
);
167 XConvertBig5ToUtf8(char* buffer_return
, int len
) {
171 if (len
< 1) return 0;
172 buf
= (char*) malloc((unsigned)len
);
173 memcpy(buf
, buffer_return
, (unsigned)len
);
176 l
+= XConvertUcsToUtf8((unsigned int)buf
[i
], buffer_return
+ l
);
178 while (i
+ 1 < len
) {
181 b
[0] = (unsigned char) buf
[i
];
182 b
[1] = (unsigned char) buf
[i
+ 1];
183 if (big5_mbtowc(NULL
, &ucs
, b
, 2) == 2) {
189 l
+= XConvertUcsToUtf8(ucs
, buffer_return
+ l
);
196 XConvertCp936extToUtf8(char* buffer_return
, int len
)
201 if (len
< 1) return 0;
202 buf
= (char*) malloc((unsigned)len
);
203 memcpy(buf
, buffer_return
, (unsigned)len
);
206 l
+= XConvertUcsToUtf8((unsigned int)buf
[i
], buffer_return
+ l
);
208 while (i
+ 1 < len
) {
211 b
[0] = (unsigned char) buf
[i
];
212 b
[1] = (unsigned char) buf
[i
+ 1];
213 if (cp936ext_mbtowc(NULL
, &ucs
, b
, 2) == 2) {
223 l
+= XConvertUcsToUtf8(ucs
, buffer_return
+ l
);
226 l
+= XConvertUcsToUtf8((unsigned int)buf
[i
], buffer_return
+ l
);
233 XConvertGb2312ToUtf8(char* buffer_return
, int len
) {
237 if (len
< 1) return 0;
238 buf
= (char*) malloc((unsigned)len
);
239 memcpy(buf
, buffer_return
, (unsigned)len
);
242 l
+= XConvertUcsToUtf8((unsigned int)buf
[i
], buffer_return
+ l
);
244 while (i
+ 1 < len
) {
247 b
[0] = (unsigned char) buf
[i
];
248 b
[1] = (unsigned char) buf
[i
+ 1];
252 } else if (gb2312_mbtowc(NULL
, &ucs
, b
, 2) == 2) {
258 l
+= XConvertUcsToUtf8(ucs
, buffer_return
+ l
);
261 l
+= XConvertUcsToUtf8((unsigned int)buf
[i
], buffer_return
+ l
);
268 XConvertEucCnToUtf8(char* buffer_return
, int len
) {
272 if (len
< 1) return 0;
273 buf
= (char*) malloc((unsigned)len
);
274 memcpy(buf
, buffer_return
, (unsigned)len
);
279 c
= (unsigned char) buf
[i
];
283 } else if (c
>= 0xA1 && c
< 0xFF && len
- i
> 1) {
284 c1
= (unsigned char) buf
[i
+ 1];
285 if (c1
>= 0xa1 && c1
< 0xff) {
287 b
[0] = (unsigned char) c
;
288 b
[1] = (unsigned char) c1
;
289 if (gb2312_mbtowc(NULL
, &ucs
, b
, 2) < 1) {
300 l
+= XConvertUcsToUtf8(ucs
, buffer_return
+ l
);
307 XConvertEucJpToUtf8(char* buffer_return
, int len
) {
311 if (len
< 1) return 0;
312 buf
= (char*) malloc((unsigned)len
);
313 memcpy(buf
, buffer_return
, (unsigned)len
);
318 c
= (unsigned char) buf
[i
];
322 } else if (c
>= 0xA1 && c
< 0xFF && len
- i
> 1) {
323 c1
= (unsigned char) buf
[i
+ 1];
324 if (c
< 0xF5 && c1
>= 0xa1) {
328 if (jisx0208_mbtowc(NULL
, &ucs
, b
, 2) < 1) {
331 } else if (c1
>= 0xA1 && c1
< 0xFF) {
332 ucs
= 0xE000 + 94 * (c
- 0xF5) + (c1
- 0xA1);
337 } else if (c
== 0x8E && len
- i
> 1) {
338 c1
= (unsigned char) buf
[i
+ 1];
339 if (c1
>= 0xa1 && c1
<= 0xe0) {
340 if (jisx0201_mbtowc(NULL
, &ucs
, &c1
, 1) != 1) {
347 } else if (c
== 0x8F && len
- i
> 2) {
348 c
= (unsigned char) buf
[i
+ 1];
349 c1
= (unsigned char) buf
[i
+ 2];
350 if (c
>= 0xa1 && c
< 0xff) {
351 if (c
< 0xf5 && c1
>= 0xa1 && c1
< 0xff) {
355 if (jisx0212_mbtowc(NULL
, &ucs
, b
, 2) < 1) {
362 if (c1
>= 0xa1 && c1
< 0xff) {
363 ucs
= 0xe3ac + 94 * (c
- 0xF5) + (c1
- 0xA1);
373 l
+= XConvertUcsToUtf8(ucs
, buffer_return
+ l
);
380 XConvertEucToUtf8(const char* locale
,
386 /* if (!locale || strstr(locale, "UTF") || strstr(locale, "utf")) { */
387 if (!locale
|| strstr(locale
, "UTF") || strstr(locale
, "utf")) {
391 if (strstr(locale
, "ja")) {
392 return XConvertEucJpToUtf8(buffer_return
, len
);
393 } else if (strstr(locale
, "Big5") || strstr(locale
, "big5")) { /* BIG5 */
394 return XConvertBig5ToUtf8(buffer_return
, len
);
395 } else if (strstr(locale
, "GBK") || strstr(locale
, "gbk")) {
396 return XConvertCp936extToUtf8(buffer_return
, len
);
397 } else if (strstr(locale
, "zh") || strstr(locale
, "chinese-")) {
398 if (strstr(locale
, "TW") || strstr(locale
, "chinese-t")) {
399 if (strstr(locale
, "EUC") || strstr(locale
, "euc") || strstr(locale
, "chinese-t")) {
400 return XConvertEucTwToUtf8(buffer_return
, len
);
402 return XConvertBig5ToUtf8(buffer_return
, len
);
404 if (strstr(locale
, "EUC") || strstr(locale
, "euc")) {
405 return XConvertEucCnToUtf8(buffer_return
, len
);
407 return XConvertGb2312ToUtf8(buffer_return
, len
);
408 } else if (strstr(locale
, "ko")) {
409 return XConvertEucKrToUtf8(buffer_return
, len
);
415 XUtf8LookupString(XIC ic
,
416 XKeyPressedEvent
* event
,
420 Status
* status_return
) {
424 len
= XmbLookupString(ic
, event
, buffer_return
, bytes_buffer
/ 5,
425 keysym
, status_return
);
426 if (*status_return
== XBufferOverflow
) {
429 if (*keysym
> 0 && *keysym
< 0x100 && len
== 1) {
430 if (*keysym
< 0x80) {
431 ucs
= (unsigned char)buffer_return
[0];
435 } else if (((*keysym
>= 0x100 && *keysym
<= 0xf000) ||
436 (*keysym
& 0xff000000U
) == 0x01000000))
438 ucs
= XKeysymToUcs(*keysym
);
444 len
= XConvertUcsToUtf8((unsigned)ucs
, (char *)buffer_return
);
445 } else if (len
> 0) {
450 len
= XConvertEucToUtf8(XLocaleOfIM(im
), buffer_return
, len
, bytes_buffer
);
455 #endif /* X11 only */