2 * Claws Mail -- a GTK based, lightweight, and fast e-mail client
3 * Copyright (C) 1999-2012 Hiroyuki Yamamoto and the Claws Mail team
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 3 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program. If not, see <http://www.gnu.org/licenses/>.
22 #include "claws-features.h"
28 #include <glib/gi18n.h>
40 #include "quoted-printable.h"
43 /* For unknown reasons the inconv.m4 macro undefs that macro if no
44 const is needed. This would break the code below so we define it. */
57 #define SUBST_CHAR 0x5f;
60 #define iseuckanji(c) \
61 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xfe)
62 #define iseuchwkana1(c) \
63 (((c) & 0xff) == 0x8e)
64 #define iseuchwkana2(c) \
65 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
67 (((c) & 0xff) == 0x8f)
68 #define issjiskanji1(c) \
69 ((((c) & 0xff) >= 0x81 && ((c) & 0xff) <= 0x9f) || \
70 (((c) & 0xff) >= 0xe0 && ((c) & 0xff) <= 0xfc))
71 #define issjiskanji2(c) \
72 ((((c) & 0xff) >= 0x40 && ((c) & 0xff) <= 0x7e) || \
73 (((c) & 0xff) >= 0x80 && ((c) & 0xff) <= 0xfc))
74 #define issjishwkana(c) \
75 (((c) & 0xff) >= 0xa1 && ((c) & 0xff) <= 0xdf)
78 if (state != JIS_KANJI) { \
86 if (state != JIS_ASCII) { \
94 if (state != JIS_HWKANA) { \
102 if (state != JIS_AUXKANJI) { \
107 state = JIS_AUXKANJI; \
110 static CodeConvFunc
conv_get_code_conv_func (const gchar
*src_charset_str
,
111 const gchar
*dest_charset_str
);
113 static gchar
*conv_iconv_strdup_with_cd (const gchar
*inbuf
,
116 static gchar
*conv_iconv_strdup (const gchar
*inbuf
,
117 const gchar
*src_code
,
118 const gchar
*dest_code
);
120 static CharSet
conv_get_locale_charset (void);
121 static CharSet
conv_get_outgoing_charset (void);
122 static CharSet
conv_guess_ja_encoding(const gchar
*str
);
123 static gboolean
conv_is_ja_locale (void);
125 static gint
conv_jistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
126 static gint
conv_euctojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
127 static gint
conv_sjistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
129 static gint
conv_jistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
130 static gint
conv_sjistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
131 static gint
conv_euctoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
132 static gint
conv_anytoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
134 static gint
conv_utf8toeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
135 static gint
conv_utf8tojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
137 static void conv_unreadable_8bit(gchar
*str
);
139 static gint
conv_jistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
140 static gint
conv_sjistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
141 static gint
conv_euctodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
143 static gint
conv_anytodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
144 static gint
conv_ustodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
145 static gint
conv_noconv(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
);
147 static gboolean codeconv_strict_mode
= FALSE
;
148 static gboolean codeconv_allow_jisx0201_kana
= FALSE
;
149 static gboolean codeconv_broken_are_utf8
= FALSE
;
151 void codeconv_set_strict(gboolean mode
)
153 codeconv_strict_mode
= mode
;
156 void codeconv_set_allow_jisx0201_kana(gboolean allow
)
158 codeconv_allow_jisx0201_kana
= allow
;
161 void codeconv_set_broken_are_utf8(gboolean are
)
163 codeconv_broken_are_utf8
= are
;
166 static gint
conv_jistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
168 const guchar
*in
= inbuf
;
170 JISState state
= JIS_ASCII
;
172 cm_return_val_if_fail(outbuf
!= NULL
, 0);
175 * Loop outputs up to 3 bytes in each pass (aux kanji) and we
176 * need 1 byte to terminate the output
178 while (*in
!= '\0' && (out
- outbuf
) < outlen
- 4) {
182 if (*(in
+ 1) == '@' || *(in
+ 1) == 'B') {
185 } else if (*(in
+ 1) == '(' &&
187 state
= JIS_AUXKANJI
;
190 /* unknown escape sequence */
193 } else if (*in
== '(') {
194 if (*(in
+ 1) == 'B' || *(in
+ 1) == 'J') {
197 } else if (*(in
+ 1) == 'I') {
201 /* unknown escape sequence */
205 /* unknown escape sequence */
208 } else if (*in
== 0x0e) {
211 } else if (*in
== 0x0f) {
220 *out
++ = *in
++ | 0x80;
221 if (*in
== '\0') break;
222 *out
++ = *in
++ | 0x80;
226 *out
++ = *in
++ | 0x80;
230 *out
++ = *in
++ | 0x80;
231 if (*in
== '\0') break;
232 *out
++ = *in
++ | 0x80;
242 #define JIS_HWDAKUTEN 0x5e
243 #define JIS_HWHANDAKUTEN 0x5f
245 static gint
conv_jis_hantozen(guchar
*outbuf
, guchar jis_code
, guchar sound_sym
)
247 static guint16 h2z_tbl
[] = {
249 0x0000, 0x2123, 0x2156, 0x2157, 0x2122, 0x2126, 0x2572, 0x2521,
250 0x2523, 0x2525, 0x2527, 0x2529, 0x2563, 0x2565, 0x2567, 0x2543,
252 0x213c, 0x2522, 0x2524, 0x2526, 0x2528, 0x252a, 0x252b, 0x252d,
253 0x252f, 0x2531, 0x2533, 0x2535, 0x2537, 0x2539, 0x253b, 0x253d,
255 0x253f, 0x2541, 0x2544, 0x2546, 0x2548, 0x254a, 0x254b, 0x254c,
256 0x254d, 0x254e, 0x254f, 0x2552, 0x2555, 0x2558, 0x255b, 0x255e,
258 0x255f, 0x2560, 0x2561, 0x2562, 0x2564, 0x2566, 0x2568, 0x2569,
259 0x256a, 0x256b, 0x256c, 0x256d, 0x256f, 0x2573, 0x212b, 0x212c
262 static guint16 dakuten_tbl
[] = {
264 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x252c, 0x252e,
265 0x2530, 0x2532, 0x2534, 0x2536, 0x2538, 0x253a, 0x253c, 0x253e,
267 0x2540, 0x2542, 0x2545, 0x2547, 0x2549, 0x0000, 0x0000, 0x0000,
268 0x0000, 0x0000, 0x2550, 0x2553, 0x2556, 0x2559, 0x255c, 0x0000
271 static guint16 handakuten_tbl
[] = {
273 0x2551, 0x2554, 0x2557, 0x255a, 0x255d
278 cm_return_val_if_fail(outbuf
!= NULL
, 0);
283 if (jis_code
< 0x21 || jis_code
> 0x5f)
286 if (sound_sym
== JIS_HWDAKUTEN
&&
287 jis_code
>= 0x36 && jis_code
<= 0x4e) {
288 out_code
= dakuten_tbl
[jis_code
- 0x30];
290 *outbuf
= out_code
>> 8;
291 *(outbuf
+ 1) = out_code
& 0xff;
296 if (sound_sym
== JIS_HWHANDAKUTEN
&&
297 jis_code
>= 0x4a && jis_code
<= 0x4e) {
298 out_code
= handakuten_tbl
[jis_code
- 0x4a];
299 *outbuf
= out_code
>> 8;
300 *(outbuf
+ 1) = out_code
& 0xff;
304 out_code
= h2z_tbl
[jis_code
- 0x20];
305 *outbuf
= out_code
>> 8;
306 *(outbuf
+ 1) = out_code
& 0xff;
310 static gint
conv_euctojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
312 const guchar
*in
= inbuf
;
314 JISState state
= JIS_ASCII
;
316 cm_return_val_if_fail(outbuf
!= NULL
, 0);
319 * Loop outputs up to 6 bytes in each pass (aux shift + aux
320 * kanji) and we need up to 4 bytes to terminate the output
321 * (ASCII shift + null)
323 while (*in
!= '\0' && (out
- outbuf
) < outlen
- 10) {
327 } else if (iseuckanji(*in
)) {
328 if (iseuckanji(*(in
+ 1))) {
330 *out
++ = *in
++ & 0x7f;
331 *out
++ = *in
++ & 0x7f;
336 if (*in
!= '\0' && !IS_ASCII(*in
)) {
341 } else if (iseuchwkana1(*in
)) {
342 if (iseuchwkana2(*(in
+ 1))) {
343 if (codeconv_allow_jisx0201_kana
) {
346 *out
++ = *in
++ & 0x7f;
351 if (iseuchwkana1(*(in
+ 2)) &&
352 iseuchwkana2(*(in
+ 3)))
353 len
= conv_jis_hantozen
355 *(in
+ 1), *(in
+ 3));
357 len
= conv_jis_hantozen
372 if (*in
!= '\0' && !IS_ASCII(*in
)) {
377 } else if (iseucaux(*in
)) {
379 if (iseuckanji(*in
) && iseuckanji(*(in
+ 1))) {
381 *out
++ = *in
++ & 0x7f;
382 *out
++ = *in
++ & 0x7f;
385 if (*in
!= '\0' && !IS_ASCII(*in
)) {
388 if (*in
!= '\0' && !IS_ASCII(*in
)) {
406 static gint
conv_sjistoeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
408 const guchar
*in
= inbuf
;
411 cm_return_val_if_fail(outbuf
!= NULL
, 0);
414 * Loop outputs up to 2 bytes in each pass and we need 1 byte
415 * to terminate the output
417 while (*in
!= '\0' && (out
- outbuf
) < outlen
- 3) {
420 } else if (issjiskanji1(*in
)) {
421 if (issjiskanji2(*(in
+ 1))) {
423 guchar out2
= *(in
+ 1);
426 row
= out1
< 0xa0 ? 0x70 : 0xb0;
428 out1
= (out1
- row
) * 2 - 1;
429 out2
-= out2
> 0x7f ? 0x20 : 0x1f;
431 out1
= (out1
- row
) * 2;
435 *out
++ = out1
| 0x80;
436 *out
++ = out2
| 0x80;
441 if (*in
!= '\0' && !IS_ASCII(*in
)) {
446 } else if (issjishwkana(*in
)) {
459 static gint
conv_jistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
463 cm_return_val_if_fail(inbuf
!= NULL
, 0);
464 cm_return_val_if_fail(outbuf
!= NULL
, 0);
466 Xalloca(eucstr
, outlen
, return -1);
468 if (conv_jistoeuc(eucstr
, outlen
, inbuf
) <0)
470 if (conv_euctoutf8(outbuf
, outlen
, eucstr
) < 0)
475 static gint
conv_sjistoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
479 cm_return_val_if_fail(inbuf
!= NULL
, 0);
480 cm_return_val_if_fail(outbuf
!= NULL
, 0);
482 tmpstr
= conv_iconv_strdup(inbuf
, CS_SHIFT_JIS
, CS_UTF_8
);
484 strncpy2(outbuf
, tmpstr
, outlen
);
488 strncpy2(outbuf
, inbuf
, outlen
);
493 static gint
conv_euctoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
495 static iconv_t cd
= (iconv_t
)-1;
496 static gboolean iconv_ok
= TRUE
;
499 cm_return_val_if_fail(inbuf
!= NULL
, 0);
500 cm_return_val_if_fail(outbuf
!= NULL
, 0);
502 if (cd
== (iconv_t
)-1) {
504 strncpy2(outbuf
, inbuf
, outlen
);
507 cd
= iconv_open(CS_UTF_8
, CS_EUC_JP_MS
);
508 if (cd
== (iconv_t
)-1) {
509 cd
= iconv_open(CS_UTF_8
, CS_EUC_JP
);
510 if (cd
== (iconv_t
)-1) {
511 g_warning("conv_euctoutf8(): %s",
514 strncpy2(outbuf
, inbuf
, outlen
);
520 tmpstr
= conv_iconv_strdup_with_cd(inbuf
, cd
);
522 strncpy2(outbuf
, tmpstr
, outlen
);
526 strncpy2(outbuf
, inbuf
, outlen
);
531 static gint
conv_anytoutf8(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
535 cm_return_val_if_fail(inbuf
!= NULL
, 0);
536 cm_return_val_if_fail(outbuf
!= NULL
, 0);
538 switch (conv_guess_ja_encoding(inbuf
)) {
540 r
= conv_jistoutf8(outbuf
, outlen
, inbuf
);
543 r
= conv_sjistoutf8(outbuf
, outlen
, inbuf
);
546 r
= conv_euctoutf8(outbuf
, outlen
, inbuf
);
550 strncpy2(outbuf
, inbuf
, outlen
);
557 static gint
conv_utf8toeuc(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
559 static iconv_t cd
= (iconv_t
)-1;
560 static gboolean iconv_ok
= TRUE
;
563 cm_return_val_if_fail(inbuf
!= NULL
, 0);
564 cm_return_val_if_fail(outbuf
!= NULL
, 0);
566 if (cd
== (iconv_t
)-1) {
568 strncpy2(outbuf
, inbuf
, outlen
);
571 cd
= iconv_open(CS_EUC_JP_MS
, CS_UTF_8
);
572 if (cd
== (iconv_t
)-1) {
573 cd
= iconv_open(CS_EUC_JP
, CS_UTF_8
);
574 if (cd
== (iconv_t
)-1) {
575 g_warning("conv_utf8toeuc(): %s",
578 strncpy2(outbuf
, inbuf
, outlen
);
584 tmpstr
= conv_iconv_strdup_with_cd(inbuf
, cd
);
586 strncpy2(outbuf
, tmpstr
, outlen
);
590 strncpy2(outbuf
, inbuf
, outlen
);
595 static gint
conv_utf8tojis(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
599 cm_return_val_if_fail(inbuf
!= NULL
, 0);
600 cm_return_val_if_fail(outbuf
!= NULL
, 0);
602 Xalloca(eucstr
, outlen
, return -1);
604 if (conv_utf8toeuc(eucstr
, outlen
, inbuf
) < 0)
606 if (conv_euctojis(outbuf
, outlen
, eucstr
) < 0)
612 static void conv_unreadable_8bit(gchar
*str
)
614 register guchar
*p
= str
;
617 /* convert CR+LF -> LF */
618 if (*p
== '\r' && *(p
+ 1) == '\n')
619 memmove(p
, p
+ 1, strlen(p
));
620 else if (!IS_ASCII(*p
)) *p
= SUBST_CHAR
;
625 static CharSet
conv_guess_ja_encoding(const gchar
*str
)
627 const guchar
*p
= str
;
628 CharSet guessed
= C_US_ASCII
;
631 if (*p
== ESC
&& (*(p
+ 1) == '$' || *(p
+ 1) == '(')) {
632 if (guessed
== C_US_ASCII
)
633 return C_ISO_2022_JP
;
635 } else if (IS_ASCII(*p
)) {
637 } else if (iseuckanji(*p
) && iseuckanji(*(p
+ 1))) {
638 if (*p
>= 0xfd && *p
<= 0xfe)
640 else if (guessed
== C_SHIFT_JIS
) {
641 if ((issjiskanji1(*p
) &&
642 issjiskanji2(*(p
+ 1))) ||
644 guessed
= C_SHIFT_JIS
;
650 } else if (issjiskanji1(*p
) && issjiskanji2(*(p
+ 1))) {
651 if (iseuchwkana1(*p
) && iseuchwkana2(*(p
+ 1)))
652 guessed
= C_SHIFT_JIS
;
656 } else if (issjishwkana(*p
)) {
657 guessed
= C_SHIFT_JIS
;
667 static gint
conv_jistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
669 cm_return_val_if_fail(inbuf
!= NULL
, 0);
670 cm_return_val_if_fail(outbuf
!= NULL
, 0);
672 return conv_jistoutf8(outbuf
, outlen
, inbuf
);
675 static gint
conv_sjistodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
677 cm_return_val_if_fail(inbuf
!= NULL
, 0);
678 cm_return_val_if_fail(outbuf
!= NULL
, 0);
680 return conv_sjistoutf8(outbuf
, outlen
, inbuf
);
683 static gint
conv_euctodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
685 cm_return_val_if_fail(inbuf
!= NULL
, 0);
686 cm_return_val_if_fail(outbuf
!= NULL
, 0);
688 return conv_euctoutf8(outbuf
, outlen
, inbuf
);
691 void conv_utf8todisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
693 cm_return_if_fail(inbuf
!= NULL
);
694 cm_return_if_fail(outbuf
!= NULL
);
696 if (g_utf8_validate(inbuf
, -1, NULL
) == TRUE
)
697 strncpy2(outbuf
, inbuf
, outlen
);
699 conv_ustodisp(outbuf
, outlen
, inbuf
);
702 static gint
conv_anytodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
706 cm_return_val_if_fail(inbuf
!= NULL
, 0);
707 cm_return_val_if_fail(outbuf
!= NULL
, 0);
709 if (conv_anytoutf8(outbuf
, outlen
, inbuf
) < 0)
711 if (g_utf8_validate(outbuf
, -1, NULL
) != TRUE
)
712 conv_unreadable_8bit(outbuf
);
716 static gint
conv_ustodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
718 cm_return_val_if_fail(inbuf
!= NULL
, 0);
719 cm_return_val_if_fail(outbuf
!= NULL
, 0);
721 strncpy2(outbuf
, inbuf
, outlen
);
722 conv_unreadable_8bit(outbuf
);
727 void conv_localetodisp(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
731 cm_return_if_fail(inbuf
!= NULL
);
732 cm_return_if_fail(outbuf
!= NULL
);
734 codeconv_set_strict(TRUE
);
735 tmpstr
= conv_iconv_strdup(inbuf
, conv_get_locale_charset_str(),
737 codeconv_set_strict(FALSE
);
738 if (tmpstr
&& g_utf8_validate(tmpstr
, -1, NULL
)) {
739 strncpy2(outbuf
, tmpstr
, outlen
);
742 } else if (tmpstr
&& !g_utf8_validate(tmpstr
, -1, NULL
)) {
744 codeconv_set_strict(TRUE
);
745 tmpstr
= conv_iconv_strdup(inbuf
,
746 conv_get_locale_charset_str_no_utf8(),
748 codeconv_set_strict(FALSE
);
750 if (tmpstr
&& g_utf8_validate(tmpstr
, -1, NULL
)) {
751 strncpy2(outbuf
, tmpstr
, outlen
);
756 conv_utf8todisp(outbuf
, outlen
, inbuf
);
760 static gint
conv_noconv(gchar
*outbuf
, gint outlen
, const gchar
*inbuf
)
762 cm_return_val_if_fail(inbuf
!= NULL
, 0);
763 cm_return_val_if_fail(outbuf
!= NULL
, 0);
765 strncpy2(outbuf
, inbuf
, outlen
);
770 conv_get_fallback_for_private_encoding(const gchar
*encoding
)
773 if ((encoding
[0] == 'X' || encoding
[0] == 'x') &&
774 encoding
[1] == '-') {
775 if (!g_ascii_strcasecmp(encoding
, CS_X_MACCYR
))
777 if (!g_ascii_strcasecmp(encoding
, CS_X_GBK
))
780 else if(!g_ascii_strcasecmp(encoding
, CS_ISO_8859_8_I
)) {
782 * ISO-8859-8-I is a variant which fully
783 * agrees with ISO-8859-8 on character
784 * codings, and differs only in directionality
785 * implications, which are ignored here
786 * anyway; and is not recognized by iconv
788 return CS_ISO_8859_8
;
795 CodeConverter
*conv_code_converter_new(const gchar
*src_charset
)
799 src_charset
= conv_get_fallback_for_private_encoding(src_charset
);
801 conv
= g_new0(CodeConverter
, 1);
802 conv
->code_conv_func
= conv_get_code_conv_func(src_charset
, NULL
);
803 conv
->charset_str
= g_strdup(src_charset
);
804 conv
->charset
= conv_get_charset_from_str(src_charset
);
809 void conv_code_converter_destroy(CodeConverter
*conv
)
811 g_free(conv
->charset_str
);
815 gint
conv_convert(CodeConverter
*conv
, gchar
*outbuf
, gint outlen
,
818 cm_return_val_if_fail(inbuf
!= NULL
, -1);
819 cm_return_val_if_fail(outbuf
!= NULL
, -1);
821 if (conv
->code_conv_func
!= conv_noconv
)
822 return conv
->code_conv_func(outbuf
, outlen
, inbuf
);
826 str
= conv_iconv_strdup(inbuf
, conv
->charset_str
, NULL
);
830 strncpy2(outbuf
, str
, outlen
);
838 gchar
*conv_codeset_strdup(const gchar
*inbuf
,
839 const gchar
*src_code
, const gchar
*dest_code
)
843 CodeConvFunc conv_func
;
845 cm_return_val_if_fail(inbuf
!= NULL
, NULL
);
847 if (!g_strcmp0(src_code
, dest_code
)) {
848 CharSet dest_charset
= conv_get_charset_from_str(dest_code
);
849 if (codeconv_strict_mode
&& dest_charset
== C_UTF_8
) {
850 /* ensure valid UTF-8 if target is UTF-8 */
851 if (!g_utf8_validate(inbuf
, -1, NULL
)) {
855 /* otherwise, try for a lucky day */
856 return g_strdup(inbuf
);
859 src_code
= conv_get_fallback_for_private_encoding(src_code
);
860 conv_func
= conv_get_code_conv_func(src_code
, dest_code
);
861 if (conv_func
== conv_ustodisp
862 && codeconv_strict_mode
863 && !is_ascii_str(inbuf
))
866 if (conv_func
!= conv_noconv
) {
867 len
= (strlen(inbuf
) + 1) * 3;
870 if (conv_func(buf
, len
, inbuf
) == 0 || !codeconv_strict_mode
)
871 return g_realloc(buf
, strlen(buf
) + 1);
878 return conv_iconv_strdup(inbuf
, src_code
, dest_code
);
881 static CodeConvFunc
conv_get_code_conv_func(const gchar
*src_charset_str
,
882 const gchar
*dest_charset_str
)
884 CodeConvFunc code_conv
= conv_noconv
;
886 CharSet dest_charset
;
888 if (!src_charset_str
)
889 src_charset
= conv_get_locale_charset();
891 src_charset
= conv_get_charset_from_str(src_charset_str
);
893 /* auto detection mode */
894 if (!src_charset_str
&& !dest_charset_str
) {
895 if (conv_is_ja_locale())
896 return conv_anytodisp
;
901 dest_charset
= conv_get_charset_from_str(dest_charset_str
);
903 if (dest_charset
== C_US_ASCII
)
904 return conv_ustodisp
;
906 switch (src_charset
) {
924 case C_ISO_2022_JP_2
:
925 case C_ISO_2022_JP_3
:
926 if (dest_charset
== C_AUTO
)
927 code_conv
= conv_jistodisp
;
928 else if (dest_charset
== C_EUC_JP
)
929 code_conv
= conv_jistoeuc
;
930 else if (dest_charset
== C_UTF_8
)
931 code_conv
= conv_jistoutf8
;
934 if (dest_charset
== C_AUTO
)
935 code_conv
= conv_sjistodisp
;
936 else if (dest_charset
== C_EUC_JP
)
937 code_conv
= conv_sjistoeuc
;
938 else if (dest_charset
== C_UTF_8
)
939 code_conv
= conv_sjistoutf8
;
942 if (dest_charset
== C_AUTO
)
943 code_conv
= conv_euctodisp
;
944 else if (dest_charset
== C_ISO_2022_JP
||
945 dest_charset
== C_ISO_2022_JP_2
||
946 dest_charset
== C_ISO_2022_JP_3
)
947 code_conv
= conv_euctojis
;
948 else if (dest_charset
== C_UTF_8
)
949 code_conv
= conv_euctoutf8
;
952 if (dest_charset
== C_EUC_JP
)
953 code_conv
= conv_utf8toeuc
;
954 else if (dest_charset
== C_ISO_2022_JP
||
955 dest_charset
== C_ISO_2022_JP_2
||
956 dest_charset
== C_ISO_2022_JP_3
)
957 code_conv
= conv_utf8tojis
;
966 static gchar
*conv_iconv_strdup(const gchar
*inbuf
,
967 const gchar
*src_code
, const gchar
*dest_code
)
972 cm_return_val_if_fail(inbuf
!= NULL
, NULL
);
974 if (!src_code
&& !dest_code
&&
975 g_utf8_validate(inbuf
, -1, NULL
))
976 return g_strdup(inbuf
);
979 src_code
= conv_get_outgoing_charset_str();
981 dest_code
= CS_INTERNAL
;
983 /* don't convert if src and dest codeset are identical */
984 if (!strcasecmp(src_code
, dest_code
))
985 return g_strdup(inbuf
);
987 /* don't convert if dest codeset is US-ASCII */
988 if (!strcasecmp(src_code
, CS_US_ASCII
))
989 return g_strdup(inbuf
);
991 /* don't convert if dest codeset is US-ASCII */
992 if (!strcasecmp(dest_code
, CS_US_ASCII
))
993 return g_strdup(inbuf
);
995 cd
= iconv_open(dest_code
, src_code
);
996 if (cd
== (iconv_t
)-1)
999 outbuf
= conv_iconv_strdup_with_cd(inbuf
, cd
);
1006 gchar
*conv_iconv_strdup_with_cd(const gchar
*inbuf
, iconv_t cd
)
1008 const gchar
*inbuf_p
;
1018 cm_return_val_if_fail(inbuf
!= NULL
, NULL
);
1021 in_size
= strlen(inbuf
);
1023 out_size
= (in_size
+ 1) * 2;
1024 outbuf
= g_malloc(out_size
);
1026 out_left
= out_size
;
1028 #define EXPAND_BUF() \
1030 len = outbuf_p - outbuf; \
1032 outbuf = g_realloc(outbuf, out_size); \
1033 outbuf_p = outbuf + len; \
1034 out_left = out_size - len; \
1037 while ((n_conv
= iconv(cd
, (ICONV_CONST gchar
**)&inbuf_p
, &in_left
,
1038 &outbuf_p
, &out_left
)) == (size_t)-1) {
1039 if (EILSEQ
== errno
) {
1040 if (codeconv_strict_mode
) {
1044 //g_print("iconv(): at %d: %s\n", in_size - in_left, g_strerror(errno));
1047 if (out_left
== 0) {
1050 *outbuf_p
++ = SUBST_CHAR
;
1052 } else if (EINVAL
== errno
) {
1054 } else if (E2BIG
== errno
) {
1057 g_warning("conv_iconv_strdup(): %s",
1063 while ((n_conv
= iconv(cd
, NULL
, NULL
, &outbuf_p
, &out_left
)) ==
1065 if (E2BIG
== errno
) {
1068 g_warning("conv_iconv_strdup(): %s",
1076 len
= outbuf_p
- outbuf
;
1077 outbuf
= g_realloc(outbuf
, len
+ 1);
1083 static const struct {
1087 {C_US_ASCII
, CS_US_ASCII
},
1088 {C_US_ASCII
, CS_ANSI_X3_4_1968
},
1089 {C_UTF_8
, CS_UTF_8
},
1090 {C_UTF_7
, CS_UTF_7
},
1091 {C_ISO_8859_1
, CS_ISO_8859_1
},
1092 {C_ISO_8859_2
, CS_ISO_8859_2
},
1093 {C_ISO_8859_3
, CS_ISO_8859_3
},
1094 {C_ISO_8859_4
, CS_ISO_8859_4
},
1095 {C_ISO_8859_5
, CS_ISO_8859_5
},
1096 {C_ISO_8859_6
, CS_ISO_8859_6
},
1097 {C_ISO_8859_7
, CS_ISO_8859_7
},
1098 {C_ISO_8859_8
, CS_ISO_8859_8
},
1099 {C_ISO_8859_9
, CS_ISO_8859_9
},
1100 {C_ISO_8859_10
, CS_ISO_8859_10
},
1101 {C_ISO_8859_11
, CS_ISO_8859_11
},
1102 {C_ISO_8859_13
, CS_ISO_8859_13
},
1103 {C_ISO_8859_14
, CS_ISO_8859_14
},
1104 {C_ISO_8859_15
, CS_ISO_8859_15
},
1105 {C_BALTIC
, CS_BALTIC
},
1106 {C_CP1250
, CS_CP1250
},
1107 {C_CP1251
, CS_CP1251
},
1108 {C_CP1252
, CS_CP1252
},
1109 {C_CP1253
, CS_CP1253
},
1110 {C_CP1254
, CS_CP1254
},
1111 {C_CP1255
, CS_CP1255
},
1112 {C_CP1256
, CS_CP1256
},
1113 {C_CP1257
, CS_CP1257
},
1114 {C_CP1258
, CS_CP1258
},
1115 {C_WINDOWS_1250
, CS_WINDOWS_1250
},
1116 {C_WINDOWS_1251
, CS_WINDOWS_1251
},
1117 {C_WINDOWS_1252
, CS_WINDOWS_1252
},
1118 {C_WINDOWS_1253
, CS_WINDOWS_1253
},
1119 {C_WINDOWS_1254
, CS_WINDOWS_1254
},
1120 {C_WINDOWS_1255
, CS_WINDOWS_1255
},
1121 {C_WINDOWS_1256
, CS_WINDOWS_1256
},
1122 {C_WINDOWS_1257
, CS_WINDOWS_1257
},
1123 {C_WINDOWS_1258
, CS_WINDOWS_1258
},
1124 {C_KOI8_R
, CS_KOI8_R
},
1125 {C_MACCYR
, CS_MACCYR
},
1126 {C_KOI8_T
, CS_KOI8_T
},
1127 {C_KOI8_U
, CS_KOI8_U
},
1128 {C_ISO_2022_JP
, CS_ISO_2022_JP
},
1129 {C_ISO_2022_JP_2
, CS_ISO_2022_JP_2
},
1130 {C_ISO_2022_JP_3
, CS_ISO_2022_JP_3
},
1131 {C_EUC_JP
, CS_EUC_JP
},
1132 {C_EUC_JP
, CS_EUCJP
},
1133 {C_EUC_JP_MS
, CS_EUC_JP_MS
},
1134 {C_SHIFT_JIS
, CS_SHIFT_JIS
},
1135 {C_SHIFT_JIS
, CS_SHIFT__JIS
},
1136 {C_SHIFT_JIS
, CS_SJIS
},
1137 {C_ISO_2022_KR
, CS_ISO_2022_KR
},
1138 {C_EUC_KR
, CS_EUC_KR
},
1139 {C_ISO_2022_CN
, CS_ISO_2022_CN
},
1140 {C_EUC_CN
, CS_EUC_CN
},
1141 {C_GB18030
, CS_GB18030
},
1142 {C_GB2312
, CS_GB2312
},
1144 {C_EUC_TW
, CS_EUC_TW
},
1146 {C_BIG5_HKSCS
, CS_BIG5_HKSCS
},
1147 {C_TIS_620
, CS_TIS_620
},
1148 {C_WINDOWS_874
, CS_WINDOWS_874
},
1149 {C_GEORGIAN_PS
, CS_GEORGIAN_PS
},
1150 {C_TCVN5712_1
, CS_TCVN5712_1
},
1153 static const struct {
1154 gchar
*const locale
;
1156 CharSet out_charset
;
1157 } locale_table
[] = {
1158 {"ja_JP.eucJP" , C_EUC_JP
, C_ISO_2022_JP
},
1159 {"ja_JP.EUC-JP" , C_EUC_JP
, C_ISO_2022_JP
},
1160 {"ja_JP.EUC" , C_EUC_JP
, C_ISO_2022_JP
},
1161 {"ja_JP.ujis" , C_EUC_JP
, C_ISO_2022_JP
},
1162 {"ja_JP.SJIS" , C_SHIFT_JIS
, C_ISO_2022_JP
},
1163 {"ja_JP.JIS" , C_ISO_2022_JP
, C_ISO_2022_JP
},
1165 {"ja_JP" , C_SHIFT_JIS
, C_ISO_2022_JP
},
1167 {"ja_JP" , C_EUC_JP
, C_ISO_2022_JP
},
1169 {"ko_KR.EUC-KR" , C_EUC_KR
, C_EUC_KR
},
1170 {"ko_KR" , C_EUC_KR
, C_EUC_KR
},
1171 {"zh_CN.GB18030" , C_GB18030
, C_GB18030
},
1172 {"zh_CN.GB2312" , C_GB2312
, C_GB2312
},
1173 {"zh_CN.GBK" , C_GBK
, C_GBK
},
1174 {"zh_CN" , C_GB18030
, C_GB18030
},
1175 {"zh_HK" , C_BIG5_HKSCS
, C_BIG5_HKSCS
},
1176 {"zh_TW.eucTW" , C_EUC_TW
, C_BIG5
},
1177 {"zh_TW.EUC-TW" , C_EUC_TW
, C_BIG5
},
1178 {"zh_TW.Big5" , C_BIG5
, C_BIG5
},
1179 {"zh_TW" , C_BIG5
, C_BIG5
},
1181 {"ru_RU.KOI8-R" , C_KOI8_R
, C_KOI8_R
},
1182 {"ru_RU.KOI8R" , C_KOI8_R
, C_KOI8_R
},
1183 {"ru_RU.CP1251" , C_WINDOWS_1251
, C_KOI8_R
},
1185 {"ru_RU" , C_WINDOWS_1251
, C_KOI8_R
},
1187 {"ru_RU" , C_ISO_8859_5
, C_KOI8_R
},
1189 {"tg_TJ" , C_KOI8_T
, C_KOI8_T
},
1190 {"ru_UA" , C_KOI8_U
, C_KOI8_U
},
1191 {"uk_UA.CP1251" , C_WINDOWS_1251
, C_KOI8_U
},
1192 {"uk_UA" , C_KOI8_U
, C_KOI8_U
},
1194 {"be_BY" , C_WINDOWS_1251
, C_WINDOWS_1251
},
1195 {"bg_BG" , C_WINDOWS_1251
, C_WINDOWS_1251
},
1197 {"yi_US" , C_WINDOWS_1255
, C_WINDOWS_1255
},
1199 {"af_ZA" , C_ISO_8859_1
, C_ISO_8859_1
},
1200 {"br_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1201 {"ca_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1202 {"da_DK" , C_ISO_8859_1
, C_ISO_8859_1
},
1203 {"de_AT" , C_ISO_8859_1
, C_ISO_8859_1
},
1204 {"de_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1205 {"de_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1206 {"de_DE" , C_ISO_8859_1
, C_ISO_8859_1
},
1207 {"de_LU" , C_ISO_8859_1
, C_ISO_8859_1
},
1208 {"en_AU" , C_ISO_8859_1
, C_ISO_8859_1
},
1209 {"en_BW" , C_ISO_8859_1
, C_ISO_8859_1
},
1210 {"en_CA" , C_ISO_8859_1
, C_ISO_8859_1
},
1211 {"en_DK" , C_ISO_8859_1
, C_ISO_8859_1
},
1212 {"en_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1213 {"en_HK" , C_ISO_8859_1
, C_ISO_8859_1
},
1214 {"en_IE" , C_ISO_8859_1
, C_ISO_8859_1
},
1215 {"en_NZ" , C_ISO_8859_1
, C_ISO_8859_1
},
1216 {"en_PH" , C_ISO_8859_1
, C_ISO_8859_1
},
1217 {"en_SG" , C_ISO_8859_1
, C_ISO_8859_1
},
1218 {"en_US" , C_ISO_8859_1
, C_ISO_8859_1
},
1219 {"en_ZA" , C_ISO_8859_1
, C_ISO_8859_1
},
1220 {"en_ZW" , C_ISO_8859_1
, C_ISO_8859_1
},
1221 {"es_AR" , C_ISO_8859_1
, C_ISO_8859_1
},
1222 {"es_BO" , C_ISO_8859_1
, C_ISO_8859_1
},
1223 {"es_CL" , C_ISO_8859_1
, C_ISO_8859_1
},
1224 {"es_CO" , C_ISO_8859_1
, C_ISO_8859_1
},
1225 {"es_CR" , C_ISO_8859_1
, C_ISO_8859_1
},
1226 {"es_DO" , C_ISO_8859_1
, C_ISO_8859_1
},
1227 {"es_EC" , C_ISO_8859_1
, C_ISO_8859_1
},
1228 {"es_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1229 {"es_GT" , C_ISO_8859_1
, C_ISO_8859_1
},
1230 {"es_HN" , C_ISO_8859_1
, C_ISO_8859_1
},
1231 {"es_MX" , C_ISO_8859_1
, C_ISO_8859_1
},
1232 {"es_NI" , C_ISO_8859_1
, C_ISO_8859_1
},
1233 {"es_PA" , C_ISO_8859_1
, C_ISO_8859_1
},
1234 {"es_PE" , C_ISO_8859_1
, C_ISO_8859_1
},
1235 {"es_PR" , C_ISO_8859_1
, C_ISO_8859_1
},
1236 {"es_PY" , C_ISO_8859_1
, C_ISO_8859_1
},
1237 {"es_SV" , C_ISO_8859_1
, C_ISO_8859_1
},
1238 {"es_US" , C_ISO_8859_1
, C_ISO_8859_1
},
1239 {"es_UY" , C_ISO_8859_1
, C_ISO_8859_1
},
1240 {"es_VE" , C_ISO_8859_1
, C_ISO_8859_1
},
1241 {"et_EE" , C_ISO_8859_1
, C_ISO_8859_1
},
1242 {"eu_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1243 {"fi_FI" , C_ISO_8859_1
, C_ISO_8859_1
},
1244 {"fo_FO" , C_ISO_8859_1
, C_ISO_8859_1
},
1245 {"fr_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1246 {"fr_CA" , C_ISO_8859_1
, C_ISO_8859_1
},
1247 {"fr_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1248 {"fr_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1249 {"fr_LU" , C_ISO_8859_1
, C_ISO_8859_1
},
1250 {"ga_IE" , C_ISO_8859_1
, C_ISO_8859_1
},
1251 {"gl_ES" , C_ISO_8859_1
, C_ISO_8859_1
},
1252 {"gv_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1253 {"id_ID" , C_ISO_8859_1
, C_ISO_8859_1
},
1254 {"is_IS" , C_ISO_8859_1
, C_ISO_8859_1
},
1255 {"it_CH" , C_ISO_8859_1
, C_ISO_8859_1
},
1256 {"it_IT" , C_ISO_8859_1
, C_ISO_8859_1
},
1257 {"kl_GL" , C_ISO_8859_1
, C_ISO_8859_1
},
1258 {"kw_GB" , C_ISO_8859_1
, C_ISO_8859_1
},
1259 {"ms_MY" , C_ISO_8859_1
, C_ISO_8859_1
},
1260 {"nl_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1261 {"nl_NL" , C_ISO_8859_1
, C_ISO_8859_1
},
1262 {"nb_NO" , C_ISO_8859_1
, C_ISO_8859_1
},
1263 {"nn_NO" , C_ISO_8859_1
, C_ISO_8859_1
},
1264 {"no_NO" , C_ISO_8859_1
, C_ISO_8859_1
},
1265 {"oc_FR" , C_ISO_8859_1
, C_ISO_8859_1
},
1266 {"pt_BR" , C_ISO_8859_1
, C_ISO_8859_1
},
1267 {"pt_PT" , C_ISO_8859_1
, C_ISO_8859_1
},
1268 {"sq_AL" , C_ISO_8859_1
, C_ISO_8859_1
},
1269 {"sv_FI" , C_ISO_8859_1
, C_ISO_8859_1
},
1270 {"sv_SE" , C_ISO_8859_1
, C_ISO_8859_1
},
1271 {"tl_PH" , C_ISO_8859_1
, C_ISO_8859_1
},
1272 {"uz_UZ" , C_ISO_8859_1
, C_ISO_8859_1
},
1273 {"wa_BE" , C_ISO_8859_1
, C_ISO_8859_1
},
1275 {"bs_BA" , C_ISO_8859_2
, C_ISO_8859_2
},
1276 {"cs_CZ" , C_ISO_8859_2
, C_ISO_8859_2
},
1277 {"hr_HR" , C_ISO_8859_2
, C_ISO_8859_2
},
1278 {"hu_HU" , C_ISO_8859_2
, C_ISO_8859_2
},
1279 {"pl_PL" , C_ISO_8859_2
, C_ISO_8859_2
},
1280 {"ro_RO" , C_ISO_8859_2
, C_ISO_8859_2
},
1281 {"sk_SK" , C_ISO_8859_2
, C_ISO_8859_2
},
1282 {"sl_SI" , C_ISO_8859_2
, C_ISO_8859_2
},
1284 {"sr_YU@cyrillic" , C_ISO_8859_5
, C_ISO_8859_5
},
1285 {"sr_YU" , C_ISO_8859_2
, C_ISO_8859_2
},
1287 {"mt_MT" , C_ISO_8859_3
, C_ISO_8859_3
},
1289 {"lt_LT.iso88594" , C_ISO_8859_4
, C_ISO_8859_4
},
1290 {"lt_LT.ISO8859-4" , C_ISO_8859_4
, C_ISO_8859_4
},
1291 {"lt_LT.ISO_8859-4" , C_ISO_8859_4
, C_ISO_8859_4
},
1292 {"lt_LT" , C_ISO_8859_13
, C_ISO_8859_13
},
1294 {"mk_MK" , C_ISO_8859_5
, C_ISO_8859_5
},
1296 {"ar_AE" , C_ISO_8859_6
, C_ISO_8859_6
},
1297 {"ar_BH" , C_ISO_8859_6
, C_ISO_8859_6
},
1298 {"ar_DZ" , C_ISO_8859_6
, C_ISO_8859_6
},
1299 {"ar_EG" , C_ISO_8859_6
, C_ISO_8859_6
},
1300 {"ar_IQ" , C_ISO_8859_6
, C_ISO_8859_6
},
1301 {"ar_JO" , C_ISO_8859_6
, C_ISO_8859_6
},
1302 {"ar_KW" , C_ISO_8859_6
, C_ISO_8859_6
},
1303 {"ar_LB" , C_ISO_8859_6
, C_ISO_8859_6
},
1304 {"ar_LY" , C_ISO_8859_6
, C_ISO_8859_6
},
1305 {"ar_MA" , C_ISO_8859_6
, C_ISO_8859_6
},
1306 {"ar_OM" , C_ISO_8859_6
, C_ISO_8859_6
},
1307 {"ar_QA" , C_ISO_8859_6
, C_ISO_8859_6
},
1308 {"ar_SA" , C_ISO_8859_6
, C_ISO_8859_6
},
1309 {"ar_SD" , C_ISO_8859_6
, C_ISO_8859_6
},
1310 {"ar_SY" , C_ISO_8859_6
, C_ISO_8859_6
},
1311 {"ar_TN" , C_ISO_8859_6
, C_ISO_8859_6
},
1312 {"ar_YE" , C_ISO_8859_6
, C_ISO_8859_6
},
1314 {"el_GR" , C_ISO_8859_7
, C_ISO_8859_7
},
1315 {"he_IL" , C_ISO_8859_8
, C_ISO_8859_8
},
1316 {"iw_IL" , C_ISO_8859_8
, C_ISO_8859_8
},
1317 {"tr_TR" , C_ISO_8859_9
, C_ISO_8859_9
},
1319 {"lv_LV" , C_ISO_8859_13
, C_ISO_8859_13
},
1320 {"mi_NZ" , C_ISO_8859_13
, C_ISO_8859_13
},
1322 {"cy_GB" , C_ISO_8859_14
, C_ISO_8859_14
},
1324 {"ar_IN" , C_UTF_8
, C_UTF_8
},
1325 {"en_IN" , C_UTF_8
, C_UTF_8
},
1326 {"se_NO" , C_UTF_8
, C_UTF_8
},
1327 {"ta_IN" , C_UTF_8
, C_UTF_8
},
1328 {"te_IN" , C_UTF_8
, C_UTF_8
},
1329 {"ur_PK" , C_UTF_8
, C_UTF_8
},
1331 {"th_TH" , C_TIS_620
, C_TIS_620
},
1332 /* {"th_TH" , C_WINDOWS_874}, */
1333 /* {"th_TH" , C_ISO_8859_11}, */
1335 {"ka_GE" , C_GEORGIAN_PS
, C_GEORGIAN_PS
},
1336 {"vi_VN.TCVN" , C_TCVN5712_1
, C_TCVN5712_1
},
1338 {"C" , C_US_ASCII
, C_US_ASCII
},
1339 {"POSIX" , C_US_ASCII
, C_US_ASCII
},
1340 {"ANSI_X3.4-1968" , C_US_ASCII
, C_US_ASCII
},
1343 static GHashTable
*conv_get_charset_to_str_table(void)
1345 static GHashTable
*table
;
1351 table
= g_hash_table_new(NULL
, g_direct_equal
);
1353 for (i
= 0; i
< sizeof(charsets
) / sizeof(charsets
[0]); i
++) {
1354 if (g_hash_table_lookup(table
, GUINT_TO_POINTER(charsets
[i
].charset
))
1357 (table
, GUINT_TO_POINTER(charsets
[i
].charset
),
1365 static GHashTable
*conv_get_charset_from_str_table(void)
1367 static GHashTable
*table
;
1373 table
= g_hash_table_new(str_case_hash
, str_case_equal
);
1375 for (i
= 0; i
< sizeof(charsets
) / sizeof(charsets
[0]); i
++) {
1376 g_hash_table_insert(table
, charsets
[i
].name
,
1377 GUINT_TO_POINTER(charsets
[i
].charset
));
1383 const gchar
*conv_get_charset_str(CharSet charset
)
1387 table
= conv_get_charset_to_str_table();
1388 return g_hash_table_lookup(table
, GUINT_TO_POINTER(charset
));
1391 CharSet
conv_get_charset_from_str(const gchar
*charset
)
1395 if (!charset
) return C_AUTO
;
1397 table
= conv_get_charset_from_str_table();
1398 return GPOINTER_TO_UINT(g_hash_table_lookup(table
, charset
));
1401 static CharSet
conv_get_locale_charset(void)
1403 static CharSet cur_charset
= C_UNINITIALIZED
;
1404 const gchar
*cur_locale
;
1408 if (cur_charset
!= C_UNINITIALIZED
)
1411 cur_locale
= conv_get_current_locale();
1413 cur_charset
= C_US_ASCII
;
1417 if (strcasestr(cur_locale
, "UTF-8") ||
1418 strcasestr(cur_locale
, "utf8")) {
1419 cur_charset
= C_UTF_8
;
1423 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1424 cur_charset
= C_ISO_8859_15
;
1428 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1431 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1432 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1433 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1434 strlen(locale_table
[i
].locale
))) {
1435 cur_charset
= locale_table
[i
].charset
;
1437 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1438 !strchr(p
+ 1, '.')) {
1439 if (strlen(cur_locale
) == 2 &&
1440 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1441 cur_charset
= locale_table
[i
].charset
;
1447 cur_charset
= C_AUTO
;
1451 static CharSet
conv_get_locale_charset_no_utf8(void)
1453 static CharSet cur_charset
= C_UNINITIALIZED
;
1454 const gchar
*cur_locale
;
1458 if (codeconv_broken_are_utf8
) {
1459 cur_charset
= C_UTF_8
;
1463 cur_locale
= conv_get_current_locale();
1465 cur_charset
= C_US_ASCII
;
1469 if (strcasestr(cur_locale
, "UTF-8") ||
1470 strcasestr(cur_locale
, "utf8")) {
1471 cur_charset
= C_UTF_8
;
1475 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1476 cur_charset
= C_ISO_8859_15
;
1480 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1483 /* "ja_JP.EUC" matches with "ja_JP.eucJP", "ja_JP.EUC" and
1484 "ja_JP". "ja_JP" matches with "ja_JP.xxxx" and "ja" */
1485 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1486 strlen(locale_table
[i
].locale
))) {
1487 cur_charset
= locale_table
[i
].charset
;
1489 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1490 !strchr(p
+ 1, '.')) {
1491 if (strlen(cur_locale
) == 2 &&
1492 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1493 cur_charset
= locale_table
[i
].charset
;
1499 cur_charset
= C_AUTO
;
1503 const gchar
*conv_get_locale_charset_str(void)
1505 static const gchar
*codeset
= NULL
;
1508 codeset
= conv_get_charset_str(conv_get_locale_charset());
1510 return codeset
? codeset
: CS_INTERNAL
;
1513 const gchar
*conv_get_locale_charset_str_no_utf8(void)
1515 static const gchar
*codeset
= NULL
;
1518 codeset
= conv_get_charset_str(conv_get_locale_charset_no_utf8());
1520 return codeset
? codeset
: CS_INTERNAL
;
1523 static CharSet
conv_get_outgoing_charset(void)
1525 static CharSet out_charset
= C_UNINITIALIZED
;
1526 const gchar
*cur_locale
;
1530 if (out_charset
!= C_UNINITIALIZED
)
1533 cur_locale
= conv_get_current_locale();
1535 out_charset
= C_AUTO
;
1539 if (strcasestr(cur_locale
, "UTF-8") ||
1540 strcasestr(cur_locale
, "utf8")) {
1541 out_charset
= C_UTF_8
;
1545 if ((p
= strcasestr(cur_locale
, "@euro")) && p
[5] == '\0') {
1546 out_charset
= C_ISO_8859_15
;
1550 for (i
= 0; i
< sizeof(locale_table
) / sizeof(locale_table
[0]); i
++) {
1553 if (!g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
,
1554 strlen(locale_table
[i
].locale
))) {
1555 out_charset
= locale_table
[i
].out_charset
;
1557 } else if ((p
= strchr(locale_table
[i
].locale
, '_')) &&
1558 !strchr(p
+ 1, '.')) {
1559 if (strlen(cur_locale
) == 2 &&
1560 !g_ascii_strncasecmp(cur_locale
, locale_table
[i
].locale
, 2)) {
1561 out_charset
= locale_table
[i
].out_charset
;
1570 const gchar
*conv_get_outgoing_charset_str(void)
1572 CharSet out_charset
;
1575 out_charset
= conv_get_outgoing_charset();
1576 str
= conv_get_charset_str(out_charset
);
1578 return str
? str
: CS_UTF_8
;
1581 const gchar
*conv_get_current_locale(void)
1583 const gchar
*cur_locale
;
1586 cur_locale
= g_win32_getlocale();
1588 cur_locale
= g_getenv("LC_ALL");
1589 if (!cur_locale
) cur_locale
= g_getenv("LC_CTYPE");
1590 if (!cur_locale
) cur_locale
= g_getenv("LANG");
1591 if (!cur_locale
) cur_locale
= setlocale(LC_CTYPE
, NULL
);
1592 #endif /* G_OS_WIN32 */
1594 debug_print("current locale: %s\n",
1595 cur_locale
? cur_locale
: "(none)");
1600 static gboolean
conv_is_ja_locale(void)
1602 static gint is_ja_locale
= -1;
1603 const gchar
*cur_locale
;
1605 if (is_ja_locale
!= -1)
1606 return is_ja_locale
!= 0;
1609 cur_locale
= conv_get_current_locale();
1611 if (g_ascii_strncasecmp(cur_locale
, "ja", 2) == 0)
1615 return is_ja_locale
!= 0;
1618 gchar
*conv_unmime_header(const gchar
*str
, const gchar
*default_encoding
,
1619 gboolean addr_field
)
1621 gchar buf
[BUFFSIZE
];
1623 cm_return_val_if_fail(str
!= NULL
, NULL
);
1625 if (is_ascii_str(str
))
1626 return unmime_header(str
, addr_field
);
1628 if (default_encoding
) {
1631 utf8_buf
= conv_codeset_strdup
1632 (str
, default_encoding
, CS_INTERNAL
);
1636 decoded_str
= unmime_header(utf8_buf
, addr_field
);
1642 if (conv_is_ja_locale())
1643 conv_anytodisp(buf
, sizeof(buf
), str
);
1645 conv_localetodisp(buf
, sizeof(buf
), str
);
1647 return unmime_header(buf
, addr_field
);
1650 #define MAX_LINELEN 76
1651 #define MAX_HARD_LINELEN 996
1652 #define MIMESEP_BEGIN "=?"
1653 #define MIMESEP_END "?="
1655 #define LBREAK_IF_REQUIRED(cond, is_plain_text) \
1657 if (len - (destp - (guchar *)dest) < MAX_LINELEN + 2) { \
1662 if ((cond) && *srcp) { \
1663 if (destp > (guchar *)dest && left < MAX_LINELEN - 1) { \
1664 if (isspace(*(destp - 1))) \
1666 else if (is_plain_text && isspace(*srcp)) \
1671 left = MAX_LINELEN - 1; \
1673 } else if (destp == (guchar *)dest && left < 7) { \
1674 if (is_plain_text && isspace(*srcp)) \
1679 left = MAX_LINELEN - 1; \
1685 #define B64LEN(len) ((len) / 3 * 4 + ((len) % 3 ? 4 : 0))
1687 void conv_encode_header_full(gchar
*dest
, gint len
, const gchar
*src
,
1688 gint header_len
, gboolean addr_field
,
1689 const gchar
*out_encoding_
)
1691 const gchar
*cur_encoding
;
1692 const gchar
*out_encoding
;
1696 const guchar
*srcp
= src
;
1697 guchar
*destp
= dest
;
1698 gboolean use_base64
;
1700 cm_return_if_fail(g_utf8_validate(src
, -1, NULL
) == TRUE
);
1701 cm_return_if_fail(destp
!= NULL
);
1703 if (MB_CUR_MAX
> 1) {
1705 mimesep_enc
= "?B?";
1708 mimesep_enc
= "?Q?";
1711 cur_encoding
= CS_INTERNAL
;
1714 out_encoding
= out_encoding_
;
1716 out_encoding
= conv_get_outgoing_charset_str();
1718 if (!strcmp(out_encoding
, CS_US_ASCII
))
1719 out_encoding
= CS_ISO_8859_1
;
1721 mimestr_len
= strlen(MIMESEP_BEGIN
) + strlen(out_encoding
) +
1722 strlen(mimesep_enc
) + strlen(MIMESEP_END
);
1724 left
= MAX_LINELEN
- header_len
;
1727 LBREAK_IF_REQUIRED(left
<= 0, TRUE
);
1729 while (isspace(*srcp
)) {
1732 LBREAK_IF_REQUIRED(left
<= 0, TRUE
);
1735 /* output as it is if the next word is ASCII string */
1736 if (!is_next_nonascii(srcp
)) {
1739 word_len
= get_next_word_len(srcp
);
1740 LBREAK_IF_REQUIRED(left
< word_len
, TRUE
);
1741 while (word_len
> 0) {
1742 LBREAK_IF_REQUIRED(left
+ (MAX_HARD_LINELEN
- MAX_LINELEN
) <= 0, TRUE
)
1751 /* don't include parentheses and quotes in encoded strings */
1752 if (addr_field
&& (*srcp
== '(' || *srcp
== ')' || *srcp
== '"')) {
1753 LBREAK_IF_REQUIRED(left
< 2, FALSE
);
1764 const guchar
*p
= srcp
;
1766 gint out_enc_str_len
;
1767 gint mime_block_len
;
1768 gboolean cont
= FALSE
;
1770 while (*p
!= '\0') {
1771 if (isspace(*p
) && !is_next_nonascii(p
+ 1))
1773 /* don't include parentheses in encoded
1775 if (addr_field
&& (*p
== '(' || *p
== ')' || *p
== '"'))
1778 mb_len
= g_utf8_skip
[*p
];
1780 Xstrndup_a(part_str
, srcp
, cur_len
+ mb_len
, );
1781 out_str
= conv_codeset_strdup
1782 (part_str
, cur_encoding
, out_encoding
);
1784 if (codeconv_strict_mode
) {
1788 g_warning("conv_encode_header_full(): code conversion failed");
1789 conv_unreadable_8bit(part_str
);
1790 out_str
= g_strdup(part_str
);
1793 out_str_len
= strlen(out_str
);
1796 out_enc_str_len
= B64LEN(out_str_len
);
1799 qp_get_q_encoding_len(out_str
);
1803 if (mimestr_len
+ out_enc_str_len
<= left
) {
1806 } else if (cur_len
== 0) {
1808 LBREAK_IF_REQUIRED(1, FALSE
);
1817 Xstrndup_a(part_str
, srcp
, cur_len
, );
1818 out_str
= conv_codeset_strdup
1819 (part_str
, cur_encoding
, out_encoding
);
1821 g_warning("conv_encode_header_full(): code conversion failed");
1822 conv_unreadable_8bit(part_str
);
1823 out_str
= g_strdup(part_str
);
1825 out_str_len
= strlen(out_str
);
1828 out_enc_str_len
= B64LEN(out_str_len
);
1831 qp_get_q_encoding_len(out_str
);
1834 enc_str
= g_base64_encode(out_str
, out_str_len
);
1836 Xalloca(enc_str
, out_enc_str_len
+ 1, );
1837 qp_q_encode(enc_str
, out_str
);
1842 /* output MIME-encoded string block */
1843 mime_block_len
= mimestr_len
+ strlen(enc_str
);
1844 g_snprintf(destp
, mime_block_len
+ 1,
1845 MIMESEP_BEGIN
"%s%s%s" MIMESEP_END
,
1846 out_encoding
, mimesep_enc
, enc_str
);
1851 destp
+= mime_block_len
;
1854 left
-= mime_block_len
;
1857 LBREAK_IF_REQUIRED(cont
, FALSE
);
1867 void conv_encode_header(gchar
*dest
, gint len
, const gchar
*src
,
1868 gint header_len
, gboolean addr_field
)
1870 conv_encode_header_full(dest
,len
,src
,header_len
,addr_field
,NULL
);
1873 #undef LBREAK_IF_REQUIRED
1876 gchar
*conv_filename_from_utf8(const gchar
*utf8_file
)
1879 GError
*error
= NULL
;
1881 cm_return_val_if_fail(utf8_file
!= NULL
, NULL
);
1883 fs_file
= g_filename_from_utf8(utf8_file
, -1, NULL
, NULL
, &error
);
1885 debug_print("failed to convert encoding of file name: %s\n",
1887 g_error_free(error
);
1890 fs_file
= g_strdup(utf8_file
);
1895 gchar
*conv_filename_to_utf8(const gchar
*fs_file
)
1897 gchar
*utf8_file
= NULL
;
1898 GError
*error
= NULL
;
1900 cm_return_val_if_fail(fs_file
!= NULL
, NULL
);
1902 utf8_file
= g_filename_to_utf8(fs_file
, -1, NULL
, NULL
, &error
);
1904 g_warning("failed to convert encoding of file name: %s",
1906 g_error_free(error
);
1909 if (!utf8_file
|| !g_utf8_validate(utf8_file
, -1, NULL
)) {
1911 utf8_file
= g_strdup(fs_file
);
1912 conv_unreadable_8bit(utf8_file
);