2 * Purple's oscar protocol plugin
3 * This file is the legal property of its developers.
4 * Please see the AUTHORS file distributed alongside this file.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
24 encoding_multi_convert_to_utf8(const gchar
*text
, gssize textlen
, const gchar
*encodings
, GError
**error
, gboolean fallback
)
27 const gchar
*begin
= encodings
;
28 const gchar
*end
= NULL
;
29 gchar
*curr_encoding
= NULL
; /* allocated buffer for encoding name */
30 const gchar
*curr_encoding_ro
= NULL
; /* read-only encoding name */
33 purple_debug_error("oscar", "encodings is NULL");
39 /* extract next encoding */
40 end
= strchr(begin
, ',');
42 curr_encoding_ro
= begin
;
43 } else { /* allocate buffer for encoding */
44 curr_encoding
= g_strndup(begin
, end
- begin
);
46 purple_debug_error("oscar", "Error allocating memory for encoding");
49 curr_encoding_ro
= curr_encoding
;
52 if (!g_ascii_strcasecmp(curr_encoding_ro
, "utf-8") && g_utf8_validate(text
, textlen
, NULL
)) {
56 utf8
= g_convert(text
, textlen
, "UTF-8", curr_encoding_ro
, NULL
, NULL
, NULL
);
58 if (!end
) /* last occurence. do not free curr_encoding: buffer was'nt allocated */
61 g_free(curr_encoding
); /* free allocated buffer for encoding here */
63 if (utf8
) /* text was successfully converted */
69 if (!utf8
&& fallback
)
70 { /* "begin" points to last encoding */
71 utf8
= g_convert_with_fallback(text
, textlen
, "UTF-8", begin
, "?", NULL
, NULL
, error
);
78 encoding_extract(const char *encoding
)
82 if (encoding
== NULL
) {
86 if (!g_str_has_prefix(encoding
, "text/aolrtf; charset=") &&
87 !g_str_has_prefix(encoding
, "text/x-aolrtf; charset=") &&
88 !g_str_has_prefix(encoding
, "text/plain; charset=")) {
89 return g_strdup(encoding
);
92 begin
= strchr(encoding
, '"');
93 end
= strrchr(encoding
, '"');
95 if ((begin
== NULL
) || (end
== NULL
) || (begin
>= end
)) {
96 return g_strdup(encoding
);
99 return g_strndup(begin
+1, (end
-1) - begin
);
103 oscar_encoding_to_utf8(const char *encoding
, const char *text
, int textlen
)
106 const gchar
*glib_encoding
= NULL
;
107 gchar
*extracted_encoding
= encoding_extract(encoding
);
109 if (extracted_encoding
== NULL
|| *extracted_encoding
== '\0') {
110 purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
111 } else if (!g_ascii_strcasecmp(extracted_encoding
, "iso-8859-1")) {
112 glib_encoding
= "iso-8859-1";
113 } else if (!g_ascii_strcasecmp(extracted_encoding
, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding
, "us-ascii")) {
114 glib_encoding
= "Windows-1252";
115 } else if (!g_ascii_strcasecmp(extracted_encoding
, "unicode-2-0")) {
116 glib_encoding
= "UTF-16BE";
117 } else if (g_ascii_strcasecmp(extracted_encoding
, "utf-8")) {
118 glib_encoding
= extracted_encoding
;
121 if (glib_encoding
!= NULL
) {
122 utf8
= encoding_multi_convert_to_utf8(text
, textlen
, glib_encoding
, NULL
, FALSE
);
126 * If utf8 is still NULL then either the encoding is utf-8 or
127 * we have been unable to convert the text to utf-8 from the encoding
128 * that was specified. So we check if the text is valid utf-8 then
132 if (textlen
!= 0 && *text
!= '\0' && !g_utf8_validate(text
, textlen
, NULL
))
133 utf8
= g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
135 utf8
= g_strndup(text
, textlen
);
138 g_free(extracted_encoding
);
143 oscar_utf8_try_convert(PurpleAccount
*account
, OscarData
*od
, const gchar
*msg
)
145 const char *charset
= NULL
;
151 if (g_utf8_validate(msg
, -1, NULL
))
152 return g_strdup(msg
);
155 charset
= purple_account_get_string(account
, "encoding", NULL
);
157 if(charset
&& *charset
)
158 ret
= encoding_multi_convert_to_utf8(msg
, -1, charset
, NULL
, FALSE
);
161 ret
= purple_utf8_try_convert(msg
);
167 oscar_convert_to_utf8(const gchar
*data
, gsize datalen
, const char *charsetstr
, gboolean fallback
)
172 if ((charsetstr
== NULL
) || (*charsetstr
== '\0'))
175 if (g_ascii_strcasecmp("UTF-8", charsetstr
)) {
176 ret
= encoding_multi_convert_to_utf8(data
, datalen
, charsetstr
, &err
, fallback
);
178 purple_debug_warning("oscar", "Conversion from %s failed: %s.\n",
179 charsetstr
, err
->message
);
183 if (g_utf8_validate(data
, datalen
, NULL
))
184 ret
= g_strndup(data
, datalen
);
186 purple_debug_warning("oscar", "String is not valid UTF-8.\n");
193 oscar_decode_im(PurpleAccount
*account
, const char *sourcebn
, guint16 charset
, const gchar
*data
, gsize datalen
)
196 /* charsetstr1 is always set to what the correct encoding should be. */
197 const gchar
*charsetstr1
, *charsetstr2
, *charsetstr3
= NULL
;
199 if ((datalen
== 0) || (data
== NULL
))
202 if (charset
== AIM_CHARSET_UNICODE
) {
203 charsetstr1
= "UTF-16BE";
204 charsetstr2
= "UTF-8";
205 } else if (charset
== AIM_CHARSET_LATIN_1
) {
206 if ((sourcebn
!= NULL
) && oscar_util_valid_name_icq(sourcebn
))
207 charsetstr1
= purple_account_get_string(account
, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING
);
209 charsetstr1
= "ISO-8859-1";
210 charsetstr2
= "UTF-8";
211 } else if (charset
== AIM_CHARSET_ASCII
) {
212 /* Should just be "ASCII" */
213 charsetstr1
= "ASCII";
214 charsetstr2
= purple_account_get_string(account
, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING
);
215 } else if (charset
== 0x000d) {
216 /* iChat sending unicode over a Direct IM connection = UTF-8 */
217 /* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
218 charsetstr1
= "UTF-8";
219 charsetstr2
= "ISO-8859-1";
220 charsetstr3
= purple_account_get_string(account
, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING
);
222 /* Unknown, hope for valid UTF-8... */
223 charsetstr1
= "UTF-8";
224 charsetstr2
= purple_account_get_string(account
, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING
);
227 purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT
", choice1=%s, choice2=%s, choice3=%s\n",
228 charset
, datalen
, charsetstr1
, charsetstr2
, (charsetstr3
? charsetstr3
: ""));
230 ret
= oscar_convert_to_utf8(data
, datalen
, charsetstr1
, FALSE
);
232 if (charsetstr3
!= NULL
) {
233 /* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */
234 ret
= oscar_convert_to_utf8(data
, datalen
, charsetstr2
, FALSE
);
236 ret
= oscar_convert_to_utf8(data
, datalen
, charsetstr3
, TRUE
);
238 /* Try charsetstr2, allowing substitutions */
239 ret
= oscar_convert_to_utf8(data
, datalen
, charsetstr2
, TRUE
);
243 char *str
, *salvage
, *tmp
;
245 str
= g_malloc(datalen
+ 1);
246 strncpy(str
, data
, datalen
);
248 salvage
= purple_utf8_salvage(str
);
249 tmp
= g_strdup_printf(_("(There was an error receiving this message. Either you and %s have different encodings selected, or %s has a buggy client.)"),
251 ret
= g_strdup_printf("%s %s", salvage
, tmp
);
261 get_simplest_charset(const char *utf8
)
265 if ((unsigned char)(*utf8
) > 0x7f) {
267 return AIM_CHARSET_UNICODE
;
271 return AIM_CHARSET_ASCII
;
275 oscar_encode_im(const gchar
*msg
, gsize
*result_len
, guint16
*charset
, gchar
**charsetstr
)
277 guint16 msg_charset
= get_simplest_charset(msg
);
278 if (charset
!= NULL
) {
279 *charset
= msg_charset
;
281 if (charsetstr
!= NULL
) {
282 *charsetstr
= msg_charset
== AIM_CHARSET_ASCII
? "us-ascii" : "unicode-2-0";
284 return g_convert(msg
, -1, msg_charset
== AIM_CHARSET_ASCII
? "ASCII" : "UTF-16BE", "UTF-8", NULL
, result_len
, NULL
);