mark PurpleImageClass as private
[pidgin-git.git] / libpurple / protocols / oscar / encoding.c
blobafb8dcd1ab1bc42e80477934c96665718979fd05
1 /*
2 * Purple's oscar protocol plugin
3 * This file is the legal property of its developers.
4 * Please see the AUTHORS file distributed alongside this file.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111-1301 USA
21 #include "encoding.h"
23 static gchar *
24 encoding_multi_convert_to_utf8(const gchar *text, gssize textlen, const gchar *encodings, GError **error, gboolean fallback)
26 gchar *utf8 = NULL;
27 const gchar *begin = encodings;
28 const gchar *end = NULL;
29 gchar *curr_encoding = NULL; /* allocated buffer for encoding name */
30 const gchar *curr_encoding_ro = NULL; /* read-only encoding name */
32 if (!encodings) {
33 purple_debug_error("oscar", "encodings is NULL");
34 return NULL;
37 for (;;)
39 /* extract next encoding */
40 end = strchr(begin, ',');
41 if (!end) {
42 curr_encoding_ro = begin;
43 } else { /* allocate buffer for encoding */
44 curr_encoding = g_strndup(begin, end - begin);
45 if (!curr_encoding) {
46 purple_debug_error("oscar", "Error allocating memory for encoding");
47 break;
49 curr_encoding_ro = curr_encoding;
52 if (!g_ascii_strcasecmp(curr_encoding_ro, "utf-8") && g_utf8_validate(text, textlen, NULL)) {
53 break;
56 utf8 = g_convert(text, textlen, "UTF-8", curr_encoding_ro, NULL, NULL, NULL);
58 if (!end) /* last occurence. do not free curr_encoding: buffer was'nt allocated */
59 break;
61 g_free(curr_encoding); /* free allocated buffer for encoding here */
63 if (utf8) /* text was successfully converted */
64 break;
66 begin = end + 1;
69 if (!utf8 && fallback)
70 { /* "begin" points to last encoding */
71 utf8 = g_convert_with_fallback(text, textlen, "UTF-8", begin, "?", NULL, NULL, error);
74 return utf8;
77 static gchar *
78 encoding_extract(const char *encoding)
80 char *begin, *end;
82 if (encoding == NULL) {
83 return NULL;
86 if (!g_str_has_prefix(encoding, "text/aolrtf; charset=") &&
87 !g_str_has_prefix(encoding, "text/x-aolrtf; charset=") &&
88 !g_str_has_prefix(encoding, "text/plain; charset=")) {
89 return g_strdup(encoding);
92 begin = strchr(encoding, '"');
93 end = strrchr(encoding, '"');
95 if ((begin == NULL) || (end == NULL) || (begin >= end)) {
96 return g_strdup(encoding);
99 return g_strndup(begin+1, (end-1) - begin);
102 gchar *
103 oscar_encoding_to_utf8(const char *encoding, const char *text, int textlen)
105 gchar *utf8 = NULL;
106 const gchar *glib_encoding = NULL;
107 gchar *extracted_encoding = encoding_extract(encoding);
109 if (extracted_encoding == NULL || *extracted_encoding == '\0') {
110 purple_debug_info("oscar", "Empty encoding, assuming UTF-8\n");
111 } else if (!g_ascii_strcasecmp(extracted_encoding, "iso-8859-1")) {
112 glib_encoding = "iso-8859-1";
113 } else if (!g_ascii_strcasecmp(extracted_encoding, "ISO-8859-1-Windows-3.1-Latin-1") || !g_ascii_strcasecmp(extracted_encoding, "us-ascii")) {
114 glib_encoding = "Windows-1252";
115 } else if (!g_ascii_strcasecmp(extracted_encoding, "unicode-2-0")) {
116 glib_encoding = "UTF-16BE";
117 } else if (g_ascii_strcasecmp(extracted_encoding, "utf-8")) {
118 glib_encoding = extracted_encoding;
121 if (glib_encoding != NULL) {
122 utf8 = encoding_multi_convert_to_utf8(text, textlen, glib_encoding, NULL, FALSE);
126 * If utf8 is still NULL then either the encoding is utf-8 or
127 * we have been unable to convert the text to utf-8 from the encoding
128 * that was specified. So we check if the text is valid utf-8 then
129 * just copy it.
131 if (utf8 == NULL) {
132 if (textlen != 0 && *text != '\0' && !g_utf8_validate(text, textlen, NULL))
133 utf8 = g_strdup(_("(There was an error receiving this message. The buddy you are speaking with is probably using a different encoding than expected. If you know what encoding he is using, you can specify it in the advanced account options for your AIM/ICQ account.)"));
134 else
135 utf8 = g_strndup(text, textlen);
138 g_free(extracted_encoding);
139 return utf8;
142 gchar *
143 oscar_utf8_try_convert(PurpleAccount *account, OscarData *od, const gchar *msg)
145 const char *charset = NULL;
146 char *ret = NULL;
148 if (msg == NULL)
149 return NULL;
151 if (g_utf8_validate(msg, -1, NULL))
152 return g_strdup(msg);
154 if (od->icq)
155 charset = purple_account_get_string(account, "encoding", NULL);
157 if(charset && *charset)
158 ret = encoding_multi_convert_to_utf8(msg, -1, charset, NULL, FALSE);
160 if(!ret)
161 ret = purple_utf8_try_convert(msg);
163 return ret;
166 static gchar *
167 oscar_convert_to_utf8(const gchar *data, gsize datalen, const char *charsetstr, gboolean fallback)
169 gchar *ret = NULL;
170 GError *err = NULL;
172 if ((charsetstr == NULL) || (*charsetstr == '\0'))
173 return NULL;
175 if (g_ascii_strcasecmp("UTF-8", charsetstr)) {
176 ret = encoding_multi_convert_to_utf8(data, datalen, charsetstr, &err, fallback);
177 if (err != NULL) {
178 purple_debug_warning("oscar", "Conversion from %s failed: %s.\n",
179 charsetstr, err->message);
180 g_error_free(err);
182 } else {
183 if (g_utf8_validate(data, datalen, NULL))
184 ret = g_strndup(data, datalen);
185 else
186 purple_debug_warning("oscar", "String is not valid UTF-8.\n");
189 return ret;
192 gchar *
193 oscar_decode_im(PurpleAccount *account, const char *sourcebn, guint16 charset, const gchar *data, gsize datalen)
195 gchar *ret = NULL;
196 /* charsetstr1 is always set to what the correct encoding should be. */
197 const gchar *charsetstr1, *charsetstr2, *charsetstr3 = NULL;
199 if ((datalen == 0) || (data == NULL))
200 return NULL;
202 if (charset == AIM_CHARSET_UNICODE) {
203 charsetstr1 = "UTF-16BE";
204 charsetstr2 = "UTF-8";
205 } else if (charset == AIM_CHARSET_LATIN_1) {
206 if ((sourcebn != NULL) && oscar_util_valid_name_icq(sourcebn))
207 charsetstr1 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
208 else
209 charsetstr1 = "ISO-8859-1";
210 charsetstr2 = "UTF-8";
211 } else if (charset == AIM_CHARSET_ASCII) {
212 /* Should just be "ASCII" */
213 charsetstr1 = "ASCII";
214 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
215 } else if (charset == 0x000d) {
216 /* iChat sending unicode over a Direct IM connection = UTF-8 */
217 /* Mobile AIM client on multiple devices (including Blackberry Tour, Nokia 3100, and LG VX6000) = ISO-8859-1 */
218 charsetstr1 = "UTF-8";
219 charsetstr2 = "ISO-8859-1";
220 charsetstr3 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
221 } else {
222 /* Unknown, hope for valid UTF-8... */
223 charsetstr1 = "UTF-8";
224 charsetstr2 = purple_account_get_string(account, "encoding", OSCAR_DEFAULT_CUSTOM_ENCODING);
227 purple_debug_info("oscar", "Parsing IM, charset=0x%04hx, datalen=%" G_GSIZE_FORMAT ", choice1=%s, choice2=%s, choice3=%s\n",
228 charset, datalen, charsetstr1, charsetstr2, (charsetstr3 ? charsetstr3 : ""));
230 ret = oscar_convert_to_utf8(data, datalen, charsetstr1, FALSE);
231 if (ret == NULL) {
232 if (charsetstr3 != NULL) {
233 /* Try charsetstr2 without allowing substitutions, then fall through to charsetstr3 if needed */
234 ret = oscar_convert_to_utf8(data, datalen, charsetstr2, FALSE);
235 if (ret == NULL)
236 ret = oscar_convert_to_utf8(data, datalen, charsetstr3, TRUE);
237 } else {
238 /* Try charsetstr2, allowing substitutions */
239 ret = oscar_convert_to_utf8(data, datalen, charsetstr2, TRUE);
242 if (ret == NULL) {
243 char *str, *salvage, *tmp;
245 str = g_malloc(datalen + 1);
246 strncpy(str, data, datalen);
247 str[datalen] = '\0';
248 salvage = purple_utf8_salvage(str);
249 tmp = g_strdup_printf(_("(There was an error receiving this message. Either you and %s have different encodings selected, or %s has a buggy client.)"),
250 sourcebn, sourcebn);
251 ret = g_strdup_printf("%s %s", salvage, tmp);
252 g_free(tmp);
253 g_free(str);
254 g_free(salvage);
257 return ret;
260 static guint16
261 get_simplest_charset(const char *utf8)
263 while (*utf8)
265 if ((unsigned char)(*utf8) > 0x7f) {
266 /* not ASCII! */
267 return AIM_CHARSET_UNICODE;
269 utf8++;
271 return AIM_CHARSET_ASCII;
274 gchar *
275 oscar_encode_im(const gchar *msg, gsize *result_len, guint16 *charset, gchar **charsetstr)
277 guint16 msg_charset = get_simplest_charset(msg);
278 if (charset != NULL) {
279 *charset = msg_charset;
281 if (charsetstr != NULL) {
282 *charsetstr = msg_charset == AIM_CHARSET_ASCII ? "us-ascii" : "unicode-2-0";
284 return g_convert(msg, -1, msg_charset == AIM_CHARSET_ASCII ? "ASCII" : "UTF-16BE", "UTF-8", NULL, result_len, NULL);