libpurple/util.c

   1 /*
   2  * @file util.h Utility Functions
   3  * @ingroup core
   4  */
   5
   6 /* Purple is the legal property of its developers, whose names are too numerous
   7  * to list here.  Please refer to the COPYRIGHT file distributed with this
   8  * source distribution.
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111-1301  USA
  23  */
  24 #include "internal.h"
  25
  26 #include "cipher.h"
  27 #include "conversation.h"
  28 #include "core.h"
  29 #include "debug.h"
  30 #include "notify.h"
  31 #include "ntlm.h"
  32 #include "prpl.h"
  33 #include "prefs.h"
  34 #include "util.h"
  35
  36 /* 512KiB Default value for maximum HTTP download size (when the client hasn't
  37    specified a length) */
  38 #define DEFAULT_MAX_HTTP_DOWNLOAD (512 * 1024)
  39
  40 #define MAX_HTTP_CHUNK_SIZE (10 * 1024 * 1024)
  41
  42 struct _PurpleUtilFetchUrlData
  43 {
  44         PurpleUtilFetchUrlCallback callback;
  45         void *user_data;
  46
  47         struct
  48         {
  49                 char *user;
  50                 char *passwd;
  51                 char *address;
  52                 int port;
  53                 char *page;
  54
  55         } website;
  56
  57         char *url;
  58         int num_times_redirected;
  59         gboolean full;
  60         char *user_agent;
  61         gboolean http11;
  62         char *request;
  63         gsize request_len;
  64         gsize request_written;
  65         gboolean include_headers;
  66
  67         gboolean is_ssl;
  68         PurpleSslConnection *ssl_connection;
  69         PurpleProxyConnectData *connect_data;
  70         int fd;
  71         guint inpa;
  72
  73         gboolean got_headers;
  74         gboolean has_explicit_data_len;
  75         char *webdata;
  76         gsize len;
  77         unsigned long data_len;
  78         gsize max_len;
  79         gboolean chunked;
  80         PurpleAccount *account;
  81 };
  82
  83 static char *custom_user_dir = NULL;
  84 static char *user_dir = NULL;
  85
  86
  87 PurpleMenuAction *
  88 purple_menu_action_new(const char *label, PurpleCallback callback, gpointer data,
  89                      GList *children)
  90 {
  91         PurpleMenuAction *act = g_new0(PurpleMenuAction, 1);
  92         act->label = g_strdup(label);
  93         act->callback = callback;
  94         act->data = data;
  95         act->children = children;
  96         return act;
  97 }
  98
  99 void
 100 purple_menu_action_free(PurpleMenuAction *act)
 101 {
 102         g_return_if_fail(act != NULL);
 103
 104         g_free(act->label);
 105         g_free(act);
 106 }
 107
 108 void
 109 purple_util_init(void)
 110 {
 111         /* This does nothing right now.  It exists for symmetry with
 112          * purple_util_uninit() and forwards compatibility. */
 113 }
 114
 115 void
 116 purple_util_uninit(void)
 117 {
 118         /* Free these so we don't have leaks at shutdown. */
 119
 120         g_free(custom_user_dir);
 121         custom_user_dir = NULL;
 122
 123         g_free(user_dir);
 124         user_dir = NULL;
 125 }
 126
 127 /**************************************************************************
 128  * Base16 Functions
 129  **************************************************************************/
 130 gchar *
 131 purple_base16_encode(const guchar *data, gsize len)
 132 {
 133         gsize i;
 134         gchar *ascii = NULL;
 135
 136         g_return_val_if_fail(data != NULL, NULL);
 137         g_return_val_if_fail(len > 0,   NULL);
 138
 139         ascii = g_malloc(len * 2 + 1);
 140
 141         for (i = 0; i < len; i++)
 142                 g_snprintf(&ascii[i * 2], 3, "%02hhx", data[i]);
 143
 144         return ascii;
 145 }
 146
 147 guchar *
 148 purple_base16_decode(const char *str, gsize *ret_len)
 149 {
 150         gsize len, i, accumulator = 0;
 151         guchar *data;
 152
 153         g_return_val_if_fail(str != NULL, NULL);
 154
 155         len = strlen(str);
 156
 157         g_return_val_if_fail(strlen(str) > 0, 0);
 158         g_return_val_if_fail(len % 2 == 0,    0);
 159
 160         data = g_malloc(len / 2);
 161
 162         for (i = 0; i < len; i++)
 163         {
 164                 if ((i % 2) == 0)
 165                         accumulator = 0;
 166                 else
 167                         accumulator <<= 4;
 168
 169                 if (isdigit(str[i]))
 170                         accumulator |= str[i] - 48;
 171                 else
 172                 {
 173                         switch(tolower(str[i]))
 174                         {
 175                                 case 'a':  accumulator |= 10;  break;
 176                                 case 'b':  accumulator |= 11;  break;
 177                                 case 'c':  accumulator |= 12;  break;
 178                                 case 'd':  accumulator |= 13;  break;
 179                                 case 'e':  accumulator |= 14;  break;
 180                                 case 'f':  accumulator |= 15;  break;
 181                         }
 182                 }
 183
 184                 if (i % 2)
 185                         data[(i - 1) / 2] = accumulator;
 186         }
 187
 188         if (ret_len != NULL)
 189                 *ret_len = len / 2;
 190
 191         return data;
 192 }
 193
 194 gchar *
 195 purple_base16_encode_chunked(const guchar *data, gsize len)
 196 {
 197         gsize i;
 198         gchar *ascii = NULL;
 199
 200         g_return_val_if_fail(data != NULL, NULL);
 201         g_return_val_if_fail(len > 0,   NULL);
 202
 203         /* For each byte of input, we need 2 bytes for the hex representation
 204          * and 1 for the colon.
 205          * The final colon will be replaced by a terminating NULL
 206          */
 207         ascii = g_malloc(len * 3 + 1);
 208
 209         for (i = 0; i < len; i++)
 210                 g_snprintf(&ascii[i * 3], 4, "%02hhx:", data[i]);
 211
 212         /* Replace the final colon with NULL */
 213         ascii[len * 3 - 1] = 0;
 214
 215         return ascii;
 216 }
 217
 218
 219 /**************************************************************************
 220  * Base64 Functions
 221  **************************************************************************/
 222 static const char xdigits[] =
 223         "0123456789abcdef";
 224
 225 gchar *
 226 purple_base64_encode(const guchar *data, gsize len)
 227 {
 228         return g_base64_encode(data, len);
 229 }
 230
 231 guchar *
 232 purple_base64_decode(const char *str, gsize *ret_len)
 233 {
 234         /*
 235          * We want to allow ret_len to be NULL for backward compatibility,
 236          * but g_base64_decode() requires a valid length variable.  So if
 237          * ret_len is NULL then pass in a dummy variable.
 238          */
 239         gsize unused;
 240         return g_base64_decode(str, ret_len != NULL ? ret_len : &unused);
 241 }
 242
 243 /**************************************************************************
 244  * Quoted Printable Functions (see RFC 2045).
 245  **************************************************************************/
 246 guchar *
 247 purple_quotedp_decode(const char *str, gsize *ret_len)
 248 {
 249         char *n, *new;
 250         const char *end, *p;
 251
 252         n = new = g_malloc(strlen (str) + 1);
 253         end = str + strlen(str);
 254
 255         for (p = str; p < end; p++, n++) {
 256                 if (*p == '=') {
 257                         if (p[1] == '\r' && p[2] == '\n') { /* 5.1 #5 */
 258                                 n -= 1;
 259                                 p += 2;
 260                         } else if (p[1] == '\n') { /* fuzzy case for 5.1 #5 */
 261                                 n -= 1;
 262                                 p += 1;
 263                         } else if (p[1] && p[2]) {
 264                                 char *nibble1 = strchr(xdigits, tolower(p[1]));
 265                                 char *nibble2 = strchr(xdigits, tolower(p[2]));
 266                                 if (nibble1 && nibble2) { /* 5.1 #1 */
 267                                         *n = ((nibble1 - xdigits) << 4) | (nibble2 - xdigits);
 268                                         p += 2;
 269                                 } else { /* This should never happen */
 270                                         *n = *p;
 271                                 }
 272                         } else { /* This should never happen */
 273                                 *n = *p;
 274                         }
 275                 }
 276                 else if (*p == '_')
 277                         *n = ' ';
 278                 else
 279                         *n = *p;
 280         }
 281
 282         *n = '\0';
 283
 284         if (ret_len != NULL)
 285                 *ret_len = n - new;
 286
 287         /* Resize to take less space */
 288         /* new = realloc(new, n - new); */
 289
 290         return (guchar *)new;
 291 }
 292
 293 /**************************************************************************
 294  * MIME Functions
 295  **************************************************************************/
 296 char *
 297 purple_mime_decode_field(const char *str)
 298 {
 299         /*
 300          * This is wing's version, partially based on revo/shx's version
 301          * See RFC2047 [which apparently obsoletes RFC1342]
 302          */
 303         typedef enum {
 304                 state_start, state_equal1, state_question1,
 305                 state_charset, state_question2,
 306                 state_encoding, state_question3,
 307                 state_encoded_text, state_question4, state_equal2 = state_start
 308         } encoded_word_state_t;
 309         encoded_word_state_t state = state_start;
 310         const char *cur, *mark;
 311         const char *charset0 = NULL, *encoding0 = NULL, *encoded_text0 = NULL;
 312         GString *new;
 313
 314         /* token can be any CHAR (supposedly ISO8859-1/ISO2022), not just ASCII */
 315         #define token_char_p(c) \
 316                 (c != ' ' && !iscntrl(c) && !strchr("()<>@,;:\"/[]?.=", c))
 317
 318         /* But encoded-text must be ASCII; alas, isascii() may not exist */
 319         #define encoded_text_char_p(c) \
 320                 ((c & 0x80) == 0 && c != '?' && c != ' ' && isgraph(c))
 321
 322         g_return_val_if_fail(str != NULL, NULL);
 323
 324         new = g_string_new(NULL);
 325
 326         /* Here we will be looking for encoded words and if they seem to be
 327          * valid then decode them.
 328          * They are of this form: =?charset?encoding?text?=
 329          */
 330
 331         for (cur = str, mark = NULL; *cur; cur += 1) {
 332                 switch (state) {
 333                 case state_equal1:
 334                         if (*cur == '?') {
 335                                 state = state_question1;
 336                         } else {
 337                                 g_string_append_len(new, mark, cur - mark + 1);
 338                                 state = state_start;
 339                         }
 340                         break;
 341                 case state_question1:
 342                         if (token_char_p(*cur)) {
 343                                 charset0 = cur;
 344                                 state = state_charset;
 345                         } else { /* This should never happen */
 346                                 g_string_append_len(new, mark, cur - mark + 1);
 347                                 state = state_start;
 348                         }
 349                         break;
 350                 case state_charset:
 351                         if (*cur == '?') {
 352                                 state = state_question2;
 353                         } else if (!token_char_p(*cur)) { /* This should never happen */
 354                                 g_string_append_len(new, mark, cur - mark + 1);
 355                                 state = state_start;
 356                         }
 357                         break;
 358                 case state_question2:
 359                         if (token_char_p(*cur)) {
 360                                 encoding0 = cur;
 361                                 state = state_encoding;
 362                         } else { /* This should never happen */
 363                                 g_string_append_len(new, mark, cur - mark + 1);
 364                                 state = state_start;
 365                         }
 366                         break;
 367                 case state_encoding:
 368                         if (*cur == '?') {
 369                                 state = state_question3;
 370                         } else if (!token_char_p(*cur)) { /* This should never happen */
 371                                 g_string_append_len(new, mark, cur - mark + 1);
 372                                 state = state_start;
 373                         }
 374                         break;
 375                 case state_question3:
 376                         if (encoded_text_char_p(*cur)) {
 377                                 encoded_text0 = cur;
 378                                 state = state_encoded_text;
 379                         } else if (*cur == '?') { /* empty string */
 380                                 encoded_text0 = cur;
 381                                 state = state_question4;
 382                         } else { /* This should never happen */
 383                                 g_string_append_len(new, mark, cur - mark + 1);
 384                                 state = state_start;
 385                         }
 386                         break;
 387                 case state_encoded_text:
 388                         if (*cur == '?') {
 389                                 state = state_question4;
 390                         } else if (!encoded_text_char_p(*cur)) {
 391                                 g_string_append_len(new, mark, cur - mark + 1);
 392                                 state = state_start;
 393                         }
 394                         break;
 395                 case state_question4:
 396                         if (*cur == '=') { /* Got the whole encoded-word */
 397                                 char *charset = g_strndup(charset0, encoding0 - charset0 - 1);
 398                                 char *encoding = g_strndup(encoding0, encoded_text0 - encoding0 - 1);
 399                                 char *encoded_text = g_strndup(encoded_text0, cur - encoded_text0 - 1);
 400                                 guchar *decoded = NULL;
 401                                 gsize dec_len;
 402                                 if (g_ascii_strcasecmp(encoding, "Q") == 0)
 403                                         decoded = purple_quotedp_decode(encoded_text, &dec_len);
 404                                 else if (g_ascii_strcasecmp(encoding, "B") == 0)
 405                                         decoded = purple_base64_decode(encoded_text, &dec_len);
 406                                 else
 407                                         decoded = NULL;
 408                                 if (decoded) {
 409                                         gsize len;
 410                                         char *converted = g_convert((const gchar *)decoded, dec_len, "utf-8", charset, NULL, &len, NULL);
 411
 412                                         if (converted) {
 413                                                 g_string_append_len(new, converted, len);
 414                                                 g_free(converted);
 415                                         }
 416                                         g_free(decoded);
 417                                 }
 418                                 g_free(charset);
 419                                 g_free(encoding);
 420                                 g_free(encoded_text);
 421                                 state = state_equal2; /* Restart the FSM */
 422                         } else { /* This should never happen */
 423                                 g_string_append_len(new, mark, cur - mark + 1);
 424                                 state = state_start;
 425                         }
 426                         break;
 427                 default:
 428                         if (*cur == '=') {
 429                                 mark = cur;
 430                                 state = state_equal1;
 431                         } else {
 432                                 /* Some unencoded text. */
 433                                 g_string_append_c(new, *cur);
 434                         }
 435                         break;
 436                 } /* switch */
 437         } /* for */
 438
 439         if (state != state_start)
 440                 g_string_append_len(new, mark, cur - mark + 1);
 441
 442         return g_string_free(new, FALSE);;
 443 }
 444
 445
 446 /**************************************************************************
 447  * Date/Time Functions
 448  **************************************************************************/
 449
 450 const char *purple_get_tzoff_str(const struct tm *tm, gboolean iso)
 451 {
 452         static char buf[7];
 453         long off;
 454         gint8 min;
 455         gint8 hrs;
 456         struct tm new_tm = *tm;
 457
 458         mktime(&new_tm);
 459
 460         if (new_tm.tm_isdst < 0)
 461                 g_return_val_if_reached("");
 462
 463 #ifdef _WIN32
 464         if ((off = wpurple_get_tz_offset()) == -1)
 465                 return "";
 466 #else
 467 # ifdef HAVE_TM_GMTOFF
 468         off = new_tm.tm_gmtoff;
 469 # else
 470 #  ifdef HAVE_TIMEZONE
 471         tzset();
 472         off = -1 * timezone;
 473 #  endif /* HAVE_TIMEZONE */
 474 # endif /* !HAVE_TM_GMTOFF */
 475 #endif /* _WIN32 */
 476
 477         min = (off / 60) % 60;
 478         hrs = ((off / 60) - min) / 60;
 479
 480         if(iso) {
 481                 if (0 == off) {
 482                         strcpy(buf, "Z");
 483                 } else {
 484                         /* please leave the colons...they're optional for iso, but jabber
 485                          * wants them */
 486                         if(g_snprintf(buf, sizeof(buf), "%+03d:%02d", hrs, ABS(min)) > 6)
 487                                 g_return_val_if_reached("");
 488                 }
 489         } else {
 490                 if (g_snprintf(buf, sizeof(buf), "%+03d%02d", hrs, ABS(min)) > 5)
 491                         g_return_val_if_reached("");
 492         }
 493
 494         return buf;
 495 }
 496
 497 /* Windows doesn't HAVE_STRFTIME_Z_FORMAT, but this seems clearer. -- rlaager */
 498 #if !defined(HAVE_STRFTIME_Z_FORMAT) || defined(_WIN32)
 499 static size_t purple_internal_strftime(char *s, size_t max, const char *format, const struct tm *tm)
 500 {
 501         const char *start;
 502         const char *c;
 503         char *fmt = NULL;
 504
 505         /* Yes, this is checked in purple_utf8_strftime(),
 506          * but better safe than sorry. -- rlaager */
 507         g_return_val_if_fail(format != NULL, 0);
 508
 509         /* This is fairly efficient, and it only gets
 510          * executed on Windows or if the underlying
 511          * system doesn't support the %z format string,
 512          * for strftime() so I think it's good enough.
 513          * -- rlaager */
 514         for (c = start = format; *c ; c++)
 515         {
 516                 if (*c != '%')
 517                         continue;
 518
 519                 c++;
 520
 521 #ifndef HAVE_STRFTIME_Z_FORMAT
 522                 if (*c == 'z')
 523                 {
 524                         char *tmp = g_strdup_printf("%s%.*s%s",
 525                                                     fmt ? fmt : "",
 526                                                     c - start - 1,
 527                                                     start,
 528                                                     purple_get_tzoff_str(tm, FALSE));
 529                         g_free(fmt);
 530                         fmt = tmp;
 531                         start = c + 1;
 532                 }
 533 #endif
 534 #ifdef _WIN32
 535                 if (*c == 'Z')
 536                 {
 537                         char *tmp = g_strdup_printf("%s%.*s%s",
 538                                                     fmt ? fmt : "",
 539                                                     c - start - 1,
 540                                                     start,
 541                                                     wpurple_get_timezone_abbreviation(tm));
 542                         g_free(fmt);
 543                         fmt = tmp;
 544                         start = c + 1;
 545                 }
 546 #endif
 547         }
 548
 549         if (fmt != NULL)
 550         {
 551                 size_t ret;
 552
 553                 if (*start)
 554                 {
 555                         char *tmp = g_strconcat(fmt, start, NULL);
 556                         g_free(fmt);
 557                         fmt = tmp;
 558                 }
 559
 560                 ret = strftime(s, max, fmt, tm);
 561                 g_free(fmt);
 562
 563                 return ret;
 564         }
 565
 566         return strftime(s, max, format, tm);
 567 }
 568 #else /* HAVE_STRFTIME_Z_FORMAT && !_WIN32 */
 569 #define purple_internal_strftime strftime
 570 #endif
 571
 572 const char *
 573 purple_utf8_strftime(const char *format, const struct tm *tm)
 574 {
 575         static char buf[128];
 576         char *locale;
 577         GError *err = NULL;
 578         int len;
 579         char *utf8;
 580
 581         g_return_val_if_fail(format != NULL, NULL);
 582
 583         if (tm == NULL)
 584         {
 585                 time_t now = time(NULL);
 586                 tm = localtime(&now);
 587         }
 588
 589         locale = g_locale_from_utf8(format, -1, NULL, NULL, &err);
 590         if (err != NULL)
 591         {
 592                 purple_debug_error("util", "Format conversion failed in purple_utf8_strftime(): %s\n", err->message);
 593                 g_error_free(err);
 594                 err = NULL;
 595                 locale = g_strdup(format);
 596         }
 597
 598         /* A return value of 0 is either an error (in
 599          * which case, the contents of the buffer are
 600          * undefined) or the empty string (in which
 601          * case, no harm is done here). */
 602         if ((len = purple_internal_strftime(buf, sizeof(buf), locale, tm)) == 0)
 603         {
 604                 g_free(locale);
 605                 return "";
 606         }
 607
 608         g_free(locale);
 609
 610         utf8 = g_locale_to_utf8(buf, len, NULL, NULL, &err);
 611         if (err != NULL)
 612         {
 613                 purple_debug_error("util", "Result conversion failed in purple_utf8_strftime(): %s\n", err->message);
 614                 g_error_free(err);
 615         }
 616         else
 617         {
 618                 purple_strlcpy(buf, utf8);
 619                 g_free(utf8);
 620         }
 621
 622         return buf;
 623 }
 624
 625 const char *
 626 purple_date_format_short(const struct tm *tm)
 627 {
 628         return purple_utf8_strftime("%x", tm);
 629 }
 630
 631 const char *
 632 purple_date_format_long(const struct tm *tm)
 633 {
 634         /*
 635          * This string determines how some dates are displayed.  The default
 636          * string "%x %X" shows the date then the time.  Translators can
 637          * change this to "%X %x" if they want the time to be shown first,
 638          * followed by the date.
 639          */
 640         return purple_utf8_strftime(_("%x %X"), tm);
 641 }
 642
 643 const char *
 644 purple_date_format_full(const struct tm *tm)
 645 {
 646         return purple_utf8_strftime("%c", tm);
 647 }
 648
 649 const char *
 650 purple_time_format(const struct tm *tm)
 651 {
 652         return purple_utf8_strftime("%X", tm);
 653 }
 654
 655 time_t
 656 purple_time_build(int year, int month, int day, int hour, int min, int sec)
 657 {
 658         struct tm tm;
 659
 660         tm.tm_year = year - 1900;
 661         tm.tm_mon = month - 1;
 662         tm.tm_mday = day;
 663         tm.tm_hour = hour;
 664         tm.tm_min = min;
 665         tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60;
 666
 667         return mktime(&tm);
 668 }
 669
 670 /* originally taken from GLib trunk 1-6-11 */
 671 /* originally licensed as LGPL 2+ */
 672 static time_t
 673 mktime_utc(struct tm *tm)
 674 {
 675         time_t retval;
 676
 677 #ifndef HAVE_TIMEGM
 678         static const gint days_before[] =
 679         {
 680                 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334
 681         };
 682 #endif
 683
 684 #ifndef HAVE_TIMEGM
 685         if (tm->tm_mon < 0 || tm->tm_mon > 11)
 686                 return (time_t) -1;
 687
 688         retval = (tm->tm_year - 70) * 365;
 689         retval += (tm->tm_year - 68) / 4;
 690         retval += days_before[tm->tm_mon] + tm->tm_mday - 1;
 691
 692         if (tm->tm_year % 4 == 0 && tm->tm_mon < 2)
 693                 retval -= 1;
 694
 695         retval = ((((retval * 24) + tm->tm_hour) * 60) + tm->tm_min) * 60 + tm->tm_sec;
 696 #else
 697         retval = timegm (tm);
 698 #endif /* !HAVE_TIMEGM */
 699
 700         return retval;
 701 }
 702
 703 time_t
 704 purple_str_to_time(const char *timestamp, gboolean utc,
 705         struct tm *tm, long *tz_off, const char **rest)
 706 {
 707         struct tm t;
 708         const gchar *str;
 709         gint year = 0;
 710         long tzoff = PURPLE_NO_TZ_OFF;
 711         time_t retval;
 712         gboolean mktime_with_utc = FALSE;
 713
 714         if (rest != NULL)
 715                 *rest = NULL;
 716
 717         g_return_val_if_fail(timestamp != NULL, 0);
 718
 719         memset(&t, 0, sizeof(struct tm));
 720
 721         str = timestamp;
 722
 723         /* Strip leading whitespace */
 724         while (g_ascii_isspace(*str))
 725                 str++;
 726
 727         if (*str == '\0') {
 728                 if (rest != NULL && *str != '\0')
 729                         *rest = str;
 730
 731                 return 0;
 732         }
 733
 734         if (!g_ascii_isdigit(*str) && *str != '-' && *str != '+') {
 735                 if (rest != NULL && *str != '\0')
 736                         *rest = str;
 737
 738                 return 0;
 739         }
 740
 741         /* 4 digit year */
 742         if (sscanf(str, "%04d", &year) && year >= 1900) {
 743                 str += 4;
 744
 745                 if (*str == '-' || *str == '/')
 746                         str++;
 747
 748                 t.tm_year = year - 1900;
 749         }
 750
 751         /* 2 digit month */
 752         if (!sscanf(str, "%02d", &t.tm_mon)) {
 753                 if (rest != NULL && *str != '\0')
 754                         *rest = str;
 755
 756                 return 0;
 757         }
 758
 759         str += 2;
 760         t.tm_mon -= 1;
 761
 762         if (*str == '-' || *str == '/')
 763                 str++;
 764
 765         /* 2 digit day */
 766         if (!sscanf(str, "%02d", &t.tm_mday)) {
 767                 if (rest != NULL && *str != '\0')
 768                         *rest = str;
 769
 770                 return 0;
 771         }
 772
 773         str += 2;
 774
 775         /* Grab the year off the end if there's still stuff */
 776         if (*str == '/' || *str == '-') {
 777                 /* But make sure we don't read the year twice */
 778                 if (year >= 1900) {
 779                         if (rest != NULL && *str != '\0')
 780                                 *rest = str;
 781
 782                         return 0;
 783                 }
 784
 785                 str++;
 786
 787                 if (!sscanf(str, "%04d", &t.tm_year)) {
 788                         if (rest != NULL && *str != '\0')
 789                                 *rest = str;
 790
 791                         return 0;
 792                 }
 793
 794                 t.tm_year -= 1900;
 795         } else if (*str == 'T' || *str == '.') {
 796                 str++;
 797
 798                 /* Continue grabbing the hours/minutes/seconds */
 799                 if ((sscanf(str, "%02d:%02d:%02d", &t.tm_hour, &t.tm_min, &t.tm_sec) == 3 &&
 800                                 (str += 8)) ||
 801                     (sscanf(str, "%02d%02d%02d", &t.tm_hour, &t.tm_min, &t.tm_sec) == 3 &&
 802                                 (str += 6)))
 803                 {
 804                         gint sign, tzhrs, tzmins;
 805
 806                         if (*str == '.') {
 807                                 /* Cut off those pesky micro-seconds */
 808                                 do {
 809                                         str++;
 810                                 } while (*str >= '0' && *str <= '9');
 811                         }
 812
 813                         sign = (*str == '+') ? 1 : -1;
 814
 815                         /* Process the timezone */
 816                         if (*str == '+' || *str == '-') {
 817                                 str++;
 818
 819                                 if (((sscanf(str, "%02d:%02d", &tzhrs, &tzmins) == 2 && (str += 5)) ||
 820                                         (sscanf(str, "%02d%02d", &tzhrs, &tzmins) == 2 && (str += 4))))
 821                                 {
 822                                         mktime_with_utc = TRUE;
 823                                         tzoff = tzhrs * 60 * 60 + tzmins * 60;
 824                                         tzoff *= sign;
 825                                 }
 826                         } else if (*str == 'Z') {
 827                                 /* 'Z' = Zulu = UTC */
 828                                 str++;
 829                                 mktime_with_utc = TRUE;
 830                                 tzoff = 0;
 831                         }
 832
 833                         if (!mktime_with_utc)
 834                         {
 835                                 /* No timezone specified. */
 836
 837                                 if (utc) {
 838                                         mktime_with_utc = TRUE;
 839                                         tzoff = 0;
 840                                 } else {
 841                                         /* Local Time */
 842                                         t.tm_isdst = -1;
 843                                 }
 844                         }
 845                 }
 846         }
 847
 848         if (rest != NULL && *str != '\0') {
 849                 /* Strip trailing whitespace */
 850                 while (g_ascii_isspace(*str))
 851                         str++;
 852
 853                 if (*str != '\0')
 854                         *rest = str;
 855         }
 856
 857         if (mktime_with_utc)
 858                 retval = mktime_utc(&t);
 859         else
 860                 retval = mktime(&t);
 861
 862         if (tm != NULL)
 863                 *tm = t;
 864
 865         if (tzoff != PURPLE_NO_TZ_OFF)
 866                 retval -= tzoff;
 867
 868         if (tz_off != NULL)
 869                 *tz_off = tzoff;
 870
 871         return retval;
 872 }
 873
 874 /**************************************************************************
 875  * Markup Functions
 876  **************************************************************************/
 877
 878 /*
 879  * This function is stolen from glib's gmarkup.c and modified to not
 880  * replace ' with &apos;
 881  */
 882 static void append_escaped_text(GString *str,
 883                 const gchar *text, gssize length)
 884 {
 885         const gchar *p;
 886         const gchar *end;
 887         gunichar c;
 888
 889         p = text;
 890         end = text + length;
 891
 892         while (p != end)
 893         {
 894                 const gchar *next;
 895                 next = g_utf8_next_char (p);
 896
 897                 switch (*p)
 898                 {
 899                         case '&':
 900                                 g_string_append (str, "&amp;");
 901                                 break;
 902
 903                         case '<':
 904                                 g_string_append (str, "&lt;");
 905                                 break;
 906
 907                         case '>':
 908                                 g_string_append (str, "&gt;");
 909                                 break;
 910
 911                         case '"':
 912                                 g_string_append (str, "&quot;");
 913                                 break;
 914
 915                         default:
 916                                 c = g_utf8_get_char (p);
 917                                 if ((0x1 <= c && c <= 0x8) ||
 918                                                 (0xb <= c && c <= 0xc) ||
 919                                                 (0xe <= c && c <= 0x1f) ||
 920                                                 (0x7f <= c && c <= 0x84) ||
 921                                                 (0x86 <= c && c <= 0x9f))
 922                                         g_string_append_printf (str, "&#x%x;", c);
 923                                 else
 924                                         g_string_append_len (str, p, next - p);
 925                                 break;
 926                 }
 927
 928                 p = next;
 929         }
 930 }
 931
 932 /* This function is stolen from glib's gmarkup.c */
 933 gchar *purple_markup_escape_text(const gchar *text, gssize length)
 934 {
 935         GString *str;
 936
 937         g_return_val_if_fail(text != NULL, NULL);
 938
 939         if (length < 0)
 940                 length = strlen(text);
 941
 942         /* prealloc at least as long as original text */
 943         str = g_string_sized_new(length);
 944         append_escaped_text(str, text, length);
 945
 946         return g_string_free(str, FALSE);
 947 }
 948
 949 const char *
 950 purple_markup_unescape_entity(const char *text, int *length)
 951 {
 952         const char *pln;
 953         int len, pound;
 954         char temp[2];
 955
 956         if (!text || *text != '&')
 957                 return NULL;
 958
 959 #define IS_ENTITY(s)  (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
 960
 961         if(IS_ENTITY("&amp;"))
 962                 pln = "&";
 963         else if(IS_ENTITY("&lt;"))
 964                 pln = "<";
 965         else if(IS_ENTITY("&gt;"))
 966                 pln = ">";
 967         else if(IS_ENTITY("&nbsp;"))
 968                 pln = " ";
 969         else if(IS_ENTITY("&copy;"))
 970                 pln = "\302\251";      /* or use g_unichar_to_utf8(0xa9); */
 971         else if(IS_ENTITY("&quot;"))
 972                 pln = "\"";
 973         else if(IS_ENTITY("&reg;"))
 974                 pln = "\302\256";      /* or use g_unichar_to_utf8(0xae); */
 975         else if(IS_ENTITY("&apos;"))
 976                 pln = "\'";
 977         else if(text[1] == '#' && g_ascii_isxdigit(text[2])) {
 978                 static char buf[7];
 979                 const char *start = text + 2;
 980                 char *end;
 981                 guint64 pound;
 982                 int base = 10;
 983                 int buflen;
 984
 985                 if (*start == 'x') {
 986                         base = 16;
 987                         start++;
 988                 }
 989
 990                 pound = g_ascii_strtoull(start, &end, base);
 991                 if (pound == 0 || pound > INT_MAX || *end != ';') {
 992                         return NULL;
 993                 }
 994
 995                 len = (end - text) + 1;
 996
 997                 buflen = g_unichar_to_utf8((gunichar)pound, buf);
 998                 buf[buflen] = '\0';
 999                 pln = buf;
1000         }
1001         else
1002                 return NULL;
1003
1004         if (length)
1005                 *length = len;
1006         return pln;
1007 }
1008
1009 char *
1010 purple_markup_get_css_property(const gchar *style,
1011                                 const gchar *opt)
1012 {
1013         const gchar *css_str = style;
1014         const gchar *css_value_start;
1015         const gchar *css_value_end;
1016         gchar *tmp;
1017         gchar *ret;
1018
1019         g_return_val_if_fail(opt != NULL, NULL);
1020
1021         if (!css_str)
1022                 return NULL;
1023
1024         /* find the CSS property */
1025         while (1)
1026         {
1027                 /* skip whitespace characters */
1028                 while (*css_str && g_ascii_isspace(*css_str))
1029                         css_str++;
1030                 if (!g_ascii_isalpha(*css_str))
1031                         return NULL;
1032                 if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
1033                 {
1034                         /* go to next css property positioned after the next ';' */
1035                         while (*css_str && *css_str != '"' && *css_str != ';')
1036                                 css_str++;
1037                         if(*css_str != ';')
1038                                 return NULL;
1039                         css_str++;
1040                 }
1041                 else
1042                         break;
1043         }
1044
1045         /* find the CSS value position in the string */
1046         css_str += strlen(opt);
1047         while (*css_str && g_ascii_isspace(*css_str))
1048                 css_str++;
1049         if (*css_str != ':')
1050                 return NULL;
1051         css_str++;
1052         while (*css_str && g_ascii_isspace(*css_str))
1053                 css_str++;
1054         if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
1055                 return NULL;
1056
1057         /* mark the CSS value */
1058         css_value_start = css_str;
1059         while (*css_str && *css_str != '"' && *css_str != ';')
1060                 css_str++;
1061         css_value_end = css_str - 1;
1062
1063         /* Removes trailing whitespace */
1064         while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
1065                 css_value_end--;
1066
1067         tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
1068         ret = purple_unescape_html(tmp);
1069         g_free(tmp);
1070
1071         return ret;
1072 }
1073
1074 gboolean purple_markup_is_rtl(const char *html)
1075 {
1076         GData *attributes;
1077         const gchar *start, *end;
1078         gboolean res = FALSE;
1079
1080         if (purple_markup_find_tag("span", html, &start, &end, &attributes))
1081         {
1082                 /* tmp is a member of attributes and is free with g_datalist_clear call */
1083                 const char *tmp = g_datalist_get_data(&attributes, "dir");
1084                 if (tmp && !g_ascii_strcasecmp(tmp, "RTL"))
1085                         res = TRUE;
1086                 if (!res)
1087                 {
1088                         tmp = g_datalist_get_data(&attributes, "style");
1089                         if (tmp)
1090                         {
1091                                 char *tmp2 = purple_markup_get_css_property(tmp, "direction");
1092                                 if (tmp2 && !g_ascii_strcasecmp(tmp2, "RTL"))
1093                                         res = TRUE;
1094                                 g_free(tmp2);
1095                         }
1096
1097                 }
1098                 g_datalist_clear(&attributes);
1099         }
1100         return res;
1101 }
1102
1103 gboolean
1104 purple_markup_find_tag(const char *needle, const char *haystack,
1105                                          const char **start, const char **end, GData **attributes)
1106 {
1107         GData *attribs;
1108         const char *cur = haystack;
1109         char *name = NULL;
1110         gboolean found = FALSE;
1111         gboolean in_tag = FALSE;
1112         gboolean in_attr = FALSE;
1113         const char *in_quotes = NULL;
1114         size_t needlelen;
1115
1116         g_return_val_if_fail(    needle != NULL, FALSE);
1117         g_return_val_if_fail(   *needle != '\0', FALSE);
1118         g_return_val_if_fail(  haystack != NULL, FALSE);
1119         g_return_val_if_fail(     start != NULL, FALSE);
1120         g_return_val_if_fail(       end != NULL, FALSE);
1121         g_return_val_if_fail(attributes != NULL, FALSE);
1122
1123         needlelen = strlen(needle);
1124         g_datalist_init(&attribs);
1125
1126         while (*cur && !found) {
1127                 if (in_tag) {
1128                         if (in_quotes) {
1129                                 const char *close = cur;
1130
1131                                 while (*close && *close != *in_quotes)
1132                                         close++;
1133
1134                                 /* if we got the close quote, store the value and carry on from    *
1135                                  * after it. if we ran to the end of the string, point to the NULL *
1136                                  * and we're outta here */
1137                                 if (*close) {
1138                                         /* only store a value if we have an attribute name */
1139                                         if (name) {
1140                                                 size_t len = close - cur;
1141                                                 char *val = g_strndup(cur, len);
1142
1143                                                 g_datalist_set_data_full(&attribs, name, val, g_free);
1144                                                 g_free(name);
1145                                                 name = NULL;
1146                                         }
1147
1148                                         in_quotes = NULL;
1149                                         cur = close + 1;
1150                                 } else {
1151                                         cur = close;
1152                                 }
1153                         } else if (in_attr) {
1154                                 const char *close = cur;
1155
1156                                 while (*close && *close != '>' && *close != '"' &&
1157                                                 *close != '\'' && *close != ' ' && *close != '=')
1158                                         close++;
1159
1160                                 /* if we got the equals, store the name of the attribute. if we got
1161                                  * the quote, save the attribute and go straight to quote mode.
1162                                  * otherwise the tag closed or we reached the end of the string,
1163                                  * so we can get outta here */
1164                                 switch (*close) {
1165                                 case '"':
1166                                 case '\'':
1167                                         in_quotes = close;
1168                                         /* fall through */
1169                                 case '=':
1170                                         {
1171                                                 size_t len = close - cur;
1172
1173                                                 /* don't store a blank attribute name */
1174                                                 if (len) {
1175                                                         g_free(name);
1176                                                         name = g_ascii_strdown(cur, len);
1177                                                 }
1178
1179                                                 in_attr = FALSE;
1180                                                 cur = close + 1;
1181                                         }
1182                                         break;
1183                                 case ' ':
1184                                 case '>':
1185                                         in_attr = FALSE;
1186                                         /* fall through */
1187                                 default:
1188                                         cur = close;
1189                                         break;
1190                                 }
1191                         } else {
1192                                 switch (*cur) {
1193                                 case ' ':
1194                                         /* swallow extra spaces inside tag */
1195                                         while (*cur && *cur == ' ') cur++;
1196                                         in_attr = TRUE;
1197                                         break;
1198                                 case '>':
1199                                         found = TRUE;
1200                                         *end = cur;
1201                                         break;
1202                                 case '"':
1203                                 case '\'':
1204                                         in_quotes = cur;
1205                                         /* fall through */
1206                                 default:
1207                                         cur++;
1208                                         break;
1209                                 }
1210                         }
1211                 } else {
1212                         /* if we hit a < followed by the name of our tag... */
1213                         if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
1214                                 *start = cur;
1215                                 cur = cur + needlelen + 1;
1216
1217                                 /* if we're pointing at a space or a >, we found the right tag. if *
1218                                  * we're not, we've found a longer tag, so we need to skip to the  *
1219                                  * >, but not being distracted by >s inside quotes.                */
1220                                 if (*cur == ' ' || *cur == '>') {
1221                                         in_tag = TRUE;
1222                                 } else {
1223                                         while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
1224                                                 if (*cur == '"') {
1225                                                         cur++;
1226                                                         while (*cur && *cur != '"')
1227                                                                 cur++;
1228                                                 } else if (*cur == '\'') {
1229                                                         cur++;
1230                                                         while (*cur && *cur != '\'')
1231                                                                 cur++;
1232                                                 } else {
1233                                                         cur++;
1234                                                 }
1235                                         }
1236                                 }
1237                         } else {
1238                                 cur++;
1239                         }
1240                 }
1241         }
1242
1243         /* clean up any attribute name from a premature termination */
1244         g_free(name);
1245
1246         if (found) {
1247                 *attributes = attribs;
1248         } else {
1249                 *start = NULL;
1250                 *end = NULL;
1251                 *attributes = NULL;
1252         }
1253
1254         return found;
1255 }
1256
1257 gboolean
1258 purple_markup_extract_info_field(const char *str, int len, PurpleNotifyUserInfo *user_info,
1259                                                            const char *start_token, int skip,
1260                                                            const char *end_token, char check_value,
1261                                                            const char *no_value_token,
1262                                                            const char *display_name, gboolean is_link,
1263                                                            const char *link_prefix,
1264                                                            PurpleInfoFieldFormatCallback format_cb)
1265 {
1266         const char *p, *q;
1267
1268         g_return_val_if_fail(str          != NULL, FALSE);
1269         g_return_val_if_fail(user_info    != NULL, FALSE);
1270         g_return_val_if_fail(start_token  != NULL, FALSE);
1271         g_return_val_if_fail(end_token    != NULL, FALSE);
1272         g_return_val_if_fail(display_name != NULL, FALSE);
1273
1274         p = strstr(str, start_token);
1275
1276         if (p == NULL)
1277                 return FALSE;
1278
1279         p += strlen(start_token) + skip;
1280
1281         if (p >= str + len)
1282                 return FALSE;
1283
1284         if (check_value != '\0' && *p == check_value)
1285                 return FALSE;
1286
1287         q = strstr(p, end_token);
1288
1289         /* Trim leading blanks */
1290         while (*p != '\n' && g_ascii_isspace(*p)) {
1291                 p += 1;
1292         }
1293
1294         /* Trim trailing blanks */
1295         while (q > p && g_ascii_isspace(*(q - 1))) {
1296                 q -= 1;
1297         }
1298
1299         /* Don't bother with null strings */
1300         if (p == q)
1301                 return FALSE;
1302
1303         if (q != NULL && (!no_value_token ||
1304                                           (no_value_token && strncmp(p, no_value_token,
1305                                                                                                  strlen(no_value_token)))))
1306         {
1307                 GString *dest = g_string_new("");
1308
1309                 if (is_link)
1310                 {
1311                         g_string_append(dest, "<a href=\"");
1312
1313                         if (link_prefix)
1314                                 g_string_append(dest, link_prefix);
1315
1316                         if (format_cb != NULL)
1317                         {
1318                                 char *reformatted = format_cb(p, q - p);
1319                                 g_string_append(dest, reformatted);
1320                                 g_free(reformatted);
1321                         }
1322                         else
1323                                 g_string_append_len(dest, p, q - p);
1324                         g_string_append(dest, "\">");
1325
1326                         if (link_prefix)
1327                                 g_string_append(dest, link_prefix);
1328
1329                         g_string_append_len(dest, p, q - p);
1330                         g_string_append(dest, "</a>");
1331                 }
1332                 else
1333                 {
1334                         if (format_cb != NULL)
1335                         {
1336                                 char *reformatted = format_cb(p, q - p);
1337                                 g_string_append(dest, reformatted);
1338                                 g_free(reformatted);
1339                         }
1340                         else
1341                                 g_string_append_len(dest, p, q - p);
1342                 }
1343
1344                 purple_notify_user_info_add_pair(user_info, display_name, dest->str);
1345                 g_string_free(dest, TRUE);
1346
1347                 return TRUE;
1348         }
1349
1350         return FALSE;
1351 }
1352
1353 struct purple_parse_tag {
1354         char *src_tag;
1355         char *dest_tag;
1356         gboolean ignore;
1357 };
1358
1359 /* NOTE: Do not put `do {} while(0)` around this macro (as this is the method
1360          recommended in the GCC docs). It contains 'continue's that should
1361          affect the while-loop in purple_markup_html_to_xhtml and doing the
1362          above would break that.
1363          Also, remember to put braces in constructs that require them for
1364          multiple statements when using this macro. */
1365 #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
1366                                                 const char *o = c + strlen("<" x); \
1367                                                 const char *p = NULL, *q = NULL, *r = NULL; \
1368                                                 /* o = iterating over full tag \
1369                                                  * p = > (end of tag) \
1370                                                  * q = start of quoted bit \
1371                                                  * r = < inside tag \
1372                                                  */ \
1373                                                 GString *innards = g_string_new(""); \
1374                                                 while(o && *o) { \
1375                                                         if(!q && (*o == '\"' || *o == '\'') ) { \
1376                                                                 q = o; \
1377                                                         } else if(q) { \
1378                                                                 if(*o == *q) { /* end of quoted bit */ \
1379                                                                         char *unescaped = g_strndup(q+1, o-q-1); \
1380                                                                         char *escaped = g_markup_escape_text(unescaped, -1); \
1381                                                                         g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
1382                                                                         g_free(unescaped); \
1383                                                                         g_free(escaped); \
1384                                                                         q = NULL; \
1385                                                                 } else if(*c == '\\') { \
1386                                                                         o++; \
1387                                                                 } \
1388                                                         } else if(*o == '<') { \
1389                                                                 r = o; \
1390                                                         } else if(*o == '>') { \
1391                                                                 p = o; \
1392                                                                 break; \
1393                                                         } else { \
1394                                                                 innards = g_string_append_c(innards, *o); \
1395                                                         } \
1396                                                         o++; \
1397                                                 } \
1398                                                 if(p && !r) { /* got an end of tag and no other < earlier */\
1399                                                         if(*(p-1) != '/') { \
1400                                                                 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
1401                                                                 pt->src_tag = x; \
1402                                                                 pt->dest_tag = y; \
1403                                                                 tags = g_list_prepend(tags, pt); \
1404                                                         } \
1405                                                         if(xhtml) { \
1406                                                                 xhtml = g_string_append(xhtml, "<" y); \
1407                                                                 xhtml = g_string_append(xhtml, innards->str); \
1408                                                                 xhtml = g_string_append_c(xhtml, '>'); \
1409                                                         } \
1410                                                         c = p + 1; \
1411                                                 } else { /* got end of tag with earlier < *or* didn't get anything */ \
1412                                                         if(xhtml) \
1413                                                                 xhtml = g_string_append(xhtml, "&lt;"); \
1414                                                         if(plain) \
1415                                                                 plain = g_string_append_c(plain, '<'); \
1416                                                         c++; \
1417                                                 } \
1418                                                 g_string_free(innards, TRUE); \
1419                                                 continue; \
1420                                         } \
1421                                         if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
1422                                                         (*(c+strlen("<" x)) == '>' || \
1423                                                          !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
1424                                                 if(xhtml) \
1425                                                         xhtml = g_string_append(xhtml, "<" y); \
1426                                                 c += strlen("<" x); \
1427                                                 if(*c != '/') { \
1428                                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
1429                                                         pt->src_tag = x; \
1430                                                         pt->dest_tag = y; \
1431                                                         tags = g_list_prepend(tags, pt); \
1432                                                         if(xhtml) \
1433                                                                 xhtml = g_string_append_c(xhtml, '>'); \
1434                                                 } else { \
1435                                                         if(xhtml) \
1436                                                                 xhtml = g_string_append(xhtml, "/>");\
1437                                                 } \
1438                                                 c = strchr(c, '>') + 1; \
1439                                                 continue; \
1440                                         }
1441 /* Don't forget to check the note above for ALLOW_TAG_ALT. */
1442 #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
1443 void
1444 purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
1445                                                   char **plain_out)
1446 {
1447         GString *xhtml = NULL;
1448         GString *plain = NULL;
1449         GString *url = NULL;
1450         GString *cdata = NULL;
1451         GList *tags = NULL, *tag;
1452         const char *c = html;
1453         char quote = '\0';
1454
1455 #define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
1456                         quote = *(ptr++); \
1457                 else \
1458                         quote = '\0';
1459
1460 #define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
1461
1462         g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
1463
1464         if(xhtml_out)
1465                 xhtml = g_string_new("");
1466         if(plain_out)
1467                 plain = g_string_new("");
1468
1469         while(c && *c) {
1470                 if(*c == '<') {
1471                         if(*(c+1) == '/') { /* closing tag */
1472                                 tag = tags;
1473                                 while(tag) {
1474                                         struct purple_parse_tag *pt = tag->data;
1475                                         if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
1476                                                 c += strlen(pt->src_tag) + 3;
1477                                                 break;
1478                                         }
1479                                         tag = tag->next;
1480                                 }
1481                                 if(tag) {
1482                                         while(tags) {
1483                                                 struct purple_parse_tag *pt = tags->data;
1484                                                 if(xhtml && !pt->ignore)
1485                                                         g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
1486                                                 if(plain && purple_strequal(pt->src_tag, "a")) {
1487                                                         /* if this is a link, we have to add the url to the plaintext, too */
1488                                                         if (cdata && url &&
1489                                                                         (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
1490                                                                                                          g_utf8_collate(url->str + 7, cdata->str) != 0)))
1491                                                                 g_string_append_printf(plain, " <%s>", g_strstrip(url->str));
1492                                                         if (cdata) {
1493                                                                 g_string_free(cdata, TRUE);
1494                                                                 cdata = NULL;
1495                                                         }
1496
1497                                                 }
1498                                                 if(tags == tag)
1499                                                         break;
1500                                                 tags = g_list_remove(tags, pt);
1501                                                 g_free(pt);
1502                                         }
1503                                         g_free(tag->data);
1504                                         tags = g_list_remove(tags, tag->data);
1505                                 } else {
1506                                         /* a closing tag we weren't expecting...
1507                                          * we'll let it slide, if it's really a tag...if it's
1508                                          * just a </ we'll escape it properly */
1509                                         const char *end = c+2;
1510                                         while(*end && g_ascii_isalpha(*end))
1511                                                 end++;
1512                                         if(*end == '>') {
1513                                                 c = end+1;
1514                                         } else {
1515                                                 if(xhtml)
1516                                                         xhtml = g_string_append(xhtml, "&lt;");
1517                                                 if(plain)
1518                                                         plain = g_string_append_c(plain, '<');
1519                                                 c++;
1520                                         }
1521                                 }
1522                         } else { /* opening tag */
1523                                 ALLOW_TAG("blockquote");
1524                                 ALLOW_TAG("cite");
1525                                 ALLOW_TAG("div");
1526                                 ALLOW_TAG("em");
1527                                 ALLOW_TAG("h1");
1528                                 ALLOW_TAG("h2");
1529                                 ALLOW_TAG("h3");
1530                                 ALLOW_TAG("h4");
1531                                 ALLOW_TAG("h5");
1532                                 ALLOW_TAG("h6");
1533                                 /* we only allow html to start the message */
1534                                 if(c == html) {
1535                                         ALLOW_TAG("html");
1536                                 }
1537                                 ALLOW_TAG_ALT("i", "em");
1538                                 ALLOW_TAG_ALT("italic", "em");
1539                                 ALLOW_TAG("li");
1540                                 ALLOW_TAG("ol");
1541                                 ALLOW_TAG("p");
1542                                 ALLOW_TAG("pre");
1543                                 ALLOW_TAG("q");
1544                                 ALLOW_TAG("span");
1545                                 ALLOW_TAG("ul");
1546
1547
1548                                 /* we skip <HR> because it's not legal in XHTML-IM.  However,
1549                                  * we still want to send something sensible, so we put a
1550                                  * linebreak in its place. <BR> also needs special handling
1551                                  * because putting a </BR> to close it would just be dumb. */
1552                                 if((!g_ascii_strncasecmp(c, "<br", 3)
1553                                                         || !g_ascii_strncasecmp(c, "<hr", 3))
1554                                                 && (*(c+3) == '>' ||
1555                                                         !g_ascii_strncasecmp(c+3, "/>", 2) ||
1556                                                         !g_ascii_strncasecmp(c+3, " />", 3))) {
1557                                         c = strchr(c, '>') + 1;
1558                                         if(xhtml)
1559                                                 xhtml = g_string_append(xhtml, "<br/>");
1560                                         if(plain && *c != '\n')
1561                                                 plain = g_string_append_c(plain, '\n');
1562                                         continue;
1563                                 }
1564                                 if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
1565                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1566                                         if (*(c+2) == '>')
1567                                                 pt->src_tag = "b";
1568                                         else if (*(c+2) == 'o')
1569                                                 pt->src_tag = "bold";
1570                                         else
1571                                                 pt->src_tag = "strong";
1572                                         pt->dest_tag = "span";
1573                                         tags = g_list_prepend(tags, pt);
1574                                         c = strchr(c, '>') + 1;
1575                                         if(xhtml)
1576                                                 xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
1577                                         continue;
1578                                 }
1579                                 if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
1580                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1581                                         pt->src_tag = *(c+2) == '>' ? "u" : "underline";
1582                                         pt->dest_tag = "span";
1583                                         tags = g_list_prepend(tags, pt);
1584                                         c = strchr(c, '>') + 1;
1585                                         if (xhtml)
1586                                                 xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
1587                                         continue;
1588                                 }
1589                                 if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
1590                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1591                                         pt->src_tag = *(c+2) == '>' ? "s" : "strike";
1592                                         pt->dest_tag = "span";
1593                                         tags = g_list_prepend(tags, pt);
1594                                         c = strchr(c, '>') + 1;
1595                                         if(xhtml)
1596                                                 xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
1597                                         continue;
1598                                 }
1599                                 if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
1600                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1601                                         pt->src_tag = "sub";
1602                                         pt->dest_tag = "span";
1603                                         tags = g_list_prepend(tags, pt);
1604                                         c = strchr(c, '>') + 1;
1605                                         if(xhtml)
1606                                                 xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
1607                                         continue;
1608                                 }
1609                                 if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
1610                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1611                                         pt->src_tag = "sup";
1612                                         pt->dest_tag = "span";
1613                                         tags = g_list_prepend(tags, pt);
1614                                         c = strchr(c, '>') + 1;
1615                                         if(xhtml)
1616                                                 xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
1617                                         continue;
1618                                 }
1619                                 if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
1620                                         const char *p = c + 4;
1621                                         GString *src = NULL, *alt = NULL;
1622                                         while (*p && *p != '>') {
1623                                                 if (!g_ascii_strncasecmp(p, "src=", 4)) {
1624                                                         const char *q = p + 4;
1625                                                         if (src)
1626                                                                 g_string_free(src, TRUE);
1627                                                         src = g_string_new("");
1628                                                         CHECK_QUOTE(q);
1629                                                         while (VALID_CHAR(q)) {
1630                                                                 src = g_string_append_c(src, *q);
1631                                                                 q++;
1632                                                         }
1633                                                         p = q;
1634                                                 } else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
1635                                                         const char *q = p + 4;
1636                                                         if (alt)
1637                                                                 g_string_free(alt, TRUE);
1638                                                         alt = g_string_new("");
1639                                                         CHECK_QUOTE(q);
1640                                                         while (VALID_CHAR(q)) {
1641                                                                 alt = g_string_append_c(alt, *q);
1642                                                                 q++;
1643                                                         }
1644                                                         p = q;
1645                                                 } else {
1646                                                         p++;
1647                                                 }
1648                                         }
1649                                         if ((c = strchr(p, '>')) != NULL)
1650                                                 c++;
1651                                         else
1652                                                 c = p;
1653                                         /* src and alt are required! */
1654                                         if(src && xhtml)
1655                                                 g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
1656                                         if(alt) {
1657                                                 if(plain)
1658                                                         plain = g_string_append(plain, alt->str);
1659                                                 if(!src && xhtml)
1660                                                         xhtml = g_string_append(xhtml, alt->str);
1661                                                 g_string_free(alt, TRUE);
1662                                         }
1663                                         g_string_free(src, TRUE);
1664                                         continue;
1665                                 }
1666                                 if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
1667                                         const char *p = c + 2;
1668                                         struct purple_parse_tag *pt;
1669                                         while (*p && *p != '>') {
1670                                                 if (!g_ascii_strncasecmp(p, "href=", 5)) {
1671                                                         const char *q = p + 5;
1672                                                         if (url)
1673                                                                 g_string_free(url, TRUE);
1674                                                         url = g_string_new("");
1675                                                         if (cdata)
1676                                                                 g_string_free(cdata, TRUE);
1677                                                         cdata = g_string_new("");
1678                                                         CHECK_QUOTE(q);
1679                                                         while (VALID_CHAR(q)) {
1680                                                                 int len;
1681                                                                 if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
1682                                                                         url = g_string_append(url, "&amp;");
1683                                                                 else
1684                                                                         url = g_string_append_c(url, *q);
1685                                                                 q++;
1686                                                         }
1687                                                         p = q;
1688                                                 } else {
1689                                                         p++;
1690                                                 }
1691                                         }
1692                                         if ((c = strchr(p, '>')) != NULL)
1693                                                 c++;
1694                                         else
1695                                                 c = p;
1696                                         pt = g_new0(struct purple_parse_tag, 1);
1697                                         pt->src_tag = "a";
1698                                         pt->dest_tag = "a";
1699                                         tags = g_list_prepend(tags, pt);
1700                                         if(xhtml)
1701                                                 g_string_append_printf(xhtml, "<a href=\"%s\">", url ? g_strstrip(url->str) : "");
1702                                         continue;
1703                                 }
1704                                 if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
1705                                         const char *p = c + 5;
1706                                         GString *style = g_string_new("");
1707                                         struct purple_parse_tag *pt;
1708                                         while (*p && *p != '>') {
1709                                                 if (!g_ascii_strncasecmp(p, "back=", 5)) {
1710                                                         const char *q = p + 5;
1711                                                         GString *color = g_string_new("");
1712                                                         CHECK_QUOTE(q);
1713                                                         while (VALID_CHAR(q)) {
1714                                                                 color = g_string_append_c(color, *q);
1715                                                                 q++;
1716                                                         }
1717                                                         g_string_append_printf(style, "background: %s; ", color->str);
1718                                                         g_string_free(color, TRUE);
1719                                                         p = q;
1720                                                 } else if (!g_ascii_strncasecmp(p, "color=", 6)) {
1721                                                         const char *q = p + 6;
1722                                                         GString *color = g_string_new("");
1723                                                         CHECK_QUOTE(q);
1724                                                         while (VALID_CHAR(q)) {
1725                                                                 color = g_string_append_c(color, *q);
1726                                                                 q++;
1727                                                         }
1728                                                         g_string_append_printf(style, "color: %s; ", color->str);
1729                                                         g_string_free(color, TRUE);
1730                                                         p = q;
1731                                                 } else if (!g_ascii_strncasecmp(p, "face=", 5)) {
1732                                                         const char *q = p + 5;
1733                                                         GString *face = g_string_new("");
1734                                                         CHECK_QUOTE(q);
1735                                                         while (VALID_CHAR(q)) {
1736                                                                 face = g_string_append_c(face, *q);
1737                                                                 q++;
1738                                                         }
1739                                                         g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
1740                                                         g_string_free(face, TRUE);
1741                                                         p = q;
1742                                                 } else if (!g_ascii_strncasecmp(p, "size=", 5)) {
1743                                                         const char *q = p + 5;
1744                                                         int sz;
1745                                                         const char *size = "medium";
1746                                                         CHECK_QUOTE(q);
1747                                                         sz = atoi(q);
1748                                                         switch (sz)
1749                                                         {
1750                                                         case 1:
1751                                                           size = "xx-small";
1752                                                           break;
1753                                                         case 2:
1754                                                           size = "small";
1755                                                           break;
1756                                                         case 3:
1757                                                           size = "medium";
1758                                                           break;
1759                                                         case 4:
1760                                                           size = "large";
1761                                                           break;
1762                                                         case 5:
1763                                                           size = "x-large";
1764                                                           break;
1765                                                         case 6:
1766                                                         case 7:
1767                                                           size = "xx-large";
1768                                                           break;
1769                                                         default:
1770                                                           break;
1771                                                         }
1772                                                         g_string_append_printf(style, "font-size: %s; ", size);
1773                                                         p = q;
1774                                                 } else {
1775                                                         p++;
1776                                                 }
1777                                         }
1778                                         if ((c = strchr(p, '>')) != NULL)
1779                                                 c++;
1780                                         else
1781                                                 c = p;
1782                                         pt = g_new0(struct purple_parse_tag, 1);
1783                                         pt->src_tag = "font";
1784                                         pt->dest_tag = "span";
1785                                         tags = g_list_prepend(tags, pt);
1786                                         if(style->len && xhtml)
1787                                                 g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
1788                                         else
1789                                                 pt->ignore = TRUE;
1790                                         g_string_free(style, TRUE);
1791                                         continue;
1792                                 }
1793                                 if (!g_ascii_strncasecmp(c, "<body ", 6)) {
1794                                         const char *p = c + 6;
1795                                         gboolean did_something = FALSE;
1796                                         while (*p && *p != '>') {
1797                                                 if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
1798                                                         const char *q = p + 8;
1799                                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1800                                                         GString *color = g_string_new("");
1801                                                         CHECK_QUOTE(q);
1802                                                         while (VALID_CHAR(q)) {
1803                                                                 color = g_string_append_c(color, *q);
1804                                                                 q++;
1805                                                         }
1806                                                         if (xhtml)
1807                                                                 g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
1808                                                         g_string_free(color, TRUE);
1809                                                         if ((c = strchr(p, '>')) != NULL)
1810                                                                 c++;
1811                                                         else
1812                                                                 c = p;
1813                                                         pt->src_tag = "body";
1814                                                         pt->dest_tag = "span";
1815                                                         tags = g_list_prepend(tags, pt);
1816                                                         did_something = TRUE;
1817                                                         break;
1818                                                 }
1819                                                 p++;
1820                                         }
1821                                         if (did_something) continue;
1822                                 }
1823                                 /* this has to come after the special case for bgcolor */
1824                                 ALLOW_TAG("body");
1825                                 if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
1826                                         char *p = strstr(c + strlen("<!--"), "-->");
1827                                         if(p) {
1828                                                 if(xhtml)
1829                                                         xhtml = g_string_append(xhtml, "<!--");
1830                                                 c += strlen("<!--");
1831                                                 continue;
1832                                         }
1833                                 }
1834
1835                                 if(xhtml)
1836                                         xhtml = g_string_append(xhtml, "&lt;");
1837                                 if(plain)
1838                                         plain = g_string_append_c(plain, '<');
1839                                 c++;
1840                         }
1841                 } else if(*c == '&') {
1842                         char buf[7];
1843                         const char *pln;
1844                         int len;
1845
1846                         if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
1847                                 len = 1;
1848                                 g_snprintf(buf, sizeof(buf), "%c", *c);
1849                                 pln = buf;
1850                         }
1851                         if(xhtml)
1852                                 xhtml = g_string_append_len(xhtml, c, len);
1853                         if(plain)
1854                                 plain = g_string_append(plain, pln);
1855                         if(cdata)
1856                                 cdata = g_string_append_len(cdata, c, len);
1857                         c += len;
1858                 } else {
1859                         if(xhtml)
1860                                 xhtml = g_string_append_c(xhtml, *c);
1861                         if(plain)
1862                                 plain = g_string_append_c(plain, *c);
1863                         if(cdata)
1864                                 cdata = g_string_append_c(cdata, *c);
1865                         c++;
1866                 }
1867         }
1868         if(xhtml) {
1869                 for (tag = tags; tag ; tag = tag->next) {
1870                         struct purple_parse_tag *pt = tag->data;
1871                         if(!pt->ignore)
1872                                 g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
1873                 }
1874         }
1875         g_list_free(tags);
1876         if(xhtml_out)
1877                 *xhtml_out = g_string_free(xhtml, FALSE);
1878         if(plain_out)
1879                 *plain_out = g_string_free(plain, FALSE);
1880         if(url)
1881                 g_string_free(url, TRUE);
1882         if (cdata)
1883                 g_string_free(cdata, TRUE);
1884 #undef CHECK_QUOTE
1885 #undef VALID_CHAR
1886 }
1887
1888 /* The following are probably reasonable changes:
1889  * - \n should be converted to a normal space
1890  * - in addition to <br>, <p> and <div> etc. should also be converted into \n
1891  * - We want to turn </td>#whitespace<td> sequences into a single tab
1892  * - We want to turn </tr>#whitespace<tr> sequences into a single \n
1893  * - <script>...</script> and <style>...</style> should be completely removed
1894  */
1895
1896 char *
1897 purple_markup_strip_html(const char *str)
1898 {
1899         int i, j, k, entlen;
1900         gboolean visible = TRUE;
1901         gboolean closing_td_p = FALSE;
1902         gchar *str2;
1903         const gchar *cdata_close_tag = NULL, *ent;
1904         gchar *href = NULL;
1905         int href_st = 0;
1906
1907         if(!str)
1908                 return NULL;
1909
1910         str2 = g_strdup(str);
1911
1912         for (i = 0, j = 0; str2[i]; i++)
1913         {
1914                 if (str2[i] == '<')
1915                 {
1916                         if (cdata_close_tag)
1917                         {
1918                                 /* Note: Don't even assume any other tag is a tag in CDATA */
1919                                 if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
1920                                                 strlen(cdata_close_tag)) == 0)
1921                                 {
1922                                         i += strlen(cdata_close_tag) - 1;
1923                                         cdata_close_tag = NULL;
1924                                 }
1925                                 continue;
1926                         }
1927                         else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
1928                         {
1929                                 str2[j++] = '\t';
1930                                 visible = TRUE;
1931                         }
1932                         else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
1933                         {
1934                                 closing_td_p = TRUE;
1935                                 visible = FALSE;
1936                         }
1937                         else
1938                         {
1939                                 closing_td_p = FALSE;
1940                                 visible = TRUE;
1941                         }
1942
1943                         k = i + 1;
1944
1945                         if(g_ascii_isspace(str2[k]))
1946                                 visible = TRUE;
1947                         else if (str2[k])
1948                         {
1949                                 /* Scan until we end the tag either implicitly (closed start
1950                                  * tag) or explicitly, using a sloppy method (i.e., < or >
1951                                  * inside quoted attributes will screw us up)
1952                                  */
1953                                 while (str2[k] && str2[k] != '<' && str2[k] != '>')
1954                                 {
1955                                         k++;
1956                                 }
1957
1958                                 /* If we've got an <a> tag with an href, save the address
1959                                  * to print later. */
1960                                 if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
1961                                     g_ascii_isspace(str2[i+2]))
1962                                 {
1963                                         int st; /* start of href, inclusive [ */
1964                                         int end; /* end of href, exclusive ) */
1965                                         char delim = ' ';
1966                                         /* Find start of href */
1967                                         for (st = i + 3; st < k; st++)
1968                                         {
1969                                                 if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
1970                                                 {
1971                                                         st += 5;
1972                                                         if (str2[st] == '"' || str2[st] == '\'')
1973                                                         {
1974                                                                 delim = str2[st];
1975                                                                 st++;
1976                                                         }
1977                                                         break;
1978                                                 }
1979                                         }
1980                                         /* find end of address */
1981                                         for (end = st; end < k && str2[end] != delim; end++)
1982                                         {
1983                                                 /* All the work is done in the loop construct above. */
1984                                         }
1985
1986                                         /* If there's an address, save it.  If there was
1987                                          * already one saved, kill it. */
1988                                         if (st < k)
1989                                         {
1990                                                 char *tmp;
1991                                                 g_free(href);
1992                                                 tmp = g_strndup(str2 + st, end - st);
1993                                                 href = purple_unescape_html(tmp);
1994                                                 g_free(tmp);
1995                                                 href_st = j;
1996                                         }
1997                                 }
1998
1999                                 /* Replace </a> with an ascii representation of the
2000                                  * address the link was pointing to. */
2001                                 else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
2002                                 {
2003                                         size_t hrlen = strlen(href);
2004
2005                                         /* Only insert the href if it's different from the CDATA. */
2006                                         if ((hrlen != (gsize)(j - href_st) ||
2007                                              strncmp(str2 + href_st, href, hrlen)) &&
2008                                             (hrlen != (gsize)(j - href_st + 7) || /* 7 == strlen("http://") */
2009                                              strncmp(str2 + href_st, href + 7, hrlen - 7)))
2010                                         {
2011                                                 str2[j++] = ' ';
2012                                                 str2[j++] = '(';
2013                                                 g_memmove(str2 + j, href, hrlen);
2014                                                 j += hrlen;
2015                                                 str2[j++] = ')';
2016                                                 g_free(href);
2017                                                 href = NULL;
2018                                         }
2019                                 }
2020
2021                                 /* Check for tags which should be mapped to newline (but ignore some of
2022                                  * the tags at the beginning of the text) */
2023                                 else if ((j && (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
2024                                               || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
2025                                               || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
2026                                               || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
2027                                               || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0))
2028                                  || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
2029                                  || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
2030                                 {
2031                                         str2[j++] = '\n';
2032                                 }
2033                                 /* Check for tags which begin CDATA and need to be closed */
2034 #if 0 /* FIXME.. option is end tag optional, we can't handle this right now */
2035                                 else if (g_ascii_strncasecmp(str2 + i, "<option", 7) == 0)
2036                                 {
2037                                         /* FIXME: We should not do this if the OPTION is SELECT'd */
2038                                         cdata_close_tag = "</option>";
2039                                 }
2040 #endif
2041                                 else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
2042                                 {
2043                                         cdata_close_tag = "</script>";
2044                                 }
2045                                 else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
2046                                 {
2047                                         cdata_close_tag = "</style>";
2048                                 }
2049                                 /* Update the index and continue checking after the tag */
2050                                 i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
2051                                 continue;
2052                         }
2053                 }
2054                 else if (cdata_close_tag)
2055                 {
2056                         continue;
2057                 }
2058                 else if (!g_ascii_isspace(str2[i]))
2059                 {
2060                         visible = TRUE;
2061                 }
2062
2063                 if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
2064                 {
2065                         while (*ent)
2066                                 str2[j++] = *ent++;
2067                         i += entlen - 1;
2068                         continue;
2069                 }
2070
2071                 if (visible)
2072                         str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
2073         }
2074
2075         g_free(href);
2076
2077         str2[j] = '\0';
2078
2079         return str2;
2080 }
2081
2082 static gboolean
2083 badchar(char c)
2084 {
2085         switch (c) {
2086         case ' ':
2087         case ',':
2088         case '\0':
2089         case '\n':
2090         case '\r':
2091         case '<':
2092         case '>':
2093         case '"':
2094                 return TRUE;
2095         default:
2096                 return FALSE;
2097         }
2098 }
2099
2100 static gboolean
2101 badentity(const char *c)
2102 {
2103         if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
2104                 !g_ascii_strncasecmp(c, "&gt;", 4) ||
2105                 !g_ascii_strncasecmp(c, "&quot;", 6)) {
2106                 return TRUE;
2107         }
2108         return FALSE;
2109 }
2110
2111 static const char *
2112 process_link(GString *ret,
2113                 const char *start, const char *c,
2114                 int matchlen,
2115                 const char *urlprefix,
2116                 int inside_paren)
2117 {
2118         char *url_buf, *tmpurlbuf;
2119         const char *t;
2120
2121         for (t = c;; t++) {
2122                 if (!badchar(*t) && !badentity(t))
2123                         continue;
2124
2125                 if (t - c == matchlen)
2126                         break;
2127
2128                 if (*t == ',' && *(t + 1) != ' ') {
2129                         continue;
2130                 }
2131
2132                 if (t > start && *(t - 1) == '.')
2133                         t--;
2134                 if (t > start && *(t - 1) == ')' && inside_paren > 0)
2135                         t--;
2136
2137                 url_buf = g_strndup(c, t - c);
2138                 tmpurlbuf = purple_unescape_html(url_buf);
2139                 g_string_append_printf(ret, "<A HREF=\"%s%s\">%s</A>",
2140                                 urlprefix,
2141                                 tmpurlbuf, url_buf);
2142                 g_free(tmpurlbuf);
2143                 g_free(url_buf);
2144                 return t;
2145         }
2146
2147         return c;
2148 }
2149
2150 char *
2151 purple_markup_linkify(const char *text)
2152 {
2153         const char *c, *t, *q = NULL;
2154         char *tmpurlbuf, *url_buf;
2155         gunichar g;
2156         gboolean inside_html = FALSE;
2157         int inside_paren = 0;
2158         GString *ret;
2159
2160         if (text == NULL)
2161                 return NULL;
2162
2163         ret = g_string_new("");
2164
2165         c = text;
2166         while (*c) {
2167
2168                 if(*c == '(' && !inside_html) {
2169                         inside_paren++;
2170                         ret = g_string_append_c(ret, *c);
2171                         c++;
2172                 }
2173
2174                 if(inside_html) {
2175                         if(*c == '>') {
2176                                 inside_html = FALSE;
2177                         } else if(!q && (*c == '\"' || *c == '\'')) {
2178                                 q = c;
2179                         } else if(q) {
2180                                 if(*c == *q)
2181                                         q = NULL;
2182                         }
2183                 } else if(*c == '<') {
2184                         inside_html = TRUE;
2185                         if (!g_ascii_strncasecmp(c, "<A", 2)) {
2186                                 while (1) {
2187                                         if (!g_ascii_strncasecmp(c, "/A>", 3)) {
2188                                                 inside_html = FALSE;
2189                                                 break;
2190                                         }
2191                                         ret = g_string_append_c(ret, *c);
2192                                         c++;
2193                                         if (!(*c))
2194                                                 break;
2195                                 }
2196                         }
2197                 } else if (!g_ascii_strncasecmp(c, "http://", 7)) {
2198                         c = process_link(ret, text, c, 7, "", inside_paren);
2199                 } else if (!g_ascii_strncasecmp(c, "https://", 8)) {
2200                         c = process_link(ret, text, c, 8, "", inside_paren);
2201                 } else if (!g_ascii_strncasecmp(c, "ftp://", 6)) {
2202                         c = process_link(ret, text, c, 6, "", inside_paren);
2203                 } else if (!g_ascii_strncasecmp(c, "sftp://", 7)) {
2204                         c = process_link(ret, text, c, 7, "", inside_paren);
2205                 } else if (!g_ascii_strncasecmp(c, "file://", 7)) {
2206                         c = process_link(ret, text, c, 7, "", inside_paren);
2207                 } else if (!g_ascii_strncasecmp(c, "www.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
2208                         c = process_link(ret, text, c, 4, "http://", inside_paren);
2209                 } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && c[4] != '.' && (c == text || badchar(c[-1]) || badentity(c-1))) {
2210                         c = process_link(ret, text, c, 4, "ftp://", inside_paren);
2211                 } else if (!g_ascii_strncasecmp(c, "xmpp:", 5) && (c == text || badchar(c[-1]) || badentity(c-1))) {
2212                         c = process_link(ret, text, c, 5, "", inside_paren);
2213                 } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
2214                         t = c;
2215                         while (1) {
2216                                 if (badchar(*t) || badentity(t)) {
2217                                         char *d;
2218                                         if (t - c == 7) {
2219                                                 break;
2220                                         }
2221                                         if (t > text && *(t - 1) == '.')
2222                                                 t--;
2223                                         if ((d = strstr(c + 7, "?")) != NULL && d < t)
2224                                                 url_buf = g_strndup(c + 7, d - c - 7);
2225                                         else
2226                                                 url_buf = g_strndup(c + 7, t - c - 7);
2227                                         if (!purple_email_is_valid(url_buf)) {
2228                                                 g_free(url_buf);
2229                                                 break;
2230                                         }
2231                                         g_free(url_buf);
2232                                         url_buf = g_strndup(c, t - c);
2233                                         tmpurlbuf = purple_unescape_html(url_buf);
2234                                         g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
2235                                                           tmpurlbuf, url_buf);
2236                                         g_free(url_buf);
2237                                         g_free(tmpurlbuf);
2238                                         c = t;
2239                                         break;
2240                                 }
2241                                 t++;
2242                         }
2243                 } else if (c != text && (*c == '@')) {
2244                         int flag;
2245                         GString *gurl_buf = NULL;
2246                         const char illegal_chars[] = "!@#$%^&*()[]{}/|\\<>\":;\r\n \0";
2247
2248                         if (strchr(illegal_chars,*(c - 1)) || strchr(illegal_chars, *(c + 1)))
2249                                 flag = 0;
2250                         else {
2251                                 flag = 1;
2252                                 gurl_buf = g_string_new("");
2253                         }
2254
2255                         t = c;
2256                         while (flag) {
2257                                 /* iterate backwards grabbing the local part of an email address */
2258                                 g = g_utf8_get_char(t);
2259                                 if (badchar(*t) || (g >= 127) || (*t == '(') ||
2260                                         ((*t == ';') && ((t > (text+2) && (!g_ascii_strncasecmp(t - 3, "&lt;", 4) ||
2261                                                                        !g_ascii_strncasecmp(t - 3, "&gt;", 4))) ||
2262                                                      (t > (text+4) && (!g_ascii_strncasecmp(t - 5, "&quot;", 6)))))) {
2263                                         /* local part will already be part of ret, strip it out */
2264                                         ret = g_string_truncate(ret, ret->len - (c - t));
2265                                         ret = g_string_append_unichar(ret, g);
2266                                         break;
2267                                 } else {
2268                                         g_string_prepend_unichar(gurl_buf, g);
2269                                         t = g_utf8_find_prev_char(text, t);
2270                                         if (t < text) {
2271                                                 ret = g_string_assign(ret, "");
2272                                                 break;
2273                                         }
2274                                 }
2275                         }
2276
2277                         t = g_utf8_find_next_char(c, NULL);
2278
2279                         while (flag) {
2280                                 /* iterate forwards grabbing the domain part of an email address */
2281                                 g = g_utf8_get_char(t);
2282                                 if (badchar(*t) || (g >= 127) || (*t == ')') || badentity(t)) {
2283                                         char *d;
2284
2285                                         url_buf = g_string_free(gurl_buf, FALSE);
2286
2287                                         /* strip off trailing periods */
2288                                         if (strlen(url_buf) > 0) {
2289                                                 for (d = url_buf + strlen(url_buf) - 1; *d == '.'; d--, t--)
2290                                                         *d = '\0';
2291                                         }
2292
2293                                         tmpurlbuf = purple_unescape_html(url_buf);
2294                                         if (purple_email_is_valid(tmpurlbuf)) {
2295                                                 g_string_append_printf(ret, "<A HREF=\"mailto:%s\">%s</A>",
2296                                                                 tmpurlbuf, url_buf);
2297                                         } else {
2298                                                 g_string_append(ret, url_buf);
2299                                         }
2300                                         g_free(url_buf);
2301                                         g_free(tmpurlbuf);
2302                                         c = t;
2303
2304                                         break;
2305                                 } else {
2306                                         g_string_append_unichar(gurl_buf, g);
2307                                         t = g_utf8_find_next_char(t, NULL);
2308                                 }
2309                         }
2310                 }
2311
2312                 if(*c == ')' && !inside_html) {
2313                         inside_paren--;
2314                         ret = g_string_append_c(ret, *c);
2315                         c++;
2316                 }
2317
2318                 if (*c == 0)
2319                         break;
2320
2321                 ret = g_string_append_c(ret, *c);
2322                 c++;
2323
2324         }
2325         return g_string_free(ret, FALSE);
2326 }
2327
2328 char *purple_unescape_text(const char *in)
2329 {
2330     GString *ret;
2331     const char *c = in;
2332
2333     if (in == NULL)
2334         return NULL;
2335
2336     ret = g_string_new("");
2337     while (*c) {
2338         int len;
2339         const char *ent;
2340
2341         if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
2342             g_string_append(ret, ent);
2343             c += len;
2344         } else {
2345             g_string_append_c(ret, *c);
2346             c++;
2347         }
2348     }
2349
2350     return g_string_free(ret, FALSE);
2351 }
2352
2353 char *purple_unescape_html(const char *html)
2354 {
2355         GString *ret;
2356         const char *c = html;
2357
2358         if (html == NULL)
2359                 return NULL;
2360
2361         ret = g_string_new("");
2362         while (*c) {
2363                 int len;
2364                 const char *ent;
2365
2366                 if ((ent = purple_markup_unescape_entity(c, &len)) != NULL) {
2367                         g_string_append(ret, ent);
2368                         c += len;
2369                 } else if (!strncmp(c, "<br>", 4)) {
2370                         g_string_append_c(ret, '\n');
2371                         c += 4;
2372                 } else {
2373                         g_string_append_c(ret, *c);
2374                         c++;
2375                 }
2376         }
2377
2378         return g_string_free(ret, FALSE);
2379 }
2380
2381 char *
2382 purple_markup_slice(const char *str, guint x, guint y)
2383 {
2384         GString *ret;
2385         GQueue *q;
2386         guint z = 0;
2387         gboolean appended = FALSE;
2388         gunichar c;
2389         char *tag;
2390
2391         g_return_val_if_fail(str != NULL, NULL);
2392         g_return_val_if_fail(x <= y, NULL);
2393
2394         if (x == y)
2395                 return g_strdup("");
2396
2397         ret = g_string_new("");
2398         q = g_queue_new();
2399
2400         while (*str && (z < y)) {
2401                 c = g_utf8_get_char(str);
2402
2403                 if (c == '<') {
2404                         char *end = strchr(str, '>');
2405
2406                         if (!end) {
2407                                 g_string_free(ret, TRUE);
2408                                 while ((tag = g_queue_pop_head(q)))
2409                                         g_free(tag);
2410                                 g_queue_free(q);
2411                                 return NULL;
2412                         }
2413
2414                         if (!g_ascii_strncasecmp(str, "<img ", 5)) {
2415                                 z += strlen("[Image]");
2416                         } else if (!g_ascii_strncasecmp(str, "<br", 3)) {
2417                                 z += 1;
2418                         } else if (!g_ascii_strncasecmp(str, "<hr>", 4)) {
2419                                 z += strlen("\n---\n");
2420                         } else if (!g_ascii_strncasecmp(str, "</", 2)) {
2421                                 /* pop stack */
2422                                 char *tmp;
2423
2424                                 tmp = g_queue_pop_head(q);
2425                                 g_free(tmp);
2426                                 /* z += 0; */
2427                         } else {
2428                                 /* push it unto the stack */
2429                                 char *tmp;
2430
2431                                 tmp = g_strndup(str, end - str + 1);
2432                                 g_queue_push_head(q, tmp);
2433                                 /* z += 0; */
2434                         }
2435
2436                         if (z >= x) {
2437                                 g_string_append_len(ret, str, end - str + 1);
2438                         }
2439
2440                         str = end;
2441                 } else if (c == '&') {
2442                         char *end = strchr(str, ';');
2443                         if (!end) {
2444                                 g_string_free(ret, TRUE);
2445                                 while ((tag = g_queue_pop_head(q)))
2446                                         g_free(tag);
2447                                 g_queue_free(q);
2448
2449                                 return NULL;
2450                         }
2451
2452                         if (z >= x)
2453                                 g_string_append_len(ret, str, end - str + 1);
2454
2455                         z++;
2456                         str = end;
2457                 } else {
2458                         if (z == x && z > 0 && !appended) {
2459                                 GList *l = q->tail;
2460
2461                                 while (l) {
2462                                         tag = l->data;
2463                                         g_string_append(ret, tag);
2464                                         l = l->prev;
2465                                 }
2466                                 appended = TRUE;
2467                         }
2468
2469                         if (z >= x)
2470                                 g_string_append_unichar(ret, c);
2471                         z++;
2472                 }
2473
2474                 str = g_utf8_next_char(str);
2475         }
2476
2477         while ((tag = g_queue_pop_head(q))) {
2478                 char *name;
2479
2480                 name = purple_markup_get_tag_name(tag);
2481                 g_string_append_printf(ret, "</%s>", name);
2482                 g_free(name);
2483                 g_free(tag);
2484         }
2485
2486         g_queue_free(q);
2487         return g_string_free(ret, FALSE);
2488 }
2489
2490 char *
2491 purple_markup_get_tag_name(const char *tag)
2492 {
2493         int i;
2494         g_return_val_if_fail(tag != NULL, NULL);
2495         g_return_val_if_fail(*tag == '<', NULL);
2496
2497         for (i = 1; tag[i]; i++)
2498                 if (tag[i] == '>' || tag[i] == ' ' || tag[i] == '/')
2499                         break;
2500
2501         return g_strndup(tag+1, i-1);
2502 }
2503
2504 /**************************************************************************
2505  * Path/Filename Functions
2506  **************************************************************************/
2507 const char *
2508 purple_home_dir(void)
2509 {
2510 #ifndef _WIN32
2511         return g_get_home_dir();
2512 #else
2513         return wpurple_data_dir();
2514 #endif
2515 }
2516
2517 /* Returns the argument passed to -c IFF it was present, or ~/.purple. */
2518 const char *
2519 purple_user_dir(void)
2520 {
2521         if (custom_user_dir != NULL)
2522                 return custom_user_dir;
2523         else if (!user_dir)
2524                 user_dir = g_build_filename(purple_home_dir(), ".purple", NULL);
2525
2526         return user_dir;
2527 }
2528
2529 void purple_util_set_user_dir(const char *dir)
2530 {
2531         g_free(custom_user_dir);
2532
2533         if (dir != NULL && *dir)
2534                 custom_user_dir = g_strdup(dir);
2535         else
2536                 custom_user_dir = NULL;
2537 }
2538
2539 int purple_build_dir (const char *path, int mode)
2540 {
2541         return g_mkdir_with_parents(path, mode);
2542 }
2543
2544 /*
2545  * This function is long and beautiful, like my--um, yeah.  Anyway,
2546  * it includes lots of error checking so as we don't overwrite
2547  * people's settings if there is a problem writing the new values.
2548  */
2549 gboolean
2550 purple_util_write_data_to_file(const char *filename, const char *data, gssize size)
2551 {
2552         const char *user_dir = purple_user_dir();
2553         gchar *filename_full;
2554         gboolean ret = FALSE;
2555
2556         g_return_val_if_fail(user_dir != NULL, FALSE);
2557
2558         purple_debug_info("util", "Writing file %s to directory %s\n",
2559                                         filename, user_dir);
2560
2561         /* Ensure the user directory exists */
2562         if (!g_file_test(user_dir, G_FILE_TEST_IS_DIR))
2563         {
2564                 if (g_mkdir(user_dir, S_IRUSR | S_IWUSR | S_IXUSR) == -1)
2565                 {
2566                         purple_debug_error("util", "Error creating directory %s: %s\n",
2567                                                          user_dir, g_strerror(errno));
2568                         return FALSE;
2569                 }
2570         }
2571
2572         filename_full = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", user_dir, filename);
2573
2574         ret = purple_util_write_data_to_file_absolute(filename_full, data, size);
2575
2576         g_free(filename_full);
2577         return ret;
2578 }
2579
2580 gboolean
2581 purple_util_write_data_to_file_absolute(const char *filename_full, const char *data, gssize size)
2582 {
2583         gchar *filename_temp;
2584         FILE *file;
2585         size_t real_size, byteswritten;
2586         struct stat st;
2587 #ifndef HAVE_FILENO
2588         int fd;
2589 #endif
2590
2591         purple_debug_info("util", "Writing file %s\n",
2592                                         filename_full);
2593
2594         g_return_val_if_fail((size >= -1), FALSE);
2595
2596         filename_temp = g_strdup_printf("%s.save", filename_full);
2597
2598         /* Remove an old temporary file, if one exists */
2599         if (g_file_test(filename_temp, G_FILE_TEST_EXISTS))
2600         {
2601                 if (g_unlink(filename_temp) == -1)
2602                 {
2603                         purple_debug_error("util", "Error removing old file "
2604                                            "%s: %s\n",
2605                                            filename_temp, g_strerror(errno));
2606                 }
2607         }
2608
2609         /* Open file */
2610         file = g_fopen(filename_temp, "wb");
2611         if (file == NULL)
2612         {
2613                 purple_debug_error("util", "Error opening file %s for "
2614                                    "writing: %s\n",
2615                                    filename_temp, g_strerror(errno));
2616                 g_free(filename_temp);
2617                 return FALSE;
2618         }
2619
2620         /* Write to file */
2621         real_size = (size == -1) ? strlen(data) : (size_t) size;
2622         byteswritten = fwrite(data, 1, real_size, file);
2623
2624 #ifdef HAVE_FILENO
2625 #ifndef _WIN32
2626         /* Set file permissions */
2627         if (fchmod(fileno(file), S_IRUSR | S_IWUSR) == -1) {
2628                 purple_debug_error("util", "Error setting permissions of "
2629                         "file %s: %s\n", filename_temp, g_strerror(errno));
2630         }
2631 #endif
2632
2633         /* Apparently XFS (and possibly other filesystems) do not
2634          * guarantee that file data is flushed before file metadata,
2635          * so this procedure is insufficient without some flushage. */
2636         if (fflush(file) < 0) {
2637                 purple_debug_error("util", "Error flushing %s: %s\n",
2638                                    filename_temp, g_strerror(errno));
2639                 g_free(filename_temp);
2640                 fclose(file);
2641                 return FALSE;
2642         }
2643         if (fsync(fileno(file)) < 0) {
2644                 purple_debug_error("util", "Error syncing file contents for %s: %s\n",
2645                                    filename_temp, g_strerror(errno));
2646                 g_free(filename_temp);
2647                 fclose(file);
2648                 return FALSE;
2649         }
2650 #endif
2651
2652         /* Close file */
2653         if (fclose(file) != 0)
2654         {
2655                 purple_debug_error("util", "Error closing file %s: %s\n",
2656                                    filename_temp, g_strerror(errno));
2657                 g_free(filename_temp);
2658                 return FALSE;
2659         }
2660
2661 #ifndef HAVE_FILENO
2662         /* This is the same effect (we hope) as the HAVE_FILENO block
2663          * above, but for systems without fileno(). */
2664         if ((fd = open(filename_temp, O_RDWR)) < 0) {
2665                 purple_debug_error("util", "Error opening file %s for flush: %s\n",
2666                                    filename_temp, g_strerror(errno));
2667                 g_free(filename_temp);
2668                 return FALSE;
2669         }
2670
2671 #ifndef _WIN32
2672         /* copy-pasta! */
2673         if (fchmod(fd, S_IRUSR | S_IWUSR) == -1) {
2674                 purple_debug_error("util", "Error setting permissions of "
2675                         "file %s: %s\n", filename_temp, g_strerror(errno));
2676         }
2677 #endif
2678
2679         if (fsync(fd) < 0) {
2680                 purple_debug_error("util", "Error syncing %s: %s\n",
2681                                    filename_temp, g_strerror(errno));
2682                 g_free(filename_temp);
2683                 close(fd);
2684                 return FALSE;
2685         }
2686         if (close(fd) < 0) {
2687                 purple_debug_error("util", "Error closing %s after sync: %s\n",
2688                                    filename_temp, g_strerror(errno));
2689                 g_free(filename_temp);
2690                 return FALSE;
2691         }
2692 #endif
2693
2694         /* Ensure the file is the correct size */
2695         if (byteswritten != real_size)
2696         {
2697                 purple_debug_error("util", "Error writing to file %s: Wrote %"
2698                                    G_GSIZE_FORMAT " bytes "
2699                                    "but should have written %" G_GSIZE_FORMAT
2700                                    "; is your disk full?\n",
2701                                    filename_temp, byteswritten, real_size);
2702                 g_free(filename_temp);
2703                 return FALSE;
2704         }
2705 #ifndef __COVERITY__
2706         /* Use stat to be absolutely sure.
2707          * It causes TOCTOU coverity warning (against g_rename below),
2708          * but it's not a threat for us.
2709          */
2710         if ((g_stat(filename_temp, &st) == -1) || ((gsize)st.st_size != real_size))
2711         {
2712                 purple_debug_error("util", "Error writing data to file %s: "
2713                                    "Incomplete file written; is your disk "
2714                                    "full?\n",
2715                                    filename_temp);
2716                 g_free(filename_temp);
2717                 return FALSE;
2718         }
2719 #endif /* __COVERITY__ */
2720
2721         /* Rename to the REAL name */
2722         if (g_rename(filename_temp, filename_full) == -1)
2723         {
2724                 purple_debug_error("util", "Error renaming %s to %s: %s\n",
2725                                    filename_temp, filename_full,
2726                                    g_strerror(errno));
2727         }
2728
2729         g_free(filename_temp);
2730
2731         return TRUE;
2732 }
2733
2734 xmlnode *
2735 purple_util_read_xml_from_file(const char *filename, const char *description)
2736 {
2737         return xmlnode_from_file(purple_user_dir(), filename, description, "util");
2738 }
2739
2740 /*
2741  * Like mkstemp() but returns a file pointer, uses a pre-set template,
2742  * uses the semantics of tempnam() for the directory to use and allocates
2743  * the space for the filepath.
2744  *
2745  * Caller is responsible for closing the file and removing it when done,
2746  * as well as freeing the space pointed-to by "path" with g_free().
2747  *
2748  * Returns NULL on failure and cleans up after itself if so.
2749  */
2750 static const char *purple_mkstemp_templ = {"purpleXXXXXX"};
2751
2752 FILE *
2753 purple_mkstemp(char **fpath, gboolean binary)
2754 {
2755         const gchar *tmpdir;
2756         int fd;
2757         FILE *fp = NULL;
2758
2759         g_return_val_if_fail(fpath != NULL, NULL);
2760
2761         if((tmpdir = (gchar*)g_get_tmp_dir()) != NULL) {
2762                 if((*fpath = g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", tmpdir, purple_mkstemp_templ)) != NULL) {
2763                         fd = g_mkstemp(*fpath);
2764                         if(fd == -1) {
2765                                 purple_debug(PURPLE_DEBUG_ERROR, "purple_mkstemp",
2766                                                    "Couldn't make \"%s\", error: %d\n",
2767                                                    *fpath, errno);
2768                         } else {
2769                                 if((fp = fdopen(fd, "r+")) == NULL) {
2770                                         close(fd);
2771                                         purple_debug(PURPLE_DEBUG_ERROR, "purple_mkstemp",
2772                                                            "Couldn't fdopen(), error: %d\n", errno);
2773                                 }
2774                         }
2775
2776                         if(!fp) {
2777                                 g_free(*fpath);
2778                                 *fpath = NULL;
2779                         }
2780                 }
2781         } else {
2782                 purple_debug(PURPLE_DEBUG_ERROR, "purple_mkstemp",
2783                                    "g_get_tmp_dir() failed!\n");
2784         }
2785
2786         return fp;
2787 }
2788
2789 const char *
2790 purple_util_get_image_extension(gconstpointer data, size_t len)
2791 {
2792         g_return_val_if_fail(data != NULL, NULL);
2793         g_return_val_if_fail(len   > 0,    NULL);
2794
2795         if (len >= 4)
2796         {
2797                 if (!strncmp((char *)data, "GIF8", 4))
2798                         return "gif";
2799                 else if (!strncmp((char *)data, "\xff\xd8\xff", 3)) /* 4th may be e0 through ef */
2800                         return "jpg";
2801                 else if (!strncmp((char *)data, "\x89PNG", 4))
2802                         return "png";
2803                 else if (!strncmp((char *)data, "MM", 2) ||
2804                                  !strncmp((char *)data, "II", 2))
2805                         return "tif";
2806                 else if (!strncmp((char *)data, "BM", 2))
2807                         return "bmp";
2808         }
2809
2810         return "icon";
2811 }
2812
2813 /*
2814  * We thought about using non-cryptographic hashes like CRC32 here.
2815  * They would be faster, but we think using something more secure is
2816  * important, so that it is more difficult for someone to maliciously
2817  * replace one buddy's icon with something else.
2818  */
2819 char *
2820 purple_util_get_image_checksum(gconstpointer image_data, size_t image_len)
2821 {
2822         PurpleCipherContext *context;
2823         gchar digest[41];
2824
2825         context = purple_cipher_context_new_by_name("sha1", NULL);
2826         if (context == NULL)
2827         {
2828                 purple_debug_error("util", "Could not find sha1 cipher\n");
2829                 g_return_val_if_reached(NULL);
2830         }
2831
2832         /* Hash the image data */
2833         purple_cipher_context_append(context, image_data, image_len);
2834         if (!purple_cipher_context_digest_to_str(context, sizeof(digest), digest, NULL))
2835         {
2836                 purple_debug_error("util", "Failed to get SHA-1 digest.\n");
2837                 g_return_val_if_reached(NULL);
2838         }
2839         purple_cipher_context_destroy(context);
2840
2841         return g_strdup(digest);
2842 }
2843
2844 char *
2845 purple_util_get_image_filename(gconstpointer image_data, size_t image_len)
2846 {
2847         /* Return the filename */
2848         char *checksum = purple_util_get_image_checksum(image_data, image_len);
2849         char *filename = g_strdup_printf("%s.%s", checksum,
2850                                purple_util_get_image_extension(image_data, image_len));
2851         g_free(checksum);
2852         return filename;
2853 }
2854
2855 gboolean
2856 purple_program_is_valid(const char *program)
2857 {
2858         GError *error = NULL;
2859         char **argv;
2860         gchar *progname;
2861         gboolean is_valid = FALSE;
2862
2863         g_return_val_if_fail(program != NULL,  FALSE);
2864         g_return_val_if_fail(*program != '\0', FALSE);
2865
2866         if (!g_shell_parse_argv(program, NULL, &argv, &error)) {
2867                 purple_debug(PURPLE_DEBUG_ERROR, "program_is_valid",
2868                                    "Could not parse program '%s': %s\n",
2869                                    program, error->message);
2870                 g_error_free(error);
2871                 return FALSE;
2872         }
2873
2874         if (argv == NULL) {
2875                 return FALSE;
2876         }
2877
2878         progname = g_find_program_in_path(argv[0]);
2879         is_valid = (progname != NULL);
2880
2881         if(purple_debug_is_verbose())
2882                 purple_debug_info("program_is_valid", "Tested program %s.  %s.\n", program,
2883                                 is_valid ? "Valid" : "Invalid");
2884
2885         g_strfreev(argv);
2886         g_free(progname);
2887
2888         return is_valid;
2889 }
2890
2891
2892 gboolean
2893 purple_running_gnome(void)
2894 {
2895 #ifndef _WIN32
2896         gchar *tmp = g_find_program_in_path("gnome-open");
2897
2898         if (tmp == NULL)
2899                 return FALSE;
2900         g_free(tmp);
2901
2902         tmp = (gchar *)g_getenv("GNOME_DESKTOP_SESSION_ID");
2903
2904         return ((tmp != NULL) && (*tmp != '\0'));
2905 #else
2906         return FALSE;
2907 #endif
2908 }
2909
2910 gboolean
2911 purple_running_kde(void)
2912 {
2913 #ifndef _WIN32
2914         gchar *tmp = g_find_program_in_path("kfmclient");
2915         const char *session;
2916
2917         if (tmp == NULL)
2918                 return FALSE;
2919         g_free(tmp);
2920
2921         session = g_getenv("KDE_FULL_SESSION");
2922         if (purple_strequal(session, "true"))
2923                 return TRUE;
2924
2925         /* If you run Purple from Konsole under !KDE, this will provide a
2926          * a false positive.  Since we do the GNOME checks first, this is
2927          * only a problem if you're running something !(KDE || GNOME) and
2928          * you run Purple from Konsole. This really shouldn't be a problem. */
2929         return ((g_getenv("KDEDIR") != NULL) || g_getenv("KDEDIRS") != NULL);
2930 #else
2931         return FALSE;
2932 #endif
2933 }
2934
2935 gboolean
2936 purple_running_osx(void)
2937 {
2938 #if defined(__APPLE__)
2939         return TRUE;
2940 #else
2941         return FALSE;
2942 #endif
2943 }
2944
2945 typedef union purple_sockaddr {
2946         struct sockaddr         sa;
2947         struct sockaddr_in      sa_in;
2948 #if defined(AF_INET6)
2949         struct sockaddr_in6     sa_in6;
2950 #endif
2951         struct sockaddr_storage sa_stor;
2952 } PurpleSockaddr;
2953
2954 char *
2955 purple_fd_get_ip(int fd)
2956 {
2957         PurpleSockaddr addr;
2958         socklen_t namelen = sizeof(addr);
2959         int family;
2960
2961         g_return_val_if_fail(fd != 0, NULL);
2962
2963         if (getsockname(fd, &(addr.sa), &namelen))
2964                 return NULL;
2965
2966         family = addr.sa.sa_family;
2967
2968         if (family == AF_INET) {
2969                 return g_strdup(inet_ntoa(addr.sa_in.sin_addr));
2970         }
2971 #if defined(AF_INET6) && defined(HAVE_INET_NTOP)
2972         else if (family == AF_INET6) {
2973                 char host[INET6_ADDRSTRLEN];
2974                 const char *tmp;
2975
2976                 tmp = inet_ntop(family, &(addr.sa_in6.sin6_addr), host, sizeof(host));
2977                 return g_strdup(tmp);
2978         }
2979 #endif
2980
2981         return NULL;
2982 }
2983
2984 int
2985 purple_socket_get_family(int fd)
2986 {
2987         PurpleSockaddr addr;
2988         socklen_t len = sizeof(addr);
2989
2990         g_return_val_if_fail(fd >= 0, -1);
2991
2992         if (getsockname(fd, &(addr.sa), &len))
2993                 return -1;
2994
2995         return addr.sa.sa_family;
2996 }
2997
2998 gboolean
2999 purple_socket_speaks_ipv4(int fd)
3000 {
3001         int family;
3002
3003         g_return_val_if_fail(fd >= 0, FALSE);
3004
3005         family = purple_socket_get_family(fd);
3006
3007         switch (family) {
3008         case AF_INET:
3009                 return TRUE;
3010 #if defined(IPV6_V6ONLY)
3011         case AF_INET6:
3012         {
3013                 int val = 0;
3014                 guint len = sizeof(val);
3015
3016                 if (getsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &val, &len) != 0)
3017                         return FALSE;
3018                 return !val;
3019         }
3020 #endif
3021         default:
3022                 return FALSE;
3023         }
3024 }
3025
3026 /**************************************************************************
3027  * String Functions
3028  **************************************************************************/
3029 gboolean
3030 purple_strequal(const gchar *left, const gchar *right)
3031 {
3032 #if GLIB_CHECK_VERSION(2,16,0)
3033         return (g_strcmp0(left, right) == 0);
3034 #else
3035         return ((left == NULL && right == NULL) ||
3036                 (left != NULL && right != NULL && strcmp(left, right) == 0));
3037 #endif
3038 }
3039
3040 const char *
3041 purple_normalize(const PurpleAccount *account, const char *str)
3042 {
3043         const char *ret = NULL;
3044         static char buf[BUF_LEN];
3045
3046         /* This should prevent a crash if purple_normalize gets called with NULL str, see #10115 */
3047         g_return_val_if_fail(str != NULL, "");
3048
3049         if (account != NULL)
3050         {
3051                 PurplePlugin *prpl = purple_find_prpl(purple_account_get_protocol_id(account));
3052
3053                 if (prpl != NULL)
3054                 {
3055                         PurplePluginProtocolInfo *prpl_info = PURPLE_PLUGIN_PROTOCOL_INFO(prpl);
3056
3057                         if (prpl_info->normalize)
3058                                 ret = prpl_info->normalize(account, str);
3059                 }
3060         }
3061
3062         if (ret == NULL)
3063         {
3064                 char *tmp;
3065
3066                 tmp = g_utf8_normalize(str, -1, G_NORMALIZE_DEFAULT);
3067                 g_snprintf(buf, sizeof(buf), "%s", tmp);
3068                 g_free(tmp);
3069
3070                 ret = buf;
3071         }
3072
3073         return ret;
3074 }
3075
3076 /*
3077  * You probably don't want to call this directly, it is
3078  * mainly for use as a PRPL callback function.  See the
3079  * comments in util.h.
3080  */
3081 const char *
3082 purple_normalize_nocase(const PurpleAccount *account, const char *str)
3083 {
3084         static char buf[BUF_LEN];
3085         char *tmp1, *tmp2;
3086
3087         g_return_val_if_fail(str != NULL, NULL);
3088
3089         tmp1 = g_utf8_strdown(str, -1);
3090         tmp2 = g_utf8_normalize(tmp1, -1, G_NORMALIZE_DEFAULT);
3091         g_snprintf(buf, sizeof(buf), "%s", tmp2 ? tmp2 : "");
3092         g_free(tmp2);
3093         g_free(tmp1);
3094
3095         return buf;
3096 }
3097
3098 gchar *
3099 purple_strdup_withhtml(const gchar *src)
3100 {
3101         gulong destsize, i, j;
3102         gchar *dest;
3103
3104         g_return_val_if_fail(src != NULL, NULL);
3105
3106         /* New length is (length of src) + (number of \n's * 3) - (number of \r's) + 1 */
3107         destsize = 1;
3108         for (i = 0; src[i] != '\0'; i++)
3109         {
3110                 if (src[i] == '\n')
3111                         destsize += 4;
3112                 else if (src[i] != '\r')
3113                         destsize++;
3114         }
3115
3116         dest = g_malloc(destsize);
3117
3118         /* Copy stuff, ignoring \r's, because they are dumb */
3119         for (i = 0, j = 0; src[i] != '\0'; i++) {
3120                 if (src[i] == '\n') {
3121                         strcpy(&dest[j], "<BR>");
3122                         j += 4;
3123                 } else if (src[i] != '\r')
3124                         dest[j++] = src[i];
3125         }
3126
3127         dest[destsize-1] = '\0';
3128
3129         return dest;
3130 }
3131
3132 gboolean
3133 purple_str_has_prefix(const char *s, const char *p)
3134 {
3135         return g_str_has_prefix(s, p);
3136 }
3137
3138 gboolean
3139 purple_str_has_suffix(const char *s, const char *x)
3140 {
3141         return g_str_has_suffix(s, x);
3142 }
3143
3144 char *
3145 purple_str_add_cr(const char *text)
3146 {
3147         char *ret = NULL;
3148         int count = 0, j;
3149         guint i;
3150
3151         g_return_val_if_fail(text != NULL, NULL);
3152
3153         if (text[0] == '\n')
3154                 count++;
3155         for (i = 1; i < strlen(text); i++)
3156                 if (text[i] == '\n' && text[i - 1] != '\r')
3157                         count++;
3158
3159         if (count == 0)
3160                 return g_strdup(text);
3161
3162         ret = g_malloc0(strlen(text) + count + 1);
3163
3164         i = 0; j = 0;
3165         if (text[i] == '\n')
3166                 ret[j++] = '\r';
3167         ret[j++] = text[i++];
3168         for (; i < strlen(text); i++) {
3169                 if (text[i] == '\n' && text[i - 1] != '\r')
3170                         ret[j++] = '\r';
3171                 ret[j++] = text[i];
3172         }
3173
3174         return ret;
3175 }
3176
3177 void
3178 purple_str_strip_char(char *text, char thechar)
3179 {
3180         int i, j;
3181
3182         g_return_if_fail(text != NULL);
3183
3184         for (i = 0, j = 0; text[i]; i++)
3185                 if (text[i] != thechar)
3186                         text[j++] = text[i];
3187
3188         text[j] = '\0';
3189 }
3190
3191 void
3192 purple_util_chrreplace(char *string, char delimiter,
3193                                          char replacement)
3194 {
3195         int i = 0;
3196
3197         g_return_if_fail(string != NULL);
3198
3199         while (string[i] != '\0')
3200         {
3201                 if (string[i] == delimiter)
3202                         string[i] = replacement;
3203                 i++;
3204         }
3205 }
3206
3207 gchar *
3208 purple_strreplace(const char *string, const char *delimiter,
3209                                 const char *replacement)
3210 {
3211         gchar **split;
3212         gchar *ret;
3213
3214         g_return_val_if_fail(string      != NULL, NULL);
3215         g_return_val_if_fail(delimiter   != NULL, NULL);
3216         g_return_val_if_fail(replacement != NULL, NULL);
3217
3218         split = g_strsplit(string, delimiter, 0);
3219         ret = g_strjoinv(replacement, split);
3220         g_strfreev(split);
3221
3222         return ret;
3223 }
3224
3225 gchar *
3226 purple_strcasereplace(const char *string, const char *delimiter,
3227                                         const char *replacement)
3228 {
3229         gchar *ret;
3230         int length_del, length_rep, i, j;
3231
3232         g_return_val_if_fail(string      != NULL, NULL);
3233         g_return_val_if_fail(delimiter   != NULL, NULL);
3234         g_return_val_if_fail(replacement != NULL, NULL);
3235
3236         length_del = strlen(delimiter);
3237         length_rep = strlen(replacement);
3238
3239         /* Count how many times the delimiter appears */
3240         i = 0; /* position in the source string */
3241         j = 0; /* number of occurrences of "delimiter" */
3242         while (string[i] != '\0') {
3243                 if (!g_ascii_strncasecmp(&string[i], delimiter, length_del)) {
3244                         i += length_del;
3245                         j += length_rep;
3246                 } else {
3247                         i++;
3248                         j++;
3249                 }
3250         }
3251
3252         ret = g_malloc(j+1);
3253
3254         i = 0; /* position in the source string */
3255         j = 0; /* position in the destination string */
3256         while (string[i] != '\0') {
3257                 if (!g_ascii_strncasecmp(&string[i], delimiter, length_del)) {
3258                         strncpy(&ret[j], replacement, length_rep);
3259                         i += length_del;
3260                         j += length_rep;
3261                 } else {
3262                         ret[j] = string[i];
3263                         i++;
3264                         j++;
3265                 }
3266         }
3267
3268         ret[j] = '\0';
3269
3270         return ret;
3271 }
3272
3273 /** TODO: Expose this when we can add API */
3274 static const char *
3275 purple_strcasestr_len(const char *haystack, gssize hlen, const char *needle, gssize nlen)
3276 {
3277         const char *tmp, *ret;
3278
3279         g_return_val_if_fail(haystack != NULL, NULL);
3280         g_return_val_if_fail(needle != NULL, NULL);
3281
3282         if (hlen == -1)
3283                 hlen = strlen(haystack);
3284         if (nlen == -1)
3285                 nlen = strlen(needle);
3286         tmp = haystack,
3287         ret = NULL;
3288
3289         g_return_val_if_fail(hlen > 0, NULL);
3290         g_return_val_if_fail(nlen > 0, NULL);
3291
3292         while (*tmp && !ret && (hlen - (tmp - haystack)) >= nlen) {
3293                 if (!g_ascii_strncasecmp(needle, tmp, nlen))
3294                         ret = tmp;
3295                 else
3296                         tmp++;
3297         }
3298
3299         return ret;
3300 }
3301
3302 const char *
3303 purple_strcasestr(const char *haystack, const char *needle)
3304 {
3305         return purple_strcasestr_len(haystack, -1, needle, -1);
3306 }
3307
3308 char *
3309 purple_str_size_to_units(size_t size)
3310 {
3311         static const char * const size_str[] = { "bytes", "KiB", "MiB", "GiB" };
3312         float size_mag;
3313         int size_index = 0;
3314
3315         if (size == (size_t)-1) {
3316                 return g_strdup(_("Calculating..."));
3317         }
3318         else if (size == 0) {
3319                 return g_strdup(_("Unknown."));
3320         }
3321         else {
3322                 size_mag = (float)size;
3323
3324                 while ((size_index < 3) && (size_mag > 1024)) {
3325                         size_mag /= 1024;
3326                         size_index++;
3327                 }
3328
3329                 if (size_index == 0) {
3330                         return g_strdup_printf("%" G_GSIZE_FORMAT " %s", size, size_str[size_index]);
3331                 } else {
3332                         return g_strdup_printf("%.2f %s", size_mag, size_str[size_index]);
3333                 }
3334         }
3335 }
3336
3337 char *
3338 purple_str_seconds_to_string(guint secs)
3339 {
3340         char *ret = NULL;
3341         guint days, hrs, mins;
3342
3343         if (secs < 60)
3344         {
3345                 return g_strdup_printf(dngettext(PACKAGE, "%d second", "%d seconds", secs), secs);
3346         }
3347
3348         days = secs / (60 * 60 * 24);
3349         secs = secs % (60 * 60 * 24);
3350         hrs  = secs / (60 * 60);
3351         secs = secs % (60 * 60);
3352         mins = secs / 60;
3353         secs = secs % 60;
3354
3355         if (days > 0)
3356         {
3357                 ret = g_strdup_printf(dngettext(PACKAGE, "%d day", "%d days", days), days);
3358         }
3359
3360         if (hrs > 0)
3361         {
3362                 if (ret != NULL)
3363                 {
3364                         char *tmp = g_strdup_printf(
3365                                         dngettext(PACKAGE, "%s, %d hour", "%s, %d hours", hrs),
3366                                                         ret, hrs);
3367                         g_free(ret);
3368                         ret = tmp;
3369                 }
3370                 else
3371                         ret = g_strdup_printf(dngettext(PACKAGE, "%d hour", "%d hours", hrs), hrs);
3372         }
3373
3374         if (mins > 0)
3375         {
3376                 if (ret != NULL)
3377                 {
3378                         char *tmp = g_strdup_printf(
3379                                         dngettext(PACKAGE, "%s, %d minute", "%s, %d minutes", mins),
3380                                                         ret, mins);
3381                         g_free(ret);
3382                         ret = tmp;
3383                 }
3384                 else
3385                         ret = g_strdup_printf(dngettext(PACKAGE, "%d minute", "%d minutes", mins), mins);
3386         }
3387
3388         return ret;
3389 }
3390
3391
3392 char *
3393 purple_str_binary_to_ascii(const unsigned char *binary, guint len)
3394 {
3395         GString *ret;
3396         guint i;
3397
3398         g_return_val_if_fail(len > 0, NULL);
3399
3400         ret = g_string_sized_new(len);
3401
3402         for (i = 0; i < len; i++)
3403                 if (binary[i] < 32 || binary[i] > 126)
3404                         g_string_append_printf(ret, "\\x%02hhx", binary[i]);
3405                 else if (binary[i] == '\\')
3406                         g_string_append(ret, "\\\\");
3407                 else
3408                         g_string_append_c(ret, binary[i]);
3409
3410         return g_string_free(ret, FALSE);
3411 }
3412
3413 /**************************************************************************
3414  * URI/URL Functions
3415  **************************************************************************/
3416
3417 void purple_got_protocol_handler_uri(const char *uri)
3418 {
3419         char proto[11];
3420         char delimiter;
3421         const char *tmp, *param_string;
3422         char *cmd;
3423         GHashTable *params = NULL;
3424         int len;
3425         if (!(tmp = strchr(uri, ':')) || tmp == uri) {
3426                 purple_debug_error("util", "Malformed protocol handler message - missing protocol.\n");
3427                 return;
3428         }
3429
3430         len = MIN(sizeof(proto) - 1, (gsize)(tmp - uri));
3431
3432         strncpy(proto, uri, len);
3433         proto[len] = '\0';
3434
3435         tmp++;
3436
3437         if (purple_strequal(proto, "xmpp"))
3438                 delimiter = ';';
3439         else
3440                 delimiter = '&';
3441
3442         purple_debug_info("util", "Processing message '%s' for protocol '%s' using delimiter '%c'.\n", tmp, proto, delimiter);
3443
3444         if ((param_string = strchr(tmp, '?'))) {
3445                 const char *keyend = NULL, *pairstart;
3446                 char *key, *value = NULL;
3447
3448                 cmd = g_strndup(tmp, (param_string - tmp));
3449                 param_string++;
3450
3451                 params = g_hash_table_new_full(g_str_hash, g_str_equal, g_free, g_free);
3452                 pairstart = tmp = param_string;
3453
3454                 while (*tmp || *pairstart) {
3455                         if (*tmp == delimiter || !(*tmp)) {
3456                                 /* If there is no explicit value */
3457                                 if (keyend == NULL)
3458                                         keyend = tmp;
3459
3460                                 if (keyend && keyend != pairstart) {
3461                                         char *p;
3462                                         key = g_strndup(pairstart, (keyend - pairstart));
3463                                         /* If there is an explicit value */
3464                                         if (keyend != tmp && keyend != (tmp - 1))
3465                                                 value = g_strndup(keyend + 1, (tmp - keyend - 1));
3466                                         for (p = key; *p; ++p)
3467                                                 *p = g_ascii_tolower(*p);
3468                                         g_hash_table_insert(params, key, value);
3469                                 }
3470                                 keyend = value = NULL;
3471                                 pairstart = (*tmp) ? tmp + 1 : tmp;
3472                         } else if (*tmp == '=')
3473                                 keyend = tmp;
3474
3475                         if (*tmp)
3476                                 tmp++;
3477                 }
3478         } else
3479                 cmd = g_strdup(tmp);
3480
3481         purple_signal_emit_return_1(purple_get_core(), "uri-handler", proto, cmd, params);
3482
3483         g_free(cmd);
3484         if (params)
3485                 g_hash_table_destroy(params);
3486 }
3487
3488 /*
3489  * TODO: Should probably add a "gboolean *ret_ishttps" parameter that
3490  *       is set to TRUE if this URL is https, otherwise it is set to
3491  *       FALSE.  But that change will break the API.
3492  *
3493  *       This is important for Yahoo! web messenger login.  They now
3494  *       force https login, and if you access the web messenger login
3495  *       page via http then it redirects you to the https version, but
3496  *       purple_util_fetch_url() ignores the "https" and attempts to
3497  *       fetch the URL via http again, which gets redirected again.
3498  */
3499 gboolean
3500 purple_url_parse(const char *url, char **ret_host, int *ret_port,
3501                            char **ret_path, char **ret_user, char **ret_passwd)
3502 {
3503         gboolean is_https = FALSE;
3504         const char * scan_info;
3505         char port_str[6];
3506         int f;
3507         const char *at, *slash;
3508         const char *turl;
3509         char host[256], path[256], user[256], passwd[256];
3510         int port = 0;
3511         /* hyphen at end includes it in control set */
3512
3513 #define ADDR_CTRL "A-Za-z0-9.-"
3514 #define PORT_CTRL "0-9"
3515 #define PAGE_CTRL "A-Za-z0-9.~_/:*!@&%%?=+^-"
3516 #define USER_CTRL "A-Za-z0-9.~_/*!&%%?=+^-"
3517 #define PASSWD_CTRL "A-Za-z0-9.~_/*!&%%?=+^-"
3518
3519         g_return_val_if_fail(url != NULL, FALSE);
3520
3521         if ((turl = purple_strcasestr(url, "http://")) != NULL)
3522         {
3523                 turl += 7;
3524                 url = turl;
3525         }
3526         else if ((turl = purple_strcasestr(url, "https://")) != NULL)
3527         {
3528                 is_https = TRUE;
3529                 turl += 8;
3530                 url = turl;
3531         }
3532
3533         /* parse out authentication information if supplied */
3534         /* Only care about @ char BEFORE the first / */
3535         at = strchr(url, '@');
3536         slash = strchr(url, '/');
3537         f = 0;
3538         if (at && (!slash || at < slash)) {
3539                 scan_info = "%255[" USER_CTRL "]:%255[" PASSWD_CTRL "]^@";
3540                 f = sscanf(url, scan_info, user, passwd);
3541
3542                 if (f == 1) {
3543                         /* No passwd, possibly just username supplied */
3544                         scan_info = "%255[" USER_CTRL "]^@";
3545                         f = sscanf(url, scan_info, user);
3546                 }
3547
3548                 url = at+1; /* move pointer after the @ char */
3549         }
3550
3551         if (f < 1) {
3552                 *user = '\0';
3553                 *passwd = '\0';
3554         } else if (f == 1)
3555                 *passwd = '\0';
3556
3557         scan_info = "%255[" ADDR_CTRL "]:%5[" PORT_CTRL "]/%255[" PAGE_CTRL "]";
3558         f = sscanf(url, scan_info, host, port_str, path);
3559
3560         if (f == 1)
3561         {
3562                 scan_info = "%255[" ADDR_CTRL "]/%255[" PAGE_CTRL "]";
3563                 f = sscanf(url, scan_info, host, path);
3564                 /* Use the default port */
3565                 if (is_https)
3566                         g_snprintf(port_str, sizeof(port_str), "443");
3567                 else
3568                         g_snprintf(port_str, sizeof(port_str), "80");
3569         }
3570
3571         if (f == 0)
3572                 *host = '\0';
3573
3574         if (f <= 1)
3575                 *path = '\0';
3576
3577         if (sscanf(port_str, "%d", &port) != 1)
3578                 purple_debug_error("util", "Error parsing URL port from %s\n", url);
3579
3580         if (ret_host != NULL) *ret_host = g_strdup(host);
3581         if (ret_port != NULL) *ret_port = port;
3582         if (ret_path != NULL) *ret_path = g_strdup(path);
3583         if (ret_user != NULL) *ret_user = g_strdup(user);
3584         if (ret_passwd != NULL) *ret_passwd = g_strdup(passwd);
3585
3586         return ((*host != '\0') ? TRUE : FALSE);
3587
3588 #undef ADDR_CTRL
3589 #undef PORT_CTRL
3590 #undef PAGE_CTRL
3591 #undef USER_CTRL
3592 #undef PASSWD_CTRL
3593 }
3594
3595 /**
3596  * The arguments to this function are similar to printf.
3597  */
3598 static void
3599 purple_util_fetch_url_error(PurpleUtilFetchUrlData *gfud, const char *format, ...)
3600 {
3601         gchar *error_message;
3602         va_list args;
3603
3604         va_start(args, format);
3605         error_message = g_strdup_vprintf(format, args);
3606         va_end(args);
3607
3608         gfud->callback(gfud, gfud->user_data, NULL, 0, error_message);
3609         g_free(error_message);
3610         purple_util_fetch_url_cancel(gfud);
3611 }
3612
3613 static void url_fetch_connect_cb(gpointer url_data, gint source, const gchar *error_message);
3614 static void ssl_url_fetch_connect_cb(gpointer data, PurpleSslConnection *ssl_connection, PurpleInputCondition cond);
3615 static void ssl_url_fetch_error_cb(PurpleSslConnection *ssl_connection, PurpleSslErrorType error, gpointer data);
3616
3617 static gboolean
3618 parse_redirect(const char *data, gsize data_len,
3619                            PurpleUtilFetchUrlData *gfud)
3620 {
3621         gchar *s;
3622         gchar *new_url, *temp_url, *end;
3623         gboolean full;
3624         int len;
3625
3626         if ((s = g_strstr_len(data, data_len, "\nLocation: ")) == NULL)
3627                 /* We're not being redirected */
3628                 return FALSE;
3629
3630         s += strlen("Location: ");
3631         end = strchr(s, '\r');
3632
3633         /* Just in case :) */
3634         if (end == NULL)
3635                 end = strchr(s, '\n');
3636
3637         if (end == NULL)
3638                 return FALSE;
3639
3640         len = end - s;
3641
3642         new_url = g_malloc(len + 1);
3643         strncpy(new_url, s, len);
3644         new_url[len] = '\0';
3645
3646         full = gfud->full;
3647
3648         if (*new_url == '/' || g_strstr_len(new_url, len, "://") == NULL)
3649         {
3650                 temp_url = new_url;
3651
3652                 new_url = g_strdup_printf("%s:%d%s", gfud->website.address,
3653                                                                   gfud->website.port, temp_url);
3654
3655                 g_free(temp_url);
3656
3657                 full = FALSE;
3658         }
3659
3660         purple_debug_info("util", "Redirecting to %s\n", new_url);
3661
3662         gfud->num_times_redirected++;
3663         if (gfud->num_times_redirected >= 5)
3664         {
3665                 purple_util_fetch_url_error(gfud,
3666                                 _("Could not open %s: Redirected too many times"),
3667                                 gfud->url);
3668                 g_free(new_url);
3669                 return TRUE;
3670         }
3671
3672         /*
3673          * Try again, with this new location.  This code is somewhat
3674          * ugly, but we need to reuse the gfud because whoever called
3675          * us is holding a reference to it.
3676          */
3677         g_free(gfud->url);
3678         gfud->url = new_url;
3679         gfud->full = full;
3680         g_free(gfud->request);
3681         gfud->request = NULL;
3682
3683         if (gfud->is_ssl) {
3684                 gfud->is_ssl = FALSE;
3685                 purple_ssl_close(gfud->ssl_connection);
3686                 gfud->ssl_connection = NULL;
3687         } else {
3688                 purple_input_remove(gfud->inpa);
3689                 gfud->inpa = 0;
3690                 close(gfud->fd);
3691                 gfud->fd = -1;
3692         }
3693         gfud->request_written = 0;
3694         gfud->len = 0;
3695         gfud->data_len = 0;
3696
3697         g_free(gfud->website.user);
3698         g_free(gfud->website.passwd);
3699         g_free(gfud->website.address);
3700         g_free(gfud->website.page);
3701         purple_url_parse(new_url, &gfud->website.address, &gfud->website.port,
3702                                    &gfud->website.page, &gfud->website.user, &gfud->website.passwd);
3703
3704         if (purple_strcasestr(new_url, "https://") != NULL) {
3705                 gfud->is_ssl = TRUE;
3706                 gfud->ssl_connection = purple_ssl_connect(gfud->account,
3707                                 gfud->website.address, gfud->website.port,
3708                                 ssl_url_fetch_connect_cb, ssl_url_fetch_error_cb, gfud);
3709         } else {
3710                 gfud->connect_data = purple_proxy_connect(NULL, gfud->account,
3711                                 gfud->website.address, gfud->website.port,
3712                                 url_fetch_connect_cb, gfud);
3713         }
3714
3715         if (gfud->ssl_connection == NULL && gfud->connect_data == NULL)
3716         {
3717                 purple_util_fetch_url_error(gfud, _("Unable to connect to %s"),
3718                                 gfud->website.address);
3719         }
3720
3721         return TRUE;
3722 }
3723
3724 /* find the starting point of the content for the specified header and make
3725  * sure that the content is safe to pass to sscanf */
3726 static const char *
3727 find_header_content(const char *data, gsize data_len, const char *header)
3728 {
3729         const char *p = NULL;
3730
3731         gsize header_len = strlen(header);
3732
3733         if (data_len > header_len) {
3734                 /* Check if the first header matches (data won't start with a \n") */
3735                 if (header[0] == '\n')
3736                         p = (g_ascii_strncasecmp(data, header + 1, header_len - 1) == 0) ? data : NULL;
3737                 if (!p)
3738                         p = purple_strcasestr_len(data, data_len, header, header_len);
3739                 if (p)
3740                         p += header_len;
3741         }
3742
3743         /* If we can find the header at all, try to sscanf it.
3744          * Response headers should end with at least \r\n, so sscanf is safe,
3745          * if we make sure that there is indeed a \n in our header.
3746          */
3747         if (p && g_strstr_len(p, data_len - (p - data), "\n")) {
3748                 return p;
3749         }
3750
3751         return NULL;
3752 }
3753
3754 static gsize
3755 parse_content_len(const char *data, gsize data_len)
3756 {
3757         gsize content_len = 0;
3758         const char *p = NULL;
3759
3760         p = find_header_content(data, data_len, "\nContent-Length: ");
3761         if (p) {
3762                 if (sscanf(p, "%" G_GSIZE_FORMAT, &content_len) != 1) {
3763                         purple_debug_warning("util", "invalid number format\n");
3764                         content_len = 0;
3765                 }
3766                 purple_debug_misc("util", "parsed %" G_GSIZE_FORMAT "\n", content_len);
3767         }
3768
3769         return content_len;
3770 }
3771
3772 static gboolean
3773 content_is_chunked(const char *data, gsize data_len)
3774 {
3775         const char *p = find_header_content(data, data_len, "\nTransfer-Encoding: ");
3776         if (p && g_ascii_strncasecmp(p, "chunked", 7) == 0)
3777                 return TRUE;
3778
3779         return FALSE;
3780 }
3781
3782 /* Process in-place */
3783 static void
3784 process_chunked_data(char *data, gsize *len)
3785 {
3786         gsize sz;
3787         gsize newlen = 0;
3788         char *p = data;
3789         char *s = data;
3790
3791         while (*s) {
3792                 /* Read the size of this chunk */
3793                 if (sscanf(s, "%" G_GSIZE_MODIFIER "x", &sz) != 1)
3794                 {
3795                         purple_debug_error("util", "Error processing chunked data: "
3796                                         "Expected data length, found: %s\n", s);
3797                         break;
3798                 }
3799                 if (sz == 0) {
3800                         /* We've reached the last chunk */
3801                         /*
3802                          * TODO: The spec allows "footers" to follow the last chunk.
3803                          *       If there is more data after this line then we should
3804                          *       treat it like a header.
3805                          */
3806                         break;
3807                 }
3808
3809                 /* Advance to the start of the data */
3810                 s = strstr(s, "\r\n");
3811                 if (s == NULL)
3812                         break;
3813                 s += 2;
3814
3815                 if (sz > MAX_HTTP_CHUNK_SIZE || s + sz > data + *len) {
3816                         purple_debug_error("util", "Error processing chunked data: "
3817                                         "Chunk size %" G_GSIZE_FORMAT " bytes was longer "
3818                                         "than the data remaining in the buffer (%"
3819                                         G_GSIZE_FORMAT " bytes)\n", sz, data + *len - s);
3820                         break;
3821                 }
3822
3823                 /* Move all data overtop of the chunk length that we read in earlier */
3824                 g_memmove(p, s, sz);
3825                 p += sz;
3826                 s += sz;
3827                 newlen += sz;
3828                 if (*s == '\0' || (*s != '\r' && *(s + 1) != '\n')) {
3829                         purple_debug_error("util", "Error processing chunked data: "
3830                                         "Expected \\r\\n, found: %s\n", s);
3831                         break;
3832                 }
3833                 s += 2;
3834         }
3835
3836         /* NULL terminate the data */
3837         *p = 0;
3838
3839         *len = newlen;
3840 }
3841
3842 static void
3843 url_fetch_recv_cb(gpointer url_data, gint source, PurpleInputCondition cond)
3844 {
3845         PurpleUtilFetchUrlData *gfud = url_data;
3846         int len;
3847         char buf[4096];
3848         char *data_cursor;
3849         gboolean got_eof = FALSE;
3850
3851         if (!gfud->is_ssl && source < 0) {
3852                 g_warn_if_reached();
3853                 len = -1;
3854                 errno = EINVAL;
3855         }
3856
3857         /*
3858          * Read data in a loop until we can't read any more!  This is a
3859          * little confusing because we read using a different function
3860          * depending on whether the socket is ssl or cleartext.
3861          */
3862         while ((gfud->is_ssl && ((len = purple_ssl_read(gfud->ssl_connection, buf, sizeof(buf))) > 0)) ||
3863                 (!gfud->is_ssl && source >= 0 && (len = read(source, buf, sizeof(buf))) > 0))
3864         {
3865                 if((gfud->len + len) > gfud->max_len) {
3866                         purple_util_fetch_url_error(gfud, _("Error reading from %s: response too long (%d bytes limit)"),
3867                                                     gfud->website.address, gfud->max_len);
3868                         return;
3869                 }
3870
3871                 /* If we've filled up our buffer, make it bigger */
3872                 if((gfud->len + len) >= gfud->data_len) {
3873                         while((gfud->len + len) >= gfud->data_len)
3874                                 gfud->data_len += sizeof(buf);
3875
3876                         gfud->webdata = g_realloc(gfud->webdata, gfud->data_len);
3877                 }
3878
3879                 data_cursor = gfud->webdata + gfud->len;
3880
3881                 gfud->len += len;
3882
3883                 memcpy(data_cursor, buf, len);
3884
3885                 gfud->webdata[gfud->len] = '\0';
3886
3887                 if(!gfud->got_headers) {
3888                         char *end_of_headers;
3889
3890                         /* See if we've reached the end of the headers yet */
3891                         end_of_headers = strstr(gfud->webdata, "\r\n\r\n");
3892                         if (end_of_headers) {
3893                                 guint header_len = (end_of_headers + 4 - gfud->webdata);
3894                                 gsize content_len;
3895
3896                                 purple_debug_misc("util", "Response headers: '%.*s'\n",
3897                                         header_len, gfud->webdata);
3898
3899                                 /* See if we can find a redirect. */
3900                                 if(parse_redirect(gfud->webdata, header_len, gfud))
3901                                         return;
3902
3903                                 gfud->got_headers = TRUE;
3904
3905                                 /* No redirect. See if we can find a content length. */
3906                                 content_len = parse_content_len(gfud->webdata, header_len);
3907                                 gfud->chunked = content_is_chunked(gfud->webdata, header_len);
3908
3909                                 if (content_len == 0) {
3910                                         /* We'll stick with an initial 8192 */
3911                                         content_len = 8192;
3912                                 } else {
3913                                         gfud->has_explicit_data_len = TRUE;
3914                                         if (content_len > gfud->max_len) {
3915                                                 purple_debug_error("util",
3916                                                                 "Overriding explicit Content-Length of %" G_GSIZE_FORMAT " with max of %" G_GSSIZE_FORMAT "\n",
3917                                                                 content_len, gfud->max_len);
3918                                                 content_len = gfud->max_len;
3919                                         }
3920                                 }
3921
3922
3923                                 /* If we're returning the headers too, we don't need to clean them out */
3924                                 if (gfud->include_headers) {
3925                                         char *new_data;
3926                                         gfud->data_len = content_len + header_len;
3927                                         new_data = g_try_realloc(gfud->webdata, gfud->data_len);
3928                                         if (new_data == NULL) {
3929                                                 purple_debug_error("util",
3930                                                                 "Failed to allocate %" G_GSIZE_FORMAT " bytes: %s\n",
3931                                                                 content_len, g_strerror(errno));
3932                                                 purple_util_fetch_url_error(gfud,
3933                                                                 _("Unable to allocate enough memory to hold "
3934                                                                   "the contents from %s.  The web server may "
3935                                                                   "be trying something malicious."),
3936                                                                 gfud->website.address);
3937
3938                                                 return;
3939                                         }
3940                                         gfud->webdata = new_data;
3941                                 } else {
3942                                         char *new_data;
3943                                         gsize body_len = gfud->len - header_len;
3944
3945                                         content_len = MAX(content_len, body_len);
3946
3947                                         new_data = g_try_malloc(content_len);
3948                                         if (new_data == NULL) {
3949                                                 purple_debug_error("util",
3950                                                                 "Failed to allocate %" G_GSIZE_FORMAT " bytes: %s\n",
3951                                                                 content_len, g_strerror(errno));
3952                                                 purple_util_fetch_url_error(gfud,
3953                                                                 _("Unable to allocate enough memory to hold "
3954                                                                   "the contents from %s.  The web server may "
3955                                                                   "be trying something malicious."),
3956                                                                 gfud->website.address);
3957
3958                                                 return;
3959                                         }
3960
3961                                         /* We may have read part of the body when reading the headers, don't lose it */
3962                                         if (body_len > 0) {
3963                                                 memcpy(new_data, end_of_headers + 4, body_len);
3964                                         }
3965
3966                                         /* Out with the old... */
3967                                         g_free(gfud->webdata);
3968
3969                                         /* In with the new. */
3970                                         gfud->len = body_len;
3971                                         gfud->data_len = content_len;
3972                                         gfud->webdata = new_data;
3973                                 }
3974                         }
3975                 }
3976
3977                 if(gfud->has_explicit_data_len && gfud->len >= gfud->data_len) {
3978                         got_eof = TRUE;
3979                         break;
3980                 }
3981         }
3982
3983         if(len < 0) {
3984                 if(errno == EAGAIN) {
3985                         return;
3986                 } else {
3987                         purple_util_fetch_url_error(gfud, _("Error reading from %s: %s"),
3988                                         gfud->website.address, g_strerror(errno));
3989                         return;
3990                 }
3991         }
3992
3993         if((len == 0) || got_eof) {
3994                 gfud->webdata = g_realloc(gfud->webdata, gfud->len + 1);
3995                 gfud->webdata[gfud->len] = '\0';
3996
3997                 if (!gfud->include_headers && gfud->chunked) {
3998                         /* Process only if we don't want the headers. */
3999                         process_chunked_data(gfud->webdata, &gfud->len);
4000                 }
4001
4002                 gfud->callback(gfud, gfud->user_data, gfud->webdata, gfud->len, NULL);
4003                 purple_util_fetch_url_cancel(gfud);
4004         }
4005 }
4006
4007 static void ssl_url_fetch_recv_cb(gpointer data, PurpleSslConnection *ssl_connection, PurpleInputCondition cond)
4008 {
4009         url_fetch_recv_cb(data, -1, cond);
4010 }
4011
4012 /**
4013  * This function is called when the socket is available to be written
4014  * to.
4015  *
4016  * @param source The file descriptor that can be written to.  This can
4017  *        be an http connection or it can be the SSL connection of an
4018  *        https request.  So be careful what you use it for!  If it's
4019  *        an https request then use purple_ssl_write() instead of
4020  *        writing to it directly.
4021  */
4022 static void
4023 url_fetch_send_cb(gpointer data, gint source, PurpleInputCondition cond)
4024 {
4025         PurpleUtilFetchUrlData *gfud;
4026         int len, total_len;
4027
4028         gfud = data;
4029
4030         if (gfud->request == NULL) {
4031
4032                 PurpleProxyInfo *gpi = purple_proxy_get_setup(gfud->account);
4033                 GString *request_str = g_string_new(NULL);
4034
4035                 g_string_append_printf(request_str, "GET %s%s HTTP/%s\r\n"
4036                                                     "Connection: close\r\n",
4037                         (gfud->full ? "" : "/"),
4038                         (gfud->full ? (gfud->url ? gfud->url : "") : (gfud->website.page ? gfud->website.page : "")),
4039                         (gfud->http11 ? "1.1" : "1.0"));
4040
4041                 if (gfud->user_agent)
4042                         g_string_append_printf(request_str, "User-Agent: %s\r\n", gfud->user_agent);
4043
4044                 /* Host header is not forbidden in HTTP/1.0 requests, and HTTP/1.1
4045                  * clients must know how to handle the "chunked" transfer encoding.
4046                  * Purple doesn't know how to handle "chunked", so should always send
4047                  * the Host header regardless, to get around some observed problems
4048                  */
4049                 g_string_append_printf(request_str, "Accept: */*\r\n"
4050                                                     "Host: %s\r\n",
4051                         (gfud->website.address ? gfud->website.address : ""));
4052
4053                 if (purple_proxy_info_get_username(gpi) != NULL
4054                                 && (purple_proxy_info_get_type(gpi) == PURPLE_PROXY_USE_ENVVAR
4055                                         || purple_proxy_info_get_type(gpi) == PURPLE_PROXY_HTTP)) {
4056                         /* This chunk of code was copied from proxy.c http_start_connect_tunneling()
4057                          * This is really a temporary hack - we need a more complete proxy handling solution,
4058                          * so I didn't think it was worthwhile to refactor for reuse
4059                          */
4060                         char *t1, *t2, *ntlm_type1;
4061                         char hostname[256];
4062                         int ret;
4063
4064                         ret = gethostname(hostname, sizeof(hostname));
4065                         hostname[sizeof(hostname) - 1] = '\0';
4066                         if (ret < 0 || hostname[0] == '\0') {
4067                                 purple_debug_warning("util", "proxy - gethostname() failed -- is your hostname set?");
4068                                 strcpy(hostname, "localhost");
4069                         }
4070
4071                         t1 = g_strdup_printf("%s:%s",
4072                                 purple_proxy_info_get_username(gpi),
4073                                 purple_proxy_info_get_password(gpi) ?
4074                                         purple_proxy_info_get_password(gpi) : "");
4075                         t2 = purple_base64_encode((const guchar *)t1, strlen(t1));
4076                         g_free(t1);
4077
4078                         ntlm_type1 = purple_ntlm_gen_type1(hostname, "");
4079
4080                         g_string_append_printf(request_str,
4081                                 "Proxy-Authorization: Basic %s\r\n"
4082                                 "Proxy-Authorization: NTLM %s\r\n"
4083                                 "Proxy-Connection: Keep-Alive\r\n",
4084                                 t2, ntlm_type1);
4085                         g_free(ntlm_type1);
4086                         g_free(t2);
4087                 }
4088
4089                 g_string_append(request_str, "\r\n");
4090
4091                 gfud->request = g_string_free(request_str, FALSE);
4092                 gfud->request_len = strlen(gfud->request);
4093         }
4094
4095         if(purple_debug_is_unsafe())
4096                 purple_debug_misc("util", "Request: '%.*s'\n", (int) gfud->request_len, gfud->request);
4097         else
4098                 purple_debug_misc("util", "request constructed\n");
4099
4100         total_len = gfud->request_len;
4101
4102         if (gfud->is_ssl)
4103                 len = purple_ssl_write(gfud->ssl_connection, gfud->request + gfud->request_written,
4104                                 total_len - gfud->request_written);
4105         else
4106                 len = write(gfud->fd, gfud->request + gfud->request_written,
4107                                 total_len - gfud->request_written);
4108
4109         if (len < 0 && errno == EAGAIN)
4110                 return;
4111         else if (len < 0) {
4112                 purple_util_fetch_url_error(gfud, _("Error writing to %s: %s"),
4113                                 gfud->website.address, g_strerror(errno));
4114                 return;
4115         }
4116         gfud->request_written += len;
4117
4118         if (gfud->request_written < (gsize)total_len)
4119                 return;
4120
4121         /* We're done writing our request, now start reading the response */
4122         if (gfud->is_ssl) {
4123                 purple_input_remove(gfud->inpa);
4124                 gfud->inpa = 0;
4125                 purple_ssl_input_add(gfud->ssl_connection, ssl_url_fetch_recv_cb, gfud);
4126         } else {
4127                 purple_input_remove(gfud->inpa);
4128                 gfud->inpa = purple_input_add(gfud->fd, PURPLE_INPUT_READ, url_fetch_recv_cb,
4129                         gfud);
4130         }
4131 }
4132
4133 static void
4134 url_fetch_connect_cb(gpointer url_data, gint source, const gchar *error_message)
4135 {
4136         PurpleUtilFetchUrlData *gfud;
4137
4138         gfud = url_data;
4139         gfud->connect_data = NULL;
4140
4141         if (source == -1)
4142         {
4143                 purple_util_fetch_url_error(gfud, _("Unable to connect to %s: %s"),
4144                                 (gfud->website.address ? gfud->website.address : ""), error_message);
4145                 return;
4146         }
4147
4148         gfud->fd = source;
4149
4150         gfud->inpa = purple_input_add(source, PURPLE_INPUT_WRITE,
4151                                                                 url_fetch_send_cb, gfud);
4152         url_fetch_send_cb(gfud, source, PURPLE_INPUT_WRITE);
4153 }
4154
4155 static void ssl_url_fetch_connect_cb(gpointer data, PurpleSslConnection *ssl_connection, PurpleInputCondition cond)
4156 {
4157         PurpleUtilFetchUrlData *gfud;
4158
4159         gfud = data;
4160
4161         gfud->inpa = purple_input_add(ssl_connection->fd, PURPLE_INPUT_WRITE,
4162                         url_fetch_send_cb, gfud);
4163         url_fetch_send_cb(gfud, ssl_connection->fd, PURPLE_INPUT_WRITE);
4164 }
4165
4166 static void ssl_url_fetch_error_cb(PurpleSslConnection *ssl_connection, PurpleSslErrorType error, gpointer data)
4167 {
4168         PurpleUtilFetchUrlData *gfud;
4169
4170         gfud = data;
4171         gfud->ssl_connection = NULL;
4172
4173         purple_util_fetch_url_error(gfud, _("Unable to connect to %s: %s"),
4174                         (gfud->website.address ? gfud->website.address : ""),
4175         purple_ssl_strerror(error));
4176 }
4177
4178 PurpleUtilFetchUrlData *
4179 purple_util_fetch_url_request(const char *url, gboolean full,
4180                 const char *user_agent, gboolean http11,
4181                 const char *request, gboolean include_headers,
4182                 PurpleUtilFetchUrlCallback callback, void *user_data)
4183 {
4184         return purple_util_fetch_url_request_len_with_account(NULL, url, full,
4185                                              user_agent, http11,
4186                                              request, include_headers, -1,
4187                                              callback, user_data);
4188 }
4189
4190 PurpleUtilFetchUrlData *
4191 purple_util_fetch_url_request_len(const char *url, gboolean full,
4192                 const char *user_agent, gboolean http11,
4193                 const char *request, gboolean include_headers, gssize max_len,
4194                 PurpleUtilFetchUrlCallback callback, void *user_data)
4195 {
4196         return purple_util_fetch_url_request_len_with_account(NULL, url, full,
4197                         user_agent, http11, request, include_headers, max_len, callback,
4198                         user_data);
4199 }
4200
4201 PurpleUtilFetchUrlData *
4202 purple_util_fetch_url_request_len_with_account(PurpleAccount *account,
4203                 const char *url, gboolean full, const char *user_agent, gboolean http11,
4204                 const char *request, gboolean include_headers, gssize max_len,
4205                 PurpleUtilFetchUrlCallback callback, void *user_data)
4206 {
4207         return purple_util_fetch_url_request_data_len_with_account(account, url, full,
4208                 user_agent, http11, request, request ? strlen (request) : 0, include_headers, max_len, callback,
4209                         user_data);
4210 }
4211
4212 PurpleUtilFetchUrlData *
4213 purple_util_fetch_url_request_data_len_with_account(PurpleAccount *account,
4214                 const char *url, gboolean full, const char *user_agent, gboolean http11,
4215                 const char *request, gsize request_len, gboolean include_headers, gssize max_len,
4216                 PurpleUtilFetchUrlCallback callback, void *user_data)
4217 {
4218         PurpleUtilFetchUrlData *gfud;
4219
4220         g_return_val_if_fail(url      != NULL, NULL);
4221         g_return_val_if_fail(callback != NULL, NULL);
4222
4223         if(purple_debug_is_unsafe())
4224                 purple_debug_info("util",
4225                                  "requested to fetch (%s), full=%d, user_agent=(%s), http11=%d\n",
4226                                  url, full, user_agent?user_agent:"(null)", http11);
4227         else
4228                 purple_debug_info("util", "requesting to fetch a URL\n");
4229
4230         gfud = g_new0(PurpleUtilFetchUrlData, 1);
4231
4232         gfud->callback = callback;
4233         gfud->user_data  = user_data;
4234         gfud->url = g_strdup(url);
4235         gfud->user_agent = g_strdup(user_agent);
4236         gfud->http11 = http11;
4237         gfud->full = full;
4238         gfud->request = request_len ? g_memdup(request, request_len) : NULL;
4239         gfud->request_len = request_len;
4240         gfud->include_headers = include_headers;
4241         gfud->fd = -1;
4242         if (max_len <= 0) {
4243                 max_len = DEFAULT_MAX_HTTP_DOWNLOAD;
4244                 purple_debug_error("util", "Defaulting max download from %s to %" G_GSSIZE_FORMAT "\n", url, max_len);
4245         }
4246         gfud->max_len = (gsize) max_len;
4247         gfud->account = account;
4248
4249         purple_url_parse(url, &gfud->website.address, &gfud->website.port,
4250                                    &gfud->website.page, &gfud->website.user, &gfud->website.passwd);
4251
4252         if (purple_strcasestr(url, "https://") != NULL) {
4253                 if (!purple_ssl_is_supported()) {
4254                         purple_util_fetch_url_error(gfud,
4255                                         _("Unable to connect to %s: %s"),
4256                                         gfud->website.address,
4257                                         _("Server requires TLS/SSL, but no TLS/SSL support was found."));
4258                         return NULL;
4259                 }
4260
4261                 gfud->is_ssl = TRUE;
4262                 gfud->ssl_connection = purple_ssl_connect(account,
4263                                 gfud->website.address, gfud->website.port,
4264                                 ssl_url_fetch_connect_cb, ssl_url_fetch_error_cb, gfud);
4265         } else {
4266                 gfud->connect_data = purple_proxy_connect(NULL, account,
4267                                 gfud->website.address, gfud->website.port,
4268                                 url_fetch_connect_cb, gfud);
4269         }
4270
4271         if (gfud->ssl_connection == NULL && gfud->connect_data == NULL)
4272         {
4273                 purple_util_fetch_url_error(gfud, _("Unable to connect to %s"),
4274                                 gfud->website.address);
4275                 return NULL;
4276         }
4277
4278         return gfud;
4279 }
4280
4281 void
4282 purple_util_fetch_url_cancel(PurpleUtilFetchUrlData *gfud)
4283 {
4284         if (gfud->ssl_connection != NULL)
4285                 purple_ssl_close(gfud->ssl_connection);
4286
4287         if (gfud->connect_data != NULL)
4288                 purple_proxy_connect_cancel(gfud->connect_data);
4289
4290         if (gfud->inpa > 0)
4291                 purple_input_remove(gfud->inpa);
4292
4293         if (gfud->fd >= 0)
4294                 close(gfud->fd);
4295
4296         g_free(gfud->website.user);
4297         g_free(gfud->website.passwd);
4298         g_free(gfud->website.address);
4299         g_free(gfud->website.page);
4300         g_free(gfud->url);
4301         g_free(gfud->user_agent);
4302         g_free(gfud->request);
4303         g_free(gfud->webdata);
4304
4305         g_free(gfud);
4306 }
4307
4308 const char *
4309 purple_url_decode(const char *str)
4310 {
4311         static char buf[BUF_LEN];
4312         guint i, j = 0;
4313         char *bum;
4314         char hex[3];
4315
4316         g_return_val_if_fail(str != NULL, NULL);
4317
4318         /*
4319          * XXX - This check could be removed and buf could be made
4320          * dynamically allocated, but this is easier.
4321          */
4322         if (strlen(str) >= BUF_LEN)
4323                 return NULL;
4324
4325         for (i = 0; i < strlen(str); i++) {
4326
4327                 if (str[i] != '%')
4328                         buf[j++] = str[i];
4329                 else {
4330                         strncpy(hex, str + ++i, 2);
4331                         hex[2] = '\0';
4332
4333                         /* i is pointing to the start of the number */
4334                         i++;
4335
4336                         /*
4337                          * Now it's at the end and at the start of the for loop
4338                          * will be at the next character.
4339                          */
4340                         buf[j++] = strtol(hex, NULL, 16);
4341                 }
4342         }
4343
4344         buf[j] = '\0';
4345
4346         if (!g_utf8_validate(buf, -1, (const char **)&bum))
4347                 *bum = '\0';
4348
4349         return buf;
4350 }
4351
4352 const char *
4353 purple_url_encode(const char *str)
4354 {
4355         const char *iter;
4356         static char buf[BUF_LEN];
4357         char utf_char[6];
4358         guint i, j = 0;
4359
4360         g_return_val_if_fail(str != NULL, NULL);
4361         g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL);
4362
4363         iter = str;
4364         for (; *iter && j < (BUF_LEN - 1) ; iter = g_utf8_next_char(iter)) {
4365                 gunichar c = g_utf8_get_char(iter);
4366                 /* If the character is an ASCII character and is alphanumeric
4367                  * no need to escape */
4368                 if (c < 128 && (isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~')) {
4369                         buf[j++] = c;
4370                 } else {
4371                         int bytes = g_unichar_to_utf8(c, utf_char);
4372                         for (i = 0; (int)i < bytes; i++) {
4373                                 if (j > (BUF_LEN - 4))
4374                                         break;
4375                                 if (i >= sizeof(utf_char)) {
4376                                         g_warn_if_reached();
4377                                         break;
4378                                 }
4379                                 sprintf(buf + j, "%%%02X", utf_char[i] & 0xff);
4380                                 j += 3;
4381                         }
4382                 }
4383         }
4384
4385         buf[j] = '\0';
4386
4387         return buf;
4388 }
4389
4390 /* Originally lifted from
4391  * http://www.oreillynet.com/pub/a/network/excerpt/spcookbook_chap03/index3.html
4392  * ... and slightly modified to be a bit more rfc822 compliant
4393  * ... and modified a bit more to make domain checking rfc1035 compliant
4394  *     with the exception permitted in rfc1101 for domains to start with digit
4395  *     but not completely checking to avoid conflicts with IP addresses
4396  */
4397 gboolean
4398 purple_email_is_valid(const char *address)
4399 {
4400         const char *c, *domain;
4401         static char *rfc822_specials = "()<>@,;:\\\"[]";
4402
4403         g_return_val_if_fail(address != NULL, FALSE);
4404
4405         if (*address == '.') return FALSE;
4406
4407         /* first we validate the name portion (name@domain) (rfc822)*/
4408         for (c = address;  *c;  c++) {
4409                 if (*c == '\"' && (c == address || *(c - 1) == '.' || *(c - 1) == '\"')) {
4410                         while (*++c) {
4411                                 if (*c == '\\') {
4412                                         if (*c++ && *c < 127 && *c != '\n' && *c != '\r') continue;
4413                                         else return FALSE;
4414                                 }
4415                                 if (*c == '\"') break;
4416                                 if (*c < ' ' || *c >= 127) return FALSE;
4417                         }
4418                         if (!*c++) return FALSE;
4419                         if (*c == '@') break;
4420                         if (*c != '.') return FALSE;
4421                         continue;
4422                 }
4423                 if (*c == '@') break;
4424                 if (*c <= ' ' || *c >= 127) return FALSE;
4425                 if (strchr(rfc822_specials, *c)) return FALSE;
4426         }
4427
4428         /* It's obviously not an email address if we didn't find an '@' above */
4429         if (*c == '\0') return FALSE;
4430
4431         /* strictly we should return false if (*(c - 1) == '.') too, but I think
4432          * we should permit user.@domain type addresses - they do work :) */
4433         if (c == address) return FALSE;
4434
4435         /* next we validate the domain portion (name@domain) (rfc1035 & rfc1011) */
4436         if (!*(domain = ++c)) return FALSE;
4437         do {
4438                 if (*c == '.' && (c == domain || *(c - 1) == '.' || *(c - 1) == '-'))
4439                         return FALSE;
4440                 if (*c == '-' && (*(c - 1) == '.' || *(c - 1) == '@')) return FALSE;
4441                 if ((*c < '0' && *c != '-' && *c != '.') || (*c > '9' && *c < 'A') ||
4442                         (*c > 'Z' && *c < 'a') || (*c > 'z')) return FALSE;
4443         } while (*++c);
4444
4445         if (*(c - 1) == '-') return FALSE;
4446
4447         return ((c - domain) > 3 ? TRUE : FALSE);
4448 }
4449
4450 gboolean
4451 purple_ipv4_address_is_valid(const char *ip)
4452 {
4453         int c, o1, o2, o3, o4;
4454         char end;
4455
4456         g_return_val_if_fail(ip != NULL, FALSE);
4457
4458         c = sscanf(ip, "%d.%d.%d.%d%c", &o1, &o2, &o3, &o4, &end);
4459         if (c != 4 || o1 < 0 || o1 > 255 || o2 < 0 || o2 > 255 || o3 < 0 || o3 > 255 || o4 < 0 || o4 > 255)
4460                 return FALSE;
4461         return TRUE;
4462 }
4463
4464 gboolean
4465 purple_ipv6_address_is_valid(const gchar *ip)
4466 {
4467         const gchar *c;
4468         gboolean double_colon = FALSE;
4469         gint chunks = 1;
4470         gint in = 0;
4471
4472         g_return_val_if_fail(ip != NULL, FALSE);
4473
4474         if (*ip == '\0')
4475                 return FALSE;
4476
4477         for (c = ip; *c; ++c) {
4478                 if ((*c >= '0' && *c <= '9') ||
4479                         (*c >= 'a' && *c <= 'f') ||
4480                         (*c >= 'A' && *c <= 'F')) {
4481                         if (++in > 4)
4482                                 /* Only four hex digits per chunk */
4483                                 return FALSE;
4484                         continue;
4485                 } else if (*c == ':') {
4486                         /* The start of a new chunk */
4487                         ++chunks;
4488                         in = 0;
4489                         if (*(c + 1) == ':') {
4490                                 /*
4491                                  * '::' indicates a consecutive series of chunks full
4492                                  * of zeroes. There can be only one of these per address.
4493                                  */
4494                                 if (double_colon)
4495                                         return FALSE;
4496                                 double_colon = TRUE;
4497                         }
4498                 } else
4499                         return FALSE;
4500         }
4501
4502         /*
4503          * Either we saw a '::' and there were fewer than 8 chunks -or-
4504          * we didn't see a '::' and saw exactly 8 chunks.
4505          */
4506         return (double_colon && chunks < 8) || (!double_colon && chunks == 8);
4507 }
4508
4509 /* TODO 3.0.0: Add ipv6 check, too */
4510 gboolean
4511 purple_ip_address_is_valid(const char *ip)
4512 {
4513         return purple_ipv4_address_is_valid(ip);
4514 }
4515
4516 /* Stolen from gnome_uri_list_extract_uris */
4517 GList *
4518 purple_uri_list_extract_uris(const gchar *uri_list)
4519 {
4520         const gchar *p, *q;
4521         gchar *retval;
4522         GList *result = NULL;
4523
4524         g_return_val_if_fail (uri_list != NULL, NULL);
4525
4526         p = uri_list;
4527
4528         /* We don't actually try to validate the URI according to RFC
4529         * 2396, or even check for allowed characters - we just ignore
4530         * comments and trim whitespace off the ends.  We also
4531         * allow LF delimination as well as the specified CRLF.
4532         */
4533         while (p) {
4534                 if (*p != '#') {
4535                         while (isspace(*p))
4536                                 p++;
4537
4538                         q = p;
4539                         while (*q && (*q != '\n') && (*q != '\r'))
4540                                 q++;
4541
4542                         if (q > p) {
4543                                 q--;
4544                                 while (q > p && isspace(*q))
4545                                         q--;
4546
4547                                 retval = (gchar*)g_malloc (q - p + 2);
4548                                 strncpy (retval, p, q - p + 1);
4549                                 retval[q - p + 1] = '\0';
4550
4551                                 result = g_list_prepend (result, retval);
4552                         }
4553                 }
4554                 p = strchr (p, '\n');
4555                 if (p)
4556                         p++;
4557         }
4558
4559         return g_list_reverse (result);
4560 }
4561
4562
4563 /* Stolen from gnome_uri_list_extract_filenames */
4564 GList *
4565 purple_uri_list_extract_filenames(const gchar *uri_list)
4566 {
4567         GList *tmp_list, *node, *result;
4568
4569         g_return_val_if_fail (uri_list != NULL, NULL);
4570
4571         result = purple_uri_list_extract_uris(uri_list);
4572
4573         tmp_list = result;
4574         while (tmp_list) {
4575                 gchar *s = (gchar*)tmp_list->data;
4576
4577                 node = tmp_list;
4578                 tmp_list = tmp_list->next;
4579
4580                 if (!strncmp (s, "file:", 5)) {
4581                         node->data = g_filename_from_uri (s, NULL, NULL);
4582                         /* not sure if this fallback is useful at all */
4583                         if (!node->data) node->data = g_strdup (s+5);
4584                 } else {
4585                         result = g_list_delete_link(result, node);
4586                 }
4587                 g_free (s);
4588         }
4589         return result;
4590 }
4591
4592 /**************************************************************************
4593  * UTF8 String Functions
4594  **************************************************************************/
4595 gchar *
4596 purple_utf8_try_convert(const char *str)
4597 {
4598         gsize converted;
4599         gchar *utf8;
4600
4601         g_return_val_if_fail(str != NULL, NULL);
4602
4603         if (g_utf8_validate(str, -1, NULL)) {
4604                 return g_strdup(str);
4605         }
4606
4607         utf8 = g_locale_to_utf8(str, -1, &converted, NULL, NULL);
4608         if (utf8 != NULL)
4609                 return utf8;
4610
4611         utf8 = g_convert(str, -1, "UTF-8", "ISO-8859-15", &converted, NULL, NULL);
4612         if ((utf8 != NULL) && (converted == strlen(str)))
4613                 return utf8;
4614
4615         g_free(utf8);
4616
4617         return NULL;
4618 }
4619
4620 #define utf8_first(x) ((x & 0x80) == 0 || (x & 0xe0) == 0xc0 \
4621                        || (x & 0xf0) == 0xe0 || (x & 0xf8) == 0xf0)
4622 gchar *
4623 purple_utf8_salvage(const char *str)
4624 {
4625         GString *workstr;
4626         const char *end;
4627
4628         g_return_val_if_fail(str != NULL, NULL);
4629
4630         workstr = g_string_sized_new(strlen(str));
4631
4632         do {
4633                 (void)g_utf8_validate(str, -1, &end);
4634                 workstr = g_string_append_len(workstr, str, end - str);
4635                 str = end;
4636                 if (*str == '\0')
4637                         break;
4638                 do {
4639                         workstr = g_string_append_c(workstr, '?');
4640                         str++;
4641                 } while (!utf8_first(*str));
4642         } while (*str != '\0');
4643
4644         return g_string_free(workstr, FALSE);
4645 }
4646
4647 gchar *
4648 purple_utf8_strip_unprintables(const gchar *str)
4649 {
4650         gchar *workstr, *iter;
4651         const gchar *bad;
4652
4653         if (str == NULL)
4654                 /* Act like g_strdup */
4655                 return NULL;
4656
4657         if (!g_utf8_validate(str, -1, &bad)) {
4658                 purple_debug_error("util", "purple_utf8_strip_unprintables(%s) failed; "
4659                                            "first bad character was %02x (%c)\n",
4660                                    str, *bad, *bad);
4661                 g_return_val_if_reached(NULL);
4662         }
4663
4664         workstr = iter = g_new(gchar, strlen(str) + 1);
4665         while (*str) {
4666                 gunichar ch = g_utf8_get_char(str);
4667                 gchar *next = g_utf8_next_char(str);
4668                 /*
4669                  * Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
4670                  *          [#x10000-#x10FFFF]
4671                  */
4672                 if ((ch == '\t' || ch == '\n' || ch == '\r') ||
4673                                 (ch >= 0x20 && ch <= 0xD7FF) ||
4674                                 (ch >= 0xE000 && ch <= 0xFFFD) ||
4675                                 (ch >= 0x10000 && ch <= 0x10FFFF)) {
4676                         memcpy(iter, str, next - str);
4677                         iter += (next - str);
4678                 }
4679
4680                 str = next;
4681         }
4682
4683         /* nul-terminate the new string */
4684         *iter = '\0';
4685
4686         return workstr;
4687 }
4688
4689 /*
4690  * This function is copied from g_strerror() but changed to use
4691  * gai_strerror().
4692  */
4693 G_CONST_RETURN gchar *
4694 purple_gai_strerror(gint errnum)
4695 {
4696 #if GLIB_CHECK_VERSION(2, 32, 0)
4697         static GPrivate msg_private = G_PRIVATE_INIT(g_free);
4698 #else
4699         static GStaticPrivate msg_private = G_STATIC_PRIVATE_INIT;
4700 #endif
4701         char *msg;
4702         int saved_errno = errno;
4703
4704         const char *msg_locale;
4705
4706         msg_locale = gai_strerror(errnum);
4707         if (g_get_charset(NULL))
4708         {
4709                 /* This string is already UTF-8--great! */
4710                 errno = saved_errno;
4711                 return msg_locale;
4712         }
4713         else
4714         {
4715                 gchar *msg_utf8 = g_locale_to_utf8(msg_locale, -1, NULL, NULL, NULL);
4716                 if (msg_utf8)
4717                 {
4718                         /* Stick in the quark table so that we can return a static result */
4719                         GQuark msg_quark = g_quark_from_string(msg_utf8);
4720                         g_free(msg_utf8);
4721
4722                         msg_utf8 = (gchar *)g_quark_to_string(msg_quark);
4723                         errno = saved_errno;
4724                         return msg_utf8;
4725                 }
4726         }
4727
4728 #if GLIB_CHECK_VERSION(2, 32, 0)
4729         msg = g_private_get(&msg_private);
4730 #else
4731         msg = g_static_private_get(&msg_private);
4732 #endif
4733         if (!msg)
4734         {
4735                 msg = g_new(gchar, 64);
4736 #if GLIB_CHECK_VERSION(2, 32, 0)
4737                 g_private_set(&msg_private, msg);
4738 #else
4739                 g_static_private_set(&msg_private, msg, g_free);
4740 #endif
4741         }
4742
4743         sprintf(msg, "unknown error (%d)", errnum);
4744
4745         errno = saved_errno;
4746         return msg;
4747 }
4748
4749 char *
4750 purple_utf8_ncr_encode(const char *str)
4751 {
4752         GString *out;
4753
4754         g_return_val_if_fail(str != NULL, NULL);
4755         g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL);
4756
4757         out = g_string_new("");
4758
4759         for(; *str; str = g_utf8_next_char(str)) {
4760                 gunichar wc = g_utf8_get_char(str);
4761
4762                 /* super simple check. hopefully not too wrong. */
4763                 if(wc >= 0x80) {
4764                         g_string_append_printf(out, "&#%u;", (guint32) wc);
4765                 } else {
4766                         g_string_append_unichar(out, wc);
4767                 }
4768         }
4769
4770         return g_string_free(out, FALSE);
4771 }
4772
4773
4774 char *
4775 purple_utf8_ncr_decode(const char *str)
4776 {
4777         GString *out;
4778         char *buf, *b;
4779
4780         g_return_val_if_fail(str != NULL, NULL);
4781         g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL);
4782
4783         buf = (char *) str;
4784         out = g_string_new("");
4785
4786         while( (b = strstr(buf, "&#")) ) {
4787                 gunichar wc;
4788                 int base = 0;
4789
4790                 /* append everything leading up to the &# */
4791                 g_string_append_len(out, buf, b-buf);
4792
4793                 b += 2; /* skip past the &# */
4794
4795                 /* strtoul will treat 0x prefix as hex, but not just x */
4796                 if(*b == 'x' || *b == 'X') {
4797                         base = 16;
4798                         b++;
4799                 }
4800
4801                 /* advances buf to the end of the ncr segment */
4802                 wc = (gunichar) strtoul(b, &buf, base);
4803
4804                 /* this mimics the previous impl of ncr_decode */
4805                 if(*buf == ';') {
4806                         g_string_append_unichar(out, wc);
4807                         buf++;
4808                 }
4809         }
4810
4811         /* append whatever's left */
4812         g_string_append(out, buf);
4813
4814         return g_string_free(out, FALSE);
4815 }
4816
4817
4818 int
4819 purple_utf8_strcasecmp(const char *a, const char *b)
4820 {
4821         char *a_norm = NULL;
4822         char *b_norm = NULL;
4823         int ret = -1;
4824
4825         if(!a && b)
4826                 return -1;
4827         else if(!b && a)
4828                 return 1;
4829         else if(!a && !b)
4830                 return 0;
4831
4832         if(!g_utf8_validate(a, -1, NULL) || !g_utf8_validate(b, -1, NULL))
4833         {
4834                 purple_debug_error("purple_utf8_strcasecmp",
4835                                                  "One or both parameters are invalid UTF8\n");
4836                 return ret;
4837         }
4838
4839         a_norm = g_utf8_casefold(a, -1);
4840         b_norm = g_utf8_casefold(b, -1);
4841         ret = g_utf8_collate(a_norm, b_norm);
4842         g_free(a_norm);
4843         g_free(b_norm);
4844
4845         return ret;
4846 }
4847
4848 /* previously conversation::find_nick() */
4849 gboolean
4850 purple_utf8_has_word(const char *haystack, const char *needle)
4851 {
4852         char *hay, *pin, *p;
4853         const char *start, *prev_char;
4854         gunichar before, after;
4855         int n;
4856         gboolean ret = FALSE;
4857
4858         start = hay = g_utf8_strdown(haystack, -1);
4859
4860         pin = g_utf8_strdown(needle, -1);
4861         n = strlen(pin);
4862
4863         while ((p = strstr(start, pin)) != NULL) {
4864                 prev_char = g_utf8_find_prev_char(hay, p);
4865                 before = (gunichar)-2;
4866                 if (prev_char) {
4867                         before = g_utf8_get_char(prev_char);
4868                 }
4869                 after = g_utf8_get_char_validated(p + n, - 1);
4870
4871                 if ((p == hay ||
4872                                 /* The character before is a reasonable guess for a word boundary
4873                                    ("!g_unichar_isalnum()" is not a valid way to determine word
4874                                     boundaries, but it is the only reasonable thing to do here),
4875                                    and isn't the '&' from a "&amp;" or some such entity*/
4876                                 (before != (gunichar)-2 && !g_unichar_isalnum(before) && *(p - 1) != '&'))
4877                                 && after != (gunichar)-2 && !g_unichar_isalnum(after)) {
4878                         ret = TRUE;
4879                         break;
4880                 }
4881                 start = p + 1;
4882         }
4883
4884         g_free(pin);
4885         g_free(hay);
4886
4887         return ret;
4888 }
4889
4890 void
4891 purple_print_utf8_to_console(FILE *filestream, char *message)
4892 {
4893         gchar *message_conv;
4894         GError *error = NULL;
4895
4896         /* Try to convert 'message' to user's locale */
4897         message_conv = g_locale_from_utf8(message, -1, NULL, NULL, &error);
4898         if (message_conv != NULL) {
4899                 fputs(message_conv, filestream);
4900                 g_free(message_conv);
4901         }
4902         else
4903         {
4904                 /* use 'message' as a fallback */
4905                 g_warning("%s\n", error->message);
4906                 g_error_free(error);
4907                 fputs(message, filestream);
4908         }
4909 }
4910
4911 gboolean purple_message_meify(char *message, gssize len)
4912 {
4913         char *c;
4914         gboolean inside_html = FALSE;
4915
4916         g_return_val_if_fail(message != NULL, FALSE);
4917
4918         if(len == -1)
4919                 len = strlen(message);
4920
4921         for (c = message; *c; c++, len--) {
4922                 if(inside_html) {
4923                         if(*c == '>')
4924                                 inside_html = FALSE;
4925                 } else {
4926                         if(*c == '<')
4927                                 inside_html = TRUE;
4928                         else
4929                                 break;
4930                 }
4931         }
4932
4933         if(*c && !g_ascii_strncasecmp(c, "/me ", 4)) {
4934                 memmove(c, c+4, len-3);
4935                 return TRUE;
4936         }
4937
4938         return FALSE;
4939 }
4940
4941 char *purple_text_strip_mnemonic(const char *in)
4942 {
4943         char *out;
4944         char *a;
4945         char *a0;
4946         const char *b;
4947
4948         g_return_val_if_fail(in != NULL, NULL);
4949
4950         out = g_malloc(strlen(in)+1);
4951         a = out;
4952         b = in;
4953
4954         a0 = a; /* The last non-space char seen so far, or the first char */
4955
4956         while(*b) {
4957                 if(*b == '_') {
4958                         if(a > out && b > in && *(b-1) == '(' && *(b+1) && !(*(b+1) & 0x80) && *(b+2) == ')') {
4959                                 /* Detected CJK style shortcut (Bug 875311) */
4960                                 a = a0; /* undo the left parenthesis */
4961                                 b += 3; /* and skip the whole mess */
4962                         } else if(*(b+1) == '_') {
4963                                 *(a++) = '_';
4964                                 b += 2;
4965                                 a0 = a;
4966                         } else {
4967                                 b++;
4968                         }
4969                 /* We don't want to corrupt the middle of UTF-8 characters */
4970                 } else if (!(*b & 0x80)) {      /* other 1-byte char */
4971                         if (*b != ' ')
4972                                 a0 = a;
4973                         *(a++) = *(b++);
4974                 } else {
4975                         /* Multibyte utf8 char, don't look for _ inside these */
4976                         int n = 0;
4977                         int i;
4978                         if ((*b & 0xe0) == 0xc0) {
4979                                 n = 2;
4980                         } else if ((*b & 0xf0) == 0xe0) {
4981                                 n = 3;
4982                         } else if ((*b & 0xf8) == 0xf0) {
4983                                 n = 4;
4984                         } else if ((*b & 0xfc) == 0xf8) {
4985                                 n = 5;
4986                         } else if ((*b & 0xfe) == 0xfc) {
4987                                 n = 6;
4988                         } else {                /* Illegal utf8 */
4989                                 n = 1;
4990                         }
4991                         a0 = a; /* unless we want to delete CJK spaces too */
4992                         for (i = 0; i < n && *b; i += 1) {
4993                                 *(a++) = *(b++);
4994                         }
4995                 }
4996         }
4997         *a = '\0';
4998
4999         return out;
5000 }
5001
5002 const char* purple_unescape_filename(const char *escaped) {
5003         return purple_url_decode(escaped);
5004 }
5005
5006
5007 /* this is almost identical to purple_url_encode (hence purple_url_decode
5008  * being used above), but we want to keep certain characters unescaped
5009  * for compat reasons */
5010 const char *
5011 purple_escape_filename(const char *str)
5012 {
5013         const char *iter;
5014         static char buf[BUF_LEN];
5015         char utf_char[6];
5016         guint i, j = 0;
5017
5018         g_return_val_if_fail(str != NULL, NULL);
5019         g_return_val_if_fail(g_utf8_validate(str, -1, NULL), NULL);
5020
5021         iter = str;
5022         for (; *iter && j < (BUF_LEN - 1) ; iter = g_utf8_next_char(iter)) {
5023                 gunichar c = g_utf8_get_char(iter);
5024                 /* If the character is an ASCII character and is alphanumeric,
5025                  * or one of the specified values, no need to escape */
5026                 if (c < 128 && (g_ascii_isalnum(c) || c == '@' || c == '-' ||
5027                                 c == '_' || c == '.' || c == '#')) {
5028                         buf[j++] = c;
5029                 } else {
5030                         int bytes = g_unichar_to_utf8(c, utf_char);
5031                         for (i = 0; (int)i < bytes; i++) {
5032                                 if (j > (BUF_LEN - 4))
5033                                         break;
5034                                 if (i >= sizeof(utf_char)) {
5035                                         g_warn_if_reached();
5036                                         break;
5037                                 }
5038                                 sprintf(buf + j, "%%%02x", utf_char[i] & 0xff);
5039                                 j += 3;
5040                         }
5041                 }
5042         }
5043 #ifdef _WIN32
5044         /* File/Directory names in windows cannot end in periods/spaces.
5045          * http://msdn.microsoft.com/en-us/library/aa365247%28VS.85%29.aspx
5046          */
5047         while (j > 0 && (buf[j - 1] == '.' || buf[j - 1] == ' '))
5048                 j--;
5049 #endif
5050         buf[j] = '\0';
5051
5052         return buf;
5053 }
5054
5055 const char *_purple_oscar_convert(const char *act, const char *protocol)
5056 {
5057         if (act && purple_strequal(protocol, "prpl-oscar")) {
5058                 int i;
5059                 for (i = 0; act[i] != '\0'; i++)
5060                         if (!isdigit(act[i]))
5061                                 return "prpl-aim";
5062                 return "prpl-icq";
5063         }
5064         return protocol;
5065 }
5066
5067 void purple_restore_default_signal_handlers(void)
5068 {
5069 #ifndef _WIN32
5070 #ifdef HAVE_SIGNAL_H
5071         signal(SIGHUP, SIG_DFL);        /* 1: terminal line hangup */
5072         signal(SIGINT, SIG_DFL);        /* 2: interrupt program */
5073         signal(SIGQUIT, SIG_DFL);       /* 3: quit program */
5074         signal(SIGILL,  SIG_DFL);       /* 4:  illegal instruction (not reset when caught) */
5075         signal(SIGTRAP, SIG_DFL);       /* 5:  trace trap (not reset when caught) */
5076         signal(SIGABRT, SIG_DFL);       /* 6:  abort program */
5077
5078 #ifdef SIGPOLL
5079         signal(SIGPOLL,  SIG_DFL);      /* 7:  pollable event (POSIX) */
5080 #endif /* SIGPOLL */
5081
5082 #ifdef SIGEMT
5083         signal(SIGEMT,  SIG_DFL);       /* 7:  EMT instruction (Non-POSIX) */
5084 #endif /* SIGEMT */
5085
5086         signal(SIGFPE,  SIG_DFL);       /* 8:  floating point exception */
5087         signal(SIGBUS,  SIG_DFL);       /* 10: bus error */
5088         signal(SIGSEGV, SIG_DFL);       /* 11: segmentation violation */
5089         signal(SIGSYS,  SIG_DFL);       /* 12: bad argument to system call */
5090         signal(SIGPIPE, SIG_DFL);       /* 13: write on a pipe with no reader */
5091         signal(SIGALRM, SIG_DFL);       /* 14: real-time timer expired */
5092         signal(SIGTERM, SIG_DFL);       /* 15: software termination signal */
5093         signal(SIGCHLD, SIG_DFL);       /* 20: child status has changed */
5094         signal(SIGXCPU, SIG_DFL);       /* 24: exceeded CPU time limit */
5095         signal(SIGXFSZ, SIG_DFL);       /* 25: exceeded file size limit */
5096 #endif /* HAVE_SIGNAL_H */
5097 #endif /* !_WIN32 */
5098 }
5099
5100 static void
5101 set_status_with_attrs(PurpleStatus *status, ...)
5102 {
5103         va_list args;
5104         va_start(args, status);
5105         purple_status_set_active_with_attrs(status, TRUE, args);
5106         va_end(args);
5107 }
5108
5109 void purple_util_set_current_song(const char *title, const char *artist, const char *album)
5110 {
5111         GList *list = purple_accounts_get_all();
5112         for (; list; list = list->next) {
5113                 PurplePresence *presence;
5114                 PurpleStatus *tune;
5115                 PurpleAccount *account = list->data;
5116                 if (!purple_account_get_enabled(account, purple_core_get_ui()))
5117                         continue;
5118
5119                 presence = purple_account_get_presence(account);
5120                 tune = purple_presence_get_status(presence, "tune");
5121                 if (!tune)
5122                         continue;
5123                 if (title) {
5124                         set_status_with_attrs(tune,
5125                                         PURPLE_TUNE_TITLE, title,
5126                                         PURPLE_TUNE_ARTIST, artist,
5127                                         PURPLE_TUNE_ALBUM, album,
5128                                         NULL);
5129                 } else {
5130                         purple_status_set_active(tune, FALSE);
5131                 }
5132         }
5133 }
5134
5135 char * purple_util_format_song_info(const char *title, const char *artist, const char *album, gpointer unused)
5136 {
5137         GString *string;
5138         char *esc;
5139
5140         if (!title || !*title)
5141                 return NULL;
5142
5143         esc = g_markup_escape_text(title, -1);
5144         string = g_string_new("");
5145         g_string_append_printf(string, "%s", esc);
5146         g_free(esc);
5147
5148         if (artist && *artist) {
5149                 esc = g_markup_escape_text(artist, -1);
5150                 g_string_append_printf(string, _(" - %s"), esc);
5151                 g_free(esc);
5152         }
5153
5154         if (album && *album) {
5155                 esc = g_markup_escape_text(album, -1);
5156                 g_string_append_printf(string, _(" (%s)"), esc);
5157                 g_free(esc);
5158         }
5159
5160         return g_string_free(string, FALSE);
5161 }
5162
5163 const gchar *
5164 purple_get_host_name(void)
5165 {
5166         return g_get_host_name();
5167 }
5168
5169 gchar *
5170 purple_uuid_random(void)
5171 {
5172         guint32 tmp, a, b;
5173
5174         tmp = g_random_int();
5175         a = 0x4000 | (tmp & 0xFFF); /* 0x4000 to 0x4FFF */
5176         tmp >>= 12;
5177         b = ((1 << 3) << 12) | (tmp & 0x3FFF); /* 0x8000 to 0xBFFF */
5178
5179         tmp = g_random_int();
5180
5181         return g_strdup_printf("%08x-%04x-%04x-%04x-%04x%08x",
5182                         g_random_int(),
5183                         tmp & 0xFFFF,
5184                         a,
5185                         b,
5186                         (tmp >> 16) & 0xFFFF, g_random_int());
5187 }