gconvert.c

   1 /* GLIB - Library of useful routines for C programming
   2  *
   3  * gconvert.c: Convert between character sets using iconv
   4  * Copyright Red Hat Inc., 2000
   5  * Authors: Havoc Pennington <hp@redhat.com>, Owen Taylor <otaylor@redhat.com
   6  *
   7  * This library is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2 of the License, or (at your option) any later version.
  11  *
  12  * This library is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with this library; if not, write to the
  19  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20  * Boston, MA 02111-1307, USA.
  21  */
  22
  23 #include <iconv.h>
  24 #include <errno.h>
  25 #include <string.h>
  26 #include <stdlib.h>
  27
  28 #include "glib.h"
  29 #include "config.h"
  30
  31 #ifdef G_PLATFORM_WIN32
  32 #define STRICT
  33 #include <windows.h>
  34 #undef STRICT
  35 #endif
  36
  37 #include "glibintl.h"
  38
  39 GQuark
  40 g_convert_error_quark()
  41 {
  42   static GQuark quark;
  43   if (!quark)
  44     quark = g_quark_from_static_string ("g_convert_error");
  45
  46   return quark;
  47 }
  48
  49 #if defined(USE_LIBICONV) && !defined (_LIBICONV_H)
  50 #error libiconv in use but included iconv.h not from libiconv
  51 #endif
  52 #if !defined(USE_LIBICONV) && defined (_LIBICONV_H)
  53 #error libiconv not in use but included iconv.h is from libiconv
  54 #endif
  55
  56 GIConv
  57 g_iconv_open (const gchar  *to_codeset,
  58               const gchar  *from_codeset)
  59 {
  60   iconv_t cd = iconv_open (to_codeset, from_codeset);
  61
  62   return (GIConv)cd;
  63 }
  64
  65 size_t
  66 g_iconv (GIConv   converter,
  67          gchar  **inbuf,
  68          size_t  *inbytes_left,
  69          gchar  **outbuf,
  70          size_t  *outbytes_left)
  71 {
  72   iconv_t cd = (iconv_t)converter;
  73
  74   return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
  75 }
  76
  77 gint
  78 g_iconv_close (GIConv converter)
  79 {
  80   iconv_t cd = (iconv_t)converter;
  81
  82   return iconv_close (cd);
  83 }
  84
  85 static GIConv
  86 open_converter (const gchar *to_codeset,
  87                 const gchar *from_codeset,
  88                 GError     **error)
  89 {
  90   GIConv cd = g_iconv_open (to_codeset, from_codeset);
  91
  92   if (cd == (iconv_t) -1)
  93     {
  94       /* Something went wrong.  */
  95       if (errno == EINVAL)
  96         g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
  97                      _("Conversion from character set `%s' to `%s' is not supported"),
  98                      from_codeset, to_codeset);
  99       else
 100         g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
 101                      _("Could not open converter from `%s' to `%s': %s"),
 102                      from_codeset, to_codeset, strerror (errno));
 103     }
 104
 105   return cd;
 106
 107 }
 108
 109 /**
 110  * g_convert:
 111  * @str:           the string to convert
 112  * @len:           the length of the string
 113  * @to_codeset:    name of character set into which to convert @str
 114  * @from_codeset:  character set of @str.
 115  * @bytes_read:    location to store the number of bytes in the
 116  *                 input string that were successfully converted, or %NULL.
 117  *                 Even if the conversion was succesful, this may be
 118  *                 less than len if there were partial characters
 119  *                 at the end of the input. If the error
 120  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 121  *                 stored will the byte fofset after the last valid
 122  *                 input sequence.
 123  * @bytes_written: the stored in the output buffer (not including the
 124  *                 terminating nul.
 125  * @error:         location to store the error occuring, or %NULL to ignore
 126  *                 errors. Any of the errors in #GConvertError may occur.
 127  *
 128  * Convert a string from one character set to another.
 129  *
 130  * Return value: If the conversion was successful, a newly allocated
 131  *               NUL-terminated string, which must be freed with
 132  *               g_free. Otherwise %NULL and @error will be set.
 133  **/
 134 gchar*
 135 g_convert (const gchar *str,
 136            gint         len,
 137            const gchar *to_codeset,
 138            const gchar *from_codeset,
 139            gint        *bytes_read,
 140            gint        *bytes_written,
 141            GError     **error)
 142 {
 143   gchar *dest;
 144   gchar *outp;
 145   const gchar *p;
 146   size_t inbytes_remaining;
 147   size_t outbytes_remaining;
 148   size_t err;
 149   GIConv cd;
 150   size_t outbuf_size;
 151   gboolean have_error = FALSE;
 152
 153   g_return_val_if_fail (str != NULL, NULL);
 154   g_return_val_if_fail (to_codeset != NULL, NULL);
 155   g_return_val_if_fail (from_codeset != NULL, NULL);
 156
 157   cd = open_converter (to_codeset, from_codeset, error);
 158
 159   if (cd == (GIConv) -1)
 160     {
 161       if (bytes_read)
 162         *bytes_read = 0;
 163
 164       if (bytes_written)
 165         *bytes_written = 0;
 166
 167       return NULL;
 168     }
 169
 170   if (len < 0)
 171     len = strlen (str);
 172
 173   p = str;
 174   inbytes_remaining = len;
 175
 176   /* Due to a GLIBC bug, round outbuf_size up to a multiple of 4 */
 177   /* + 1 for nul in case len == 1 */
 178   outbuf_size = ((len + 3) & ~3) + 1;
 179
 180   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
 181   outp = dest = g_malloc (outbuf_size);
 182
 183  again:
 184
 185   err = g_iconv (cd, (char **)&p, &inbytes_remaining, &outp, &outbytes_remaining);
 186
 187   if (err == (size_t) -1)
 188     {
 189       switch (errno)
 190         {
 191         case EINVAL:
 192           /* Incomplete text, do not report an error */
 193           break;
 194         case E2BIG:
 195           {
 196             size_t used = outp - dest;
 197
 198             /* glibc's iconv can return E2BIG even if there is space
 199              * remaining if an internal buffer is exhausted. The
 200              * folllowing is a heuristic to catch this. The 16 is
 201              * pretty arbitrary.
 202              */
 203             if (used + 16 > outbuf_size)
 204               {
 205                 outbuf_size = (outbuf_size - 1) * 2 + 1;
 206                 dest = g_realloc (dest, outbuf_size);
 207
 208                 outp = dest + used;
 209                 outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
 210               }
 211
 212             goto again;
 213           }
 214         case EILSEQ:
 215           g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
 216                        _("Invalid byte sequence in conversion input"));
 217           have_error = TRUE;
 218           break;
 219         default:
 220           g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
 221                        _("Error during conversion: %s"),
 222                        strerror (errno));
 223           have_error = TRUE;
 224           break;
 225         }
 226     }
 227
 228   *outp = '\0';
 229
 230   g_iconv_close (cd);
 231
 232   if (bytes_read)
 233     *bytes_read = p - str;
 234   else
 235     {
 236       if ((p - str) != len)
 237         {
 238           if (!have_error)
 239             {
 240               g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
 241                            _("Partial character sequence at end of input"));
 242               have_error = TRUE;
 243             }
 244         }
 245     }
 246
 247   if (bytes_written)
 248     *bytes_written = outp - dest;       /* Doesn't include '\0' */
 249
 250   if (have_error)
 251     {
 252       g_free (dest);
 253       return NULL;
 254     }
 255   else
 256     return dest;
 257 }
 258
 259 /**
 260  * g_convert_with_fallback:
 261  * @str:          the string to convert
 262  * @len:          the length of the string
 263  * @to_codeset:   name of character set into which to convert @str
 264  * @from_codeset: character set of @str.
 265  * @fallback:     UTF-8 string to use in place of character not
 266  *                present in the target encoding. (This must be
 267  *                in the target encoding), if %NULL, characters
 268  *                not in the target encoding will be represented
 269  *                as Unicode escapes \x{XXXX} or \x{XXXXXX}.
 270  * @bytes_read:   location to store the number of bytes in the
 271  *                input string that were successfully converted, or %NULL.
 272  *                Even if the conversion was succesful, this may be
 273  *                less than len if there were partial characters
 274  *                at the end of the input. If the error
 275  *                G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 276  *                stored will the byte fofset after the last valid
 277  *                input sequence.
 278  * @bytes_written: the stored in the output buffer (not including the
 279  *                 terminating nul.
 280  * @error:        location to store the error occuring, or %NULL to ignore
 281  *                errors. Any of the errors in #GConvertError may occur.
 282  *
 283  * Convert a string from one character set to another, possibly
 284  * including fallback sequences for characters not representable
 285  * in the output. Note that it is not guaranteed that the specification
 286  * for the fallback sequences in @fallback will be honored. Some
 287  * systems may do a approximate conversion from @from_codeset
 288  * to @to_codeset in their iconv() functions, in which case GLib
 289  * will simply return that approximate conversion.
 290  *
 291  * Return value: If the conversion was successful, a newly allocated
 292  *               NUL-terminated string, which must be freed with
 293  *               g_free. Otherwise %NULL and @error will be set.
 294  **/
 295 gchar*
 296 g_convert_with_fallback (const gchar *str,
 297                          gint         len,
 298                          const gchar *to_codeset,
 299                          const gchar *from_codeset,
 300                          gchar       *fallback,
 301                          gint        *bytes_read,
 302                          gint        *bytes_written,
 303                          GError     **error)
 304 {
 305   gchar *utf8;
 306   gchar *dest;
 307   gchar *outp;
 308   const gchar *insert_str = NULL;
 309   const gchar *p;
 310   int inbytes_remaining;
 311   const gchar *save_p = NULL;
 312   size_t save_inbytes = 0;
 313   size_t outbytes_remaining;
 314   size_t err;
 315   GIConv cd;
 316   size_t outbuf_size;
 317   gboolean have_error = FALSE;
 318   gboolean done = FALSE;
 319
 320   GError *local_error = NULL;
 321
 322   g_return_val_if_fail (str != NULL, NULL);
 323   g_return_val_if_fail (to_codeset != NULL, NULL);
 324   g_return_val_if_fail (from_codeset != NULL, NULL);
 325
 326   if (len < 0)
 327     len = strlen (str);
 328
 329   /* Try an exact conversion; we only proceed if this fails
 330    * due to an illegal sequence in the input string.
 331    */
 332   dest = g_convert (str, len, to_codeset, from_codeset,
 333                     bytes_read, bytes_written, &local_error);
 334   if (!local_error)
 335     return dest;
 336
 337   if (!g_error_matches (local_error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE))
 338     {
 339       g_propagate_error (error, local_error);
 340       return NULL;
 341     }
 342   else
 343     g_error_free (local_error);
 344
 345   local_error = NULL;
 346
 347   /* No go; to proceed, we need a converter from "UTF-8" to
 348    * to_codeset, and the string as UTF-8.
 349    */
 350   cd = open_converter (to_codeset, "UTF-8", error);
 351   if (cd == (GIConv) -1)
 352     {
 353       if (bytes_read)
 354         *bytes_read = 0;
 355
 356       if (bytes_written)
 357         *bytes_written = 0;
 358
 359       return NULL;
 360     }
 361
 362   utf8 = g_convert (str, len, "UTF-8", from_codeset,
 363                     bytes_read, &inbytes_remaining, error);
 364   if (!utf8)
 365     return NULL;
 366
 367   /* Now the heart of the code. We loop through the UTF-8 string, and
 368    * whenever we hit an offending character, we form fallback, convert
 369    * the fallback to the target codeset, and then go back to
 370    * converting the original string after finishing with the fallback.
 371    *
 372    * The variables save_p and save_inbytes store the input state
 373    * for the original string while we are converting the fallback
 374    */
 375   p = utf8;
 376   /* Due to a GLIBC bug, round outbuf_size up to a multiple of 4 */
 377   /* + 1 for nul in case len == 1 */
 378   outbuf_size = ((len + 3) & ~3) + 1;
 379   outbytes_remaining = outbuf_size - 1; /* -1 for nul */
 380   outp = dest = g_malloc (outbuf_size);
 381
 382   while (!done && !have_error)
 383     {
 384       size_t inbytes_tmp = inbytes_remaining;
 385       err = g_iconv (cd, (char **)&p, &inbytes_tmp, &outp, &outbytes_remaining);
 386       inbytes_remaining = inbytes_tmp;
 387
 388       if (err == (size_t) -1)
 389         {
 390           switch (errno)
 391             {
 392             case EINVAL:
 393               g_assert_not_reached();
 394               break;
 395             case E2BIG:
 396               {
 397                 size_t used = outp - dest;
 398
 399                 /* glibc's iconv can return E2BIG even if there is space
 400                  * remaining if an internal buffer is exhausted. The
 401                  * folllowing is a heuristic to catch this. The 16 is
 402                  * pretty arbitrary.
 403                  */
 404                 if (used + 16 > outbuf_size)
 405                   {
 406                     outbuf_size = (outbuf_size - 1) * 2 + 1;
 407                     dest = g_realloc (dest, outbuf_size);
 408
 409                     outp = dest + used;
 410                     outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
 411                   }
 412
 413                 break;
 414               }
 415             case EILSEQ:
 416               if (save_p)
 417                 {
 418                   /* Error converting fallback string - fatal
 419                    */
 420                   g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_ILLEGAL_SEQUENCE,
 421                                _("Cannot convert fallback '%s' to codeset '%s'"),
 422                                insert_str, to_codeset);
 423                   have_error = TRUE;
 424                   break;
 425                 }
 426               else
 427                 {
 428                   if (!fallback)
 429                     {
 430                       gunichar ch = g_utf8_get_char (p);
 431                       insert_str = g_strdup_printf ("\\x{%0*X}",
 432                                                     (ch < 0x10000) ? 4 : 6,
 433                                                     ch);
 434                     }
 435                   else
 436                     insert_str = fallback;
 437
 438                   save_p = g_utf8_next_char (p);
 439                   save_inbytes = inbytes_remaining - (save_p - p);
 440                   p = insert_str;
 441                   inbytes_remaining = strlen (p);
 442                 }
 443               break;
 444             default:
 445               g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
 446                            _("Error during conversion: %s"),
 447                            strerror (errno));
 448               have_error = TRUE;
 449               break;
 450             }
 451         }
 452       else
 453         {
 454           if (save_p)
 455             {
 456               if (!fallback)
 457                 g_free ((gchar *)insert_str);
 458               p = save_p;
 459               inbytes_remaining = save_inbytes;
 460               save_p = NULL;
 461             }
 462           else
 463             done = TRUE;
 464         }
 465     }
 466
 467   /* Cleanup
 468    */
 469   *outp = '\0';
 470
 471   g_iconv_close (cd);
 472
 473   if (bytes_written)
 474     *bytes_written = outp - str;        /* Doesn't include '\0' */
 475
 476   g_free (utf8);
 477
 478   if (have_error)
 479     {
 480       if (save_p && !fallback)
 481         g_free ((gchar *)insert_str);
 482       g_free (dest);
 483       return NULL;
 484     }
 485   else
 486     return dest;
 487 }
 488
 489 /*
 490  * g_locale_to_utf8
 491  *
 492  *
 493  */
 494
 495 /**
 496  * g_locale_to_utf8:
 497  * @opsysstring:   a string in the encoding of the current locale
 498  * @len:           the length of the string, or -1 if the string is
 499  *                 NULL-terminated.
 500  * @bytes_read:    location to store the number of bytes in the
 501  *                 input string that were successfully converted, or %NULL.
 502  *                 Even if the conversion was succesful, this may be
 503  *                 less than len if there were partial characters
 504  *                 at the end of the input. If the error
 505  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 506  *                 stored will the byte fofset after the last valid
 507  *                 input sequence.
 508  * @bytes_written: the stored in the output buffer (not including the
 509  *                 terminating nul.
 510  * @error: location to store the error occuring, or %NULL to ignore
 511  *                 errors. Any of the errors in #GConvertError may occur.
 512  *
 513  * Converts a string which is in the encoding used for strings by
 514  * the C runtime (usually the same as that used by the operating
 515  * system) in the current locale into a UTF-8 string.
 516  *
 517  * Return value: The converted string, or %NULL on an error.
 518  **/
 519 gchar *
 520 g_locale_to_utf8 (const gchar  *opsysstring,
 521                   gint          len,
 522                   gint         *bytes_read,
 523                   gint         *bytes_written,
 524                   GError      **error)
 525 {
 526 #ifdef G_PLATFORM_WIN32
 527
 528   gint i, clen, total_len, wclen, first;
 529   wchar_t *wcs, wc;
 530   gchar *result, *bp;
 531   const wchar_t *wcp;
 532
 533   if (len == -1)
 534     len = strlen (opsysstring);
 535
 536   wcs = g_new (wchar_t, len);
 537   wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len);
 538
 539   wcp = wcs;
 540   total_len = 0;
 541   for (i = 0; i < wclen; i++)
 542     {
 543       wc = *wcp++;
 544
 545       if (wc < 0x80)
 546         total_len += 1;
 547       else if (wc < 0x800)
 548         total_len += 2;
 549       else if (wc < 0x10000)
 550         total_len += 3;
 551       else if (wc < 0x200000)
 552         total_len += 4;
 553       else if (wc < 0x4000000)
 554         total_len += 5;
 555       else
 556         total_len += 6;
 557     }
 558
 559   result = g_malloc (total_len + 1);
 560
 561   wcp = wcs;
 562   bp = result;
 563   for (i = 0; i < wclen; i++)
 564     {
 565       wc = *wcp++;
 566
 567       if (wc < 0x80)
 568         {
 569           first = 0;
 570           clen = 1;
 571         }
 572       else if (wc < 0x800)
 573         {
 574           first = 0xc0;
 575           clen = 2;
 576         }
 577       else if (wc < 0x10000)
 578         {
 579           first = 0xe0;
 580           clen = 3;
 581         }
 582       else if (wc < 0x200000)
 583         {
 584           first = 0xf0;
 585           clen = 4;
 586         }
 587       else if (wc < 0x4000000)
 588         {
 589           first = 0xf8;
 590           clen = 5;
 591         }
 592       else
 593         {
 594           first = 0xfc;
 595           clen = 6;
 596         }
 597
 598       /* Woo-hoo! */
 599       switch (clen)
 600         {
 601         case 6: bp[5] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 602         case 5: bp[4] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 603         case 4: bp[3] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 604         case 3: bp[2] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 605         case 2: bp[1] = (wc & 0x3f) | 0x80; wc >>= 6; /* Fall through */
 606         case 1: bp[0] = wc | first;
 607         }
 608
 609       bp += clen;
 610     }
 611   *bp = 0;
 612
 613   g_free (wcs);
 614
 615   if (bytes_read)
 616     *bytes_read = len;
 617   if (bytes_written)
 618     *bytes_written = total_len;
 619
 620   return result;
 621
 622 #else  /* !G_PLATFORM_WIN32 */
 623
 624   char *charset, *str;
 625
 626   if (g_get_charset (&charset))
 627     return g_strdup (opsysstring);
 628
 629   str = g_convert (opsysstring, len,
 630                    "UTF-8", charset, bytes_read, bytes_written, error);
 631
 632   return str;
 633 #endif /* !G_PLATFORM_WIN32 */
 634 }
 635
 636 /**
 637  * g_locale_from_utf8:
 638  * @utf8string:    a UTF-8 encoded string
 639  * @len:           the length of the string, or -1 if the string is
 640  *                 NULL-terminated.
 641  * @bytes_read:    location to store the number of bytes in the
 642  *                 input string that were successfully converted, or %NULL.
 643  *                 Even if the conversion was succesful, this may be
 644  *                 less than len if there were partial characters
 645  *                 at the end of the input. If the error
 646  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 647  *                 stored will the byte fofset after the last valid
 648  *                 input sequence.
 649  * @bytes_written: the stored in the output buffer (not including the
 650  *                 terminating nul.
 651  * @error: location to store the error occuring, or %NULL to ignore
 652  *                 errors. Any of the errors in #GConvertError may occur.
 653  *
 654  * Converts a string from UTF-8 to the encoding used for strings by
 655  * the C runtime (usually the same as that used by the operating
 656  * system) in the current locale.
 657  *
 658  * Return value: The converted string, or %NULL on an error.
 659  **/
 660 gchar *
 661 g_locale_from_utf8 (const gchar *utf8string,
 662                     gint         len,
 663                     gint        *bytes_read,
 664                     gint        *bytes_written,
 665                     GError     **error)
 666 {
 667 #ifdef G_PLATFORM_WIN32
 668
 669   gint i, mask, clen, mblen;
 670   wchar_t *wcs, *wcp;
 671   gchar *result;
 672   guchar *cp, *end, c;
 673   gint n;
 674
 675   if (len == -1)
 676     len = strlen (utf8string);
 677
 678   /* First convert to wide chars */
 679   cp = (guchar *) utf8string;
 680   end = cp + len;
 681   n = 0;
 682   wcs = g_new (wchar_t, len + 1);
 683   wcp = wcs;
 684   while (cp != end)
 685     {
 686       mask = 0;
 687       c = *cp;
 688
 689       if (c < 0x80)
 690         {
 691           clen = 1;
 692           mask = 0x7f;
 693         }
 694       else if ((c & 0xe0) == 0xc0)
 695         {
 696           clen = 2;
 697           mask = 0x1f;
 698         }
 699       else if ((c & 0xf0) == 0xe0)
 700         {
 701           clen = 3;
 702           mask = 0x0f;
 703         }
 704       else if ((c & 0xf8) == 0xf0)
 705         {
 706           clen = 4;
 707           mask = 0x07;
 708         }
 709       else if ((c & 0xfc) == 0xf8)
 710         {
 711           clen = 5;
 712           mask = 0x03;
 713         }
 714       else if ((c & 0xfc) == 0xfc)
 715         {
 716           clen = 6;
 717           mask = 0x01;
 718         }
 719       else
 720         {
 721           g_free (wcs);
 722           return NULL;
 723         }
 724
 725       if (cp + clen > end)
 726         {
 727           g_free (wcs);
 728           return NULL;
 729         }
 730
 731       *wcp = (cp[0] & mask);
 732       for (i = 1; i < clen; i++)
 733         {
 734           if ((cp[i] & 0xc0) != 0x80)
 735             {
 736               g_free (wcs);
 737               return NULL;
 738             }
 739           *wcp <<= 6;
 740           *wcp |= (cp[i] & 0x3f);
 741         }
 742
 743       cp += clen;
 744       wcp++;
 745       n++;
 746     }
 747   if (cp != end)
 748     {
 749       g_free (wcs);
 750       return NULL;
 751     }
 752
 753   /* n is the number of wide chars constructed */
 754
 755   /* Convert to a string in the current ANSI codepage */
 756
 757   result = g_new (gchar, 3 * n + 1);
 758   mblen = WideCharToMultiByte (CP_ACP, 0, wcs, n, result, 3*n, NULL, NULL);
 759   result[mblen] = 0;
 760   g_free (wcs);
 761
 762   if (bytes_read)
 763     *bytes_read = len;
 764   if (bytes_written)
 765     *bytes_written = mblen;
 766
 767   return result;
 768
 769 #else  /* !G_PLATFORM_WIN32 */
 770
 771   gchar *charset, *str;
 772
 773   if (g_get_charset (&charset))
 774     return g_strdup (utf8string);
 775
 776   str = g_convert (utf8string, strlen (utf8string),
 777                    charset, "UTF-8", bytes_read, bytes_written, error);
 778
 779   return str;
 780
 781 #endif /* !G_PLATFORM_WIN32 */
 782 }
 783
 784 /**
 785  * g_filename_to_utf8:
 786  * @opsysstring:   a string in the encoding for filenames
 787  * @len:           the length of the string, or -1 if the string is
 788  *                 NULL-terminated.
 789  * @bytes_read:    location to store the number of bytes in the
 790  *                 input string that were successfully converted, or %NULL.
 791  *                 Even if the conversion was succesful, this may be
 792  *                 less than len if there were partial characters
 793  *                 at the end of the input. If the error
 794  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 795  *                 stored will the byte fofset after the last valid
 796  *                 input sequence.
 797  * @bytes_written: the stored in the output buffer (not including the
 798  *                 terminating nul.
 799  * @error: location to store the error occuring, or %NULL to ignore
 800  *                 errors. Any of the errors in #GConvertError may occur.
 801  *
 802  * Converts a string which is in the encoding used for filenames
 803  * into a UTF-8 string.
 804  *
 805  * Return value: The converted string, or %NULL on an error.
 806  **/
 807 gchar*
 808 g_filename_to_utf8 (const gchar *opsysstring,
 809                     gint         len,
 810                     gint        *bytes_read,
 811                     gint        *bytes_written,
 812                     GError     **error)
 813 {
 814 #ifdef G_PLATFORM_WIN32
 815   return g_locale_to_utf8 (opsysstring, len,
 816                            bytes_read, bytes_written,
 817                            error);
 818 #else  /* !G_PLATFORM_WIN32 */
 819   if (getenv ("G_BROKEN_FILENAMES"))
 820     return g_locale_to_utf8 (opsysstring, len,
 821                              bytes_read, bytes_written,
 822                              error);
 823
 824   if (bytes_read || bytes_written)
 825     {
 826       gint len = strlen (opsysstring);
 827
 828       if (bytes_read)
 829         *bytes_read = len;
 830       if (bytes_written)
 831         *bytes_written = len;
 832     }
 833
 834   if (len < 0)
 835     return g_strdup (opsysstring);
 836   else
 837     return g_strndup (opsysstring, len);
 838 #endif /* !G_PLATFORM_WIN32 */
 839 }
 840
 841 /**
 842  * g_filename_from_utf8:
 843  * @utf8string:    a UTF-8 encoded string
 844  * @len:           the length of the string, or -1 if the string is
 845  *                 NULL-terminated.
 846  * @bytes_read:    location to store the number of bytes in the
 847  *                 input string that were successfully converted, or %NULL.
 848  *                 Even if the conversion was succesful, this may be
 849  *                 less than len if there were partial characters
 850  *                 at the end of the input. If the error
 851  *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
 852  *                 stored will the byte fofset after the last valid
 853  *                 input sequence.
 854  * @bytes_written: the stored in the output buffer (not including the
 855  *                 terminating nul.
 856  * @error: location to store the error occuring, or %NULL to ignore
 857  *                 errors. Any of the errors in #GConvertError may occur.
 858  *
 859  * Converts a string from UTF-8 to the encoding used for filenames.
 860  *
 861  * Return value: The converted string, or %NULL on an error.
 862  **/
 863 gchar*
 864 g_filename_from_utf8 (const gchar *utf8string,
 865                       gint         len,
 866                       gint        *bytes_read,
 867                       gint        *bytes_written,
 868                       GError     **error)
 869 {
 870 #ifdef G_PLATFORM_WIN32
 871   return g_locale_from_utf8 (utf8string, len,
 872                              bytes_read, bytes_written,
 873                              error);
 874 #else  /* !G_PLATFORM_WIN32 */
 875   if (getenv ("G_BROKEN_FILENAMES"))
 876     return g_locale_from_utf8 (utf8string, len,
 877                                bytes_read, bytes_written,
 878                                error);
 879
 880   if (bytes_read || bytes_written)
 881     {
 882       gint len = strlen (utf8string);
 883
 884       if (bytes_read)
 885         *bytes_read = len;
 886       if (bytes_written)
 887         *bytes_written = len;
 888     }
 889
 890   if (len < 0)
 891     return g_strdup (utf8string);
 892   else
 893     return g_strndup (utf8string, len);
 894 #endif /* !G_PLATFORM_WIN32 */
 895 }