binutils/winduni.c

   1 /* winduni.c -- unicode support for the windres program.
   2    Copyright 1997, 1998, 2000, 2001, 2003, 2005, 2007, 2009
   3    Free Software Foundation, Inc.
   4    Written by Ian Lance Taylor, Cygnus Support.
   5    Rewritten by Kai Tietz, Onevision.
   6
   7    This file is part of GNU Binutils.
   8
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3 of the License, or
  12    (at your option) any later version.
  13
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
  22    02110-1301, USA.  */
  23
  24
  25 /* This file contains unicode support routines for the windres
  26    program.  Ideally, we would have generic unicode support which
  27    would work on all systems.  However, we don't.  Instead, on a
  28    Windows host, we are prepared to call some Windows routines.  This
  29    means that we will generate different output on Windows and Unix
  30    hosts, but that seems better than not really supporting unicode at
  31    all.  */
  32
  33 #include "sysdep.h"
  34 #include "bfd.h"
  35 #include "libiberty.h" /* for xstrdup */
  36 #include "bucomm.h"
  37 /* Must be include before windows.h and winnls.h.  */
  38 #if defined (_WIN32) || defined (__CYGWIN__)
  39 #include <windows.h>
  40 #include <winnls.h>
  41 #endif
  42 #include "winduni.h"
  43 #include "safe-ctype.h"
  44
  45 #if HAVE_ICONV
  46 #include <iconv.h>
  47 #endif
  48
  49 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
  50 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
  51 static int unichar_isascii (const unichar *, rc_uint_type);
  52
  53 /* Convert an ASCII string to a unicode string.  We just copy it,
  54    expanding chars to shorts, rather than doing something intelligent.  */
  55
  56 #if !defined (_WIN32) && !defined (__CYGWIN__)
  57
  58 /* Codepages mapped.  */
  59 static local_iconv_map codepages[] =
  60 {
  61   { 0, "MS-ANSI" },
  62   { 1, "WINDOWS-1252" },
  63   { 437, "MS-ANSI" },
  64   { 737, "MS-GREEK" },
  65   { 775, "WINBALTRIM" },
  66   { 850, "MS-ANSI" },
  67   { 852, "MS-EE" },
  68   { 857, "MS-TURK" },
  69   { 862, "CP862" },
  70   { 864, "CP864" },
  71   { 866, "MS-CYRL" },
  72   { 874, "WINDOWS-874" },
  73   { 932, "CP932" },
  74   { 936, "CP936" },
  75   { 949, "CP949" },
  76   { 950, "CP950" },
  77   { 1250, "WINDOWS-1250" },
  78   { 1251, "WINDOWS-1251" },
  79   { 1252, "WINDOWS-1252" },
  80   { 1253, "WINDOWS-1253" },
  81   { 1254, "WINDOWS-1254" },
  82   { 1255, "WINDOWS-1255" },
  83   { 1256, "WINDOWS-1256" },
  84   { 1257, "WINDOWS-1257" },
  85   { 1258, "WINDOWS-1258" },
  86   { CP_UTF7, "UTF-7" },
  87   { CP_UTF8, "UTF-8" },
  88   { CP_UTF16, "UTF-16" },
  89   { (rc_uint_type) -1, NULL }
  90 };
  91
  92 /* Languages supported.  */
  93 static const wind_language_t languages[] =
  94 {
  95   { 0x0000, 437, 1252, "Neutral", "Neutral" },
  96   { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
  97   { 0x0403, 850, 1252, "Catalan", "Spain" },          { 0x0404, 950,  950, "Chinese", "Taiwan" },
  98   { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
  99   { 0x0407, 850, 1252, "German", "Germany" },         { 0x0408, 737, 1253, "Greek", "Greece" },
 100   { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
 101   { 0x040B, 850, 1252, "Finnish", "Finland" },        { 0x040C, 850, 1252, "French", "France" },
 102   { 0x040D, 862, 1255, "Hebrew", "Israel" },          { 0x040E, 852, 1250, "Hungarian", "Hungary" },
 103   { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
 104   { 0x0411, 932,  932, "Japanese", "Japan" },         { 0x0412, 949,  949, "Korean", "Korea (south)" },
 105   { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokmål)", "Norway" },
 106   { 0x0415, 852, 1250, "Polish", "Poland" },          { 0x0416, 850, 1252, "Portuguese", "Brazil" },
 107   { 0x0418, 852, 1250, "Romanian", "Romania" },       { 0x0419, 866, 1251, "Russian", "Russia" },
 108   { 0x041A, 852, 1250, "Croatian", "Croatia" },       { 0x041B, 852, 1250, "Slovak", "Slovakia" },
 109   { 0x041C, 852, 1250, "Albanian", "Albania" },       { 0x041D, 850, 1252, "Swedish", "Sweden" },
 110   { 0x041E, 874,  874, "Thai", "Thailand" },          { 0x041F, 857, 1254, "Turkish", "Turkey" },
 111   { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
 112   { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
 113   { 0x0425, 775, 1257, "Estonian", "Estonia" },       { 0x0426, 775, 1257, "Latvian", "Latvia" },
 114   { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
 115   { 0x0429, 864, 1256, "Arabic", "Farsi" },           { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
 116   { 0x042D, 850, 1252, "Basque", "Spain" },
 117   { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
 118   { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
 119   { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
 120   { 0x043C, 437, 1252, "Irish", "Ireland" },
 121   { 0x043E, 850, 1252, "Malay", "Malaysia" },
 122   { 0x0801, 864, 1256, "Arabic", "Iraq" },
 123   { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
 124   { 0x0807, 850, 1252, "German", "Switzerland" },
 125   { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
 126   { 0x080C, 850, 1252, "French", "Belgium" },
 127   { 0x0810, 850, 1252, "Italian", "Switzerland" },
 128   { 0x0813, 850, 1252, "Dutch", "Belgium" },          { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
 129   { 0x0816, 850, 1252, "Portuguese", "Portugal" },
 130   { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
 131   { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
 132   { 0x0C01, 864, 1256, "Arabic", "Egypt" },
 133   { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
 134   { 0x0C07, 850, 1252, "German", "Austria" },
 135   { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
 136   { 0x0C0C, 850, 1252, "French", "Canada"},
 137   { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
 138   { 0x1001, 864, 1256, "Arabic", "Libya" },
 139   { 0x1004, 936,  936, "Chinese", "Singapore" },
 140   { 0x1007, 850, 1252, "German", "Luxembourg" },
 141   { 0x1009, 850, 1252, "English", "Canada" },
 142   { 0x100A, 850, 1252, "Spanish", "Guatemala" },
 143   { 0x100C, 850, 1252, "French", "Switzerland" },
 144   { 0x1401, 864, 1256, "Arabic", "Algeria" },
 145   { 0x1407, 850, 1252, "German", "Liechtenstein" },
 146   { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
 147   { 0x140C, 850, 1252, "French", "Luxembourg" },
 148   { 0x1801, 864, 1256, "Arabic", "Morocco" },
 149   { 0x1809, 850, 1252, "English", "Ireland" },        { 0x180A, 850, 1252, "Spanish", "Panama" },
 150   { 0x180C, 850, 1252, "French", "Monaco" },
 151   { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
 152   { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
 153   { 0x2001, 864, 1256, "Arabic", "Oman" },
 154   { 0x2009, 850, 1252, "English", "Jamaica" },        { 0x200A, 850, 1252, "Spanish", "Venezuela" },
 155   { 0x2401, 864, 1256, "Arabic", "Yemen" },
 156   { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
 157   { 0x2801, 864, 1256, "Arabic", "Syria" },
 158   { 0x2809, 850, 1252, "English", "Belize" },         { 0x280A, 850, 1252, "Spanish", "Peru" },
 159   { 0x2C01, 864, 1256, "Arabic", "Jordan" },
 160   { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
 161   { 0x3001, 864, 1256, "Arabic", "Lebanon" },
 162   { 0x3009, 437, 1252, "English", "Zimbabwe" },       { 0x300A, 850, 1252, "Spanish", "Ecuador" },
 163   { 0x3401, 864, 1256, "Arabic", "Kuwait" },
 164   { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
 165   { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
 166   { 0x380A, 850, 1252, "Spanish", "Uruguay" },
 167   { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
 168   { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
 169   { 0x4001, 864, 1256, "Arabic", "Qatar" },
 170   { 0x400A, 850, 1252, "Spanish", "Bolivia" },
 171   { 0x440A, 850, 1252, "Spanish", "El Salvador" },
 172   { 0x480A, 850, 1252, "Spanish", "Honduras" },
 173   { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
 174   { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
 175   { (unsigned) -1,  0,      0, NULL, NULL }
 176 };
 177
 178 #endif
 179
 180 /* Specifies the default codepage to be used for unicode
 181    transformations.  By default this is CP_ACP.  */
 182 rc_uint_type wind_default_codepage = CP_ACP;
 183
 184 /* Specifies the currently used codepage for unicode
 185    transformations.  By default this is CP_ACP.  */
 186 rc_uint_type wind_current_codepage = CP_ACP;
 187
 188 /* Convert an ASCII string to a unicode string.  We just copy it,
 189    expanding chars to shorts, rather than doing something intelligent.  */
 190
 191 void
 192 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
 193 {
 194   unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
 195 }
 196
 197 /* Convert an ASCII string with length A_LENGTH to a unicode string.  We just
 198    copy it, expanding chars to shorts, rather than doing something intelligent.
 199    This routine converts also \0 within a string.  */
 200
 201 void
 202 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
 203 {
 204   char *tmp, *p;
 205   rc_uint_type tlen, elen, idx = 0;
 206
 207   *unicode = NULL;
 208
 209   if (!a_length)
 210     {
 211       if (length)
 212         *length = 0;
 213       return;
 214     }
 215
 216   /* Make sure we have zero terminated string.  */
 217   p = tmp = (char *) alloca (a_length + 1);
 218   memcpy (tmp, ascii, a_length);
 219   tmp[a_length] = 0;
 220
 221   while (a_length > 0)
 222     {
 223       unichar *utmp, *up;
 224
 225       tlen = strlen (p);
 226
 227       if (tlen > a_length)
 228         tlen = a_length;
 229       if (*p == 0)
 230         {
 231           /* Make room for one more character.  */
 232           utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
 233           if (idx > 0)
 234             {
 235               memcpy (utmp, *unicode, idx * sizeof (unichar));
 236             }
 237           *unicode = utmp;
 238           utmp[idx++] = 0;
 239           --a_length;
 240           p++;
 241           continue;
 242         }
 243       utmp = NULL;
 244       elen = 0;
 245       elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
 246       if (elen)
 247         {
 248           utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
 249           wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
 250           elen /= sizeof (unichar);
 251           elen --;
 252         }
 253       else
 254         {
 255           /* Make room for one more character.  */
 256           utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
 257           if (idx > 0)
 258             {
 259               memcpy (utmp, *unicode, idx * sizeof (unichar));
 260             }
 261           *unicode = utmp;
 262           utmp[idx++] = ((unichar) *p) & 0xff;
 263           --a_length;
 264           p++;
 265           continue;
 266         }
 267       p += tlen;
 268       a_length -= tlen;
 269
 270       up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
 271       if (idx > 0)
 272         memcpy (up, *unicode, idx * sizeof (unichar));
 273
 274       *unicode = up;
 275       if (elen)
 276         memcpy (&up[idx], utmp, sizeof (unichar) * elen);
 277
 278       idx += elen;
 279     }
 280
 281   if (length)
 282     *length = idx;
 283 }
 284
 285 /* Convert an unicode string to an ASCII string.  We just copy it,
 286    shrink shorts to chars, rather than doing something intelligent.
 287    Shorts with not within the char range are replaced by '_'.  */
 288
 289 void
 290 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
 291 {
 292   codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
 293 }
 294
 295 /* Print the unicode string UNICODE to the file E.  LENGTH is the
 296    number of characters to print, or -1 if we should print until the
 297    end of the string.  FIXME: On a Windows host, we should be calling
 298    some Windows function, probably WideCharToMultiByte.  */
 299
 300 void
 301 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
 302 {
 303   while (1)
 304     {
 305       unichar ch;
 306
 307       if (length == 0)
 308         return;
 309       if ((bfd_signed_vma) length > 0)
 310         --length;
 311
 312       ch = *unicode;
 313
 314       if (ch == 0 && (bfd_signed_vma) length < 0)
 315         return;
 316
 317       ++unicode;
 318
 319       if ((ch & 0x7f) == ch)
 320         {
 321           if (ch == '\\')
 322             fputs ("\\\\", e);
 323           else if (ch == '"')
 324             fputs ("\"\"", e);
 325           else if (ISPRINT (ch))
 326             putc (ch, e);
 327           else
 328             {
 329               switch (ch)
 330                 {
 331                 case ESCAPE_A:
 332                   fputs ("\\a", e);
 333                   break;
 334
 335                 case ESCAPE_B:
 336                   fputs ("\\b", e);
 337                   break;
 338
 339                 case ESCAPE_F:
 340                   fputs ("\\f", e);
 341                   break;
 342
 343                 case ESCAPE_N:
 344                   fputs ("\\n", e);
 345                   break;
 346
 347                 case ESCAPE_R:
 348                   fputs ("\\r", e);
 349                   break;
 350
 351                 case ESCAPE_T:
 352                   fputs ("\\t", e);
 353                   break;
 354
 355                 case ESCAPE_V:
 356                   fputs ("\\v", e);
 357                   break;
 358
 359                 default:
 360                   fprintf (e, "\\%03o", (unsigned int) ch);
 361                   break;
 362                 }
 363             }
 364         }
 365       else if ((ch & 0xff) == ch)
 366         fprintf (e, "\\%03o", (unsigned int) ch);
 367       else
 368         fprintf (e, "\\x%04x", (unsigned int) ch);
 369     }
 370 }
 371
 372 /* Print a unicode string to a file.  */
 373
 374 void
 375 ascii_print (FILE *e, const char *s, rc_uint_type length)
 376 {
 377   while (1)
 378     {
 379       char ch;
 380
 381       if (length == 0)
 382         return;
 383       if ((bfd_signed_vma) length > 0)
 384         --length;
 385
 386       ch = *s;
 387
 388       if (ch == 0 && (bfd_signed_vma) length < 0)
 389         return;
 390
 391       ++s;
 392
 393       if ((ch & 0x7f) == ch)
 394         {
 395           if (ch == '\\')
 396             fputs ("\\\\", e);
 397           else if (ch == '"')
 398             fputs ("\"\"", e);
 399           else if (ISPRINT (ch))
 400             putc (ch, e);
 401           else
 402             {
 403               switch (ch)
 404                 {
 405                 case ESCAPE_A:
 406                   fputs ("\\a", e);
 407                   break;
 408
 409                 case ESCAPE_B:
 410                   fputs ("\\b", e);
 411                   break;
 412
 413                 case ESCAPE_F:
 414                   fputs ("\\f", e);
 415                   break;
 416
 417                 case ESCAPE_N:
 418                   fputs ("\\n", e);
 419                   break;
 420
 421                 case ESCAPE_R:
 422                   fputs ("\\r", e);
 423                   break;
 424
 425                 case ESCAPE_T:
 426                   fputs ("\\t", e);
 427                   break;
 428
 429                 case ESCAPE_V:
 430                   fputs ("\\v", e);
 431                   break;
 432
 433                 default:
 434                   fprintf (e, "\\%03o", (unsigned int) ch);
 435                   break;
 436                 }
 437             }
 438         }
 439       else
 440         fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
 441     }
 442 }
 443
 444 rc_uint_type
 445 unichar_len (const unichar *unicode)
 446 {
 447   rc_uint_type r = 0;
 448
 449   if (unicode)
 450     while (unicode[r] != 0)
 451       r++;
 452   else
 453     --r;
 454   return r;
 455 }
 456
 457 unichar *
 458 unichar_dup (const unichar *unicode)
 459 {
 460   unichar *r;
 461   int len;
 462
 463   if (! unicode)
 464     return NULL;
 465   for (len = 0; unicode[len] != 0; ++len)
 466     ;
 467   ++len;
 468   r = ((unichar *) res_alloc (len * sizeof (unichar)));
 469   memcpy (r, unicode, len * sizeof (unichar));
 470   return r;
 471 }
 472
 473 unichar *
 474 unichar_dup_uppercase (const unichar *u)
 475 {
 476   unichar *r = unichar_dup (u);
 477   int i;
 478
 479   if (! r)
 480     return NULL;
 481
 482   for (i = 0; r[i] != 0; ++i)
 483     {
 484       if (r[i] >= 'a' && r[i] <= 'z')
 485         r[i] &= 0xdf;
 486     }
 487   return r;
 488 }
 489
 490 static int
 491 unichar_isascii (const unichar *u, rc_uint_type len)
 492 {
 493   rc_uint_type i;
 494
 495   if ((bfd_signed_vma) len < 0)
 496     {
 497       if (u)
 498         len = (rc_uint_type) unichar_len (u);
 499       else
 500         len = 0;
 501     }
 502
 503   for (i = 0; i < len; i++)
 504     if ((u[i] & 0xff80) != 0)
 505       return 0;
 506   return 1;
 507 }
 508
 509 void
 510 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
 511 {
 512   if (! unichar_isascii (u, len))
 513     fputc ('L', e);
 514   fputc ('"', e);
 515   unicode_print (e, u, len);
 516   fputc ('"', e);
 517 }
 518
 519 int
 520 unicode_is_valid_codepage (rc_uint_type cp)
 521 {
 522   if ((cp & 0xffff) != cp)
 523     return 0;
 524   if (cp == CP_UTF16 || cp == CP_ACP)
 525     return 1;
 526
 527 #if !defined (_WIN32) && !defined (__CYGWIN__)
 528   if (! wind_find_codepage_info (cp))
 529     return 0;
 530   return 1;
 531 #else
 532   return !! IsValidCodePage ((UINT) cp);
 533 #endif
 534 }
 535
 536 #if defined (_WIN32) || defined (__CYGWIN__)
 537
 538 #define max_cp_string_len 6
 539
 540 static unsigned int
 541 codepage_from_langid (unsigned short langid)
 542 {
 543   char cp_string [max_cp_string_len];
 544   int c;
 545
 546   memset (cp_string, 0, max_cp_string_len);
 547   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
 548      but is unavailable on Win95.  */
 549   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 550                       LOCALE_IDEFAULTANSICODEPAGE,
 551                       cp_string, max_cp_string_len);
 552   /* If codepage data for an LCID is not installed on users's system,
 553      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 554      default. */
 555   if (c == 0)
 556     return CP_ACP;
 557   return strtoul (cp_string, 0, 10);
 558 }
 559
 560 static unsigned int
 561 wincodepage_from_langid (unsigned short langid)
 562 {
 563   char cp_string [max_cp_string_len];
 564   int c;
 565
 566   memset (cp_string, 0, max_cp_string_len);
 567   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
 568      but is unavailable on Win95.  */
 569   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 570                       LOCALE_IDEFAULTCODEPAGE,
 571                       cp_string, max_cp_string_len);
 572   /* If codepage data for an LCID is not installed on users's system,
 573      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 574      default. */
 575   if (c == 0)
 576     return CP_OEM;
 577   return strtoul (cp_string, 0, 10);
 578 }
 579
 580 static char *
 581 lang_from_langid (unsigned short langid)
 582 {
 583   char cp_string[261];
 584   int c;
 585
 586   memset (cp_string, 0, 261);
 587   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 588                       LOCALE_SENGLANGUAGE,
 589                       cp_string, 260);
 590   /* If codepage data for an LCID is not installed on users's system,
 591      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 592      default. */
 593   if (c == 0)
 594     strcpy (cp_string, "Neutral");
 595   return xstrdup (cp_string);
 596 }
 597
 598 static char *
 599 country_from_langid (unsigned short langid)
 600 {
 601   char cp_string[261];
 602   int c;
 603
 604   memset (cp_string, 0, 261);
 605   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 606                       LOCALE_SENGCOUNTRY,
 607                       cp_string, 260);
 608   /* If codepage data for an LCID is not installed on users's system,
 609      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 610      default. */
 611   if (c == 0)
 612     strcpy (cp_string, "Neutral");
 613   return xstrdup (cp_string);
 614 }
 615
 616 #endif
 617
 618 const wind_language_t *
 619 wind_find_language_by_id (unsigned id)
 620 {
 621 #if !defined (_WIN32) && !defined (__CYGWIN__)
 622   int i;
 623
 624   if (! id)
 625     return NULL;
 626   for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
 627     ;
 628   if (languages[i].id == id)
 629     return &languages[i];
 630   return NULL;
 631 #else
 632   static wind_language_t wl;
 633
 634   wl.id = id;
 635   wl.doscp = codepage_from_langid ((unsigned short) id);
 636   wl.wincp = wincodepage_from_langid ((unsigned short) id);
 637   wl.name = lang_from_langid ((unsigned short) id);
 638   wl.country = country_from_langid ((unsigned short) id);
 639
 640   return & wl;
 641 #endif
 642 }
 643
 644 const local_iconv_map *
 645 wind_find_codepage_info (unsigned cp)
 646 {
 647 #if !defined (_WIN32) && !defined (__CYGWIN__)
 648   int i;
 649
 650   for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
 651     ;
 652   if (codepages[i].codepage == (rc_uint_type) -1)
 653     return NULL;
 654   return &codepages[i];
 655 #else
 656   static local_iconv_map lim;
 657   if (!unicode_is_valid_codepage (cp))
 658         return NULL;
 659   lim.codepage = cp;
 660   lim.iconv_name = "";
 661   return & lim;
 662 #endif
 663 }
 664
 665 /* Convert an Codepage string to a unicode string.  */
 666
 667 void
 668 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
 669 {
 670   rc_uint_type len;
 671
 672   len = wind_MultiByteToWideChar (cp, src, NULL, 0);
 673   if (len)
 674     {
 675       *u = ((unichar *) res_alloc (len));
 676       wind_MultiByteToWideChar (cp, src, *u, len);
 677     }
 678   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
 679      this will set *length to -1.  */
 680   len -= sizeof (unichar);
 681
 682   if (length != NULL)
 683     *length = len / sizeof (unichar);
 684 }
 685
 686 /* Convert an unicode string to an codepage string.  */
 687
 688 void
 689 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
 690 {
 691   rc_uint_type len;
 692
 693   len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
 694   if (len)
 695     {
 696       *ascii = (char *) res_alloc (len * sizeof (char));
 697       wind_WideCharToMultiByte (cp, unicode, *ascii, len);
 698     }
 699   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
 700      this will set *length to -1.  */
 701   len--;
 702
 703   if (length != NULL)
 704     *length = len;
 705 }
 706
 707 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
 708 static int
 709 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
 710 {
 711   int i;
 712
 713   for (i = 1; i <= 32; i++)
 714     {
 715       char *tmp_d = d;
 716       ICONV_CONST char *tmp_s = s;
 717       size_t ret;
 718       size_t s_left = (size_t) i;
 719       size_t d_left = (size_t) d_len;
 720
 721       ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
 722
 723       if (ret != (size_t) -1)
 724         {
 725           *n_s = tmp_s;
 726           *n_d = tmp_d;
 727           return 0;
 728         }
 729     }
 730
 731   return 1;
 732 }
 733
 734 static const char *
 735 wind_iconv_cp (rc_uint_type cp)
 736 {
 737   const local_iconv_map *lim = wind_find_codepage_info (cp);
 738
 739   if (!lim)
 740     return NULL;
 741   return lim->iconv_name;
 742 }
 743 #endif /* HAVE_ICONV */
 744
 745 static rc_uint_type
 746 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
 747                           unichar *u, rc_uint_type u_len)
 748 {
 749   rc_uint_type ret = 0;
 750
 751 #if defined (_WIN32) || defined (__CYGWIN__)
 752   rc_uint_type conv_flags = MB_PRECOMPOSED;
 753
 754   /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
 755      MultiByteToWideChar will set the last error to
 756      ERROR_INVALID_FLAGS if we do. */
 757   if (cp == CP_UTF8 || cp == CP_UTF7)
 758     conv_flags = 0;
 759
 760   ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
 761                                             mb, -1, u, u_len);
 762   /* Convert to bytes. */
 763   ret *= sizeof (unichar);
 764
 765 #elif defined (HAVE_ICONV)
 766   int first = 1;
 767   char tmp[32];
 768   char *p_tmp;
 769   const char *iconv_name = wind_iconv_cp (cp);
 770
 771   if (!mb || !iconv_name)
 772     return 0;
 773   iconv_t cd = iconv_open ("UTF-16", iconv_name);
 774
 775   while (1)
 776     {
 777       int iret;
 778       const char *n_mb = "";
 779       char *n_tmp = "";
 780
 781       p_tmp = tmp;
 782       iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
 783       if (first)
 784         {
 785           first = 0;
 786           continue;
 787         }
 788       if (!iret)
 789         {
 790           size_t l_tmp = (size_t) (n_tmp - p_tmp);
 791
 792           if (u)
 793             {
 794               if ((size_t) u_len < l_tmp)
 795                 break;
 796               memcpy (u, tmp, l_tmp);
 797               u += l_tmp/2;
 798               u_len -= l_tmp;
 799             }
 800           ret += l_tmp;
 801         }
 802       else
 803         break;
 804       if (tmp[0] == 0 && tmp[1] == 0)
 805         break;
 806       mb = n_mb;
 807     }
 808   iconv_close (cd);
 809 #else
 810   if (cp)
 811     ret = 0;
 812   ret = strlen (mb) + 1;
 813   ret *= sizeof (unichar);
 814   if (u != NULL && u_len != 0)
 815     {
 816       do
 817         {
 818           *u++ = ((unichar) *mb) & 0xff;
 819           --u_len; mb++;
 820         }
 821       while (u_len != 0 && mb[-1] != 0);
 822     }
 823   if (u != NULL && u_len != 0)
 824     *u = 0;
 825 #endif
 826   return ret;
 827 }
 828
 829 static rc_uint_type
 830 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
 831 {
 832   rc_uint_type ret = 0;
 833 #if defined (_WIN32) || defined (__CYGWIN__)
 834   WINBOOL used_def = FALSE;
 835
 836   ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
 837                                             NULL, & used_def);
 838 #elif defined (HAVE_ICONV)
 839   int first = 1;
 840   char tmp[32];
 841   char *p_tmp;
 842   const char *iconv_name = wind_iconv_cp (cp);
 843
 844   if (!u || !iconv_name)
 845     return 0;
 846   iconv_t cd = iconv_open (iconv_name, "UTF-16");
 847
 848   while (1)
 849     {
 850       int iret;
 851       const char *n_u = "";
 852       char *n_tmp = "";
 853
 854       p_tmp = tmp;
 855       iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
 856       if (first)
 857         {
 858           first = 0;
 859           continue;
 860         }
 861       if (!iret)
 862         {
 863           size_t l_tmp = (size_t) (n_tmp - p_tmp);
 864
 865           if (mb)
 866             {
 867               if ((size_t) mb_len < l_tmp)
 868                 break;
 869               memcpy (mb, tmp, l_tmp);
 870               mb += l_tmp;
 871               mb_len -= l_tmp;
 872             }
 873           ret += l_tmp;
 874         }
 875       else
 876         break;
 877       if (u[0] == 0)
 878         break;
 879       u = (const unichar *) n_u;
 880     }
 881   iconv_close (cd);
 882 #else
 883   if (cp)
 884     ret = 0;
 885
 886   while (u[ret] != 0)
 887     ++ret;
 888
 889   ++ret;
 890
 891   if (mb)
 892     {
 893       while (*u != 0 && mb_len != 0)
 894         {
 895           if (u[0] == (u[0] & 0x7f))
 896             *mb++ = (char) u[0];
 897           else
 898             *mb++ = '_';
 899           ++u; --mb_len;
 900         }
 901       if (mb_len != 0)
 902         *mb = 0;
 903     }
 904 #endif
 905   return ret;
 906 }