binutils/winduni.c

   1 /* winduni.c -- unicode support for the windres program.
   2    Copyright (C) 1997-2024 Free Software Foundation, Inc.
   3    Written by Ian Lance Taylor, Cygnus Support.
   4    Rewritten by Kai Tietz, Onevision.
   5
   6    This file is part of GNU Binutils.
   7
   8    This program is free software; you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
  21    02110-1301, USA.  */
  22
  23
  24 /* This file contains unicode support routines for the windres
  25    program.  Ideally, we would have generic unicode support which
  26    would work on all systems.  However, we don't.  Instead, on a
  27    Windows host, we are prepared to call some Windows routines.  This
  28    means that we will generate different output on Windows and Unix
  29    hosts, but that seems better than not really supporting unicode at
  30    all.  */
  31
  32 #include "sysdep.h"
  33 #include "bfd.h"
  34 #include "libiberty.h" /* for xstrdup */
  35 #include "bucomm.h"
  36 /* Must be include before windows.h and winnls.h.  */
  37 #if defined (_WIN32) || defined (__CYGWIN__)
  38 #include <windows.h>
  39 #include <winnls.h>
  40 #endif
  41 #include "winduni.h"
  42 #include "safe-ctype.h"
  43
  44 #if HAVE_ICONV
  45 #include <iconv.h>
  46 #endif
  47
  48 static rc_uint_type wind_WideCharToMultiByte (rc_uint_type, const unichar *, char *, rc_uint_type);
  49 static rc_uint_type wind_MultiByteToWideChar (rc_uint_type, const char *, unichar *, rc_uint_type);
  50 static int unichar_isascii (const unichar *, rc_uint_type);
  51
  52 /* Convert an ASCII string to a unicode string.  We just copy it,
  53    expanding chars to shorts, rather than doing something intelligent.  */
  54
  55 #if !defined (_WIN32) && !defined (__CYGWIN__)
  56
  57 /* Codepages mapped.  */
  58 static local_iconv_map codepages[] =
  59 {
  60   { 0, "cp1252" },
  61   { 1, "WINDOWS-1252" },
  62   { 437, "MS-ANSI" },
  63   { 737, "MS-GREEK" },
  64   { 775, "WINBALTRIM" },
  65   { 850, "MS-ANSI" },
  66   { 852, "MS-EE" },
  67   { 857, "MS-TURK" },
  68   { 862, "CP862" },
  69   { 864, "CP864" },
  70   { 866, "MS-CYRL" },
  71   { 874, "WINDOWS-874" },
  72   { 932, "CP932" },
  73   { 936, "CP936" },
  74   { 949, "CP949" },
  75   { 950, "CP950" },
  76   { 1250, "WINDOWS-1250" },
  77   { 1251, "WINDOWS-1251" },
  78   { 1252, "WINDOWS-1252" },
  79   { 1253, "WINDOWS-1253" },
  80   { 1254, "WINDOWS-1254" },
  81   { 1255, "WINDOWS-1255" },
  82   { 1256, "WINDOWS-1256" },
  83   { 1257, "WINDOWS-1257" },
  84   { 1258, "WINDOWS-1258" },
  85   { CP_UTF7, "UTF-7" },
  86   { CP_UTF8, "UTF-8" },
  87   { CP_UTF16, "UTF-16LE" },
  88   { (rc_uint_type) -1, NULL }
  89 };
  90
  91 /* Languages supported.  */
  92 static const wind_language_t languages[] =
  93 {
  94   { 0x0000, 437, 1252, "Neutral", "Neutral" },
  95   { 0x0401, 864, 1256, "Arabic", "Saudi Arabia" },    { 0x0402, 866, 1251, "Bulgarian", "Bulgaria" },
  96   { 0x0403, 850, 1252, "Catalan", "Spain" },          { 0x0404, 950,  950, "Chinese", "Taiwan" },
  97   { 0x0405, 852, 1250, "Czech", "Czech Republic" },   { 0x0406, 850, 1252, "Danish", "Denmark" },
  98   { 0x0407, 850, 1252, "German", "Germany" },         { 0x0408, 737, 1253, "Greek", "Greece" },
  99   { 0x0409, 437, 1252, "English", "United States" },  { 0x040A, 850, 1252, "Spanish - Traditional Sort", "Spain" },
 100   { 0x040B, 850, 1252, "Finnish", "Finland" },        { 0x040C, 850, 1252, "French", "France" },
 101   { 0x040D, 862, 1255, "Hebrew", "Israel" },          { 0x040E, 852, 1250, "Hungarian", "Hungary" },
 102   { 0x040F, 850, 1252, "Icelandic", "Iceland" },      { 0x0410, 850, 1252, "Italian", "Italy" },
 103   { 0x0411, 932,  932, "Japanese", "Japan" },         { 0x0412, 949,  949, "Korean", "Korea (south)" },
 104   { 0x0413, 850, 1252, "Dutch", "Netherlands" },      { 0x0414, 850, 1252, "Norwegian (Bokm\345l)", "Norway" },
 105   { 0x0415, 852, 1250, "Polish", "Poland" },          { 0x0416, 850, 1252, "Portuguese", "Brazil" },
 106   { 0x0418, 852, 1250, "Romanian", "Romania" },       { 0x0419, 866, 1251, "Russian", "Russia" },
 107   { 0x041A, 852, 1250, "Croatian", "Croatia" },       { 0x041B, 852, 1250, "Slovak", "Slovakia" },
 108   { 0x041C, 852, 1250, "Albanian", "Albania" },       { 0x041D, 850, 1252, "Swedish", "Sweden" },
 109   { 0x041E, 874,  874, "Thai", "Thailand" },          { 0x041F, 857, 1254, "Turkish", "Turkey" },
 110   { 0x0421, 850, 1252, "Indonesian", "Indonesia" },   { 0x0422, 866, 1251, "Ukrainian", "Ukraine" },
 111   { 0x0423, 866, 1251, "Belarusian", "Belarus" },     { 0x0424, 852, 1250, "Slovene", "Slovenia" },
 112   { 0x0425, 775, 1257, "Estonian", "Estonia" },       { 0x0426, 775, 1257, "Latvian", "Latvia" },
 113   { 0x0427, 775, 1257, "Lithuanian", "Lithuania" },
 114   { 0x0429, 864, 1256, "Arabic", "Farsi" },           { 0x042A,1258, 1258, "Vietnamese", "Vietnam" },
 115   { 0x042D, 850, 1252, "Basque", "Spain" },
 116   { 0x042F, 866, 1251, "Macedonian", "Former Yugoslav Republic of Macedonia" },
 117   { 0x0436, 850, 1252, "Afrikaans", "South Africa" },
 118   { 0x0438, 850, 1252, "Faroese", "Faroe Islands" },
 119   { 0x043C, 437, 1252, "Irish", "Ireland" },
 120   { 0x043E, 850, 1252, "Malay", "Malaysia" },
 121   { 0x0801, 864, 1256, "Arabic", "Iraq" },
 122   { 0x0804, 936,  936, "Chinese (People's republic of China)", "People's republic of China" },
 123   { 0x0807, 850, 1252, "German", "Switzerland" },
 124   { 0x0809, 850, 1252, "English", "United Kingdom" }, { 0x080A, 850, 1252, "Spanish", "Mexico" },
 125   { 0x080C, 850, 1252, "French", "Belgium" },
 126   { 0x0810, 850, 1252, "Italian", "Switzerland" },
 127   { 0x0813, 850, 1252, "Dutch", "Belgium" },          { 0x0814, 850, 1252, "Norwegian (Nynorsk)", "Norway" },
 128   { 0x0816, 850, 1252, "Portuguese", "Portugal" },
 129   { 0x081A, 852, 1252, "Serbian (latin)", "Yugoslavia" },
 130   { 0x081D, 850, 1252, "Swedish (Finland)", "Finland" },
 131   { 0x0C01, 864, 1256, "Arabic", "Egypt" },
 132   { 0x0C04, 950,  950, "Chinese", "Hong Kong" },
 133   { 0x0C07, 850, 1252, "German", "Austria" },
 134   { 0x0C09, 850, 1252, "English", "Australia" },      { 0x0C0A, 850, 1252, "Spanish - International Sort", "Spain" },
 135   { 0x0C0C, 850, 1252, "French", "Canada"},
 136   { 0x0C1A, 855, 1251, "Serbian (Cyrillic)", "Serbia" },
 137   { 0x1001, 864, 1256, "Arabic", "Libya" },
 138   { 0x1004, 936,  936, "Chinese", "Singapore" },
 139   { 0x1007, 850, 1252, "German", "Luxembourg" },
 140   { 0x1009, 850, 1252, "English", "Canada" },
 141   { 0x100A, 850, 1252, "Spanish", "Guatemala" },
 142   { 0x100C, 850, 1252, "French", "Switzerland" },
 143   { 0x1401, 864, 1256, "Arabic", "Algeria" },
 144   { 0x1407, 850, 1252, "German", "Liechtenstein" },
 145   { 0x1409, 850, 1252, "English", "New Zealand" },    { 0x140A, 850, 1252, "Spanish", "Costa Rica" },
 146   { 0x140C, 850, 1252, "French", "Luxembourg" },
 147   { 0x1801, 864, 1256, "Arabic", "Morocco" },
 148   { 0x1809, 850, 1252, "English", "Ireland" },        { 0x180A, 850, 1252, "Spanish", "Panama" },
 149   { 0x180C, 850, 1252, "French", "Monaco" },
 150   { 0x1C01, 864, 1256, "Arabic", "Tunisia" },
 151   { 0x1C09, 437, 1252, "English", "South Africa" },   { 0x1C0A, 850, 1252, "Spanish", "Dominican Republic" },
 152   { 0x2001, 864, 1256, "Arabic", "Oman" },
 153   { 0x2009, 850, 1252, "English", "Jamaica" },        { 0x200A, 850, 1252, "Spanish", "Venezuela" },
 154   { 0x2401, 864, 1256, "Arabic", "Yemen" },
 155   { 0x2409, 850, 1252, "English", "Caribbean" },      { 0x240A, 850, 1252, "Spanish", "Colombia" },
 156   { 0x2801, 864, 1256, "Arabic", "Syria" },
 157   { 0x2809, 850, 1252, "English", "Belize" },         { 0x280A, 850, 1252, "Spanish", "Peru" },
 158   { 0x2C01, 864, 1256, "Arabic", "Jordan" },
 159   { 0x2C09, 437, 1252, "English", "Trinidad & Tobago" },{ 0x2C0A, 850, 1252, "Spanish", "Argentina" },
 160   { 0x3001, 864, 1256, "Arabic", "Lebanon" },
 161   { 0x3009, 437, 1252, "English", "Zimbabwe" },       { 0x300A, 850, 1252, "Spanish", "Ecuador" },
 162   { 0x3401, 864, 1256, "Arabic", "Kuwait" },
 163   { 0x3409, 437, 1252, "English", "Philippines" },    { 0x340A, 850, 1252, "Spanish", "Chile" },
 164   { 0x3801, 864, 1256, "Arabic", "United Arab Emirates" },
 165   { 0x380A, 850, 1252, "Spanish", "Uruguay" },
 166   { 0x3C01, 864, 1256, "Arabic", "Bahrain" },
 167   { 0x3C0A, 850, 1252, "Spanish", "Paraguay" },
 168   { 0x4001, 864, 1256, "Arabic", "Qatar" },
 169   { 0x400A, 850, 1252, "Spanish", "Bolivia" },
 170   { 0x440A, 850, 1252, "Spanish", "El Salvador" },
 171   { 0x480A, 850, 1252, "Spanish", "Honduras" },
 172   { 0x4C0A, 850, 1252, "Spanish", "Nicaragua" },
 173   { 0x500A, 850, 1252, "Spanish", "Puerto Rico" },
 174   { (unsigned) -1,  0,      0, NULL, NULL }
 175 };
 176
 177 #endif
 178
 179 /* Specifies the default codepage to be used for unicode
 180    transformations.  By default this is CP_ACP.  */
 181 rc_uint_type wind_default_codepage = CP_ACP;
 182
 183 /* Specifies the currently used codepage for unicode
 184    transformations.  By default this is CP_ACP.  */
 185 rc_uint_type wind_current_codepage = CP_ACP;
 186
 187 /* Convert an ASCII string to a unicode string.  We just copy it,
 188    expanding chars to shorts, rather than doing something intelligent.  */
 189
 190 void
 191 unicode_from_ascii (rc_uint_type *length, unichar **unicode, const char *ascii)
 192 {
 193   unicode_from_codepage (length, unicode, ascii, wind_current_codepage);
 194 }
 195
 196 /* Convert an ASCII string with length A_LENGTH to a unicode string.  We just
 197    copy it, expanding chars to shorts, rather than doing something intelligent.
 198    This routine converts also \0 within a string.  */
 199
 200 void
 201 unicode_from_ascii_len (rc_uint_type *length, unichar **unicode, const char *ascii, rc_uint_type a_length)
 202 {
 203   char *tmp, *p;
 204   rc_uint_type tlen, elen, idx = 0;
 205
 206   *unicode = NULL;
 207
 208   if (!a_length)
 209     {
 210       if (length)
 211         *length = 0;
 212       return;
 213     }
 214
 215   /* Make sure we have zero terminated string.  */
 216   p = tmp = (char *) xmalloc (a_length + 1);
 217   memcpy (tmp, ascii, a_length);
 218   tmp[a_length] = 0;
 219
 220   while (a_length > 0)
 221     {
 222       unichar *utmp, *up;
 223
 224       tlen = strlen (p);
 225
 226       if (tlen > a_length)
 227         tlen = a_length;
 228       if (*p == 0)
 229         {
 230           /* Make room for one more character.  */
 231           utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
 232           if (idx > 0)
 233             {
 234               memcpy (utmp, *unicode, idx * sizeof (unichar));
 235             }
 236           *unicode = utmp;
 237           utmp[idx++] = 0;
 238           --a_length;
 239           p++;
 240           continue;
 241         }
 242       utmp = NULL;
 243       elen = 0;
 244       elen = wind_MultiByteToWideChar (wind_current_codepage, p, NULL, 0);
 245       if (elen)
 246         {
 247           utmp = ((unichar *) res_alloc (elen + sizeof (unichar) * 2));
 248           wind_MultiByteToWideChar (wind_current_codepage, p, utmp, elen);
 249           elen /= sizeof (unichar);
 250           elen --;
 251         }
 252       else
 253         {
 254           /* Make room for one more character.  */
 255           utmp = (unichar *) res_alloc (sizeof (unichar) * (idx + 1));
 256           if (idx > 0)
 257             {
 258               memcpy (utmp, *unicode, idx * sizeof (unichar));
 259             }
 260           *unicode = utmp;
 261           utmp[idx++] = ((unichar) *p) & 0xff;
 262           --a_length;
 263           p++;
 264           continue;
 265         }
 266       p += tlen;
 267       a_length -= tlen;
 268
 269       up = (unichar *) res_alloc (sizeof (unichar) * (idx + elen));
 270       if (idx > 0)
 271         memcpy (up, *unicode, idx * sizeof (unichar));
 272
 273       *unicode = up;
 274       if (elen)
 275         memcpy (&up[idx], utmp, sizeof (unichar) * elen);
 276
 277       idx += elen;
 278     }
 279
 280   if (length)
 281     *length = idx;
 282
 283   free (tmp);
 284 }
 285
 286 /* Convert an unicode string to an ASCII string.  We just copy it,
 287    shrink shorts to chars, rather than doing something intelligent.
 288    Shorts with not within the char range are replaced by '_'.  */
 289
 290 void
 291 ascii_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii)
 292 {
 293   codepage_from_unicode (length, unicode, ascii, wind_current_codepage);
 294 }
 295
 296 /* Print the unicode string UNICODE to the file E.  LENGTH is the
 297    number of characters to print, or -1 if we should print until the
 298    end of the string.  FIXME: On a Windows host, we should be calling
 299    some Windows function, probably WideCharToMultiByte.  */
 300
 301 void
 302 unicode_print (FILE *e, const unichar *unicode, rc_uint_type length)
 303 {
 304   while (1)
 305     {
 306       unichar ch;
 307
 308       if (length == 0)
 309         return;
 310       if ((bfd_signed_vma) length > 0)
 311         --length;
 312
 313       ch = *unicode;
 314
 315       if (ch == 0 && (bfd_signed_vma) length < 0)
 316         return;
 317
 318       ++unicode;
 319
 320       if ((ch & 0x7f) == ch)
 321         {
 322           if (ch == '\\')
 323             fputs ("\\\\", e);
 324           else if (ch == '"')
 325             fputs ("\"\"", e);
 326           else if (ISPRINT (ch))
 327             putc (ch, e);
 328           else
 329             {
 330               switch (ch)
 331                 {
 332                 case ESCAPE_A:
 333                   fputs ("\\a", e);
 334                   break;
 335
 336                 case ESCAPE_B:
 337                   fputs ("\\b", e);
 338                   break;
 339
 340                 case ESCAPE_F:
 341                   fputs ("\\f", e);
 342                   break;
 343
 344                 case ESCAPE_N:
 345                   fputs ("\\n", e);
 346                   break;
 347
 348                 case ESCAPE_R:
 349                   fputs ("\\r", e);
 350                   break;
 351
 352                 case ESCAPE_T:
 353                   fputs ("\\t", e);
 354                   break;
 355
 356                 case ESCAPE_V:
 357                   fputs ("\\v", e);
 358                   break;
 359
 360                 default:
 361                   fprintf (e, "\\%03o", (unsigned int) ch);
 362                   break;
 363                 }
 364             }
 365         }
 366       else if ((ch & 0xff) == ch)
 367         fprintf (e, "\\%03o", (unsigned int) ch);
 368       else
 369         fprintf (e, "\\x%04x", (unsigned int) ch);
 370     }
 371 }
 372
 373 /* Print a unicode string to a file.  */
 374
 375 void
 376 ascii_print (FILE *e, const char *s, rc_uint_type length)
 377 {
 378   while (1)
 379     {
 380       char ch;
 381
 382       if (length == 0)
 383         return;
 384       if ((bfd_signed_vma) length > 0)
 385         --length;
 386
 387       ch = *s;
 388
 389       if (ch == 0 && (bfd_signed_vma) length < 0)
 390         return;
 391
 392       ++s;
 393
 394       if ((ch & 0x7f) == ch)
 395         {
 396           if (ch == '\\')
 397             fputs ("\\\\", e);
 398           else if (ch == '"')
 399             fputs ("\"\"", e);
 400           else if (ISPRINT (ch))
 401             putc (ch, e);
 402           else
 403             {
 404               switch (ch)
 405                 {
 406                 case ESCAPE_A:
 407                   fputs ("\\a", e);
 408                   break;
 409
 410                 case ESCAPE_B:
 411                   fputs ("\\b", e);
 412                   break;
 413
 414                 case ESCAPE_F:
 415                   fputs ("\\f", e);
 416                   break;
 417
 418                 case ESCAPE_N:
 419                   fputs ("\\n", e);
 420                   break;
 421
 422                 case ESCAPE_R:
 423                   fputs ("\\r", e);
 424                   break;
 425
 426                 case ESCAPE_T:
 427                   fputs ("\\t", e);
 428                   break;
 429
 430                 case ESCAPE_V:
 431                   fputs ("\\v", e);
 432                   break;
 433
 434                 default:
 435                   fprintf (e, "\\%03o", (unsigned int) ch);
 436                   break;
 437                 }
 438             }
 439         }
 440       else
 441         fprintf (e, "\\%03o", (unsigned int) ch & 0xff);
 442     }
 443 }
 444
 445 rc_uint_type
 446 unichar_len (const unichar *unicode)
 447 {
 448   rc_uint_type r = 0;
 449
 450   if (unicode)
 451     while (unicode[r] != 0)
 452       r++;
 453   else
 454     --r;
 455   return r;
 456 }
 457
 458 unichar *
 459 unichar_dup (const unichar *unicode)
 460 {
 461   unichar *r;
 462   int len;
 463
 464   if (! unicode)
 465     return NULL;
 466   for (len = 0; unicode[len] != 0; ++len)
 467     ;
 468   ++len;
 469   r = ((unichar *) res_alloc (len * sizeof (unichar)));
 470   memcpy (r, unicode, len * sizeof (unichar));
 471   return r;
 472 }
 473
 474 unichar *
 475 unichar_dup_uppercase (const unichar *u)
 476 {
 477   unichar *r = unichar_dup (u);
 478   int i;
 479
 480   if (! r)
 481     return NULL;
 482
 483   for (i = 0; r[i] != 0; ++i)
 484     {
 485       if (r[i] >= 'a' && r[i] <= 'z')
 486         r[i] &= 0xdf;
 487     }
 488   return r;
 489 }
 490
 491 static int
 492 unichar_isascii (const unichar *u, rc_uint_type len)
 493 {
 494   rc_uint_type i;
 495
 496   if ((bfd_signed_vma) len < 0)
 497     {
 498       if (u)
 499         len = (rc_uint_type) unichar_len (u);
 500       else
 501         len = 0;
 502     }
 503
 504   for (i = 0; i < len; i++)
 505     if ((u[i] & 0xff80) != 0)
 506       return 0;
 507   return 1;
 508 }
 509
 510 void
 511 unicode_print_quoted (FILE *e, const unichar *u, rc_uint_type len)
 512 {
 513   if (! unichar_isascii (u, len))
 514     fputc ('L', e);
 515   fputc ('"', e);
 516   unicode_print (e, u, len);
 517   fputc ('"', e);
 518 }
 519
 520 int
 521 unicode_is_valid_codepage (rc_uint_type cp)
 522 {
 523   if ((cp & 0xffff) != cp)
 524     return 0;
 525   if (cp == CP_UTF16 || cp == CP_ACP)
 526     return 1;
 527
 528 #if !defined (_WIN32) && !defined (__CYGWIN__)
 529   if (! wind_find_codepage_info (cp))
 530     return 0;
 531   return 1;
 532 #else
 533   return !! IsValidCodePage ((UINT) cp);
 534 #endif
 535 }
 536
 537 #if defined (_WIN32) || defined (__CYGWIN__)
 538
 539 #define max_cp_string_len 6
 540
 541 static unsigned int
 542 codepage_from_langid (unsigned short langid)
 543 {
 544   char cp_string [max_cp_string_len];
 545   int c;
 546
 547   memset (cp_string, 0, max_cp_string_len);
 548   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
 549      but is unavailable on Win95.  */
 550   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 551                       LOCALE_IDEFAULTANSICODEPAGE,
 552                       cp_string, max_cp_string_len);
 553   /* If codepage data for an LCID is not installed on users's system,
 554      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 555      default. */
 556   if (c == 0)
 557     return CP_ACP;
 558   return strtoul (cp_string, 0, 10);
 559 }
 560
 561 static unsigned int
 562 wincodepage_from_langid (unsigned short langid)
 563 {
 564   char cp_string [max_cp_string_len];
 565   int c;
 566
 567   memset (cp_string, 0, max_cp_string_len);
 568   /* LOCALE_RETURN_NUMBER flag would avoid strtoul conversion,
 569      but is unavailable on Win95.  */
 570   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 571                       LOCALE_IDEFAULTCODEPAGE,
 572                       cp_string, max_cp_string_len);
 573   /* If codepage data for an LCID is not installed on users's system,
 574      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 575      default. */
 576   if (c == 0)
 577     return CP_OEM;
 578   return strtoul (cp_string, 0, 10);
 579 }
 580
 581 static char *
 582 lang_from_langid (unsigned short langid)
 583 {
 584   char cp_string[261];
 585   int c;
 586
 587   memset (cp_string, 0, 261);
 588   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 589                       LOCALE_SENGLANGUAGE,
 590                       cp_string, 260);
 591   /* If codepage data for an LCID is not installed on users's system,
 592      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 593      default. */
 594   if (c == 0)
 595     strcpy (cp_string, "Neutral");
 596   return xstrdup (cp_string);
 597 }
 598
 599 static char *
 600 country_from_langid (unsigned short langid)
 601 {
 602   char cp_string[261];
 603   int c;
 604
 605   memset (cp_string, 0, 261);
 606   c = GetLocaleInfoA (MAKELCID (langid, SORT_DEFAULT),
 607                       LOCALE_SENGCOUNTRY,
 608                       cp_string, 260);
 609   /* If codepage data for an LCID is not installed on users's system,
 610      GetLocaleInfo returns an empty string.  Fall back to system ANSI
 611      default. */
 612   if (c == 0)
 613     strcpy (cp_string, "Neutral");
 614   return xstrdup (cp_string);
 615 }
 616
 617 #endif
 618
 619 const wind_language_t *
 620 wind_find_language_by_id (unsigned id)
 621 {
 622 #if !defined (_WIN32) && !defined (__CYGWIN__)
 623   int i;
 624
 625   if (! id)
 626     return NULL;
 627   for (i = 0; languages[i].id != (unsigned) -1 && languages[i].id != id; i++)
 628     ;
 629   if (languages[i].id == id)
 630     return &languages[i];
 631   return NULL;
 632 #else
 633   static wind_language_t wl;
 634
 635   wl.id = id;
 636   wl.doscp = codepage_from_langid ((unsigned short) id);
 637   wl.wincp = wincodepage_from_langid ((unsigned short) id);
 638   wl.name = lang_from_langid ((unsigned short) id);
 639   wl.country = country_from_langid ((unsigned short) id);
 640
 641   return & wl;
 642 #endif
 643 }
 644
 645 const local_iconv_map *
 646 wind_find_codepage_info (unsigned cp)
 647 {
 648 #if !defined (_WIN32) && !defined (__CYGWIN__)
 649   int i;
 650
 651   for (i = 0; codepages[i].codepage != (rc_uint_type) -1 && codepages[i].codepage != cp; i++)
 652     ;
 653   if (codepages[i].codepage == (rc_uint_type) -1)
 654     return NULL;
 655   return &codepages[i];
 656 #else
 657   static local_iconv_map lim;
 658   if (!unicode_is_valid_codepage (cp))
 659         return NULL;
 660   lim.codepage = cp;
 661   lim.iconv_name = "";
 662   return & lim;
 663 #endif
 664 }
 665
 666 /* Convert an Codepage string to a unicode string.  */
 667
 668 void
 669 unicode_from_codepage (rc_uint_type *length, unichar **u, const char *src, rc_uint_type cp)
 670 {
 671   rc_uint_type len;
 672
 673   len = wind_MultiByteToWideChar (cp, src, NULL, 0);
 674   if (len)
 675     {
 676       *u = ((unichar *) res_alloc (len));
 677       wind_MultiByteToWideChar (cp, src, *u, len);
 678     }
 679   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
 680      this will set *length to -1.  */
 681   len -= sizeof (unichar);
 682
 683   if (length != NULL)
 684     *length = len / sizeof (unichar);
 685 }
 686
 687 /* Convert an unicode string to an codepage string.  */
 688
 689 void
 690 codepage_from_unicode (rc_uint_type *length, const unichar *unicode, char **ascii, rc_uint_type cp)
 691 {
 692   rc_uint_type len;
 693
 694   len = wind_WideCharToMultiByte (cp, unicode, NULL, 0);
 695   if (len)
 696     {
 697       *ascii = (char *) res_alloc (len * sizeof (char));
 698       wind_WideCharToMultiByte (cp, unicode, *ascii, len);
 699     }
 700   /* Discount the trailing '/0'.  If MultiByteToWideChar failed,
 701      this will set *length to -1.  */
 702   len--;
 703
 704   if (length != NULL)
 705     *length = len;
 706 }
 707
 708 #if defined (HAVE_ICONV) && !defined (_WIN32) && !defined (__CYGWIN__)
 709 static int
 710 iconv_onechar (iconv_t cd, ICONV_CONST char *s, char *d, int d_len, const char **n_s, char **n_d)
 711 {
 712   int i;
 713
 714   for (i = 1; i <= 32; i++)
 715     {
 716       char *tmp_d = d;
 717       ICONV_CONST char *tmp_s = s;
 718       size_t ret;
 719       size_t s_left = (size_t) i;
 720       size_t d_left = (size_t) d_len;
 721
 722       ret = iconv (cd, & tmp_s, & s_left, & tmp_d, & d_left);
 723
 724       if (ret != (size_t) -1)
 725         {
 726           *n_s = tmp_s;
 727           *n_d = tmp_d;
 728           return 0;
 729         }
 730     }
 731
 732   return 1;
 733 }
 734
 735 static const char *
 736 wind_iconv_cp (rc_uint_type cp)
 737 {
 738   const local_iconv_map *lim = wind_find_codepage_info (cp);
 739
 740   if (!lim)
 741     return NULL;
 742   return lim->iconv_name;
 743 }
 744 #endif /* HAVE_ICONV */
 745
 746 static rc_uint_type
 747 wind_MultiByteToWideChar (rc_uint_type cp, const char *mb,
 748                           unichar *u, rc_uint_type u_len)
 749 {
 750   rc_uint_type ret = 0;
 751
 752 #if defined (_WIN32) || defined (__CYGWIN__)
 753   rc_uint_type conv_flags = MB_PRECOMPOSED;
 754
 755   /* MB_PRECOMPOSED is not allowed for UTF-7 or UTF-8.
 756      MultiByteToWideChar will set the last error to
 757      ERROR_INVALID_FLAGS if we do. */
 758   if (cp == CP_UTF8 || cp == CP_UTF7)
 759     conv_flags = 0;
 760
 761   ret = (rc_uint_type) MultiByteToWideChar (cp, conv_flags,
 762                                             mb, -1, u, u_len);
 763   /* Convert to bytes. */
 764   ret *= sizeof (unichar);
 765
 766 #elif defined (HAVE_ICONV)
 767   int first = 1;
 768   char tmp[32];
 769   char *p_tmp;
 770   const char *iconv_name = wind_iconv_cp (cp);
 771
 772   if (!mb || !iconv_name)
 773     return 0;
 774   iconv_t cd = iconv_open (
 775 #if WORDS_BIGENDIAN
 776                            "UTF-16BE",
 777 #else
 778                            "UTF-16LE",
 779 #endif
 780                            iconv_name);
 781
 782   while (1)
 783     {
 784       int iret;
 785       const char *n_mb = "";
 786       char *n_tmp = "";
 787
 788       p_tmp = tmp;
 789       iret = iconv_onechar (cd, (ICONV_CONST char *) mb, p_tmp, 32, & n_mb, & n_tmp);
 790       if (first)
 791         {
 792           first = 0;
 793           continue;
 794         }
 795       if (!iret)
 796         {
 797           size_t l_tmp = (size_t) (n_tmp - p_tmp);
 798
 799           if (u)
 800             {
 801               if ((size_t) u_len < l_tmp)
 802                 break;
 803               memcpy (u, tmp, l_tmp);
 804               u += l_tmp/2;
 805               u_len -= l_tmp;
 806             }
 807           ret += l_tmp;
 808         }
 809       else
 810         break;
 811       if (tmp[0] == 0 && tmp[1] == 0)
 812         break;
 813       mb = n_mb;
 814     }
 815   iconv_close (cd);
 816 #else
 817   if (cp)
 818     ret = 0;
 819   ret = strlen (mb) + 1;
 820   ret *= sizeof (unichar);
 821   if (u != NULL && u_len != 0)
 822     {
 823       do
 824         {
 825           *u++ = ((unichar) *mb) & 0xff;
 826           --u_len; mb++;
 827         }
 828       while (u_len != 0 && mb[-1] != 0);
 829     }
 830   if (u != NULL && u_len != 0)
 831     *u = 0;
 832 #endif
 833   return ret;
 834 }
 835
 836 static rc_uint_type
 837 wind_WideCharToMultiByte (rc_uint_type cp, const unichar *u, char *mb, rc_uint_type mb_len)
 838 {
 839   rc_uint_type ret = 0;
 840 #if defined (_WIN32) || defined (__CYGWIN__)
 841   WINBOOL used_def = false;
 842
 843   ret = (rc_uint_type) WideCharToMultiByte (cp, 0, u, -1, mb, mb_len,
 844                                             NULL, & used_def);
 845 #elif defined (HAVE_ICONV)
 846   int first = 1;
 847   char tmp[32];
 848   char *p_tmp;
 849   const char *iconv_name = wind_iconv_cp (cp);
 850
 851   if (!u || !iconv_name)
 852     return 0;
 853   iconv_t cd = iconv_open (iconv_name,
 854 #if WORDS_BIGENDIAN
 855                            "UTF-16BE"
 856 #else
 857                            "UTF-16LE"
 858 #endif
 859                            );
 860
 861   while (1)
 862     {
 863       int iret;
 864       const char *n_u = "";
 865       char *n_tmp = "";
 866
 867       p_tmp = tmp;
 868       iret = iconv_onechar (cd, (ICONV_CONST char *) u, p_tmp, 32, &n_u, & n_tmp);
 869       if (first)
 870         {
 871           first = 0;
 872           continue;
 873         }
 874       if (!iret)
 875         {
 876           size_t l_tmp = (size_t) (n_tmp - p_tmp);
 877
 878           if (mb)
 879             {
 880               if ((size_t) mb_len < l_tmp)
 881                 break;
 882               memcpy (mb, tmp, l_tmp);
 883               mb += l_tmp;
 884               mb_len -= l_tmp;
 885             }
 886           ret += l_tmp;
 887         }
 888       else
 889         break;
 890       if (u[0] == 0)
 891         break;
 892       u = (const unichar *) n_u;
 893     }
 894   iconv_close (cd);
 895 #else
 896   if (cp)
 897     ret = 0;
 898
 899   while (u[ret] != 0)
 900     ++ret;
 901
 902   ++ret;
 903
 904   if (mb)
 905     {
 906       while (*u != 0 && mb_len != 0)
 907         {
 908           if (u[0] == (u[0] & 0x7f))
 909             *mb++ = (char) u[0];
 910           else
 911             *mb++ = '_';
 912           ++u; --mb_len;
 913         }
 914       if (mb_len != 0)
 915         *mb = 0;
 916     }
 917 #endif
 918   return ret;
 919 }