winsup/cygwin/nlsfuncs.cc

   1 /* nlsfuncs.cc: NLS helper functions
   2
   3 This file is part of Cygwin.
   4
   5 This software is a copyrighted work licensed under the terms of the
   6 Cygwin license.  Please consult the file "CYGWIN_LICENSE" for
   7 details. */
   8
   9 #include "winsup.h"
  10 #include <stdio.h>
  11 #include <stdlib.h>
  12 #include <locale.h>
  13 #include <wchar.h>
  14 #include <wctype.h>
  15 #include "path.h"
  16 #include "fhandler.h"
  17 #include "dtable.h"
  18 #include "cygheap.h"
  19 #include "tls_pbuf.h"
  20 #include "collate.h"
  21 #include "lc_msg.h"
  22 #include "lc_era.h"
  23 #include "lc_collelem.h"
  24 #include "lc_def_codesets.h"
  25
  26 #define _LC(x)  &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
  27
  28 #define getlocaleinfo(category,type) \
  29             __getlocaleinfo(win_locale,(type),_LC(category))
  30 #define getlocaleint(type) \
  31             __getlocaleint(win_locale,(type))
  32 #define setlocaleinfo(category,val) \
  33             __setlocaleinfo(_LC(category),(val))
  34 #define eval_datetimefmt(type,flags) \
  35             __eval_datetimefmt(win_locale,(type),(flags),&lc_time_ptr,\
  36                                lc_time_end-lc_time_ptr)
  37 #define charfromwchar(category,in) \
  38             __charfromwchar (_##category##_locale->in,_LC(category),f_wctomb)
  39
  40 /* Check for @cjk* modifier.  Try to be as fast as possible */
  41 #define __is_cjk_modifier(_in, _cmp, _L) ({ \
  42         _in[1] == 'c' \
  43         && _in[2] == 'j' \
  44         && _in[3] == 'k'\
  45         && (_cmp (_in + 4, _L##"narrow") == 0 \
  46             || _cmp (_in + 4, _L##"wide") == 0 \
  47             || _cmp (_in + 4, _L##"single") == 0); \
  48 })
  49 #define is_cjk_modifier(_in)    __is_cjk_modifier(_in, strcmp, )
  50 #define w_is_cjk_modifier(_in)  __is_cjk_modifier(_in, wcscmp, L)
  51
  52 /* ResolveLocaleName does not what we want.  It converts anything which
  53    vaguely resembles a locale into some other locale it supports.  Bad
  54    examples are: "en-XY" gets converted to "en-US", and worse, "ff-BF" gets
  55    converted to "ff-Latn-SN", even though "ff-Adlm-BF" exists!  Useless.
  56    To check if a locale is supported, we have to enumerate all valid
  57    Windows locales, and return the match, even if the locale in Windows
  58    requires a script. */
  59 struct res_loc_t {
  60   const wchar_t *search_iso639;
  61   const wchar_t *search_iso3166;
  62   wchar_t *resolved_locale;
  63   int res_len;
  64 };
  65
  66 static BOOL
  67 resolve_locale_proc (LPWSTR win_locale, DWORD info, LPARAM param)
  68 {
  69   res_loc_t *loc = (res_loc_t *) param;
  70   wchar_t *iso639, *iso639_end;
  71   wchar_t *iso3166;
  72
  73   iso639 = win_locale;
  74   iso639_end = wcschr (iso639, L'-');
  75   if (!iso639_end)
  76     return TRUE;
  77   if (wcsncmp (loc->search_iso639, iso639, iso639_end - iso639) != 0)
  78     return TRUE;
  79   iso3166 = ++iso639_end;
  80   /* Territory is all upper case */
  81   while (!iswupper (iso3166[0]) || !iswupper (iso3166[1]))
  82     {
  83       iso3166 = wcschr (iso3166, L'-');
  84       if (!iso3166)
  85         return TRUE;
  86       ++iso3166;
  87     }
  88   if (wcsncmp (loc->search_iso3166, iso3166, wcslen (loc->search_iso3166)))
  89     return TRUE;
  90   wcsncat (loc->resolved_locale, win_locale, loc->res_len - 1);
  91   return FALSE;
  92 }
  93
  94 static int
  95 resolve_locale_name (const wchar_t *search, wchar_t *result, int rlen)
  96 {
  97   res_loc_t loc;
  98
  99   loc.search_iso639 = search;
 100   loc.search_iso3166 = wcschr (search, L'-') + 1;
 101   loc.resolved_locale = result;
 102   loc.res_len = rlen;
 103   result[0] = L'\0';
 104   EnumSystemLocalesEx (resolve_locale_proc,
 105                        LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL,
 106                        (LPARAM) &loc, NULL);
 107   return wcslen (result);
 108 }
 109
 110 /* Fetch Windows RFC 5646 locale from POSIX locale specifier.
 111    Return values:
 112
 113      -1: Invalid locale
 114       0: C or POSIX
 115       1: valid locale
 116 */
 117 static int
 118 __get_rfc5646_from_locale (const char *name, wchar_t *win_locale)
 119 {
 120   wchar_t wlocale[ENCODING_LEN + 1] = { 0 };
 121   wchar_t locale[ENCODING_LEN + 1];
 122   wchar_t *c;
 123
 124   win_locale[0] = L'\0';
 125   mbstowcs (locale, name, ENCODING_LEN + 1);
 126   /* Remember modifier for later use. */
 127   const char *modifier = strchr (name, '@') ? : "";
 128   /* Drop charset and modifier */
 129   c = wcschr (locale, L'.');
 130   if (!c)
 131     c = wcschr (locale, L'@');
 132   if (c)
 133     *c = L'\0';
 134   /* "POSIX" already converted to "C" in loadlocale. */
 135   if (!wcscmp (locale, L"C"))
 136     return 0;
 137   c = wcschr (locale, '_');
 138   if (!c)
 139     {
 140       /* try if the locale can be resolved from the language tag
 141          fix up Linux-only locale first */
 142       if (!wcscmp (locale, L"ber"))
 143         wcscpy (locale, L"tzm");
 144       if (ResolveLocaleName (locale, wlocale, ENCODING_LEN + 1) <= 1)
 145         {
 146           set_errno (ENOENT);
 147           return -1;
 148         }
 149       wcpcpy (win_locale, wlocale);
 150       return 1;
 151     }
 152
 153   /* Convert to RFC 5646 syntax. */
 154   *c = '-';
 155   /* Override a few locales with a different default script as used
 156      on Linux.  Linux also supports no_NO which is equivalent to nb_NO,
 157      but Windows can resolve that nicely.  Also, "tzm" and "zgh" are
 158      subsumed under "ber" on Linux. */
 159   struct {
 160     const wchar_t *loc;
 161     const wchar_t *wloc;
 162   } override_locale[] = {
 163     { L"ber-DZ" , L"tzm-Latn-DZ" },
 164     { L"ber-MA" , L"zgh-Tfng-MA" },
 165     { L"mn-CN" , L"mn-Mong-CN"   },
 166     { L"mn-MN" , L"mn-Mong-MN"   },
 167     { L"pa-PK" , L"pa-Arab-PK"   },
 168     { L"sd-IN" , L"sd-Deva-IN"   },
 169     { L"sr-BA" , L"sr-Cyrl-BA"   },
 170     { L"sr-ME" , L"sr-Cyrl-ME"   },
 171     { L"sr-RS" , L"sr-Cyrl-RS"   },
 172     { L"sr-XK" , L"sr-Cyrl-XK"   },
 173     { L"tzm-MA", L"tzm-Tfng-MA"  },
 174     { NULL    , NULL         }
 175   };
 176
 177   for (int i = 0; override_locale[i].loc
 178                   && override_locale[i].loc[0] <= locale[0]; ++i)
 179     {
 180       if (!wcscmp (locale, override_locale[i].loc))
 181         {
 182           wcscpy (wlocale, override_locale[i].wloc);
 183           break;
 184         }
 185     }
 186   /* If resolve_locale_name returns with error, or if it returns a
 187      locale other than the input locale, we don't support this locale. */
 188   if (!wlocale[0]
 189       && !resolve_locale_name (locale, wlocale, ENCODING_LEN + 1))
 190     {
 191       set_errno (ENOENT);
 192       return -1;
 193     }
 194
 195   /* Check for modifiers changing the script */
 196   const wchar_t *iso15924_script[] = { L"Latn-", L"Cyrl-", L"Deva-", L"Adlm-" };
 197   int idx = -1;
 198
 199   if (modifier[0])
 200     {
 201       if (!strcmp (++modifier, "latin"))
 202         idx = 0;
 203       else if (!strcmp (modifier, "cyrillic"))
 204         idx = 1;
 205       else if (!strcmp (modifier, "devanagari"))
 206         idx = 2;
 207       else if (!strcmp (modifier, "adlam"))
 208         idx = 3;
 209     }
 210   if (idx >= 0)
 211     {
 212       wchar_t *iso3166 = wcschr (wlocale, L'-') + 1;
 213       wchar_t *wlp;
 214
 215       /* Copy iso639 language part including dash */
 216       wlp = wcpncpy (win_locale, wlocale, iso3166 - wlocale);
 217       /* Concat new iso15924 script */
 218       wlp = wcpcpy (wlp, iso15924_script[idx]);
 219       /* Concat iso3166 territory.  Skip script, if already in the locale */
 220       wchar_t *skip_script = wcschr (iso3166, L'-');
 221       if (skip_script)
 222         iso3166 = skip_script + 1;
 223        wcpcpy (wlp, iso3166);
 224     }
 225   else
 226     wcpcpy (win_locale, wlocale);
 227   return 1;
 228 }
 229
 230 /* Never returns -1.  Just skips invalid chars instead.  Only if return_invalid
 231    is set, s==NULL returns -1 since then it's used to recognize invalid strings
 232    in the used charset. */
 233 static size_t
 234 lc_wcstombs (wctomb_p f_wctomb, char *s, const wchar_t *pwcs, size_t n,
 235              bool return_invalid = false)
 236 {
 237   char *ptr = s;
 238   size_t max = n;
 239   char buf[8];
 240   size_t i, bytes, num_to_copy;
 241   mbstate_t state;
 242
 243   memset (&state, 0, sizeof state);
 244   if (s == NULL)
 245     {
 246       size_t num_bytes = 0;
 247       while (*pwcs != 0)
 248         {
 249           bytes = f_wctomb (_REENT, buf, *pwcs++, &state);
 250           if (bytes != (size_t) -1)
 251             num_bytes += bytes;
 252           else if (return_invalid)
 253             return (size_t) -1;
 254         }
 255       return num_bytes;
 256     }
 257   while (n > 0)
 258     {
 259       bytes = f_wctomb (_REENT, buf, *pwcs, &state);
 260       if (bytes == (size_t) -1)
 261         {
 262           memset (&state, 0, sizeof state);
 263           ++pwcs;
 264           continue;
 265         }
 266       num_to_copy = (n > bytes ? bytes : n);
 267       for (i = 0; i < num_to_copy; ++i)
 268         *ptr++ = buf[i];
 269
 270       if (*pwcs == 0x00)
 271         return ptr - s - (n >= bytes);
 272       ++pwcs;
 273       n -= num_to_copy;
 274     }
 275   return max;
 276 }
 277
 278 /* Never returns -1.  Invalid sequences are translated to replacement
 279    wide-chars. */
 280 static size_t
 281 lc_mbstowcs (mbtowc_p f_mbtowc, wchar_t *pwcs, const char *s, size_t n)
 282 {
 283   size_t ret = 0;
 284   char *t = (char *) s;
 285   size_t bytes;
 286   mbstate_t state;
 287
 288   memset (&state, 0, sizeof state);
 289   if (!pwcs)
 290     n = 1;
 291   while (n > 0)
 292     {
 293       bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */, &state);
 294       if (bytes == (size_t) -1)
 295         {
 296           state.__count = 0;
 297           bytes = 1;
 298           if (pwcs)
 299             *pwcs = L' ';
 300         }
 301       else if (bytes == 0)
 302         break;
 303       t += bytes;
 304       ++ret;
 305       if (pwcs)
 306         {
 307           ++pwcs;
 308           --n;
 309         }
 310     }
 311   return ret;
 312 }
 313
 314 static int
 315 locale_cmp (const void *a, const void *b)
 316 {
 317   char **la = (char **) a;
 318   char **lb = (char **) b;
 319   return strcmp (*la, *lb);
 320 }
 321
 322 /* Helper function to workaround reallocs which move blocks even if they shrink.
 323    Cygwin's realloc is not doing this, but tcsh's, for instance.  All lc_foo
 324    structures consist entirely of pointers so they are practically pointer
 325    arrays.  What we do here is just treat the lc_foo pointers as char ** and
 326    rebase all char * pointers within, up to the given size of the structure. */
 327 static void
 328 rebase_locale_buf (const void *ptrv, const void *ptrvend, const char *newbase,
 329                    const char *oldbase, const char *oldend)
 330 {
 331   const char **ptrsend = (const char **) ptrvend;
 332   for (const char **ptrs = (const char **) ptrv; ptrs < ptrsend; ++ptrs)
 333     if (*ptrs >= oldbase && *ptrs < oldend)
 334       *ptrs += newbase - oldbase;
 335 }
 336
 337 static wchar_t *
 338 __getlocaleinfo (wchar_t *loc, LCTYPE type, char **ptr, size_t size)
 339 {
 340   size_t num;
 341   wchar_t *ret;
 342
 343   if ((uintptr_t) *ptr % 1)
 344     ++*ptr;
 345   ret = (wchar_t *) *ptr;
 346   num = GetLocaleInfoEx (loc, type, ret, size / sizeof (wchar_t));
 347   *ptr = (char *) (ret + num);
 348   return ret;
 349 }
 350
 351 static wchar_t *
 352 __setlocaleinfo (char **ptr, size_t size, wchar_t val)
 353 {
 354   wchar_t *ret;
 355
 356   if ((uintptr_t) *ptr % 1)
 357     ++*ptr;
 358   ret = (wchar_t *) *ptr;
 359   ret[0] = val;
 360   ret[1] = L'\0';
 361   *ptr = (char *) (ret + 2);
 362   return ret;
 363 }
 364
 365 static char *
 366 __charfromwchar (const wchar_t *in, char **ptr, size_t size, wctomb_p f_wctomb)
 367 {
 368   size_t num;
 369   char *ret;
 370
 371   num = lc_wcstombs (f_wctomb, ret = *ptr, in, size);
 372   *ptr += num + 1;
 373   return ret;
 374 }
 375
 376 static UINT
 377 __getlocaleint (wchar_t *loc, LCTYPE type)
 378 {
 379   UINT val;
 380   return GetLocaleInfoEx (loc, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
 381                          sizeof val) ? val : 0;
 382 }
 383
 384 enum dt_flags {
 385   DT_DEFAULT    = 0x00,
 386   DT_AMPM       = 0x01, /* Enforce 12 hour time format. */
 387   DT_ABBREV     = 0x02, /* Enforce abbreviated month and day names. */
 388 };
 389
 390 static wchar_t *
 391 __eval_datetimefmt (wchar_t *loc, LCTYPE type, dt_flags flags, char **ptr,
 392                     size_t size)
 393 {
 394   wchar_t buf[80];
 395   wchar_t fc;
 396   size_t idx;
 397   const wchar_t *day_str = L"edaA";
 398   const wchar_t *mon_str = L"mmbB";
 399   const wchar_t *year_str = L"yyyY";
 400   const wchar_t *hour12_str = L"lI";
 401   const wchar_t *hour24_str = L"kH";
 402   const wchar_t *t_str;
 403
 404   if ((uintptr_t) *ptr % 1)
 405     ++*ptr;
 406   wchar_t *ret = (wchar_t *) *ptr;
 407   wchar_t *p = (wchar_t *) *ptr;
 408   GetLocaleInfoEx (loc, type, buf, 80);
 409   for (wchar_t *fmt = buf; *fmt; ++fmt)
 410     switch (fc = *fmt)
 411       {
 412       case L'\'':
 413         if (fmt[1] == L'\'')
 414           *p++ = L'\'';
 415         else
 416           while (fmt[1] && *++fmt != L'\'')
 417             *p++ = *fmt;
 418         break;
 419       case L'd':
 420       case L'M':
 421       case L'y':
 422         t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str);
 423         for (idx = 0; fmt[1] == fc; ++idx, ++fmt);
 424         if (idx > 3)
 425           idx = 3;
 426         if ((flags & DT_ABBREV) && fc != L'y' && idx == 3)
 427           idx = 2;
 428         *p++ = L'%';
 429         *p++ = t_str[idx];
 430         break;
 431       case L'g':
 432         /* TODO */
 433         break;
 434       case L'h':
 435       case L'H':
 436         t_str = (fc == L'h' || (flags & DT_AMPM) ? hour12_str : hour24_str);
 437         idx = 0;
 438         if (fmt[1] == fc)
 439           {
 440             ++fmt;
 441             idx = 1;
 442           }
 443         *p++ = L'%';
 444         *p++ = t_str[idx];
 445         break;
 446       case L'm':
 447       case L's':
 448       case L't':
 449         if (fmt[1] == fc)
 450           ++fmt;
 451         *p++ = L'%';
 452         *p++ = (fc == L'm' ? L'M' : fc == L's' ? L'S' : L'p');
 453         break;
 454       case L'\t':
 455       case L'\n':
 456       case L'%':
 457         *p++ = L'%';
 458         *p++ = fc;
 459         break;
 460       default:
 461         *p++ = *fmt;
 462         break;
 463       }
 464   *p++ = L'\0';
 465   *ptr = (char *) p;
 466   return ret;
 467 }
 468
 469 /* Convert Windows grouping format into POSIX grouping format. */
 470 static char *
 471 conv_grouping (wchar_t *loc, LCTYPE type, char **lc_ptr)
 472 {
 473   wchar_t buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
 474   bool repeat = false;
 475   char *ptr = *lc_ptr;
 476   char *ret = ptr;
 477
 478   GetLocaleInfoEx (loc, type, buf, 10);
 479   /* Convert Windows grouping format into POSIX grouping format. Note that
 480      only ASCII chars are used in the grouping format. */
 481   for (wchar_t *c = buf; *c; ++c)
 482     {
 483       if (*c < L'0' || *c > L'9')
 484         continue;
 485       char val = *c - L'0';
 486       if (!val)
 487         {
 488           repeat = true;
 489           break;
 490         }
 491       *ptr++ = val;
 492     }
 493   if (!repeat)
 494     *ptr++ = CHAR_MAX;
 495   *ptr++ = '\0';
 496   *lc_ptr = ptr;
 497   return ret;
 498 }
 499
 500 /* Called from newlib's setlocale() via __time_load_locale() if category
 501    is LC_TIME.  Returns LC_TIME values fetched from Windows locale data
 502    in the structure pointed to by _time_locale.  This is subsequently
 503    accessed by functions like nl_langinfo, strftime, strptime. */
 504 extern "C" int
 505 __set_lc_time_from_win (const char *name,
 506                         const struct lc_time_T *_C_time_locale,
 507                         struct lc_time_T *_time_locale,
 508                         char **lc_time_buf, wctomb_p f_wctomb,
 509                         const char *charset)
 510 {
 511   wchar_t win_locale[ENCODING_LEN + 1];
 512   int ret = __get_rfc5646_from_locale (name, win_locale);
 513   if (ret < 0)
 514     return ret;
 515   if (!ret && !strcmp (charset, "ASCII"))
 516     return 0;
 517
 518 # define MAX_TIME_BUFFER_SIZE   4096
 519
 520   char *new_lc_time_buf = (char *) malloc (MAX_TIME_BUFFER_SIZE);
 521   const char *lc_time_end = new_lc_time_buf + MAX_TIME_BUFFER_SIZE;
 522
 523   if (!new_lc_time_buf)
 524     return -1;
 525   char *lc_time_ptr = new_lc_time_buf;
 526
 527   /* C.foo is just a copy of "C" with fixed charset. */
 528   if (!ret)
 529     memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T));
 530   /* codeset */
 531   _time_locale->codeset = lc_time_ptr;
 532   lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1;
 533
 534   if (ret)
 535     {
 536       char locale[ENCODING_LEN + 1];
 537       strcpy (locale, name);
 538       /* Removes the charset from the locale and attach the modifier to the
 539          language_TERRITORY part. */
 540       char *c = strchr (locale, '.');
 541       if (c)
 542         {
 543           *c = '\0';
 544           char *c2 = strchr (c + 1, '@');
 545           /* Ignore @cjk* modifiers, they are newlib specials. */
 546           if (c2 && !is_cjk_modifier (c2))
 547             memmove (c, c2, strlen (c2) + 1);
 548         }
 549       /* Now search in the alphabetically order lc_era array for the
 550          locale. */
 551       lc_era_t locale_key = { locale, NULL, NULL, NULL, NULL, NULL ,
 552                                       NULL, NULL, NULL, NULL, NULL };
 553       lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key,
 554                                             (void *) lc_era,
 555                                             sizeof lc_era / sizeof *lc_era,
 556                                             sizeof *lc_era, locale_cmp);
 557
 558       /* mon */
 559       /* Windows has a bug in "ja-JP" and "ko-KR" (but not in "ko-KP").
 560          In these locales, strings returned for LOCALE_SABBREVMONTHNAME*
 561          are missing the suffix representing a month.
 562
 563          A Japanese article describing the problem was
 564          https://msdn.microsoft.com/ja-jp/library/cc422084.aspx, which is
 565          only available via
 566          https://web.archive.org/web/20110922195821/https://msdn.microsoft.com/ja-jp/library/cc422084.aspx
 567          these days.  Testing indicates that this problem is still present
 568          in Windows 11.
 569
 570          The workaround is to use LOCALE_SMONTHNAME* in these locales,
 571          even for the abbreviated month name. */
 572       const LCTYPE mon_base = !wcscmp (win_locale, L"ja-JP")
 573                               || !wcscmp (win_locale, L"ko-KR")
 574                               ? LOCALE_SMONTHNAME1 : LOCALE_SABBREVMONTHNAME1;
 575       for (int i = 0; i < 12; ++i)
 576         {
 577           _time_locale->wmon[i] = getlocaleinfo (time, mon_base + i);
 578           _time_locale->mon[i] = charfromwchar (time, wmon[i]);
 579         }
 580       /* month and alt_month */
 581       for (int i = 0; i < 12; ++i)
 582         {
 583           _time_locale->wmonth[i] = getlocaleinfo (time,
 584                                                    LOCALE_SMONTHNAME1 + i);
 585           _time_locale->month[i] = _time_locale->alt_month[i]
 586                                  = charfromwchar (time, wmonth[i]);
 587         }
 588       /* wday */
 589       _time_locale->wwday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7);
 590       _time_locale->wday[0] = charfromwchar (time, wwday[0]);
 591       for (int i = 0; i < 6; ++i)
 592         {
 593           _time_locale->wwday[i + 1] = getlocaleinfo (time,
 594                                                       LOCALE_SABBREVDAYNAME1 + i);
 595           _time_locale->wday[i + 1] = charfromwchar (time, wwday[i + 1]);
 596         }
 597       /* weekday */
 598       _time_locale->wweekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7);
 599       _time_locale->weekday[0] = charfromwchar (time, wweekday[0]);
 600       for (int i = 0; i < 6; ++i)
 601         {
 602           _time_locale->wweekday[i + 1] = getlocaleinfo (time,
 603                                                          LOCALE_SDAYNAME1 + i);
 604           _time_locale->weekday[i + 1] = charfromwchar (time, wweekday[i + 1]);
 605         }
 606       size_t len;
 607       /* X_fmt */
 608       if (era && *era->t_fmt)
 609         {
 610           _time_locale->wX_fmt = (const wchar_t *) lc_time_ptr;
 611           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wX_fmt,
 612                                           era->t_fmt) + 1);
 613         }
 614       else
 615         _time_locale->wX_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
 616       _time_locale->X_fmt = charfromwchar (time, wX_fmt);
 617       /* x_fmt */
 618       if (era && *era->d_fmt)
 619         {
 620           _time_locale->wx_fmt = (const wchar_t *) lc_time_ptr;
 621           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wx_fmt,
 622                                           era->d_fmt) + 1);
 623         }
 624       else
 625         _time_locale->wx_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, DT_DEFAULT);
 626       _time_locale->x_fmt = charfromwchar (time, wx_fmt);
 627       /* c_fmt */
 628       if (era && *era->d_t_fmt)
 629         {
 630           _time_locale->wc_fmt = (const wchar_t *) lc_time_ptr;
 631           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wc_fmt,
 632                                           era->d_t_fmt) + 1);
 633         }
 634       else
 635         {
 636           _time_locale->wc_fmt = eval_datetimefmt (LOCALE_SLONGDATE, DT_ABBREV);
 637           ((wchar_t *) lc_time_ptr)[-1] = L' ';
 638           eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
 639         }
 640       _time_locale->c_fmt = charfromwchar (time, wc_fmt);
 641       /* AM/PM */
 642       _time_locale->wam_pm[0] = getlocaleinfo (time, LOCALE_S1159);
 643       _time_locale->wam_pm[1] = getlocaleinfo (time, LOCALE_S2359);
 644       _time_locale->am_pm[0] = charfromwchar (time, wam_pm[0]);
 645       _time_locale->am_pm[1] = charfromwchar (time, wam_pm[1]);
 646       /* date_fmt */
 647       if (era && *era->date_fmt)
 648         {
 649           _time_locale->wdate_fmt = (const wchar_t *) lc_time_ptr;
 650           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wdate_fmt,
 651                                           era->date_fmt) + 1);
 652         }
 653       else
 654         _time_locale->wdate_fmt = _time_locale->wc_fmt;
 655       _time_locale->date_fmt = charfromwchar (time, wdate_fmt);
 656       /* md */
 657       {
 658         wchar_t buf[80];
 659         GetLocaleInfoEx (win_locale, LOCALE_IDATE, buf, 80);
 660         _time_locale->md_order = (const char *) lc_time_ptr;
 661         lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
 662       }
 663       /* ampm_fmt */
 664       if (era)
 665         {
 666           _time_locale->wampm_fmt = (const wchar_t *) lc_time_ptr;
 667           lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wampm_fmt,
 668                                           era->t_fmt_ampm) + 1);
 669         }
 670       else
 671         _time_locale->wampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_AMPM);
 672       _time_locale->ampm_fmt = charfromwchar (time, wampm_fmt);
 673
 674       if (era)
 675         {
 676           /* Evaluate string length in target charset.  Characters invalid in the
 677              target charset are simply ignored, as on Linux. */
 678           len = 0;
 679           len += lc_wcstombs (f_wctomb, NULL, era->era, 0) + 1;
 680           len += lc_wcstombs (f_wctomb, NULL, era->era_d_fmt, 0) + 1;
 681           len += lc_wcstombs (f_wctomb, NULL, era->era_d_t_fmt, 0) + 1;
 682           len += lc_wcstombs (f_wctomb, NULL, era->era_t_fmt, 0) + 1;
 683           len += lc_wcstombs (f_wctomb, NULL, era->alt_digits, 0) + 1;
 684           len += (wcslen (era->era) + 1) * sizeof (wchar_t);
 685           len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t);
 686           len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t);
 687           len += (wcslen (era->era_t_fmt) + 1) * sizeof (wchar_t);
 688           len += (wcslen (era->alt_digits) + 1) * sizeof (wchar_t);
 689
 690           /* Make sure data fits into the buffer */
 691           if (lc_time_ptr + len > lc_time_end)
 692             {
 693               len = lc_time_ptr + len - new_lc_time_buf;
 694               char *tmp = (char *) realloc (new_lc_time_buf, len);
 695               if (!tmp)
 696                 era = NULL;
 697               else
 698                 {
 699                   if (tmp != new_lc_time_buf)
 700                     rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
 701                                        new_lc_time_buf, lc_time_ptr);
 702                   lc_time_ptr = tmp + (lc_time_ptr - new_lc_time_buf);
 703                   new_lc_time_buf = tmp;
 704                   lc_time_end = new_lc_time_buf + len;
 705                 }
 706             }
 707           /* Copy over */
 708           if (era)
 709             {
 710               /* era */
 711               _time_locale->wera = (const wchar_t *) lc_time_ptr;
 712               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera,
 713                                               era->era) + 1);
 714               _time_locale->era = charfromwchar (time, wera);
 715               /* era_d_fmt */
 716               _time_locale->wera_d_fmt = (const wchar_t *) lc_time_ptr;
 717               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_fmt,
 718                                               era->era_d_fmt) + 1);
 719               _time_locale->era_d_fmt = charfromwchar (time, wera_d_fmt);
 720               /* era_d_t_fmt */
 721               _time_locale->wera_d_t_fmt = (const wchar_t *) lc_time_ptr;
 722               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_t_fmt,
 723                                               era->era_d_t_fmt) + 1);
 724               _time_locale->era_d_t_fmt = charfromwchar (time, wera_d_t_fmt);
 725               /* era_t_fmt */
 726               _time_locale->wera_t_fmt = (const wchar_t *) lc_time_ptr;
 727               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_t_fmt,
 728                                               era->era_t_fmt) + 1);
 729               _time_locale->era_t_fmt = charfromwchar (time, wera_t_fmt);
 730               /* alt_digits */
 731               _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
 732               lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->walt_digits,
 733                                               era->alt_digits) + 1);
 734               _time_locale->alt_digits = charfromwchar (time, walt_digits);
 735             }
 736         }
 737       if (!era)
 738         {
 739           _time_locale->wera =
 740           _time_locale->wera_d_fmt =
 741           _time_locale->wera_d_t_fmt =
 742           _time_locale->wera_t_fmt =
 743           _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
 744           _time_locale->era =
 745           _time_locale->era_d_fmt =
 746           _time_locale->era_d_t_fmt =
 747           _time_locale->era_t_fmt =
 748           _time_locale->alt_digits = (const char *) lc_time_ptr;
 749           /* Twice, to make sure wide char strings are correctly terminated. */
 750           *lc_time_ptr++ = '\0';
 751           *lc_time_ptr++ = '\0';
 752         }
 753     }
 754
 755   char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf);
 756   if (!tmp)
 757     {
 758       free (new_lc_time_buf);
 759       return -1;
 760     }
 761   if (tmp != new_lc_time_buf)
 762     rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
 763                        new_lc_time_buf, lc_time_ptr);
 764   *lc_time_buf = tmp;
 765   return 1;
 766 }
 767
 768 /* Called from newlib's setlocale() via __ctype_load_locale() if category
 769    is LC_CTYPE.  Returns LC_CTYPE values fetched from Windows locale data
 770    in the structure pointed to by _ctype_locale.  This is subsequently
 771    accessed by functions like nl_langinfo, localeconv, printf, etc. */
 772 extern "C" int
 773 __set_lc_ctype_from_win (const char *name,
 774                          const struct lc_ctype_T *_C_ctype_locale,
 775                          struct lc_ctype_T *_ctype_locale,
 776                          char **lc_ctype_buf, wctomb_p f_wctomb,
 777                          const char *charset, int mb_cur_max)
 778 {
 779   wchar_t win_locale[ENCODING_LEN + 1];
 780   int ret = __get_rfc5646_from_locale (name, win_locale);
 781   if (ret < 0)
 782     return ret;
 783   if (!ret && !strcmp (charset, "ASCII"))
 784     return 0;
 785
 786 # define MAX_CTYPE_BUFFER_SIZE  256
 787
 788   char *new_lc_ctype_buf = (char *) malloc (MAX_CTYPE_BUFFER_SIZE);
 789
 790   if (!new_lc_ctype_buf)
 791     return -1;
 792   char *lc_ctype_ptr = new_lc_ctype_buf;
 793   /* C.foo is just a copy of "C" with fixed charset. */
 794   if (!ret)
 795     memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T));
 796   /* codeset */
 797   _ctype_locale->codeset = lc_ctype_ptr;
 798   lc_ctype_ptr = stpcpy (lc_ctype_ptr, charset) + 1;
 799   /* mb_cur_max */
 800   _ctype_locale->mb_cur_max = lc_ctype_ptr;
 801   *lc_ctype_ptr++ = mb_cur_max;
 802   *lc_ctype_ptr++ = '\0';
 803   if (ret)
 804     {
 805       /* outdigits and woutdigits */
 806       wchar_t digits[11];
 807       GetLocaleInfoEx (win_locale, LOCALE_SNATIVEDIGITS, digits, 11);
 808       for (int i = 0; i <= 9; ++i)
 809         {
 810           mbstate_t state;
 811
 812           /* Make sure the wchar_t's are always 2 byte aligned. */
 813           if ((uintptr_t) lc_ctype_ptr % 2)
 814             ++lc_ctype_ptr;
 815           wchar_t *woutdig = (wchar_t *) lc_ctype_ptr;
 816           _ctype_locale->woutdigits[i] = (const wchar_t *) woutdig;
 817           *woutdig++ = digits[i];
 818           *woutdig++ = L'\0';
 819           lc_ctype_ptr = (char *) woutdig;
 820           _ctype_locale->outdigits[i] = lc_ctype_ptr;
 821           memset (&state, 0, sizeof state);
 822           lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], &state);
 823           *lc_ctype_ptr++ = '\0';
 824         }
 825     }
 826
 827   char *tmp = (char *) realloc (new_lc_ctype_buf,
 828                                 lc_ctype_ptr - new_lc_ctype_buf);
 829   if (!tmp)
 830     {
 831       free (new_lc_ctype_buf);
 832       return -1;
 833     }
 834   if (tmp != new_lc_ctype_buf)
 835     rebase_locale_buf (_ctype_locale, _ctype_locale + 1, tmp,
 836                        new_lc_ctype_buf, lc_ctype_ptr);
 837   *lc_ctype_buf = tmp;
 838   return 1;
 839 }
 840
 841 /* Called from newlib's setlocale() via __numeric_load_locale() if category
 842    is LC_NUMERIC.  Returns LC_NUMERIC values fetched from Windows locale data
 843    in the structure pointed to by _numeric_locale.  This is subsequently
 844    accessed by functions like nl_langinfo, localeconv, printf, etc. */
 845 extern "C" int
 846 __set_lc_numeric_from_win (const char *name,
 847                            const struct lc_numeric_T *_C_numeric_locale,
 848                            struct lc_numeric_T *_numeric_locale,
 849                            char **lc_numeric_buf, wctomb_p f_wctomb,
 850                            const char *charset)
 851 {
 852   wchar_t win_locale[ENCODING_LEN + 1];
 853   int ret = __get_rfc5646_from_locale (name, win_locale);
 854   if (ret < 0)
 855     return ret;
 856   if (!ret && !strcmp (charset, "ASCII"))
 857     return 0;
 858
 859 # define MAX_NUMERIC_BUFFER_SIZE        256
 860
 861   char *new_lc_numeric_buf = (char *) malloc (MAX_NUMERIC_BUFFER_SIZE);
 862   const char *lc_numeric_end = new_lc_numeric_buf + MAX_NUMERIC_BUFFER_SIZE;
 863
 864   if (!new_lc_numeric_buf)
 865     return -1;
 866   char *lc_numeric_ptr = new_lc_numeric_buf;
 867   /* C.foo is just a copy of "C" with fixed charset. */
 868   if (!ret)
 869     memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T));
 870   else
 871     {
 872       /* decimal_point and thousands_sep */
 873       /* fa_IR.  Windows decimal_point is slash, correct is dot */
 874       if (!wcscmp (win_locale, L"fa-IR"))
 875         {
 876           _numeric_locale->wdecimal_point = setlocaleinfo (numeric, L'.');
 877           _numeric_locale->wthousands_sep = setlocaleinfo (numeric, L',');
 878         }
 879       /* ps_AF.  Windows decimal_point is dot, thousands_sep is comma,
 880                  correct are arabic separators. */
 881       else if (!wcscmp (win_locale, L"ps-AF"))
 882         {
 883           _numeric_locale->wdecimal_point = setlocaleinfo (numeric, 0x066b);
 884           _numeric_locale->wthousands_sep = setlocaleinfo (numeric, 0x066c);
 885         }
 886       else
 887         {
 888           _numeric_locale->wdecimal_point = getlocaleinfo (numeric,
 889                                                            LOCALE_SDECIMAL);
 890           _numeric_locale->wthousands_sep = getlocaleinfo (numeric,
 891                                                            LOCALE_STHOUSAND);
 892         }
 893       _numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point);
 894       _numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep);
 895       /* grouping */
 896       _numeric_locale->grouping = conv_grouping (win_locale, LOCALE_SGROUPING,
 897                                                  &lc_numeric_ptr);
 898     }
 899   /* codeset */
 900   _numeric_locale->codeset = lc_numeric_ptr;
 901   lc_numeric_ptr = stpcpy (lc_numeric_ptr, charset) + 1;
 902
 903   char *tmp = (char *) realloc (new_lc_numeric_buf,
 904                                 lc_numeric_ptr - new_lc_numeric_buf);
 905   if (!tmp)
 906     {
 907       free (new_lc_numeric_buf);
 908       return -1;
 909     }
 910   if (tmp != new_lc_numeric_buf)
 911     rebase_locale_buf (_numeric_locale, _numeric_locale + 1, tmp,
 912                        new_lc_numeric_buf, lc_numeric_ptr);
 913   *lc_numeric_buf = tmp;
 914   return 1;
 915 }
 916
 917 /* Called from newlib's setlocale() via __monetary_load_locale() if category
 918    is LC_MONETARY.  Returns LC_MONETARY values fetched from Windows locale data
 919    in the structure pointed to by _monetary_locale.  This is subsequently
 920    accessed by functions like nl_langinfo, localeconv, printf, etc. */
 921 extern "C" int
 922 __set_lc_monetary_from_win (const char *name,
 923                             const struct lc_monetary_T *_C_monetary_locale,
 924                             struct lc_monetary_T *_monetary_locale,
 925                             char **lc_monetary_buf, wctomb_p f_wctomb,
 926                             const char *charset)
 927 {
 928   wchar_t win_locale[ENCODING_LEN + 1];
 929   int ret = __get_rfc5646_from_locale (name, win_locale);
 930   if (ret < 0)
 931     return ret;
 932   if (!ret && !strcmp (charset, "ASCII"))
 933     return 0;
 934
 935 # define MAX_MONETARY_BUFFER_SIZE       512
 936
 937   char *new_lc_monetary_buf = (char *) malloc (MAX_MONETARY_BUFFER_SIZE);
 938   const char *lc_monetary_end = new_lc_monetary_buf + MAX_MONETARY_BUFFER_SIZE;
 939
 940   if (!new_lc_monetary_buf)
 941     return -1;
 942   char *lc_monetary_ptr = new_lc_monetary_buf;
 943   /* C.foo is just a copy of "C" with fixed charset. */
 944   if (!ret)
 945     memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T));
 946   else
 947     {
 948       /* int_curr_symbol */
 949       _monetary_locale->wint_curr_symbol = getlocaleinfo (monetary,
 950                                                           LOCALE_SINTLSYMBOL);
 951       /* No spacing char means space. */
 952       if (!_monetary_locale->wint_curr_symbol[3])
 953         {
 954           wchar_t *wc = (wchar_t *) _monetary_locale->wint_curr_symbol + 3;
 955           *wc++ = L' ';
 956           *wc++ = L'\0';
 957           lc_monetary_ptr = (char *) wc;
 958         }
 959       _monetary_locale->int_curr_symbol = charfromwchar (monetary,
 960                                                          wint_curr_symbol);
 961       /* currency_symbol */
 962       _monetary_locale->wcurrency_symbol = getlocaleinfo (monetary,
 963                                                           LOCALE_SCURRENCY);
 964       /* As on Linux:  If the currency_symbol can't be represented in the
 965          given charset, use int_curr_symbol. */
 966       if (lc_wcstombs (f_wctomb, NULL, _monetary_locale->wcurrency_symbol,
 967                        0, true) == (size_t) -1)
 968         _monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol;
 969       else
 970         _monetary_locale->currency_symbol = charfromwchar (monetary,
 971                                                            wcurrency_symbol);
 972       /* mon_decimal_point and mon_thousands_sep */
 973       /* fa_IR or ps_AF.  Windows mon_decimal_point is slash and comma,
 974                           mon_thousands_sep is comma and dot, correct
 975                           are arabic separators. */
 976       if (!wcscmp (win_locale, L"fa-IR")
 977           || !wcscmp (win_locale, L"ps-AF"))
 978         {
 979           _monetary_locale->wmon_decimal_point = setlocaleinfo (monetary,
 980                                                                 0x066b);
 981           _monetary_locale->wmon_thousands_sep = setlocaleinfo (monetary,
 982                                                                 0x066c);
 983         }
 984       else
 985         {
 986           _monetary_locale->wmon_decimal_point = getlocaleinfo (monetary,
 987                                                         LOCALE_SMONDECIMALSEP);
 988           _monetary_locale->wmon_thousands_sep = getlocaleinfo (monetary,
 989                                                         LOCALE_SMONTHOUSANDSEP);
 990         }
 991       _monetary_locale->mon_decimal_point = charfromwchar (monetary,
 992                                                            wmon_decimal_point);
 993       _monetary_locale->mon_thousands_sep = charfromwchar (monetary,
 994                                                            wmon_thousands_sep);
 995       /* mon_grouping */
 996       _monetary_locale->mon_grouping = conv_grouping (win_locale,
 997                                                       LOCALE_SMONGROUPING,
 998                                                       &lc_monetary_ptr);
 999       /* positive_sign */
1000       _monetary_locale->wpositive_sign = getlocaleinfo (monetary,
1001                                                         LOCALE_SPOSITIVESIGN);
1002       _monetary_locale->positive_sign = charfromwchar (monetary, wpositive_sign);
1003       /* negative_sign */
1004       _monetary_locale->wnegative_sign = getlocaleinfo (monetary,
1005                                                         LOCALE_SNEGATIVESIGN);
1006       _monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign);
1007       /* int_frac_digits */
1008       *lc_monetary_ptr = (char) getlocaleint (LOCALE_IINTLCURRDIGITS);
1009       _monetary_locale->int_frac_digits = lc_monetary_ptr++;
1010       /* frac_digits */
1011       *lc_monetary_ptr = (char) getlocaleint (LOCALE_ICURRDIGITS);
1012       _monetary_locale->frac_digits = lc_monetary_ptr++;
1013       /* p_cs_precedes and int_p_cs_precedes */
1014       *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSYMPRECEDES);
1015       _monetary_locale->p_cs_precedes
1016             = _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++;
1017       /* p_sep_by_space and int_p_sep_by_space */
1018       *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSEPBYSPACE);
1019       _monetary_locale->p_sep_by_space
1020             = _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++;
1021       /* n_cs_precedes and int_n_cs_precedes */
1022       *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSYMPRECEDES);
1023       _monetary_locale->n_cs_precedes
1024             = _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++;
1025       /* n_sep_by_space and int_n_sep_by_space */
1026       *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSEPBYSPACE);
1027       _monetary_locale->n_sep_by_space
1028             = _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++;
1029       /* p_sign_posn and int_p_sign_posn */
1030       *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSIGNPOSN);
1031       _monetary_locale->p_sign_posn
1032             = _monetary_locale->int_p_sign_posn = lc_monetary_ptr++;
1033       /* n_sign_posn and int_n_sign_posn */
1034       *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSIGNPOSN);
1035       _monetary_locale->n_sign_posn
1036             = _monetary_locale->int_n_sign_posn = lc_monetary_ptr++;
1037     }
1038   /* codeset */
1039   _monetary_locale->codeset = lc_monetary_ptr;
1040   lc_monetary_ptr = stpcpy (lc_monetary_ptr, charset) + 1;
1041
1042   char *tmp = (char *) realloc (new_lc_monetary_buf,
1043                                 lc_monetary_ptr - new_lc_monetary_buf);
1044   if (!tmp)
1045     {
1046       free (new_lc_monetary_buf);
1047       return -1;
1048     }
1049   if (tmp != new_lc_monetary_buf)
1050     rebase_locale_buf (_monetary_locale, _monetary_locale + 1, tmp,
1051                        new_lc_monetary_buf, lc_monetary_ptr);
1052   *lc_monetary_buf = tmp;
1053   return 1;
1054 }
1055
1056 extern "C" int
1057 __set_lc_messages_from_win (const char *name,
1058                             const struct lc_messages_T *_C_messages_locale,
1059                             struct lc_messages_T *_messages_locale,
1060                             char **lc_messages_buf,
1061                             wctomb_p f_wctomb, const char *charset)
1062 {
1063   wchar_t win_locale[ENCODING_LEN + 1];
1064   int ret = __get_rfc5646_from_locale (name, win_locale);
1065   if (ret < 0)
1066     return ret;
1067   if (!ret && !strcmp (charset, "ASCII"))
1068     return 0;
1069
1070   char locale[ENCODING_LEN + 1];
1071   char *c, *c2;
1072   lc_msg_t *msg = NULL;
1073
1074   /* C.foo is just a copy of "C" with fixed charset. */
1075   if (!ret)
1076     memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T));
1077   else
1078     {
1079       strcpy (locale, name);
1080       /* Removes the charset from the locale and attach the modifer to the
1081          language_TERRITORY part. */
1082       c = strchr (locale, '.');
1083       if (c)
1084         {
1085           *c = '\0';
1086           c2 = strchr (c + 1, '@');
1087           /* Ignore @cjk* modifiers, they are newlib specials. */
1088           if (c2 && !is_cjk_modifier (c2))
1089             memmove (c, c2, strlen (c2) + 1);
1090         }
1091       /* Now search in the alphabetically order lc_msg array for the
1092          locale. */
1093       lc_msg_t locale_key = { locale, NULL, NULL, NULL, NULL };
1094       msg = (lc_msg_t *) bsearch ((void *) &locale_key, (void *) lc_msg,
1095                                   sizeof lc_msg / sizeof *lc_msg,
1096                                   sizeof *lc_msg, locale_cmp);
1097       if (!msg)
1098         return 0;
1099     }
1100
1101   /* Evaluate string length in target charset.  Characters invalid in the
1102      target charset are simply ignored, as on Linux. */
1103   size_t len = 0;
1104   len += (strlen (charset) + 1);
1105   if (ret)
1106     {
1107       len += lc_wcstombs (f_wctomb, NULL, msg->yesexpr, 0) + 1;
1108       len += lc_wcstombs (f_wctomb, NULL, msg->noexpr, 0) + 1;
1109       len += lc_wcstombs (f_wctomb, NULL, msg->yesstr, 0) + 1;
1110       len += lc_wcstombs (f_wctomb, NULL, msg->nostr, 0) + 1;
1111       len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t);
1112       len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t);
1113       len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t);
1114       len += (wcslen (msg->nostr) + 1) * sizeof (wchar_t);
1115       if (len % 1)
1116         ++len;
1117     }
1118   /* Allocate. */
1119   char *new_lc_messages_buf = (char *) malloc (len);
1120   const char *lc_messages_end = new_lc_messages_buf + len;
1121
1122   if (!new_lc_messages_buf)
1123     return -1;
1124   /* Copy over. */
1125   c = new_lc_messages_buf;
1126   /* codeset */
1127   _messages_locale->codeset = c;
1128   c = stpcpy (c, charset) + 1;
1129   if (ret)
1130     {
1131       _messages_locale->yesexpr = (const char *) c;
1132       len = lc_wcstombs (f_wctomb, c, msg->yesexpr, lc_messages_end - c);
1133       _messages_locale->noexpr = (const char *) (c += len + 1);
1134       len = lc_wcstombs (f_wctomb, c, msg->noexpr, lc_messages_end - c);
1135       _messages_locale->yesstr = (const char *) (c += len + 1);
1136       len = lc_wcstombs (f_wctomb, c, msg->yesstr, lc_messages_end - c);
1137       _messages_locale->nostr = (const char *) (c += len + 1);
1138       len = lc_wcstombs (f_wctomb, c, msg->nostr, lc_messages_end - c);
1139       c += len + 1;
1140       if ((uintptr_t) c % 1)
1141         ++c;
1142       wchar_t *wc = (wchar_t *) c;
1143       _messages_locale->wyesexpr = (const wchar_t *) wc;
1144       wc = wcpcpy (wc, msg->yesexpr) + 1;
1145       _messages_locale->wnoexpr = (const wchar_t *) wc;
1146       wc = wcpcpy (wc, msg->noexpr) + 1;
1147       _messages_locale->wyesstr = (const wchar_t *) wc;
1148       wc = wcpcpy (wc, msg->yesstr) + 1;
1149       _messages_locale->wnostr = (const wchar_t *) wc;
1150       wcpcpy (wc, msg->nostr);
1151     }
1152   *lc_messages_buf = new_lc_messages_buf;
1153   return 1;
1154 }
1155
1156 const struct lc_collate_T _C_collate_locale =
1157 {
1158   L"",
1159   __ascii_mbtowc,
1160   "ASCII"
1161 };
1162
1163 /* Called from newlib's setlocale() if category is LC_COLLATE.  Stores
1164    LC_COLLATE locale information.  This is subsequently accessed by the
1165    below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
1166 extern "C" int
1167 __collate_load_locale (struct __locale_t *locale, const char *name,
1168                        void *f_mbtowc, const char *charset)
1169 {
1170   char *bufp = NULL;
1171   struct lc_collate_T *cop = NULL;
1172
1173   wchar_t win_locale[ENCODING_LEN + 1];
1174   int ret = __get_rfc5646_from_locale (name, win_locale);
1175   if (ret < 0)
1176     return ret;
1177   if (ret)
1178     {
1179       bufp = (char *) malloc (1);       /* dummy */
1180       if (!bufp)
1181         return -1;
1182       cop = (struct lc_collate_T *) calloc (1, sizeof (struct lc_collate_T));
1183       if (!cop)
1184         {
1185           free (bufp);
1186           return -1;
1187         }
1188       wcscpy (cop->win_locale, win_locale);
1189       cop->mbtowc = (mbtowc_p) f_mbtowc;
1190       stpcpy (cop->codeset, charset);
1191     }
1192   struct __lc_cats tmp = locale->lc_cat[LC_COLLATE];
1193   locale->lc_cat[LC_COLLATE].ptr = !win_locale[0] ? &_C_collate_locale : cop;
1194   locale->lc_cat[LC_COLLATE].buf = bufp;
1195   /* If buf is not NULL, both pointers have been alloc'ed */
1196   if (tmp.buf)
1197     {
1198       free ((void *) tmp.ptr);
1199       free (tmp.buf);
1200     }
1201   return 0;
1202 }
1203
1204 /* We use the Windows functions for locale-specific string comparison and
1205    transformation.  The advantage is that we don't need any files with
1206    collation information. */
1207
1208 extern "C" int
1209 wcscoll_l (const wchar_t *__restrict ws1, const wchar_t *__restrict ws2,
1210            struct __locale_t *locale)
1211 {
1212   int ret;
1213   const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
1214
1215   if (!collate_locale[0])
1216     return wcscmp (ws1, ws2);
1217   ret = CompareStringEx (collate_locale, 0, ws1, -1, ws2, -1, NULL, NULL, 0);
1218   if (!ret)
1219     set_errno (EINVAL);
1220   return ret - CSTR_EQUAL;
1221 }
1222
1223 extern "C" int
1224 wcscoll (const wchar_t *__restrict ws1, const wchar_t *__restrict ws2)
1225 {
1226   return wcscoll_l (ws1, ws2, __get_current_locale ());
1227 }
1228
1229 extern "C" int
1230 strcoll_l (const char *__restrict s1, const char *__restrict s2,
1231            struct __locale_t *locale)
1232 {
1233   size_t n1, n2;
1234   wchar_t *ws1, *ws2;
1235   tmp_pathbuf tp;
1236   int ret;
1237   const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
1238
1239   if (!collate_locale[0])
1240     return strcmp (s1, s2);
1241   mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc;
1242   n1 = lc_mbstowcs (collate_mbtowc, NULL, s1, 0) + 1;
1243   ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
1244                           : tp.w_get ());
1245   lc_mbstowcs (collate_mbtowc, ws1, s1, n1);
1246   n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
1247   ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1248                           : tp.w_get ());
1249   lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
1250   ret = CompareStringEx (collate_locale, 0, ws1, -1, ws2, -1, NULL, NULL, 0);
1251   if (n1 > NT_MAX_PATH)
1252     free (ws1);
1253   if (n2 > NT_MAX_PATH)
1254     free (ws2);
1255   if (!ret)
1256     set_errno (EINVAL);
1257   return ret - CSTR_EQUAL;
1258 }
1259
1260 extern "C" int
1261 strcoll (const char *__restrict s1, const char *__restrict s2)
1262 {
1263   return strcoll_l (s1, s2, __get_current_locale ());
1264 }
1265
1266 /* BSD.  Used from glob.cc, fnmatch.c and regcomp.c. */
1267 extern "C" int
1268 __wcollate_range_cmp (wint_t c1, wint_t c2)
1269 {
1270   wchar_t s1[3] = { (wchar_t) c1, L'\0', L'\0' };
1271   wchar_t s2[3] = { (wchar_t) c2, L'\0', L'\0' };
1272
1273   /* Handle Unicode values >= 0x10000, convert to surrogate pair */
1274   if (c1 > 0xffff)
1275     {
1276       s1[0] = ((c1 - 0x10000) >> 10) + 0xd800;
1277       s1[1] = ((c1 - 0x10000) & 0x3ff) + 0xdc00;
1278     }
1279   if (c2 > 0xffff)
1280     {
1281       s2[0] = ((c2 - 0x10000) >> 10) + 0xd800;
1282       s2[1] = ((c2 - 0x10000) & 0x3ff) + 0xdc00;
1283     }
1284   return wcscoll (s1, s2);
1285 }
1286
1287 /* Not so much BSD.  Used from glob.cc, fnmatch.c and regcomp.c.
1288
1289    The args are pointers to wint_t strings.  This allows to compare
1290    against collating symbols. */
1291 extern "C" int
1292 __wscollate_range_cmp (wint_t *c1, wint_t *c2,
1293                        size_t c1len, size_t c2len)
1294 {
1295   wchar_t s1[c1len * 2 + 1] = { 0 };    /* # of chars if all are surrogates */
1296   wchar_t s2[c2len * 2 + 1] = { 0 };
1297
1298   /* wcscoll() ignores case in many locales. but we don't want that
1299      for filenames... */
1300   if ((iswupper (*c1) && !iswupper (*c2))
1301       || (iswlower (*c1) && !iswlower (*c2)))
1302     return *c1 - *c2;
1303
1304   wcintowcs (s1, c1, c1len);
1305   wcintowcs (s2, c2, c2len);
1306   return wcscoll_l (s1, s2, __get_current_locale ());
1307 }
1308
1309 const size_t ce_size = sizeof collating_element / sizeof *collating_element;
1310 const size_t ce_e_size = sizeof *collating_element;
1311
1312 /* Check if UTF-32 input character `test' is in the same equivalence class
1313    as UTF-32 character 'eqv'.
1314    Note that we only recognize input in Unicode normalization form C, that
1315    is, we expect all letters to be composed.  A single character is all we
1316    look at.
1317    To check equivalence, decompose pattern letter and input letter into
1318    normalization form KD and check the base character for equality.  Also,
1319    convert all digits to the ASCII digits 0 - 9 and compare. */
1320 extern "C" int
1321 is_unicode_equiv (wint_t test, wint_t eqv)
1322 {
1323         wchar_t decomp_testc[24] = { 0 };
1324         wchar_t decomp_eqvc[24] = { 0 };
1325         wchar_t testc[3] = { 0 };
1326         wchar_t eqvc[3] = { 0 };
1327
1328         /* For equivalence classes, case doesn't matter.  However, be careful.
1329            Only convert chars which have a "upper" to "lower". */
1330         if (iswupper (eqv))
1331                 eqv = towlower (eqv);
1332         if (iswupper (test))
1333                 test = towlower (test);
1334         /* Convert to UTF-16 string */
1335         if (eqv > 0x10000) {
1336                 eqvc[0] = ((eqv - 0x10000) >> 10) + 0xd800;
1337                 eqvc[1] = ((eqv - 0x10000) & 0x3ff) + 0xdc00;
1338         } else
1339                 eqvc[0] = eqv;
1340         if (test > 0x10000) {
1341                 testc[0] = ((test - 0x10000) >> 10) + 0xd800;
1342                 testc[1] = ((test - 0x10000) & 0x3ff) + 0xdc00;
1343         } else
1344                 testc[0] = test;
1345         /* Convert to decomposed form */
1346         FoldStringW (MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS,
1347                      eqvc, -1, decomp_eqvc, 24);
1348         FoldStringW (MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS,
1349                      testc, -1, decomp_testc, 24);
1350         /* If they are equivalent, the base char must be the same. */
1351         if (decomp_eqvc[0] != decomp_testc[0])
1352                 return 0;
1353         /* If it's a surrogate pair, check the second char, too */
1354         if (decomp_eqvc[0] >= 0xd800 && decomp_eqvc[0] <= 0xdbff &&
1355             decomp_eqvc[1] != decomp_testc[1])
1356                 return 0;
1357         return 1;
1358 }
1359
1360 static int
1361 comp_coll_elem (const void *key, const void *array_member)
1362 {
1363   collating_element_t *ckey = (collating_element_t *) key;
1364   collating_element_t *carray_member = (collating_element_t *) array_member;
1365
1366   int ret = wcicmp ((const wint_t *) ckey->element,
1367                     (const wint_t *) carray_member->element);
1368   /* The locale in the collating_element array never has a codeset
1369      attached.  So the length of the collating_element locale is
1370      always <= length of the key locale, and that's all we need to
1371      check.  Also, if the collating_element locale is empty, we're
1372      all set. */
1373   if (ret == 0 && carray_member->locale[0])
1374     ret = strncmp (ckey->locale, carray_member->locale,
1375                    strlen (carray_member->locale));
1376   return ret;
1377 }
1378
1379 extern "C" int
1380 is_unicode_coll_elem (const wint_t *test)
1381 {
1382   collating_element_t ct = {
1383     (const char32_t *) test,
1384     __get_current_locale ()->categories[LC_COLLATE]
1385   };
1386   collating_element_t *cmatch;
1387
1388   if (wcilen (test) == 1)
1389     return 1;
1390   cmatch = (collating_element_t *)
1391            bsearch (&ct, collating_element, ce_size, ce_e_size, comp_coll_elem);
1392   return !!cmatch;
1393 }
1394
1395 static int
1396 comp_coll_elem_n (const void *key, const void *array_member)
1397 {
1398   collating_element_t *ckey = (collating_element_t *) key;
1399   collating_element_t *carray_member = (collating_element_t *) array_member;
1400
1401   int ret = wcincmp ((const wint_t *) ckey->element,
1402                      (const wint_t *) carray_member->element,
1403                      wcilen ((const wint_t *) carray_member->element));
1404   /* The locale in the collating_element array never has a codeset
1405      attached.  So the length of the collating_element locale is
1406      always <= length of the key locale, and that's all we need to
1407      check.  Also, if the collating_element locale is empty, we're
1408      all set. */
1409   if (ret == 0 && carray_member->locale[0])
1410     ret = strncmp (ckey->locale, carray_member->locale,
1411                    strlen (carray_member->locale));
1412   return ret;
1413 }
1414
1415 /* Return the number of UTF-32 chars making up the next full character in
1416    inp, taking valid collation elements in the current locale into account. */
1417 extern "C" size_t
1418 next_unicode_char (wint_t *inp)
1419 {
1420   collating_element_t ct = {
1421     (const char32_t *) inp,
1422     __get_current_locale ()->categories[LC_COLLATE]
1423   };
1424   collating_element_t *cmatch;
1425
1426   if (wcilen (inp) > 1)
1427     {
1428       cmatch = (collating_element_t *)
1429                bsearch (&ct, collating_element, ce_size, ce_e_size,
1430                         comp_coll_elem_n);
1431       if (cmatch)
1432         return wcilen ((const wint_t *) cmatch->element);
1433     }
1434   return 1;
1435 }
1436
1437 extern "C" size_t
1438 wcsxfrm_l (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn,
1439            struct __locale_t *locale)
1440 {
1441   size_t ret;
1442   const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
1443
1444   if (!collate_locale[0])
1445     return wcslcpy (ws1, ws2, wsn);
1446   /* Don't use LCMAP_SORTKEY in conjunction with LCMAP_BYTEREV.  The cchDest
1447      parameter is used as byte count with LCMAP_SORTKEY but as char count with
1448      LCMAP_BYTEREV. */
1449   ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1, ws1,
1450                        wsn * sizeof (wchar_t), NULL, NULL, 0);
1451   if (ret)
1452     {
1453       ret /= sizeof (wchar_t);
1454       if (wsn)
1455         {
1456           /* Byte swap the array ourselves here. */
1457           for (size_t idx = 0; idx < ret; ++idx)
1458             ws1[idx] = __builtin_bswap16 (ws1[idx]);
1459           /* LCMapStringW returns byte count including the terminating NUL char.
1460              wcsxfrm is supposed to return length in wchar_t excluding the NUL.
1461              Since the array is only single byte NUL-terminated yet, make sure
1462              the result is wchar_t-NUL terminated. */
1463           if (ret < wsn)
1464             ws1[ret] = L'\0';
1465         }
1466       return ret;
1467     }
1468   if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1469     set_errno (EINVAL);
1470   else
1471     {
1472       ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
1473                            NULL, 0, NULL, NULL, 0);
1474       if (ret)
1475         wsn = ret / sizeof (wchar_t);
1476     }
1477   return wsn;
1478 }
1479
1480 extern "C" size_t
1481 wcsxfrm (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn)
1482 {
1483   return wcsxfrm_l (ws1, ws2, wsn, __get_current_locale ());
1484 }
1485
1486 extern "C" size_t
1487 strxfrm_l (char *__restrict s1, const char *__restrict s2, size_t sn,
1488            struct __locale_t *locale)
1489 {
1490   size_t ret = 0;
1491   size_t n2;
1492   wchar_t *ws2;
1493   tmp_pathbuf tp;
1494   const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
1495
1496   if (!collate_locale[0])
1497     return strlcpy (s1, s2, sn);
1498   mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc;
1499   n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
1500   ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1501                           : tp.w_get ());
1502   if (ws2)
1503     {
1504       lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
1505       /* The sort key is a NUL-terminated byte string. */
1506       ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
1507                           (PWCHAR) s1, sn, NULL, NULL, 0);
1508     }
1509   if (ret == 0)
1510     {
1511       ret = sn + 1;
1512       if (!ws2 || GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1513         set_errno (EINVAL);
1514       else
1515         ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
1516                              NULL, 0, NULL, NULL, 0);
1517     }
1518   if (ws2 && n2 > NT_MAX_PATH)
1519     free (ws2);
1520   /* LCMapStringW returns byte count including the terminating NUL character.
1521      strxfrm is supposed to return length excluding the NUL. */
1522   return ret - 1;
1523 }
1524
1525 extern "C" size_t
1526 strxfrm (char *__restrict s1, const char *__restrict s2, size_t sn)
1527 {
1528   return strxfrm_l (s1, s2, sn, __get_current_locale ());
1529 }
1530
1531 /* Fetch default ANSI codepage from locale info and generate a setlocale
1532    compatible character set code.  Called from newlib's setlocale(), if the
1533    charset isn't given explicitely in the POSIX compatible locale specifier. */
1534 extern "C" void
1535 __set_charset_from_locale (const char *loc, char *charset)
1536 {
1537   wchar_t win_locale[ENCODING_LEN + 1];
1538   char locale[ENCODING_LEN + 1];
1539   char *modifier;
1540   char *c;
1541   UINT cp;
1542
1543   /* Cut out explicit codeset */
1544   stpcpy (locale, loc);
1545   modifier = strchr (loc, '@');
1546   if ((c = strchr (locale, '.')))
1547     stpcpy (c, modifier ?: "");
1548   /* Ignore @cjk* modifiers, they are newlib specials. */
1549   modifier = strchr (locale, '@');
1550   if (modifier && is_cjk_modifier (modifier))
1551     *modifier = '\0';
1552
1553   default_codeset_t srch_dc = { locale, NULL };
1554   default_codeset_t *dc = (default_codeset_t *)
1555          bsearch ((void *) &srch_dc, (void *) default_codeset,
1556                   sizeof default_codeset / sizeof *default_codeset,
1557                   sizeof *default_codeset, locale_cmp);
1558   if (dc)
1559     {
1560       stpcpy (charset, dc->codeset);
1561       return;
1562     }
1563
1564   /* "C" locale, or invalid locale? */
1565   if (__get_rfc5646_from_locale (locale, win_locale) <= 0)
1566     cp = 20127;
1567   else if (GetLocaleInfoEx (win_locale,
1568                             LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
1569                             (PWCHAR) &cp, sizeof cp))
1570     cp = 0;
1571   /* Translate codepage and lcid to a charset closely aligned with the default
1572      charsets defined in Glibc. */
1573   const char *cs;
1574   switch (cp)
1575     {
1576     case 20127:
1577       cs = "ASCII";
1578       break;
1579     case 874:
1580       cs = "CP874";
1581       break;
1582     case 932:
1583       cs = "EUCJP";
1584       break;
1585     case 936:
1586       cs = "GB2312";
1587       break;
1588     case 949:
1589       cs = "EUCKR";
1590       break;
1591     case 950:
1592       cs = "BIG5";
1593       break;
1594     case 1250:
1595       cs = "ISO-8859-2";
1596       break;
1597     case 1251:
1598       cs = "ISO-8859-5";
1599       break;
1600     case 1252:
1601       cs = "ISO-8859-1";
1602       break;
1603     case 1253:
1604       cs = "ISO-8859-7";
1605       break;
1606     case 1254:
1607       cs = "ISO-8859-9";
1608       break;
1609     case 1255:
1610       cs = "ISO-8859-8";
1611       break;
1612     case 1256:
1613       cs = "ISO-8859-6";
1614       break;
1615     case 1257:
1616       cs = "ISO-8859-13";
1617       break;
1618     case 1258:
1619       cs = "UTF-8";
1620       break;
1621     default:
1622       /* Some (pretty new) EU locales don't exist in GLibc and haven't been
1623          catched above.  Check for @euro modifier again and make these locales
1624          always use ISO-8859-15. */
1625       if (modifier && !strcmp (modifier + 1, "euro"))
1626         cs = "ISO-8859-15";
1627       else
1628         cs = "UTF-8";
1629       break;
1630     }
1631   stpcpy (charset, cs);
1632 }
1633
1634 /* Called from fhandler_tty::setup_locale.  Set a codepage which reflects the
1635    internal charset setting.  This is *not* necessarily the Windows
1636    codepage connected to a locale by default, so we have to set this
1637    up explicitely. */
1638 UINT
1639 __eval_codepage_from_internal_charset ()
1640 {
1641   const char *charset = __locale_charset (__get_global_locale ());
1642   UINT codepage = CP_UTF8; /* Default UTF8 */
1643
1644   /* The internal charset names are well defined, so we can use shortcuts. */
1645   switch (charset[0])
1646     {
1647     case 'B': /* BIG5 */
1648       codepage = 950;
1649       break;
1650     case 'C': /* CPxxx */
1651       codepage = strtoul (charset + 2, NULL, 10);
1652       break;
1653     case 'E': /* EUCxx */
1654       switch (charset[3])
1655         {
1656         case 'J': /* EUCJP */
1657           codepage = 20932;
1658           break;
1659         case 'K': /* EUCKR */
1660           codepage = 949;
1661           break;
1662         case 'C': /* EUCCN */
1663           codepage = 936;
1664           break;
1665         }
1666       break;
1667     case 'G': /* GBK/GB2312/GB18030 */
1668       codepage = (charset[2] == '1') ? 54936 : 936;
1669       break;
1670     case 'I': /* ISO-8859-x */
1671       codepage = strtoul (charset + 9, NULL, 10) + 28590;
1672       break;
1673     case 'S': /* SJIS */
1674       codepage = 932;
1675       break;
1676     default: /* All set to UTF8 already */
1677       break;
1678     }
1679   return codepage;
1680 }
1681
1682 /* This function is called from newlib's loadlocale if the locale identifier
1683    was invalid, one way or the other.  It looks for the file
1684
1685      /usr/share/locale/locale.alias
1686
1687    which is part of the gettext package, and if it finds the locale alias
1688    in that file, it replaces the locale with the correct locale string from
1689    that file.
1690
1691    If successful, it returns a pointer to new_locale, NULL otherwise.*/
1692 extern "C" char *
1693 __set_locale_from_locale_alias (const char *locale, char *new_locale)
1694 {
1695   wchar_t wlocale[ENCODING_LEN + 1];
1696   wchar_t walias[ENCODING_LEN + 1];
1697 #define LOCALE_ALIAS_LINE_LEN 255
1698   char alias_buf[LOCALE_ALIAS_LINE_LEN + 1], *c;
1699   wchar_t *wc;
1700   const char *alias, *replace;
1701   char *ret = NULL;
1702
1703   FILE *fp = fopen ("/usr/share/locale/locale.alias", "rt");
1704   if (!fp)
1705     return NULL;
1706   /* The incoming locale is given in the application charset, or in
1707      the Cygwin internal charset.  We try both. */
1708   if (mbstowcs (wlocale, locale, ENCODING_LEN + 1) == (size_t) -1)
1709     sys_mbstowcs (wlocale, ENCODING_LEN + 1, locale);
1710   wlocale[ENCODING_LEN] = L'\0';
1711   /* Ignore @cjk* modifiers, they are newlib specials. */
1712   wc = wcschr (wlocale, L'@');
1713   if (wc && w_is_cjk_modifier (wc))
1714     *wc = L'\0';
1715   while (fgets (alias_buf, LOCALE_ALIAS_LINE_LEN + 1, fp))
1716     {
1717       alias_buf[LOCALE_ALIAS_LINE_LEN] = '\0';
1718       c = strrchr (alias_buf, '\n');
1719       if (c)
1720         *c = '\0';
1721       c = alias_buf;
1722       c += strspn (c, " \t");
1723       if (!*c || *c == '#')
1724         continue;
1725       alias = c;
1726       c += strcspn (c, " \t");
1727       *c++ = '\0';
1728       c += strspn (c, " \t");
1729       if (*c == '#')
1730         continue;
1731       replace = c;
1732       c += strcspn (c, " \t");
1733       *c++ = '\0';
1734       if (strlen (replace) > ENCODING_LEN)
1735         continue;
1736       /* The file is latin1 encoded */
1737       lc_mbstowcs (__iso_mbtowc (1), walias, alias, ENCODING_LEN + 1);
1738       walias[ENCODING_LEN] = L'\0';
1739       if (!wcscmp (wlocale, walias))
1740         {
1741           ret = strcpy (new_locale, replace);
1742           break;
1743         }
1744     }
1745   fclose (fp);
1746   return ret;
1747 }
1748
1749 /* Can be called via cygwin_internal (CW_INTERNAL_SETLOCALE) for application
1750    which really (think they) know what they are doing. */
1751 extern "C" void
1752 internal_setlocale ()
1753 {
1754   /* Each setlocale from the environment potentially changes the
1755      multibyte representation of the CWD.  Therefore we have to
1756      reevaluate the CWD's posix path and store in the new charset.
1757      Same for the PATH environment variable. */
1758   /* FIXME: Other buffered paths might be affected as well. */
1759   /* FIXME: It could be necessary to convert the entire environment,
1760             not just PATH. */
1761   tmp_pathbuf tp;
1762   char *path;
1763   wchar_t *w_path = NULL, *w_cwd;
1764
1765   /* Don't do anything if the charset hasn't actually changed. */
1766   if (cygheap->locale.mbtowc == __get_global_locale ()->mbtowc)
1767     return;
1768
1769   debug_printf ("Global charset set to %s",
1770                 __locale_charset (__get_global_locale ()));
1771   /* Fetch PATH and CWD and convert to wchar_t in previous charset. */
1772   path = getenv ("PATH");
1773   if (path && *path)    /* $PATH can be potentially unset. */
1774     {
1775       w_path = tp.w_get ();
1776       _sys_mbstowcs (cygheap->locale.mbtowc, w_path, 32768, path);
1777     }
1778   w_cwd = tp.w_get ();
1779   cwdstuff::acquire_write ();
1780   _sys_mbstowcs (cygheap->locale.mbtowc, w_cwd, 32768,
1781                    cygheap->cwd.get_posix ());
1782   /* Set charset for internal conversion functions. */
1783   cygheap->locale.mbtowc = __get_global_locale ()->mbtowc;
1784   if (cygheap->locale.mbtowc == __ascii_mbtowc)
1785     cygheap->locale.mbtowc = __utf8_mbtowc;
1786   /* Restore CWD and PATH in new charset. */
1787   cygheap->cwd.reset_posix (w_cwd);
1788   cwdstuff::release_write ();
1789   if (w_path)
1790     {
1791       char *c_path = tp.c_get ();
1792       sys_wcstombs (c_path, 32768, w_path);
1793       setenv ("PATH", c_path, 1);
1794     }
1795 }
1796
1797 /* Called from dll_crt0_1, before fetching the command line from Windows.
1798    Set the internal charset according to the environment locale settings.
1799    Check if a required codepage is available, and only switch internal
1800    charset if so.
1801    Make sure to reset the application locale to "C" per POSIX. */
1802 void
1803 initial_setlocale ()
1804 {
1805   char *ret = _setlocale_r (_REENT, LC_CTYPE, "");
1806   if (ret)
1807     internal_setlocale ();
1808 }