Cygwin: (mostly) drop NT4 and Samba < 3.0 support
[newlib-cygwin.git] / winsup / cygwin / nlsfuncs.cc
blobb32fecc8ec33e802fc9c3379c615d53a4552096d
1 /* nlsfuncs.cc: NLS helper functions
3 This file is part of Cygwin.
5 This software is a copyrighted work licensed under the terms of the
6 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
7 details. */
9 #include "winsup.h"
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <locale.h>
13 #include <wchar.h>
14 #include <wctype.h>
15 #include "path.h"
16 #include "fhandler.h"
17 #include "dtable.h"
18 #include "cygheap.h"
19 #include "tls_pbuf.h"
20 #include "collate.h"
21 #include "lc_msg.h"
22 #include "lc_era.h"
23 #include "lc_collelem.h"
24 #include "lc_def_codesets.h"
26 #define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
28 #define getlocaleinfo(category,type) \
29 __getlocaleinfo(win_locale,(type),_LC(category))
30 #define getlocaleint(type) \
31 __getlocaleint(win_locale,(type))
32 #define setlocaleinfo(category,val) \
33 __setlocaleinfo(_LC(category),(val))
34 #define eval_datetimefmt(type,flags) \
35 __eval_datetimefmt(win_locale,(type),(flags),&lc_time_ptr,\
36 lc_time_end-lc_time_ptr)
37 #define charfromwchar(category,in) \
38 __charfromwchar (_##category##_locale->in,_LC(category),f_wctomb)
40 /* Check for @cjk* modifier. Try to be as fast as possible */
41 #define __is_cjk_modifier(_in, _cmp, _L) ({ \
42 _in[1] == 'c' \
43 && _in[2] == 'j' \
44 && _in[3] == 'k'\
45 && (_cmp (_in + 4, _L##"narrow") == 0 \
46 || _cmp (_in + 4, _L##"wide") == 0 \
47 || _cmp (_in + 4, _L##"single") == 0); \
49 #define is_cjk_modifier(_in) __is_cjk_modifier(_in, strcmp, )
50 #define w_is_cjk_modifier(_in) __is_cjk_modifier(_in, wcscmp, L)
52 /* ResolveLocaleName does not what we want. It converts anything which
53 vaguely resembles a locale into some other locale it supports. Bad
54 examples are: "en-XY" gets converted to "en-US", and worse, "ff-BF" gets
55 converted to "ff-Latn-SN", even though "ff-Adlm-BF" exists! Useless.
56 To check if a locale is supported, we have to enumerate all valid
57 Windows locales, and return the match, even if the locale in Windows
58 requires a script. */
59 struct res_loc_t {
60 const wchar_t *search_iso639;
61 const wchar_t *search_iso3166;
62 wchar_t *resolved_locale;
63 int res_len;
66 static BOOL
67 resolve_locale_proc (LPWSTR win_locale, DWORD info, LPARAM param)
69 res_loc_t *loc = (res_loc_t *) param;
70 wchar_t *iso639, *iso639_end;
71 wchar_t *iso3166;
73 iso639 = win_locale;
74 iso639_end = wcschr (iso639, L'-');
75 if (!iso639_end)
76 return TRUE;
77 if (wcsncmp (loc->search_iso639, iso639, iso639_end - iso639) != 0)
78 return TRUE;
79 iso3166 = ++iso639_end;
80 /* Territory is all upper case */
81 while (!iswupper (iso3166[0]) || !iswupper (iso3166[1]))
83 iso3166 = wcschr (iso3166, L'-');
84 if (!iso3166)
85 return TRUE;
86 ++iso3166;
88 if (wcsncmp (loc->search_iso3166, iso3166, wcslen (loc->search_iso3166)))
89 return TRUE;
90 wcsncat (loc->resolved_locale, win_locale, loc->res_len - 1);
91 return FALSE;
94 static int
95 resolve_locale_name (const wchar_t *search, wchar_t *result, int rlen)
97 res_loc_t loc;
99 loc.search_iso639 = search;
100 loc.search_iso3166 = wcschr (search, L'-') + 1;
101 loc.resolved_locale = result;
102 loc.res_len = rlen;
103 result[0] = L'\0';
104 EnumSystemLocalesEx (resolve_locale_proc,
105 LOCALE_WINDOWS | LOCALE_SUPPLEMENTAL,
106 (LPARAM) &loc, NULL);
107 return wcslen (result);
110 /* Fetch Windows RFC 5646 locale from POSIX locale specifier.
111 Return values:
113 -1: Invalid locale
114 0: C or POSIX
115 1: valid locale
117 static int
118 __get_rfc5646_from_locale (const char *name, wchar_t *win_locale)
120 wchar_t wlocale[ENCODING_LEN + 1] = { 0 };
121 wchar_t locale[ENCODING_LEN + 1];
122 wchar_t *c;
124 win_locale[0] = L'\0';
125 mbstowcs (locale, name, ENCODING_LEN + 1);
126 /* Remember modifier for later use. */
127 const char *modifier = strchr (name, '@') ? : "";
128 /* Drop charset and modifier */
129 c = wcschr (locale, L'.');
130 if (!c)
131 c = wcschr (locale, L'@');
132 if (c)
133 *c = L'\0';
134 /* "POSIX" already converted to "C" in loadlocale. */
135 if (!wcscmp (locale, L"C"))
136 return 0;
137 c = wcschr (locale, '_');
138 if (!c)
140 /* try if the locale can be resolved from the language tag
141 fix up Linux-only locale first */
142 if (!wcscmp (locale, L"ber"))
143 wcscpy (locale, L"tzm");
144 if (ResolveLocaleName (locale, wlocale, ENCODING_LEN + 1) <= 1)
146 set_errno (ENOENT);
147 return -1;
149 wcpcpy (win_locale, wlocale);
150 return 1;
153 /* Convert to RFC 5646 syntax. */
154 *c = '-';
155 /* Override a few locales with a different default script as used
156 on Linux. Linux also supports no_NO which is equivalent to nb_NO,
157 but Windows can resolve that nicely. Also, "tzm" and "zgh" are
158 subsumed under "ber" on Linux. */
159 struct {
160 const wchar_t *loc;
161 const wchar_t *wloc;
162 } override_locale[] = {
163 { L"ber-DZ" , L"tzm-Latn-DZ" },
164 { L"ber-MA" , L"zgh-Tfng-MA" },
165 { L"mn-CN" , L"mn-Mong-CN" },
166 { L"mn-MN" , L"mn-Mong-MN" },
167 { L"pa-PK" , L"pa-Arab-PK" },
168 { L"sd-IN" , L"sd-Deva-IN" },
169 { L"sr-BA" , L"sr-Cyrl-BA" },
170 { L"sr-ME" , L"sr-Cyrl-ME" },
171 { L"sr-RS" , L"sr-Cyrl-RS" },
172 { L"sr-XK" , L"sr-Cyrl-XK" },
173 { L"tzm-MA", L"tzm-Tfng-MA" },
174 { NULL , NULL }
177 for (int i = 0; override_locale[i].loc
178 && override_locale[i].loc[0] <= locale[0]; ++i)
180 if (!wcscmp (locale, override_locale[i].loc))
182 wcscpy (wlocale, override_locale[i].wloc);
183 break;
186 /* If resolve_locale_name returns with error, or if it returns a
187 locale other than the input locale, we don't support this locale. */
188 if (!wlocale[0]
189 && !resolve_locale_name (locale, wlocale, ENCODING_LEN + 1))
191 set_errno (ENOENT);
192 return -1;
195 /* Check for modifiers changing the script */
196 const wchar_t *iso15924_script[] = { L"Latn-", L"Cyrl-", L"Deva-", L"Adlm-" };
197 int idx = -1;
199 if (modifier[0])
201 if (!strcmp (++modifier, "latin"))
202 idx = 0;
203 else if (!strcmp (modifier, "cyrillic"))
204 idx = 1;
205 else if (!strcmp (modifier, "devanagari"))
206 idx = 2;
207 else if (!strcmp (modifier, "adlam"))
208 idx = 3;
210 if (idx >= 0)
212 wchar_t *iso3166 = wcschr (wlocale, L'-') + 1;
213 wchar_t *wlp;
215 /* Copy iso639 language part including dash */
216 wlp = wcpncpy (win_locale, wlocale, iso3166 - wlocale);
217 /* Concat new iso15924 script */
218 wlp = wcpcpy (wlp, iso15924_script[idx]);
219 /* Concat iso3166 territory. Skip script, if already in the locale */
220 wchar_t *skip_script = wcschr (iso3166, L'-');
221 if (skip_script)
222 iso3166 = skip_script + 1;
223 wcpcpy (wlp, iso3166);
225 else
226 wcpcpy (win_locale, wlocale);
227 return 1;
230 /* Never returns -1. Just skips invalid chars instead. Only if return_invalid
231 is set, s==NULL returns -1 since then it's used to recognize invalid strings
232 in the used charset. */
233 static size_t
234 lc_wcstombs (wctomb_p f_wctomb, char *s, const wchar_t *pwcs, size_t n,
235 bool return_invalid = false)
237 char *ptr = s;
238 size_t max = n;
239 char buf[8];
240 size_t i, bytes, num_to_copy;
241 mbstate_t state;
243 memset (&state, 0, sizeof state);
244 if (s == NULL)
246 size_t num_bytes = 0;
247 while (*pwcs != 0)
249 bytes = f_wctomb (_REENT, buf, *pwcs++, &state);
250 if (bytes != (size_t) -1)
251 num_bytes += bytes;
252 else if (return_invalid)
253 return (size_t) -1;
255 return num_bytes;
257 while (n > 0)
259 bytes = f_wctomb (_REENT, buf, *pwcs, &state);
260 if (bytes == (size_t) -1)
262 memset (&state, 0, sizeof state);
263 ++pwcs;
264 continue;
266 num_to_copy = (n > bytes ? bytes : n);
267 for (i = 0; i < num_to_copy; ++i)
268 *ptr++ = buf[i];
270 if (*pwcs == 0x00)
271 return ptr - s - (n >= bytes);
272 ++pwcs;
273 n -= num_to_copy;
275 return max;
278 /* Never returns -1. Invalid sequences are translated to replacement
279 wide-chars. */
280 static size_t
281 lc_mbstowcs (mbtowc_p f_mbtowc, wchar_t *pwcs, const char *s, size_t n)
283 size_t ret = 0;
284 char *t = (char *) s;
285 size_t bytes;
286 mbstate_t state;
288 memset (&state, 0, sizeof state);
289 if (!pwcs)
290 n = 1;
291 while (n > 0)
293 bytes = f_mbtowc (_REENT, pwcs, t, 6 /* fake, always enough */, &state);
294 if (bytes == (size_t) -1)
296 state.__count = 0;
297 bytes = 1;
298 if (pwcs)
299 *pwcs = L' ';
301 else if (bytes == 0)
302 break;
303 t += bytes;
304 ++ret;
305 if (pwcs)
307 ++pwcs;
308 --n;
311 return ret;
314 static int
315 locale_cmp (const void *a, const void *b)
317 char **la = (char **) a;
318 char **lb = (char **) b;
319 return strcmp (*la, *lb);
322 /* Helper function to workaround reallocs which move blocks even if they shrink.
323 Cygwin's realloc is not doing this, but tcsh's, for instance. All lc_foo
324 structures consist entirely of pointers so they are practically pointer
325 arrays. What we do here is just treat the lc_foo pointers as char ** and
326 rebase all char * pointers within, up to the given size of the structure. */
327 static void
328 rebase_locale_buf (const void *ptrv, const void *ptrvend, const char *newbase,
329 const char *oldbase, const char *oldend)
331 const char **ptrsend = (const char **) ptrvend;
332 for (const char **ptrs = (const char **) ptrv; ptrs < ptrsend; ++ptrs)
333 if (*ptrs >= oldbase && *ptrs < oldend)
334 *ptrs += newbase - oldbase;
337 static wchar_t *
338 __getlocaleinfo (wchar_t *loc, LCTYPE type, char **ptr, size_t size)
340 size_t num;
341 wchar_t *ret;
343 if ((uintptr_t) *ptr % 1)
344 ++*ptr;
345 ret = (wchar_t *) *ptr;
346 num = GetLocaleInfoEx (loc, type, ret, size / sizeof (wchar_t));
347 *ptr = (char *) (ret + num);
348 return ret;
351 static wchar_t *
352 __setlocaleinfo (char **ptr, size_t size, wchar_t val)
354 wchar_t *ret;
356 if ((uintptr_t) *ptr % 1)
357 ++*ptr;
358 ret = (wchar_t *) *ptr;
359 ret[0] = val;
360 ret[1] = L'\0';
361 *ptr = (char *) (ret + 2);
362 return ret;
365 static char *
366 __charfromwchar (const wchar_t *in, char **ptr, size_t size, wctomb_p f_wctomb)
368 size_t num;
369 char *ret;
371 num = lc_wcstombs (f_wctomb, ret = *ptr, in, size);
372 *ptr += num + 1;
373 return ret;
376 static UINT
377 __getlocaleint (wchar_t *loc, LCTYPE type)
379 UINT val;
380 return GetLocaleInfoEx (loc, type | LOCALE_RETURN_NUMBER, (PWCHAR) &val,
381 sizeof val) ? val : 0;
384 enum dt_flags {
385 DT_DEFAULT = 0x00,
386 DT_AMPM = 0x01, /* Enforce 12 hour time format. */
387 DT_ABBREV = 0x02, /* Enforce abbreviated month and day names. */
390 static wchar_t *
391 __eval_datetimefmt (wchar_t *loc, LCTYPE type, dt_flags flags, char **ptr,
392 size_t size)
394 wchar_t buf[80];
395 wchar_t fc;
396 size_t idx;
397 const wchar_t *day_str = L"edaA";
398 const wchar_t *mon_str = L"mmbB";
399 const wchar_t *year_str = L"yyyY";
400 const wchar_t *hour12_str = L"lI";
401 const wchar_t *hour24_str = L"kH";
402 const wchar_t *t_str;
404 if ((uintptr_t) *ptr % 1)
405 ++*ptr;
406 wchar_t *ret = (wchar_t *) *ptr;
407 wchar_t *p = (wchar_t *) *ptr;
408 GetLocaleInfoEx (loc, type, buf, 80);
409 for (wchar_t *fmt = buf; *fmt; ++fmt)
410 switch (fc = *fmt)
412 case L'\'':
413 if (fmt[1] == L'\'')
414 *p++ = L'\'';
415 else
416 while (fmt[1] && *++fmt != L'\'')
417 *p++ = *fmt;
418 break;
419 case L'd':
420 case L'M':
421 case L'y':
422 t_str = (fc == L'd' ? day_str : fc == L'M' ? mon_str : year_str);
423 for (idx = 0; fmt[1] == fc; ++idx, ++fmt);
424 if (idx > 3)
425 idx = 3;
426 if ((flags & DT_ABBREV) && fc != L'y' && idx == 3)
427 idx = 2;
428 *p++ = L'%';
429 *p++ = t_str[idx];
430 break;
431 case L'g':
432 /* TODO */
433 break;
434 case L'h':
435 case L'H':
436 t_str = (fc == L'h' || (flags & DT_AMPM) ? hour12_str : hour24_str);
437 idx = 0;
438 if (fmt[1] == fc)
440 ++fmt;
441 idx = 1;
443 *p++ = L'%';
444 *p++ = t_str[idx];
445 break;
446 case L'm':
447 case L's':
448 case L't':
449 if (fmt[1] == fc)
450 ++fmt;
451 *p++ = L'%';
452 *p++ = (fc == L'm' ? L'M' : fc == L's' ? L'S' : L'p');
453 break;
454 case L'\t':
455 case L'\n':
456 case L'%':
457 *p++ = L'%';
458 *p++ = fc;
459 break;
460 default:
461 *p++ = *fmt;
462 break;
464 *p++ = L'\0';
465 *ptr = (char *) p;
466 return ret;
469 /* Convert Windows grouping format into POSIX grouping format. */
470 static char *
471 conv_grouping (wchar_t *loc, LCTYPE type, char **lc_ptr)
473 wchar_t buf[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
474 bool repeat = false;
475 char *ptr = *lc_ptr;
476 char *ret = ptr;
478 GetLocaleInfoEx (loc, type, buf, 10);
479 /* Convert Windows grouping format into POSIX grouping format. Note that
480 only ASCII chars are used in the grouping format. */
481 for (wchar_t *c = buf; *c; ++c)
483 if (*c < L'0' || *c > L'9')
484 continue;
485 char val = *c - L'0';
486 if (!val)
488 repeat = true;
489 break;
491 *ptr++ = val;
493 if (!repeat)
494 *ptr++ = CHAR_MAX;
495 *ptr++ = '\0';
496 *lc_ptr = ptr;
497 return ret;
500 /* Called from newlib's setlocale() via __time_load_locale() if category
501 is LC_TIME. Returns LC_TIME values fetched from Windows locale data
502 in the structure pointed to by _time_locale. This is subsequently
503 accessed by functions like nl_langinfo, strftime, strptime. */
504 extern "C" int
505 __set_lc_time_from_win (const char *name,
506 const struct lc_time_T *_C_time_locale,
507 struct lc_time_T *_time_locale,
508 char **lc_time_buf, wctomb_p f_wctomb,
509 const char *charset)
511 wchar_t win_locale[ENCODING_LEN + 1];
512 int ret = __get_rfc5646_from_locale (name, win_locale);
513 if (ret < 0)
514 return ret;
515 if (!ret && !strcmp (charset, "ASCII"))
516 return 0;
518 # define MAX_TIME_BUFFER_SIZE 4096
520 char *new_lc_time_buf = (char *) malloc (MAX_TIME_BUFFER_SIZE);
521 const char *lc_time_end = new_lc_time_buf + MAX_TIME_BUFFER_SIZE;
523 if (!new_lc_time_buf)
524 return -1;
525 char *lc_time_ptr = new_lc_time_buf;
527 /* C.foo is just a copy of "C" with fixed charset. */
528 if (!ret)
529 memcpy (_time_locale, _C_time_locale, sizeof (struct lc_time_T));
530 /* codeset */
531 _time_locale->codeset = lc_time_ptr;
532 lc_time_ptr = stpcpy (lc_time_ptr, charset) + 1;
534 if (ret)
536 char locale[ENCODING_LEN + 1];
537 strcpy (locale, name);
538 /* Removes the charset from the locale and attach the modifier to the
539 language_TERRITORY part. */
540 char *c = strchr (locale, '.');
541 if (c)
543 *c = '\0';
544 char *c2 = strchr (c + 1, '@');
545 /* Ignore @cjk* modifiers, they are newlib specials. */
546 if (c2 && !is_cjk_modifier (c2))
547 memmove (c, c2, strlen (c2) + 1);
549 /* Now search in the alphabetically order lc_era array for the
550 locale. */
551 lc_era_t locale_key = { locale, NULL, NULL, NULL, NULL, NULL ,
552 NULL, NULL, NULL, NULL, NULL };
553 lc_era_t *era = (lc_era_t *) bsearch ((void *) &locale_key,
554 (void *) lc_era,
555 sizeof lc_era / sizeof *lc_era,
556 sizeof *lc_era, locale_cmp);
558 /* mon */
559 /* Windows has a bug in "ja-JP" and "ko-KR" (but not in "ko-KP").
560 In these locales, strings returned for LOCALE_SABBREVMONTHNAME*
561 are missing the suffix representing a month.
563 A Japanese article describing the problem was
564 https://msdn.microsoft.com/ja-jp/library/cc422084.aspx, which is
565 only available via
566 https://web.archive.org/web/20110922195821/https://msdn.microsoft.com/ja-jp/library/cc422084.aspx
567 these days. Testing indicates that this problem is still present
568 in Windows 11.
570 The workaround is to use LOCALE_SMONTHNAME* in these locales,
571 even for the abbreviated month name. */
572 const LCTYPE mon_base = !wcscmp (win_locale, L"ja-JP")
573 || !wcscmp (win_locale, L"ko-KR")
574 ? LOCALE_SMONTHNAME1 : LOCALE_SABBREVMONTHNAME1;
575 for (int i = 0; i < 12; ++i)
577 _time_locale->wmon[i] = getlocaleinfo (time, mon_base + i);
578 _time_locale->mon[i] = charfromwchar (time, wmon[i]);
580 /* month and alt_month */
581 for (int i = 0; i < 12; ++i)
583 _time_locale->wmonth[i] = getlocaleinfo (time,
584 LOCALE_SMONTHNAME1 + i);
585 _time_locale->month[i] = _time_locale->alt_month[i]
586 = charfromwchar (time, wmonth[i]);
588 /* wday */
589 _time_locale->wwday[0] = getlocaleinfo (time, LOCALE_SABBREVDAYNAME7);
590 _time_locale->wday[0] = charfromwchar (time, wwday[0]);
591 for (int i = 0; i < 6; ++i)
593 _time_locale->wwday[i + 1] = getlocaleinfo (time,
594 LOCALE_SABBREVDAYNAME1 + i);
595 _time_locale->wday[i + 1] = charfromwchar (time, wwday[i + 1]);
597 /* weekday */
598 _time_locale->wweekday[0] = getlocaleinfo (time, LOCALE_SDAYNAME7);
599 _time_locale->weekday[0] = charfromwchar (time, wweekday[0]);
600 for (int i = 0; i < 6; ++i)
602 _time_locale->wweekday[i + 1] = getlocaleinfo (time,
603 LOCALE_SDAYNAME1 + i);
604 _time_locale->weekday[i + 1] = charfromwchar (time, wweekday[i + 1]);
606 size_t len;
607 /* X_fmt */
608 if (era && *era->t_fmt)
610 _time_locale->wX_fmt = (const wchar_t *) lc_time_ptr;
611 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wX_fmt,
612 era->t_fmt) + 1);
614 else
615 _time_locale->wX_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
616 _time_locale->X_fmt = charfromwchar (time, wX_fmt);
617 /* x_fmt */
618 if (era && *era->d_fmt)
620 _time_locale->wx_fmt = (const wchar_t *) lc_time_ptr;
621 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wx_fmt,
622 era->d_fmt) + 1);
624 else
625 _time_locale->wx_fmt = eval_datetimefmt (LOCALE_SSHORTDATE, DT_DEFAULT);
626 _time_locale->x_fmt = charfromwchar (time, wx_fmt);
627 /* c_fmt */
628 if (era && *era->d_t_fmt)
630 _time_locale->wc_fmt = (const wchar_t *) lc_time_ptr;
631 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wc_fmt,
632 era->d_t_fmt) + 1);
634 else
636 _time_locale->wc_fmt = eval_datetimefmt (LOCALE_SLONGDATE, DT_ABBREV);
637 ((wchar_t *) lc_time_ptr)[-1] = L' ';
638 eval_datetimefmt (LOCALE_STIMEFORMAT, DT_DEFAULT);
640 _time_locale->c_fmt = charfromwchar (time, wc_fmt);
641 /* AM/PM */
642 _time_locale->wam_pm[0] = getlocaleinfo (time, LOCALE_S1159);
643 _time_locale->wam_pm[1] = getlocaleinfo (time, LOCALE_S2359);
644 _time_locale->am_pm[0] = charfromwchar (time, wam_pm[0]);
645 _time_locale->am_pm[1] = charfromwchar (time, wam_pm[1]);
646 /* date_fmt */
647 if (era && *era->date_fmt)
649 _time_locale->wdate_fmt = (const wchar_t *) lc_time_ptr;
650 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wdate_fmt,
651 era->date_fmt) + 1);
653 else
654 _time_locale->wdate_fmt = _time_locale->wc_fmt;
655 _time_locale->date_fmt = charfromwchar (time, wdate_fmt);
656 /* md */
658 wchar_t buf[80];
659 GetLocaleInfoEx (win_locale, LOCALE_IDATE, buf, 80);
660 _time_locale->md_order = (const char *) lc_time_ptr;
661 lc_time_ptr = stpcpy (lc_time_ptr, *buf == L'1' ? "dm" : "md") + 1;
663 /* ampm_fmt */
664 if (era)
666 _time_locale->wampm_fmt = (const wchar_t *) lc_time_ptr;
667 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wampm_fmt,
668 era->t_fmt_ampm) + 1);
670 else
671 _time_locale->wampm_fmt = eval_datetimefmt (LOCALE_STIMEFORMAT, DT_AMPM);
672 _time_locale->ampm_fmt = charfromwchar (time, wampm_fmt);
674 if (era)
676 /* Evaluate string length in target charset. Characters invalid in the
677 target charset are simply ignored, as on Linux. */
678 len = 0;
679 len += lc_wcstombs (f_wctomb, NULL, era->era, 0) + 1;
680 len += lc_wcstombs (f_wctomb, NULL, era->era_d_fmt, 0) + 1;
681 len += lc_wcstombs (f_wctomb, NULL, era->era_d_t_fmt, 0) + 1;
682 len += lc_wcstombs (f_wctomb, NULL, era->era_t_fmt, 0) + 1;
683 len += lc_wcstombs (f_wctomb, NULL, era->alt_digits, 0) + 1;
684 len += (wcslen (era->era) + 1) * sizeof (wchar_t);
685 len += (wcslen (era->era_d_fmt) + 1) * sizeof (wchar_t);
686 len += (wcslen (era->era_d_t_fmt) + 1) * sizeof (wchar_t);
687 len += (wcslen (era->era_t_fmt) + 1) * sizeof (wchar_t);
688 len += (wcslen (era->alt_digits) + 1) * sizeof (wchar_t);
690 /* Make sure data fits into the buffer */
691 if (lc_time_ptr + len > lc_time_end)
693 len = lc_time_ptr + len - new_lc_time_buf;
694 char *tmp = (char *) realloc (new_lc_time_buf, len);
695 if (!tmp)
696 era = NULL;
697 else
699 if (tmp != new_lc_time_buf)
700 rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
701 new_lc_time_buf, lc_time_ptr);
702 lc_time_ptr = tmp + (lc_time_ptr - new_lc_time_buf);
703 new_lc_time_buf = tmp;
704 lc_time_end = new_lc_time_buf + len;
707 /* Copy over */
708 if (era)
710 /* era */
711 _time_locale->wera = (const wchar_t *) lc_time_ptr;
712 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera,
713 era->era) + 1);
714 _time_locale->era = charfromwchar (time, wera);
715 /* era_d_fmt */
716 _time_locale->wera_d_fmt = (const wchar_t *) lc_time_ptr;
717 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_fmt,
718 era->era_d_fmt) + 1);
719 _time_locale->era_d_fmt = charfromwchar (time, wera_d_fmt);
720 /* era_d_t_fmt */
721 _time_locale->wera_d_t_fmt = (const wchar_t *) lc_time_ptr;
722 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_d_t_fmt,
723 era->era_d_t_fmt) + 1);
724 _time_locale->era_d_t_fmt = charfromwchar (time, wera_d_t_fmt);
725 /* era_t_fmt */
726 _time_locale->wera_t_fmt = (const wchar_t *) lc_time_ptr;
727 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->wera_t_fmt,
728 era->era_t_fmt) + 1);
729 _time_locale->era_t_fmt = charfromwchar (time, wera_t_fmt);
730 /* alt_digits */
731 _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
732 lc_time_ptr = (char *) (wcpcpy ((wchar_t *) _time_locale->walt_digits,
733 era->alt_digits) + 1);
734 _time_locale->alt_digits = charfromwchar (time, walt_digits);
737 if (!era)
739 _time_locale->wera =
740 _time_locale->wera_d_fmt =
741 _time_locale->wera_d_t_fmt =
742 _time_locale->wera_t_fmt =
743 _time_locale->walt_digits = (const wchar_t *) lc_time_ptr;
744 _time_locale->era =
745 _time_locale->era_d_fmt =
746 _time_locale->era_d_t_fmt =
747 _time_locale->era_t_fmt =
748 _time_locale->alt_digits = (const char *) lc_time_ptr;
749 /* Twice, to make sure wide char strings are correctly terminated. */
750 *lc_time_ptr++ = '\0';
751 *lc_time_ptr++ = '\0';
755 char *tmp = (char *) realloc (new_lc_time_buf, lc_time_ptr - new_lc_time_buf);
756 if (!tmp)
758 free (new_lc_time_buf);
759 return -1;
761 if (tmp != new_lc_time_buf)
762 rebase_locale_buf (_time_locale, _time_locale + 1, tmp,
763 new_lc_time_buf, lc_time_ptr);
764 *lc_time_buf = tmp;
765 return 1;
768 /* Called from newlib's setlocale() via __ctype_load_locale() if category
769 is LC_CTYPE. Returns LC_CTYPE values fetched from Windows locale data
770 in the structure pointed to by _ctype_locale. This is subsequently
771 accessed by functions like nl_langinfo, localeconv, printf, etc. */
772 extern "C" int
773 __set_lc_ctype_from_win (const char *name,
774 const struct lc_ctype_T *_C_ctype_locale,
775 struct lc_ctype_T *_ctype_locale,
776 char **lc_ctype_buf, wctomb_p f_wctomb,
777 const char *charset, int mb_cur_max)
779 wchar_t win_locale[ENCODING_LEN + 1];
780 int ret = __get_rfc5646_from_locale (name, win_locale);
781 if (ret < 0)
782 return ret;
783 if (!ret && !strcmp (charset, "ASCII"))
784 return 0;
786 # define MAX_CTYPE_BUFFER_SIZE 256
788 char *new_lc_ctype_buf = (char *) malloc (MAX_CTYPE_BUFFER_SIZE);
790 if (!new_lc_ctype_buf)
791 return -1;
792 char *lc_ctype_ptr = new_lc_ctype_buf;
793 /* C.foo is just a copy of "C" with fixed charset. */
794 if (!ret)
795 memcpy (_ctype_locale, _C_ctype_locale, sizeof (struct lc_ctype_T));
796 /* codeset */
797 _ctype_locale->codeset = lc_ctype_ptr;
798 lc_ctype_ptr = stpcpy (lc_ctype_ptr, charset) + 1;
799 /* mb_cur_max */
800 _ctype_locale->mb_cur_max = lc_ctype_ptr;
801 *lc_ctype_ptr++ = mb_cur_max;
802 *lc_ctype_ptr++ = '\0';
803 if (ret)
805 /* outdigits and woutdigits */
806 wchar_t digits[11];
807 GetLocaleInfoEx (win_locale, LOCALE_SNATIVEDIGITS, digits, 11);
808 for (int i = 0; i <= 9; ++i)
810 mbstate_t state;
812 /* Make sure the wchar_t's are always 2 byte aligned. */
813 if ((uintptr_t) lc_ctype_ptr % 2)
814 ++lc_ctype_ptr;
815 wchar_t *woutdig = (wchar_t *) lc_ctype_ptr;
816 _ctype_locale->woutdigits[i] = (const wchar_t *) woutdig;
817 *woutdig++ = digits[i];
818 *woutdig++ = L'\0';
819 lc_ctype_ptr = (char *) woutdig;
820 _ctype_locale->outdigits[i] = lc_ctype_ptr;
821 memset (&state, 0, sizeof state);
822 lc_ctype_ptr += f_wctomb (_REENT, lc_ctype_ptr, digits[i], &state);
823 *lc_ctype_ptr++ = '\0';
827 char *tmp = (char *) realloc (new_lc_ctype_buf,
828 lc_ctype_ptr - new_lc_ctype_buf);
829 if (!tmp)
831 free (new_lc_ctype_buf);
832 return -1;
834 if (tmp != new_lc_ctype_buf)
835 rebase_locale_buf (_ctype_locale, _ctype_locale + 1, tmp,
836 new_lc_ctype_buf, lc_ctype_ptr);
837 *lc_ctype_buf = tmp;
838 return 1;
841 /* Called from newlib's setlocale() via __numeric_load_locale() if category
842 is LC_NUMERIC. Returns LC_NUMERIC values fetched from Windows locale data
843 in the structure pointed to by _numeric_locale. This is subsequently
844 accessed by functions like nl_langinfo, localeconv, printf, etc. */
845 extern "C" int
846 __set_lc_numeric_from_win (const char *name,
847 const struct lc_numeric_T *_C_numeric_locale,
848 struct lc_numeric_T *_numeric_locale,
849 char **lc_numeric_buf, wctomb_p f_wctomb,
850 const char *charset)
852 wchar_t win_locale[ENCODING_LEN + 1];
853 int ret = __get_rfc5646_from_locale (name, win_locale);
854 if (ret < 0)
855 return ret;
856 if (!ret && !strcmp (charset, "ASCII"))
857 return 0;
859 # define MAX_NUMERIC_BUFFER_SIZE 256
861 char *new_lc_numeric_buf = (char *) malloc (MAX_NUMERIC_BUFFER_SIZE);
862 const char *lc_numeric_end = new_lc_numeric_buf + MAX_NUMERIC_BUFFER_SIZE;
864 if (!new_lc_numeric_buf)
865 return -1;
866 char *lc_numeric_ptr = new_lc_numeric_buf;
867 /* C.foo is just a copy of "C" with fixed charset. */
868 if (!ret)
869 memcpy (_numeric_locale, _C_numeric_locale, sizeof (struct lc_numeric_T));
870 else
872 /* decimal_point and thousands_sep */
873 /* fa_IR. Windows decimal_point is slash, correct is dot */
874 if (!wcscmp (win_locale, L"fa-IR"))
876 _numeric_locale->wdecimal_point = setlocaleinfo (numeric, L'.');
877 _numeric_locale->wthousands_sep = setlocaleinfo (numeric, L',');
879 /* ps_AF. Windows decimal_point is dot, thousands_sep is comma,
880 correct are arabic separators. */
881 else if (!wcscmp (win_locale, L"ps-AF"))
883 _numeric_locale->wdecimal_point = setlocaleinfo (numeric, 0x066b);
884 _numeric_locale->wthousands_sep = setlocaleinfo (numeric, 0x066c);
886 else
888 _numeric_locale->wdecimal_point = getlocaleinfo (numeric,
889 LOCALE_SDECIMAL);
890 _numeric_locale->wthousands_sep = getlocaleinfo (numeric,
891 LOCALE_STHOUSAND);
893 _numeric_locale->decimal_point = charfromwchar (numeric, wdecimal_point);
894 _numeric_locale->thousands_sep = charfromwchar (numeric, wthousands_sep);
895 /* grouping */
896 _numeric_locale->grouping = conv_grouping (win_locale, LOCALE_SGROUPING,
897 &lc_numeric_ptr);
899 /* codeset */
900 _numeric_locale->codeset = lc_numeric_ptr;
901 lc_numeric_ptr = stpcpy (lc_numeric_ptr, charset) + 1;
903 char *tmp = (char *) realloc (new_lc_numeric_buf,
904 lc_numeric_ptr - new_lc_numeric_buf);
905 if (!tmp)
907 free (new_lc_numeric_buf);
908 return -1;
910 if (tmp != new_lc_numeric_buf)
911 rebase_locale_buf (_numeric_locale, _numeric_locale + 1, tmp,
912 new_lc_numeric_buf, lc_numeric_ptr);
913 *lc_numeric_buf = tmp;
914 return 1;
917 /* Called from newlib's setlocale() via __monetary_load_locale() if category
918 is LC_MONETARY. Returns LC_MONETARY values fetched from Windows locale data
919 in the structure pointed to by _monetary_locale. This is subsequently
920 accessed by functions like nl_langinfo, localeconv, printf, etc. */
921 extern "C" int
922 __set_lc_monetary_from_win (const char *name,
923 const struct lc_monetary_T *_C_monetary_locale,
924 struct lc_monetary_T *_monetary_locale,
925 char **lc_monetary_buf, wctomb_p f_wctomb,
926 const char *charset)
928 wchar_t win_locale[ENCODING_LEN + 1];
929 int ret = __get_rfc5646_from_locale (name, win_locale);
930 if (ret < 0)
931 return ret;
932 if (!ret && !strcmp (charset, "ASCII"))
933 return 0;
935 # define MAX_MONETARY_BUFFER_SIZE 512
937 char *new_lc_monetary_buf = (char *) malloc (MAX_MONETARY_BUFFER_SIZE);
938 const char *lc_monetary_end = new_lc_monetary_buf + MAX_MONETARY_BUFFER_SIZE;
940 if (!new_lc_monetary_buf)
941 return -1;
942 char *lc_monetary_ptr = new_lc_monetary_buf;
943 /* C.foo is just a copy of "C" with fixed charset. */
944 if (!ret)
945 memcpy (_monetary_locale, _C_monetary_locale, sizeof (struct lc_monetary_T));
946 else
948 /* int_curr_symbol */
949 _monetary_locale->wint_curr_symbol = getlocaleinfo (monetary,
950 LOCALE_SINTLSYMBOL);
951 /* No spacing char means space. */
952 if (!_monetary_locale->wint_curr_symbol[3])
954 wchar_t *wc = (wchar_t *) _monetary_locale->wint_curr_symbol + 3;
955 *wc++ = L' ';
956 *wc++ = L'\0';
957 lc_monetary_ptr = (char *) wc;
959 _monetary_locale->int_curr_symbol = charfromwchar (monetary,
960 wint_curr_symbol);
961 /* currency_symbol */
962 _monetary_locale->wcurrency_symbol = getlocaleinfo (monetary,
963 LOCALE_SCURRENCY);
964 /* As on Linux: If the currency_symbol can't be represented in the
965 given charset, use int_curr_symbol. */
966 if (lc_wcstombs (f_wctomb, NULL, _monetary_locale->wcurrency_symbol,
967 0, true) == (size_t) -1)
968 _monetary_locale->currency_symbol = _monetary_locale->int_curr_symbol;
969 else
970 _monetary_locale->currency_symbol = charfromwchar (monetary,
971 wcurrency_symbol);
972 /* mon_decimal_point and mon_thousands_sep */
973 /* fa_IR or ps_AF. Windows mon_decimal_point is slash and comma,
974 mon_thousands_sep is comma and dot, correct
975 are arabic separators. */
976 if (!wcscmp (win_locale, L"fa-IR")
977 || !wcscmp (win_locale, L"ps-AF"))
979 _monetary_locale->wmon_decimal_point = setlocaleinfo (monetary,
980 0x066b);
981 _monetary_locale->wmon_thousands_sep = setlocaleinfo (monetary,
982 0x066c);
984 else
986 _monetary_locale->wmon_decimal_point = getlocaleinfo (monetary,
987 LOCALE_SMONDECIMALSEP);
988 _monetary_locale->wmon_thousands_sep = getlocaleinfo (monetary,
989 LOCALE_SMONTHOUSANDSEP);
991 _monetary_locale->mon_decimal_point = charfromwchar (monetary,
992 wmon_decimal_point);
993 _monetary_locale->mon_thousands_sep = charfromwchar (monetary,
994 wmon_thousands_sep);
995 /* mon_grouping */
996 _monetary_locale->mon_grouping = conv_grouping (win_locale,
997 LOCALE_SMONGROUPING,
998 &lc_monetary_ptr);
999 /* positive_sign */
1000 _monetary_locale->wpositive_sign = getlocaleinfo (monetary,
1001 LOCALE_SPOSITIVESIGN);
1002 _monetary_locale->positive_sign = charfromwchar (monetary, wpositive_sign);
1003 /* negative_sign */
1004 _monetary_locale->wnegative_sign = getlocaleinfo (monetary,
1005 LOCALE_SNEGATIVESIGN);
1006 _monetary_locale->negative_sign = charfromwchar (monetary, wnegative_sign);
1007 /* int_frac_digits */
1008 *lc_monetary_ptr = (char) getlocaleint (LOCALE_IINTLCURRDIGITS);
1009 _monetary_locale->int_frac_digits = lc_monetary_ptr++;
1010 /* frac_digits */
1011 *lc_monetary_ptr = (char) getlocaleint (LOCALE_ICURRDIGITS);
1012 _monetary_locale->frac_digits = lc_monetary_ptr++;
1013 /* p_cs_precedes and int_p_cs_precedes */
1014 *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSYMPRECEDES);
1015 _monetary_locale->p_cs_precedes
1016 = _monetary_locale->int_p_cs_precedes = lc_monetary_ptr++;
1017 /* p_sep_by_space and int_p_sep_by_space */
1018 *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSEPBYSPACE);
1019 _monetary_locale->p_sep_by_space
1020 = _monetary_locale->int_p_sep_by_space = lc_monetary_ptr++;
1021 /* n_cs_precedes and int_n_cs_precedes */
1022 *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSYMPRECEDES);
1023 _monetary_locale->n_cs_precedes
1024 = _monetary_locale->int_n_cs_precedes = lc_monetary_ptr++;
1025 /* n_sep_by_space and int_n_sep_by_space */
1026 *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSEPBYSPACE);
1027 _monetary_locale->n_sep_by_space
1028 = _monetary_locale->int_n_sep_by_space = lc_monetary_ptr++;
1029 /* p_sign_posn and int_p_sign_posn */
1030 *lc_monetary_ptr = (char) getlocaleint (LOCALE_IPOSSIGNPOSN);
1031 _monetary_locale->p_sign_posn
1032 = _monetary_locale->int_p_sign_posn = lc_monetary_ptr++;
1033 /* n_sign_posn and int_n_sign_posn */
1034 *lc_monetary_ptr = (char) getlocaleint (LOCALE_INEGSIGNPOSN);
1035 _monetary_locale->n_sign_posn
1036 = _monetary_locale->int_n_sign_posn = lc_monetary_ptr++;
1038 /* codeset */
1039 _monetary_locale->codeset = lc_monetary_ptr;
1040 lc_monetary_ptr = stpcpy (lc_monetary_ptr, charset) + 1;
1042 char *tmp = (char *) realloc (new_lc_monetary_buf,
1043 lc_monetary_ptr - new_lc_monetary_buf);
1044 if (!tmp)
1046 free (new_lc_monetary_buf);
1047 return -1;
1049 if (tmp != new_lc_monetary_buf)
1050 rebase_locale_buf (_monetary_locale, _monetary_locale + 1, tmp,
1051 new_lc_monetary_buf, lc_monetary_ptr);
1052 *lc_monetary_buf = tmp;
1053 return 1;
1056 extern "C" int
1057 __set_lc_messages_from_win (const char *name,
1058 const struct lc_messages_T *_C_messages_locale,
1059 struct lc_messages_T *_messages_locale,
1060 char **lc_messages_buf,
1061 wctomb_p f_wctomb, const char *charset)
1063 wchar_t win_locale[ENCODING_LEN + 1];
1064 int ret = __get_rfc5646_from_locale (name, win_locale);
1065 if (ret < 0)
1066 return ret;
1067 if (!ret && !strcmp (charset, "ASCII"))
1068 return 0;
1070 char locale[ENCODING_LEN + 1];
1071 char *c, *c2;
1072 lc_msg_t *msg = NULL;
1074 /* C.foo is just a copy of "C" with fixed charset. */
1075 if (!ret)
1076 memcpy (_messages_locale, _C_messages_locale, sizeof (struct lc_messages_T));
1077 else
1079 strcpy (locale, name);
1080 /* Removes the charset from the locale and attach the modifer to the
1081 language_TERRITORY part. */
1082 c = strchr (locale, '.');
1083 if (c)
1085 *c = '\0';
1086 c2 = strchr (c + 1, '@');
1087 /* Ignore @cjk* modifiers, they are newlib specials. */
1088 if (c2 && !is_cjk_modifier (c2))
1089 memmove (c, c2, strlen (c2) + 1);
1091 /* Now search in the alphabetically order lc_msg array for the
1092 locale. */
1093 lc_msg_t locale_key = { locale, NULL, NULL, NULL, NULL };
1094 msg = (lc_msg_t *) bsearch ((void *) &locale_key, (void *) lc_msg,
1095 sizeof lc_msg / sizeof *lc_msg,
1096 sizeof *lc_msg, locale_cmp);
1097 if (!msg)
1098 return 0;
1101 /* Evaluate string length in target charset. Characters invalid in the
1102 target charset are simply ignored, as on Linux. */
1103 size_t len = 0;
1104 len += (strlen (charset) + 1);
1105 if (ret)
1107 len += lc_wcstombs (f_wctomb, NULL, msg->yesexpr, 0) + 1;
1108 len += lc_wcstombs (f_wctomb, NULL, msg->noexpr, 0) + 1;
1109 len += lc_wcstombs (f_wctomb, NULL, msg->yesstr, 0) + 1;
1110 len += lc_wcstombs (f_wctomb, NULL, msg->nostr, 0) + 1;
1111 len += (wcslen (msg->yesexpr) + 1) * sizeof (wchar_t);
1112 len += (wcslen (msg->noexpr) + 1) * sizeof (wchar_t);
1113 len += (wcslen (msg->yesstr) + 1) * sizeof (wchar_t);
1114 len += (wcslen (msg->nostr) + 1) * sizeof (wchar_t);
1115 if (len % 1)
1116 ++len;
1118 /* Allocate. */
1119 char *new_lc_messages_buf = (char *) malloc (len);
1120 const char *lc_messages_end = new_lc_messages_buf + len;
1122 if (!new_lc_messages_buf)
1123 return -1;
1124 /* Copy over. */
1125 c = new_lc_messages_buf;
1126 /* codeset */
1127 _messages_locale->codeset = c;
1128 c = stpcpy (c, charset) + 1;
1129 if (ret)
1131 _messages_locale->yesexpr = (const char *) c;
1132 len = lc_wcstombs (f_wctomb, c, msg->yesexpr, lc_messages_end - c);
1133 _messages_locale->noexpr = (const char *) (c += len + 1);
1134 len = lc_wcstombs (f_wctomb, c, msg->noexpr, lc_messages_end - c);
1135 _messages_locale->yesstr = (const char *) (c += len + 1);
1136 len = lc_wcstombs (f_wctomb, c, msg->yesstr, lc_messages_end - c);
1137 _messages_locale->nostr = (const char *) (c += len + 1);
1138 len = lc_wcstombs (f_wctomb, c, msg->nostr, lc_messages_end - c);
1139 c += len + 1;
1140 if ((uintptr_t) c % 1)
1141 ++c;
1142 wchar_t *wc = (wchar_t *) c;
1143 _messages_locale->wyesexpr = (const wchar_t *) wc;
1144 wc = wcpcpy (wc, msg->yesexpr) + 1;
1145 _messages_locale->wnoexpr = (const wchar_t *) wc;
1146 wc = wcpcpy (wc, msg->noexpr) + 1;
1147 _messages_locale->wyesstr = (const wchar_t *) wc;
1148 wc = wcpcpy (wc, msg->yesstr) + 1;
1149 _messages_locale->wnostr = (const wchar_t *) wc;
1150 wcpcpy (wc, msg->nostr);
1152 *lc_messages_buf = new_lc_messages_buf;
1153 return 1;
1156 const struct lc_collate_T _C_collate_locale =
1158 L"",
1159 __ascii_mbtowc,
1160 "ASCII"
1163 /* Called from newlib's setlocale() if category is LC_COLLATE. Stores
1164 LC_COLLATE locale information. This is subsequently accessed by the
1165 below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
1166 extern "C" int
1167 __collate_load_locale (struct __locale_t *locale, const char *name,
1168 void *f_mbtowc, const char *charset)
1170 char *bufp = NULL;
1171 struct lc_collate_T *cop = NULL;
1173 wchar_t win_locale[ENCODING_LEN + 1];
1174 int ret = __get_rfc5646_from_locale (name, win_locale);
1175 if (ret < 0)
1176 return ret;
1177 if (ret)
1179 bufp = (char *) malloc (1); /* dummy */
1180 if (!bufp)
1181 return -1;
1182 cop = (struct lc_collate_T *) calloc (1, sizeof (struct lc_collate_T));
1183 if (!cop)
1185 free (bufp);
1186 return -1;
1188 wcscpy (cop->win_locale, win_locale);
1189 cop->mbtowc = (mbtowc_p) f_mbtowc;
1190 stpcpy (cop->codeset, charset);
1192 struct __lc_cats tmp = locale->lc_cat[LC_COLLATE];
1193 locale->lc_cat[LC_COLLATE].ptr = !win_locale[0] ? &_C_collate_locale : cop;
1194 locale->lc_cat[LC_COLLATE].buf = bufp;
1195 /* If buf is not NULL, both pointers have been alloc'ed */
1196 if (tmp.buf)
1198 free ((void *) tmp.ptr);
1199 free (tmp.buf);
1201 return 0;
1204 /* We use the Windows functions for locale-specific string comparison and
1205 transformation. The advantage is that we don't need any files with
1206 collation information. */
1208 extern "C" int
1209 wcscoll_l (const wchar_t *__restrict ws1, const wchar_t *__restrict ws2,
1210 struct __locale_t *locale)
1212 int ret;
1213 const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
1215 if (!collate_locale[0])
1216 return wcscmp (ws1, ws2);
1217 ret = CompareStringEx (collate_locale, 0, ws1, -1, ws2, -1, NULL, NULL, 0);
1218 if (!ret)
1219 set_errno (EINVAL);
1220 return ret - CSTR_EQUAL;
1223 extern "C" int
1224 wcscoll (const wchar_t *__restrict ws1, const wchar_t *__restrict ws2)
1226 return wcscoll_l (ws1, ws2, __get_current_locale ());
1229 extern "C" int
1230 strcoll_l (const char *__restrict s1, const char *__restrict s2,
1231 struct __locale_t *locale)
1233 size_t n1, n2;
1234 wchar_t *ws1, *ws2;
1235 tmp_pathbuf tp;
1236 int ret;
1237 const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
1239 if (!collate_locale[0])
1240 return strcmp (s1, s2);
1241 mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc;
1242 n1 = lc_mbstowcs (collate_mbtowc, NULL, s1, 0) + 1;
1243 ws1 = (n1 > NT_MAX_PATH ? (wchar_t *) malloc (n1 * sizeof (wchar_t))
1244 : tp.w_get ());
1245 lc_mbstowcs (collate_mbtowc, ws1, s1, n1);
1246 n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
1247 ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1248 : tp.w_get ());
1249 lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
1250 ret = CompareStringEx (collate_locale, 0, ws1, -1, ws2, -1, NULL, NULL, 0);
1251 if (n1 > NT_MAX_PATH)
1252 free (ws1);
1253 if (n2 > NT_MAX_PATH)
1254 free (ws2);
1255 if (!ret)
1256 set_errno (EINVAL);
1257 return ret - CSTR_EQUAL;
1260 extern "C" int
1261 strcoll (const char *__restrict s1, const char *__restrict s2)
1263 return strcoll_l (s1, s2, __get_current_locale ());
1266 /* BSD. Used from glob.cc, fnmatch.c and regcomp.c. */
1267 extern "C" int
1268 __wcollate_range_cmp (wint_t c1, wint_t c2)
1270 wchar_t s1[3] = { (wchar_t) c1, L'\0', L'\0' };
1271 wchar_t s2[3] = { (wchar_t) c2, L'\0', L'\0' };
1273 /* Handle Unicode values >= 0x10000, convert to surrogate pair */
1274 if (c1 > 0xffff)
1276 s1[0] = ((c1 - 0x10000) >> 10) + 0xd800;
1277 s1[1] = ((c1 - 0x10000) & 0x3ff) + 0xdc00;
1279 if (c2 > 0xffff)
1281 s2[0] = ((c2 - 0x10000) >> 10) + 0xd800;
1282 s2[1] = ((c2 - 0x10000) & 0x3ff) + 0xdc00;
1284 return wcscoll (s1, s2);
1287 /* Not so much BSD. Used from glob.cc, fnmatch.c and regcomp.c.
1289 The args are pointers to wint_t strings. This allows to compare
1290 against collating symbols. */
1291 extern "C" int
1292 __wscollate_range_cmp (wint_t *c1, wint_t *c2,
1293 size_t c1len, size_t c2len)
1295 wchar_t s1[c1len * 2 + 1] = { 0 }; /* # of chars if all are surrogates */
1296 wchar_t s2[c2len * 2 + 1] = { 0 };
1298 /* wcscoll() ignores case in many locales. but we don't want that
1299 for filenames... */
1300 if ((iswupper (*c1) && !iswupper (*c2))
1301 || (iswlower (*c1) && !iswlower (*c2)))
1302 return *c1 - *c2;
1304 wcintowcs (s1, c1, c1len);
1305 wcintowcs (s2, c2, c2len);
1306 return wcscoll_l (s1, s2, __get_current_locale ());
1309 const size_t ce_size = sizeof collating_element / sizeof *collating_element;
1310 const size_t ce_e_size = sizeof *collating_element;
1312 /* Check if UTF-32 input character `test' is in the same equivalence class
1313 as UTF-32 character 'eqv'.
1314 Note that we only recognize input in Unicode normalization form C, that
1315 is, we expect all letters to be composed. A single character is all we
1316 look at.
1317 To check equivalence, decompose pattern letter and input letter into
1318 normalization form KD and check the base character for equality. Also,
1319 convert all digits to the ASCII digits 0 - 9 and compare. */
1320 extern "C" int
1321 is_unicode_equiv (wint_t test, wint_t eqv)
1323 wchar_t decomp_testc[24] = { 0 };
1324 wchar_t decomp_eqvc[24] = { 0 };
1325 wchar_t testc[3] = { 0 };
1326 wchar_t eqvc[3] = { 0 };
1328 /* For equivalence classes, case doesn't matter. However, be careful.
1329 Only convert chars which have a "upper" to "lower". */
1330 if (iswupper (eqv))
1331 eqv = towlower (eqv);
1332 if (iswupper (test))
1333 test = towlower (test);
1334 /* Convert to UTF-16 string */
1335 if (eqv > 0x10000) {
1336 eqvc[0] = ((eqv - 0x10000) >> 10) + 0xd800;
1337 eqvc[1] = ((eqv - 0x10000) & 0x3ff) + 0xdc00;
1338 } else
1339 eqvc[0] = eqv;
1340 if (test > 0x10000) {
1341 testc[0] = ((test - 0x10000) >> 10) + 0xd800;
1342 testc[1] = ((test - 0x10000) & 0x3ff) + 0xdc00;
1343 } else
1344 testc[0] = test;
1345 /* Convert to decomposed form */
1346 FoldStringW (MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS,
1347 eqvc, -1, decomp_eqvc, 24);
1348 FoldStringW (MAP_COMPOSITE | MAP_FOLDCZONE | MAP_FOLDDIGITS,
1349 testc, -1, decomp_testc, 24);
1350 /* If they are equivalent, the base char must be the same. */
1351 if (decomp_eqvc[0] != decomp_testc[0])
1352 return 0;
1353 /* If it's a surrogate pair, check the second char, too */
1354 if (decomp_eqvc[0] >= 0xd800 && decomp_eqvc[0] <= 0xdbff &&
1355 decomp_eqvc[1] != decomp_testc[1])
1356 return 0;
1357 return 1;
1360 static int
1361 comp_coll_elem (const void *key, const void *array_member)
1363 collating_element_t *ckey = (collating_element_t *) key;
1364 collating_element_t *carray_member = (collating_element_t *) array_member;
1366 int ret = wcicmp ((const wint_t *) ckey->element,
1367 (const wint_t *) carray_member->element);
1368 /* The locale in the collating_element array never has a codeset
1369 attached. So the length of the collating_element locale is
1370 always <= length of the key locale, and that's all we need to
1371 check. Also, if the collating_element locale is empty, we're
1372 all set. */
1373 if (ret == 0 && carray_member->locale[0])
1374 ret = strncmp (ckey->locale, carray_member->locale,
1375 strlen (carray_member->locale));
1376 return ret;
1379 extern "C" int
1380 is_unicode_coll_elem (const wint_t *test)
1382 collating_element_t ct = {
1383 (const char32_t *) test,
1384 __get_current_locale ()->categories[LC_COLLATE]
1386 collating_element_t *cmatch;
1388 if (wcilen (test) == 1)
1389 return 1;
1390 cmatch = (collating_element_t *)
1391 bsearch (&ct, collating_element, ce_size, ce_e_size, comp_coll_elem);
1392 return !!cmatch;
1395 static int
1396 comp_coll_elem_n (const void *key, const void *array_member)
1398 collating_element_t *ckey = (collating_element_t *) key;
1399 collating_element_t *carray_member = (collating_element_t *) array_member;
1401 int ret = wcincmp ((const wint_t *) ckey->element,
1402 (const wint_t *) carray_member->element,
1403 wcilen ((const wint_t *) carray_member->element));
1404 /* The locale in the collating_element array never has a codeset
1405 attached. So the length of the collating_element locale is
1406 always <= length of the key locale, and that's all we need to
1407 check. Also, if the collating_element locale is empty, we're
1408 all set. */
1409 if (ret == 0 && carray_member->locale[0])
1410 ret = strncmp (ckey->locale, carray_member->locale,
1411 strlen (carray_member->locale));
1412 return ret;
1415 /* Return the number of UTF-32 chars making up the next full character in
1416 inp, taking valid collation elements in the current locale into account. */
1417 extern "C" size_t
1418 next_unicode_char (wint_t *inp)
1420 collating_element_t ct = {
1421 (const char32_t *) inp,
1422 __get_current_locale ()->categories[LC_COLLATE]
1424 collating_element_t *cmatch;
1426 if (wcilen (inp) > 1)
1428 cmatch = (collating_element_t *)
1429 bsearch (&ct, collating_element, ce_size, ce_e_size,
1430 comp_coll_elem_n);
1431 if (cmatch)
1432 return wcilen ((const wint_t *) cmatch->element);
1434 return 1;
1437 extern "C" size_t
1438 wcsxfrm_l (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn,
1439 struct __locale_t *locale)
1441 size_t ret;
1442 const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
1444 if (!collate_locale[0])
1445 return wcslcpy (ws1, ws2, wsn);
1446 /* Don't use LCMAP_SORTKEY in conjunction with LCMAP_BYTEREV. The cchDest
1447 parameter is used as byte count with LCMAP_SORTKEY but as char count with
1448 LCMAP_BYTEREV. */
1449 ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1, ws1,
1450 wsn * sizeof (wchar_t), NULL, NULL, 0);
1451 if (ret)
1453 ret /= sizeof (wchar_t);
1454 if (wsn)
1456 /* Byte swap the array ourselves here. */
1457 for (size_t idx = 0; idx < ret; ++idx)
1458 ws1[idx] = __builtin_bswap16 (ws1[idx]);
1459 /* LCMapStringW returns byte count including the terminating NUL char.
1460 wcsxfrm is supposed to return length in wchar_t excluding the NUL.
1461 Since the array is only single byte NUL-terminated yet, make sure
1462 the result is wchar_t-NUL terminated. */
1463 if (ret < wsn)
1464 ws1[ret] = L'\0';
1466 return ret;
1468 if (GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1469 set_errno (EINVAL);
1470 else
1472 ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
1473 NULL, 0, NULL, NULL, 0);
1474 if (ret)
1475 wsn = ret / sizeof (wchar_t);
1477 return wsn;
1480 extern "C" size_t
1481 wcsxfrm (wchar_t *__restrict ws1, const wchar_t *__restrict ws2, size_t wsn)
1483 return wcsxfrm_l (ws1, ws2, wsn, __get_current_locale ());
1486 extern "C" size_t
1487 strxfrm_l (char *__restrict s1, const char *__restrict s2, size_t sn,
1488 struct __locale_t *locale)
1490 size_t ret = 0;
1491 size_t n2;
1492 wchar_t *ws2;
1493 tmp_pathbuf tp;
1494 const wchar_t *collate_locale = __get_collate_locale (locale)->win_locale;
1496 if (!collate_locale[0])
1497 return strlcpy (s1, s2, sn);
1498 mbtowc_p collate_mbtowc = __get_collate_locale (locale)->mbtowc;
1499 n2 = lc_mbstowcs (collate_mbtowc, NULL, s2, 0) + 1;
1500 ws2 = (n2 > NT_MAX_PATH ? (wchar_t *) malloc (n2 * sizeof (wchar_t))
1501 : tp.w_get ());
1502 if (ws2)
1504 lc_mbstowcs (collate_mbtowc, ws2, s2, n2);
1505 /* The sort key is a NUL-terminated byte string. */
1506 ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
1507 (PWCHAR) s1, sn, NULL, NULL, 0);
1509 if (ret == 0)
1511 ret = sn + 1;
1512 if (!ws2 || GetLastError () != ERROR_INSUFFICIENT_BUFFER)
1513 set_errno (EINVAL);
1514 else
1515 ret = LCMapStringEx (collate_locale, LCMAP_SORTKEY, ws2, -1,
1516 NULL, 0, NULL, NULL, 0);
1518 if (ws2 && n2 > NT_MAX_PATH)
1519 free (ws2);
1520 /* LCMapStringW returns byte count including the terminating NUL character.
1521 strxfrm is supposed to return length excluding the NUL. */
1522 return ret - 1;
1525 extern "C" size_t
1526 strxfrm (char *__restrict s1, const char *__restrict s2, size_t sn)
1528 return strxfrm_l (s1, s2, sn, __get_current_locale ());
1531 /* Fetch default ANSI codepage from locale info and generate a setlocale
1532 compatible character set code. Called from newlib's setlocale(), if the
1533 charset isn't given explicitely in the POSIX compatible locale specifier. */
1534 extern "C" void
1535 __set_charset_from_locale (const char *loc, char *charset)
1537 wchar_t win_locale[ENCODING_LEN + 1];
1538 char locale[ENCODING_LEN + 1];
1539 char *modifier;
1540 char *c;
1541 UINT cp;
1543 /* Cut out explicit codeset */
1544 stpcpy (locale, loc);
1545 modifier = strchr (loc, '@');
1546 if ((c = strchr (locale, '.')))
1547 stpcpy (c, modifier ?: "");
1548 /* Ignore @cjk* modifiers, they are newlib specials. */
1549 modifier = strchr (locale, '@');
1550 if (modifier && is_cjk_modifier (modifier))
1551 *modifier = '\0';
1553 default_codeset_t srch_dc = { locale, NULL };
1554 default_codeset_t *dc = (default_codeset_t *)
1555 bsearch ((void *) &srch_dc, (void *) default_codeset,
1556 sizeof default_codeset / sizeof *default_codeset,
1557 sizeof *default_codeset, locale_cmp);
1558 if (dc)
1560 stpcpy (charset, dc->codeset);
1561 return;
1564 /* "C" locale, or invalid locale? */
1565 if (__get_rfc5646_from_locale (locale, win_locale) <= 0)
1566 cp = 20127;
1567 else if (GetLocaleInfoEx (win_locale,
1568 LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
1569 (PWCHAR) &cp, sizeof cp))
1570 cp = 0;
1571 /* Translate codepage and lcid to a charset closely aligned with the default
1572 charsets defined in Glibc. */
1573 const char *cs;
1574 switch (cp)
1576 case 20127:
1577 cs = "ASCII";
1578 break;
1579 case 874:
1580 cs = "CP874";
1581 break;
1582 case 932:
1583 cs = "EUCJP";
1584 break;
1585 case 936:
1586 cs = "GB2312";
1587 break;
1588 case 949:
1589 cs = "EUCKR";
1590 break;
1591 case 950:
1592 cs = "BIG5";
1593 break;
1594 case 1250:
1595 cs = "ISO-8859-2";
1596 break;
1597 case 1251:
1598 cs = "ISO-8859-5";
1599 break;
1600 case 1252:
1601 cs = "ISO-8859-1";
1602 break;
1603 case 1253:
1604 cs = "ISO-8859-7";
1605 break;
1606 case 1254:
1607 cs = "ISO-8859-9";
1608 break;
1609 case 1255:
1610 cs = "ISO-8859-8";
1611 break;
1612 case 1256:
1613 cs = "ISO-8859-6";
1614 break;
1615 case 1257:
1616 cs = "ISO-8859-13";
1617 break;
1618 case 1258:
1619 cs = "UTF-8";
1620 break;
1621 default:
1622 /* Some (pretty new) EU locales don't exist in GLibc and haven't been
1623 catched above. Check for @euro modifier again and make these locales
1624 always use ISO-8859-15. */
1625 if (modifier && !strcmp (modifier + 1, "euro"))
1626 cs = "ISO-8859-15";
1627 else
1628 cs = "UTF-8";
1629 break;
1631 stpcpy (charset, cs);
1634 /* Called from fhandler_tty::setup_locale. Set a codepage which reflects the
1635 internal charset setting. This is *not* necessarily the Windows
1636 codepage connected to a locale by default, so we have to set this
1637 up explicitely. */
1638 UINT
1639 __eval_codepage_from_internal_charset ()
1641 const char *charset = __locale_charset (__get_global_locale ());
1642 UINT codepage = CP_UTF8; /* Default UTF8 */
1644 /* The internal charset names are well defined, so we can use shortcuts. */
1645 switch (charset[0])
1647 case 'B': /* BIG5 */
1648 codepage = 950;
1649 break;
1650 case 'C': /* CPxxx */
1651 codepage = strtoul (charset + 2, NULL, 10);
1652 break;
1653 case 'E': /* EUCxx */
1654 switch (charset[3])
1656 case 'J': /* EUCJP */
1657 codepage = 20932;
1658 break;
1659 case 'K': /* EUCKR */
1660 codepage = 949;
1661 break;
1662 case 'C': /* EUCCN */
1663 codepage = 936;
1664 break;
1666 break;
1667 case 'G': /* GBK/GB2312/GB18030 */
1668 codepage = (charset[2] == '1') ? 54936 : 936;
1669 break;
1670 case 'I': /* ISO-8859-x */
1671 codepage = strtoul (charset + 9, NULL, 10) + 28590;
1672 break;
1673 case 'S': /* SJIS */
1674 codepage = 932;
1675 break;
1676 default: /* All set to UTF8 already */
1677 break;
1679 return codepage;
1682 /* This function is called from newlib's loadlocale if the locale identifier
1683 was invalid, one way or the other. It looks for the file
1685 /usr/share/locale/locale.alias
1687 which is part of the gettext package, and if it finds the locale alias
1688 in that file, it replaces the locale with the correct locale string from
1689 that file.
1691 If successful, it returns a pointer to new_locale, NULL otherwise.*/
1692 extern "C" char *
1693 __set_locale_from_locale_alias (const char *locale, char *new_locale)
1695 wchar_t wlocale[ENCODING_LEN + 1];
1696 wchar_t walias[ENCODING_LEN + 1];
1697 #define LOCALE_ALIAS_LINE_LEN 255
1698 char alias_buf[LOCALE_ALIAS_LINE_LEN + 1], *c;
1699 wchar_t *wc;
1700 const char *alias, *replace;
1701 char *ret = NULL;
1703 FILE *fp = fopen ("/usr/share/locale/locale.alias", "rt");
1704 if (!fp)
1705 return NULL;
1706 /* The incoming locale is given in the application charset, or in
1707 the Cygwin internal charset. We try both. */
1708 if (mbstowcs (wlocale, locale, ENCODING_LEN + 1) == (size_t) -1)
1709 sys_mbstowcs (wlocale, ENCODING_LEN + 1, locale);
1710 wlocale[ENCODING_LEN] = L'\0';
1711 /* Ignore @cjk* modifiers, they are newlib specials. */
1712 wc = wcschr (wlocale, L'@');
1713 if (wc && w_is_cjk_modifier (wc))
1714 *wc = L'\0';
1715 while (fgets (alias_buf, LOCALE_ALIAS_LINE_LEN + 1, fp))
1717 alias_buf[LOCALE_ALIAS_LINE_LEN] = '\0';
1718 c = strrchr (alias_buf, '\n');
1719 if (c)
1720 *c = '\0';
1721 c = alias_buf;
1722 c += strspn (c, " \t");
1723 if (!*c || *c == '#')
1724 continue;
1725 alias = c;
1726 c += strcspn (c, " \t");
1727 *c++ = '\0';
1728 c += strspn (c, " \t");
1729 if (*c == '#')
1730 continue;
1731 replace = c;
1732 c += strcspn (c, " \t");
1733 *c++ = '\0';
1734 if (strlen (replace) > ENCODING_LEN)
1735 continue;
1736 /* The file is latin1 encoded */
1737 lc_mbstowcs (__iso_mbtowc (1), walias, alias, ENCODING_LEN + 1);
1738 walias[ENCODING_LEN] = L'\0';
1739 if (!wcscmp (wlocale, walias))
1741 ret = strcpy (new_locale, replace);
1742 break;
1745 fclose (fp);
1746 return ret;
1749 /* Can be called via cygwin_internal (CW_INTERNAL_SETLOCALE) for application
1750 which really (think they) know what they are doing. */
1751 extern "C" void
1752 internal_setlocale ()
1754 /* Each setlocale from the environment potentially changes the
1755 multibyte representation of the CWD. Therefore we have to
1756 reevaluate the CWD's posix path and store in the new charset.
1757 Same for the PATH environment variable. */
1758 /* FIXME: Other buffered paths might be affected as well. */
1759 /* FIXME: It could be necessary to convert the entire environment,
1760 not just PATH. */
1761 tmp_pathbuf tp;
1762 char *path;
1763 wchar_t *w_path = NULL, *w_cwd;
1765 /* Don't do anything if the charset hasn't actually changed. */
1766 if (cygheap->locale.mbtowc == __get_global_locale ()->mbtowc)
1767 return;
1769 debug_printf ("Global charset set to %s",
1770 __locale_charset (__get_global_locale ()));
1771 /* Fetch PATH and CWD and convert to wchar_t in previous charset. */
1772 path = getenv ("PATH");
1773 if (path && *path) /* $PATH can be potentially unset. */
1775 w_path = tp.w_get ();
1776 _sys_mbstowcs (cygheap->locale.mbtowc, w_path, 32768, path);
1778 w_cwd = tp.w_get ();
1779 cwdstuff::acquire_write ();
1780 _sys_mbstowcs (cygheap->locale.mbtowc, w_cwd, 32768,
1781 cygheap->cwd.get_posix ());
1782 /* Set charset for internal conversion functions. */
1783 cygheap->locale.mbtowc = __get_global_locale ()->mbtowc;
1784 if (cygheap->locale.mbtowc == __ascii_mbtowc)
1785 cygheap->locale.mbtowc = __utf8_mbtowc;
1786 /* Restore CWD and PATH in new charset. */
1787 cygheap->cwd.reset_posix (w_cwd);
1788 cwdstuff::release_write ();
1789 if (w_path)
1791 char *c_path = tp.c_get ();
1792 sys_wcstombs (c_path, 32768, w_path);
1793 setenv ("PATH", c_path, 1);
1797 /* Called from dll_crt0_1, before fetching the command line from Windows.
1798 Set the internal charset according to the environment locale settings.
1799 Check if a required codepage is available, and only switch internal
1800 charset if so.
1801 Make sure to reset the application locale to "C" per POSIX. */
1802 void
1803 initial_setlocale ()
1805 char *ret = _setlocale_r (_REENT, LC_CTYPE, "");
1806 if (ret)
1807 internal_setlocale ();