1 /* nlsfuncs.cc: NLS helper functions
3 This file is part of Cygwin.
5 This software is a copyrighted work licensed under the terms of the
6 Cygwin license. Please consult the file "CYGWIN_LICENSE" for
23 #include "lc_collelem.h"
24 #include "lc_def_codesets.h"
26 #define _LC(x) &lc_##x##_ptr,lc_##x##_end-lc_##x##_ptr
28 #define getlocaleinfo(category,type) \
29 __getlocaleinfo(win_locale,(type),_LC(category))
30 #define getlocaleint(type) \
31 __getlocaleint(win_locale,(type))
32 #define setlocaleinfo(category,val) \
33 __setlocaleinfo(_LC(category),(val))
34 #define eval_datetimefmt(type,flags) \
35 __eval_datetimefmt(win_locale,(type),(flags),&lc_time_ptr,\
36 lc_time_end-lc_time_ptr)
37 #define charfromwchar(category,in) \
38 __charfromwchar (_##category##_locale->in,_LC(category),f_wctomb)
40 /* Check for @cjk* modifier. Try to be as fast as possible */
41 #define __is_cjk_modifier(_in, _cmp, _L) ({ \
45 && (_cmp (_in + 4, _L##"narrow") == 0 \
46 || _cmp (_in + 4, _L##"wide") == 0 \
47 || _cmp (_in + 4, _L##"single") == 0); \
49 #define is_cjk_modifier(_in) __is_cjk_modifier(_in, strcmp, )
50 #define w_is_cjk_modifier(_in) __is_cjk_modifier(_in, wcscmp, L)
52 /* ResolveLocaleName does not what we want. It converts anything which
53 vaguely resembles a locale into some other locale it supports. Bad
54 examples are: "en-XY" gets converted to "en-US", and worse, "ff-BF" gets
55 converted to "ff-Latn-SN", even though "ff-Adlm-BF" exists! Useless.
56 To check if a locale is supported, we have to enumerate all valid
57 Windows locales, and return the match, even if the locale in Windows
60 const wchar_t *search_iso639
;
61 const wchar_t *search_iso3166
;
62 wchar_t *resolved_locale
;
67 resolve_locale_proc (LPWSTR win_locale
, DWORD info
, LPARAM param
)
69 res_loc_t
*loc
= (res_loc_t
*) param
;
70 wchar_t *iso639
, *iso639_end
;
74 iso639_end
= wcschr (iso639
, L
'-');
77 if (wcsncmp (loc
->search_iso639
, iso639
, iso639_end
- iso639
) != 0)
79 iso3166
= ++iso639_end
;
80 /* Territory is all upper case */
81 while (!iswupper (iso3166
[0]) || !iswupper (iso3166
[1]))
83 iso3166
= wcschr (iso3166
, L
'-');
88 if (wcsncmp (loc
->search_iso3166
, iso3166
, wcslen (loc
->search_iso3166
)))
90 wcsncat (loc
->resolved_locale
, win_locale
, loc
->res_len
- 1);
95 resolve_locale_name (const wchar_t *search
, wchar_t *result
, int rlen
)
99 loc
.search_iso639
= search
;
100 loc
.search_iso3166
= wcschr (search
, L
'-') + 1;
101 loc
.resolved_locale
= result
;
104 EnumSystemLocalesEx (resolve_locale_proc
,
105 LOCALE_WINDOWS
| LOCALE_SUPPLEMENTAL
,
106 (LPARAM
) &loc
, NULL
);
107 return wcslen (result
);
110 /* Fetch Windows RFC 5646 locale from POSIX locale specifier.
118 __get_rfc5646_from_locale (const char *name
, wchar_t *win_locale
)
120 wchar_t wlocale
[ENCODING_LEN
+ 1] = { 0 };
121 wchar_t locale
[ENCODING_LEN
+ 1];
124 win_locale
[0] = L
'\0';
125 mbstowcs (locale
, name
, ENCODING_LEN
+ 1);
126 /* Remember modifier for later use. */
127 const char *modifier
= strchr (name
, '@') ? : "";
128 /* Drop charset and modifier */
129 c
= wcschr (locale
, L
'.');
131 c
= wcschr (locale
, L
'@');
134 /* "POSIX" already converted to "C" in loadlocale. */
135 if (!wcscmp (locale
, L
"C"))
137 c
= wcschr (locale
, '_');
140 /* try if the locale can be resolved from the language tag
141 fix up Linux-only locale first */
142 if (!wcscmp (locale
, L
"ber"))
143 wcscpy (locale
, L
"tzm");
144 if (ResolveLocaleName (locale
, wlocale
, ENCODING_LEN
+ 1) <= 1)
149 wcpcpy (win_locale
, wlocale
);
153 /* Convert to RFC 5646 syntax. */
155 /* Override a few locales with a different default script as used
156 on Linux. Linux also supports no_NO which is equivalent to nb_NO,
157 but Windows can resolve that nicely. Also, "tzm" and "zgh" are
158 subsumed under "ber" on Linux. */
162 } override_locale
[] = {
163 { L
"ber-DZ" , L
"tzm-Latn-DZ" },
164 { L
"ber-MA" , L
"zgh-Tfng-MA" },
165 { L
"mn-CN" , L
"mn-Mong-CN" },
166 { L
"mn-MN" , L
"mn-Mong-MN" },
167 { L
"pa-PK" , L
"pa-Arab-PK" },
168 { L
"sd-IN" , L
"sd-Deva-IN" },
169 { L
"sr-BA" , L
"sr-Cyrl-BA" },
170 { L
"sr-ME" , L
"sr-Cyrl-ME" },
171 { L
"sr-RS" , L
"sr-Cyrl-RS" },
172 { L
"sr-XK" , L
"sr-Cyrl-XK" },
173 { L
"tzm-MA", L
"tzm-Tfng-MA" },
177 for (int i
= 0; override_locale
[i
].loc
178 && override_locale
[i
].loc
[0] <= locale
[0]; ++i
)
180 if (!wcscmp (locale
, override_locale
[i
].loc
))
182 wcscpy (wlocale
, override_locale
[i
].wloc
);
186 /* If resolve_locale_name returns with error, or if it returns a
187 locale other than the input locale, we don't support this locale. */
189 && !resolve_locale_name (locale
, wlocale
, ENCODING_LEN
+ 1))
195 /* Check for modifiers changing the script */
196 const wchar_t *iso15924_script
[] = { L
"Latn-", L
"Cyrl-", L
"Deva-", L
"Adlm-" };
201 if (!strcmp (++modifier
, "latin"))
203 else if (!strcmp (modifier
, "cyrillic"))
205 else if (!strcmp (modifier
, "devanagari"))
207 else if (!strcmp (modifier
, "adlam"))
212 wchar_t *iso3166
= wcschr (wlocale
, L
'-') + 1;
215 /* Copy iso639 language part including dash */
216 wlp
= wcpncpy (win_locale
, wlocale
, iso3166
- wlocale
);
217 /* Concat new iso15924 script */
218 wlp
= wcpcpy (wlp
, iso15924_script
[idx
]);
219 /* Concat iso3166 territory. Skip script, if already in the locale */
220 wchar_t *skip_script
= wcschr (iso3166
, L
'-');
222 iso3166
= skip_script
+ 1;
223 wcpcpy (wlp
, iso3166
);
226 wcpcpy (win_locale
, wlocale
);
230 /* Never returns -1. Just skips invalid chars instead. Only if return_invalid
231 is set, s==NULL returns -1 since then it's used to recognize invalid strings
232 in the used charset. */
234 lc_wcstombs (wctomb_p f_wctomb
, char *s
, const wchar_t *pwcs
, size_t n
,
235 bool return_invalid
= false)
240 size_t i
, bytes
, num_to_copy
;
243 memset (&state
, 0, sizeof state
);
246 size_t num_bytes
= 0;
249 bytes
= f_wctomb (_REENT
, buf
, *pwcs
++, &state
);
250 if (bytes
!= (size_t) -1)
252 else if (return_invalid
)
259 bytes
= f_wctomb (_REENT
, buf
, *pwcs
, &state
);
260 if (bytes
== (size_t) -1)
262 memset (&state
, 0, sizeof state
);
266 num_to_copy
= (n
> bytes
? bytes
: n
);
267 for (i
= 0; i
< num_to_copy
; ++i
)
271 return ptr
- s
- (n
>= bytes
);
278 /* Never returns -1. Invalid sequences are translated to replacement
281 lc_mbstowcs (mbtowc_p f_mbtowc
, wchar_t *pwcs
, const char *s
, size_t n
)
284 char *t
= (char *) s
;
288 memset (&state
, 0, sizeof state
);
293 bytes
= f_mbtowc (_REENT
, pwcs
, t
, 6 /* fake, always enough */, &state
);
294 if (bytes
== (size_t) -1)
315 locale_cmp (const void *a
, const void *b
)
317 char **la
= (char **) a
;
318 char **lb
= (char **) b
;
319 return strcmp (*la
, *lb
);
322 /* Helper function to workaround reallocs which move blocks even if they shrink.
323 Cygwin's realloc is not doing this, but tcsh's, for instance. All lc_foo
324 structures consist entirely of pointers so they are practically pointer
325 arrays. What we do here is just treat the lc_foo pointers as char ** and
326 rebase all char * pointers within, up to the given size of the structure. */
328 rebase_locale_buf (const void *ptrv
, const void *ptrvend
, const char *newbase
,
329 const char *oldbase
, const char *oldend
)
331 const char **ptrsend
= (const char **) ptrvend
;
332 for (const char **ptrs
= (const char **) ptrv
; ptrs
< ptrsend
; ++ptrs
)
333 if (*ptrs
>= oldbase
&& *ptrs
< oldend
)
334 *ptrs
+= newbase
- oldbase
;
338 __getlocaleinfo (wchar_t *loc
, LCTYPE type
, char **ptr
, size_t size
)
343 if ((uintptr_t) *ptr
% 1)
345 ret
= (wchar_t *) *ptr
;
346 num
= GetLocaleInfoEx (loc
, type
, ret
, size
/ sizeof (wchar_t));
347 *ptr
= (char *) (ret
+ num
);
352 __setlocaleinfo (char **ptr
, size_t size
, wchar_t val
)
356 if ((uintptr_t) *ptr
% 1)
358 ret
= (wchar_t *) *ptr
;
361 *ptr
= (char *) (ret
+ 2);
366 __charfromwchar (const wchar_t *in
, char **ptr
, size_t size
, wctomb_p f_wctomb
)
371 num
= lc_wcstombs (f_wctomb
, ret
= *ptr
, in
, size
);
377 __getlocaleint (wchar_t *loc
, LCTYPE type
)
380 return GetLocaleInfoEx (loc
, type
| LOCALE_RETURN_NUMBER
, (PWCHAR
) &val
,
381 sizeof val
) ? val
: 0;
386 DT_AMPM
= 0x01, /* Enforce 12 hour time format. */
387 DT_ABBREV
= 0x02, /* Enforce abbreviated month and day names. */
391 __eval_datetimefmt (wchar_t *loc
, LCTYPE type
, dt_flags flags
, char **ptr
,
397 const wchar_t *day_str
= L
"edaA";
398 const wchar_t *mon_str
= L
"mmbB";
399 const wchar_t *year_str
= L
"yyyY";
400 const wchar_t *hour12_str
= L
"lI";
401 const wchar_t *hour24_str
= L
"kH";
402 const wchar_t *t_str
;
404 if ((uintptr_t) *ptr
% 1)
406 wchar_t *ret
= (wchar_t *) *ptr
;
407 wchar_t *p
= (wchar_t *) *ptr
;
408 GetLocaleInfoEx (loc
, type
, buf
, 80);
409 for (wchar_t *fmt
= buf
; *fmt
; ++fmt
)
416 while (fmt
[1] && *++fmt
!= L
'\'')
422 t_str
= (fc
== L
'd' ? day_str
: fc
== L
'M' ? mon_str
: year_str
);
423 for (idx
= 0; fmt
[1] == fc
; ++idx
, ++fmt
);
426 if ((flags
& DT_ABBREV
) && fc
!= L
'y' && idx
== 3)
436 t_str
= (fc
== L
'h' || (flags
& DT_AMPM
) ? hour12_str
: hour24_str
);
452 *p
++ = (fc
== L
'm' ? L
'M' : fc
== L
's' ? L
'S' : L
'p');
469 /* Convert Windows grouping format into POSIX grouping format. */
471 conv_grouping (wchar_t *loc
, LCTYPE type
, char **lc_ptr
)
473 wchar_t buf
[10]; /* Per MSDN max size of LOCALE_SGROUPING element incl. NUL */
478 GetLocaleInfoEx (loc
, type
, buf
, 10);
479 /* Convert Windows grouping format into POSIX grouping format. Note that
480 only ASCII chars are used in the grouping format. */
481 for (wchar_t *c
= buf
; *c
; ++c
)
483 if (*c
< L
'0' || *c
> L
'9')
485 char val
= *c
- L
'0';
500 /* Called from newlib's setlocale() via __time_load_locale() if category
501 is LC_TIME. Returns LC_TIME values fetched from Windows locale data
502 in the structure pointed to by _time_locale. This is subsequently
503 accessed by functions like nl_langinfo, strftime, strptime. */
505 __set_lc_time_from_win (const char *name
,
506 const struct lc_time_T
*_C_time_locale
,
507 struct lc_time_T
*_time_locale
,
508 char **lc_time_buf
, wctomb_p f_wctomb
,
511 wchar_t win_locale
[ENCODING_LEN
+ 1];
512 int ret
= __get_rfc5646_from_locale (name
, win_locale
);
515 if (!ret
&& !strcmp (charset
, "ASCII"))
518 # define MAX_TIME_BUFFER_SIZE 4096
520 char *new_lc_time_buf
= (char *) malloc (MAX_TIME_BUFFER_SIZE
);
521 const char *lc_time_end
= new_lc_time_buf
+ MAX_TIME_BUFFER_SIZE
;
523 if (!new_lc_time_buf
)
525 char *lc_time_ptr
= new_lc_time_buf
;
527 /* C.foo is just a copy of "C" with fixed charset. */
529 memcpy (_time_locale
, _C_time_locale
, sizeof (struct lc_time_T
));
531 _time_locale
->codeset
= lc_time_ptr
;
532 lc_time_ptr
= stpcpy (lc_time_ptr
, charset
) + 1;
536 char locale
[ENCODING_LEN
+ 1];
537 strcpy (locale
, name
);
538 /* Removes the charset from the locale and attach the modifier to the
539 language_TERRITORY part. */
540 char *c
= strchr (locale
, '.');
544 char *c2
= strchr (c
+ 1, '@');
545 /* Ignore @cjk* modifiers, they are newlib specials. */
546 if (c2
&& !is_cjk_modifier (c2
))
547 memmove (c
, c2
, strlen (c2
) + 1);
549 /* Now search in the alphabetically order lc_era array for the
551 lc_era_t locale_key
= { locale
, NULL
, NULL
, NULL
, NULL
, NULL
,
552 NULL
, NULL
, NULL
, NULL
, NULL
};
553 lc_era_t
*era
= (lc_era_t
*) bsearch ((void *) &locale_key
,
555 sizeof lc_era
/ sizeof *lc_era
,
556 sizeof *lc_era
, locale_cmp
);
559 /* Windows has a bug in "ja-JP" and "ko-KR" (but not in "ko-KP").
560 In these locales, strings returned for LOCALE_SABBREVMONTHNAME*
561 are missing the suffix representing a month.
563 A Japanese article describing the problem was
564 https://msdn.microsoft.com/ja-jp/library/cc422084.aspx, which is
566 https://web.archive.org/web/20110922195821/https://msdn.microsoft.com/ja-jp/library/cc422084.aspx
567 these days. Testing indicates that this problem is still present
570 The workaround is to use LOCALE_SMONTHNAME* in these locales,
571 even for the abbreviated month name. */
572 const LCTYPE mon_base
= !wcscmp (win_locale
, L
"ja-JP")
573 || !wcscmp (win_locale
, L
"ko-KR")
574 ? LOCALE_SMONTHNAME1
: LOCALE_SABBREVMONTHNAME1
;
575 for (int i
= 0; i
< 12; ++i
)
577 _time_locale
->wmon
[i
] = getlocaleinfo (time
, mon_base
+ i
);
578 _time_locale
->mon
[i
] = charfromwchar (time
, wmon
[i
]);
580 /* month and alt_month */
581 for (int i
= 0; i
< 12; ++i
)
583 _time_locale
->wmonth
[i
] = getlocaleinfo (time
,
584 LOCALE_SMONTHNAME1
+ i
);
585 _time_locale
->month
[i
] = _time_locale
->alt_month
[i
]
586 = charfromwchar (time
, wmonth
[i
]);
589 _time_locale
->wwday
[0] = getlocaleinfo (time
, LOCALE_SABBREVDAYNAME7
);
590 _time_locale
->wday
[0] = charfromwchar (time
, wwday
[0]);
591 for (int i
= 0; i
< 6; ++i
)
593 _time_locale
->wwday
[i
+ 1] = getlocaleinfo (time
,
594 LOCALE_SABBREVDAYNAME1
+ i
);
595 _time_locale
->wday
[i
+ 1] = charfromwchar (time
, wwday
[i
+ 1]);
598 _time_locale
->wweekday
[0] = getlocaleinfo (time
, LOCALE_SDAYNAME7
);
599 _time_locale
->weekday
[0] = charfromwchar (time
, wweekday
[0]);
600 for (int i
= 0; i
< 6; ++i
)
602 _time_locale
->wweekday
[i
+ 1] = getlocaleinfo (time
,
603 LOCALE_SDAYNAME1
+ i
);
604 _time_locale
->weekday
[i
+ 1] = charfromwchar (time
, wweekday
[i
+ 1]);
608 if (era
&& *era
->t_fmt
)
610 _time_locale
->wX_fmt
= (const wchar_t *) lc_time_ptr
;
611 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wX_fmt
,
615 _time_locale
->wX_fmt
= eval_datetimefmt (LOCALE_STIMEFORMAT
, DT_DEFAULT
);
616 _time_locale
->X_fmt
= charfromwchar (time
, wX_fmt
);
618 if (era
&& *era
->d_fmt
)
620 _time_locale
->wx_fmt
= (const wchar_t *) lc_time_ptr
;
621 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wx_fmt
,
625 _time_locale
->wx_fmt
= eval_datetimefmt (LOCALE_SSHORTDATE
, DT_DEFAULT
);
626 _time_locale
->x_fmt
= charfromwchar (time
, wx_fmt
);
628 if (era
&& *era
->d_t_fmt
)
630 _time_locale
->wc_fmt
= (const wchar_t *) lc_time_ptr
;
631 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wc_fmt
,
636 _time_locale
->wc_fmt
= eval_datetimefmt (LOCALE_SLONGDATE
, DT_ABBREV
);
637 ((wchar_t *) lc_time_ptr
)[-1] = L
' ';
638 eval_datetimefmt (LOCALE_STIMEFORMAT
, DT_DEFAULT
);
640 _time_locale
->c_fmt
= charfromwchar (time
, wc_fmt
);
642 _time_locale
->wam_pm
[0] = getlocaleinfo (time
, LOCALE_S1159
);
643 _time_locale
->wam_pm
[1] = getlocaleinfo (time
, LOCALE_S2359
);
644 _time_locale
->am_pm
[0] = charfromwchar (time
, wam_pm
[0]);
645 _time_locale
->am_pm
[1] = charfromwchar (time
, wam_pm
[1]);
647 if (era
&& *era
->date_fmt
)
649 _time_locale
->wdate_fmt
= (const wchar_t *) lc_time_ptr
;
650 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wdate_fmt
,
654 _time_locale
->wdate_fmt
= _time_locale
->wc_fmt
;
655 _time_locale
->date_fmt
= charfromwchar (time
, wdate_fmt
);
659 GetLocaleInfoEx (win_locale
, LOCALE_IDATE
, buf
, 80);
660 _time_locale
->md_order
= (const char *) lc_time_ptr
;
661 lc_time_ptr
= stpcpy (lc_time_ptr
, *buf
== L
'1' ? "dm" : "md") + 1;
666 _time_locale
->wampm_fmt
= (const wchar_t *) lc_time_ptr
;
667 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wampm_fmt
,
668 era
->t_fmt_ampm
) + 1);
671 _time_locale
->wampm_fmt
= eval_datetimefmt (LOCALE_STIMEFORMAT
, DT_AMPM
);
672 _time_locale
->ampm_fmt
= charfromwchar (time
, wampm_fmt
);
676 /* Evaluate string length in target charset. Characters invalid in the
677 target charset are simply ignored, as on Linux. */
679 len
+= lc_wcstombs (f_wctomb
, NULL
, era
->era
, 0) + 1;
680 len
+= lc_wcstombs (f_wctomb
, NULL
, era
->era_d_fmt
, 0) + 1;
681 len
+= lc_wcstombs (f_wctomb
, NULL
, era
->era_d_t_fmt
, 0) + 1;
682 len
+= lc_wcstombs (f_wctomb
, NULL
, era
->era_t_fmt
, 0) + 1;
683 len
+= lc_wcstombs (f_wctomb
, NULL
, era
->alt_digits
, 0) + 1;
684 len
+= (wcslen (era
->era
) + 1) * sizeof (wchar_t);
685 len
+= (wcslen (era
->era_d_fmt
) + 1) * sizeof (wchar_t);
686 len
+= (wcslen (era
->era_d_t_fmt
) + 1) * sizeof (wchar_t);
687 len
+= (wcslen (era
->era_t_fmt
) + 1) * sizeof (wchar_t);
688 len
+= (wcslen (era
->alt_digits
) + 1) * sizeof (wchar_t);
690 /* Make sure data fits into the buffer */
691 if (lc_time_ptr
+ len
> lc_time_end
)
693 len
= lc_time_ptr
+ len
- new_lc_time_buf
;
694 char *tmp
= (char *) realloc (new_lc_time_buf
, len
);
699 if (tmp
!= new_lc_time_buf
)
700 rebase_locale_buf (_time_locale
, _time_locale
+ 1, tmp
,
701 new_lc_time_buf
, lc_time_ptr
);
702 lc_time_ptr
= tmp
+ (lc_time_ptr
- new_lc_time_buf
);
703 new_lc_time_buf
= tmp
;
704 lc_time_end
= new_lc_time_buf
+ len
;
711 _time_locale
->wera
= (const wchar_t *) lc_time_ptr
;
712 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wera
,
714 _time_locale
->era
= charfromwchar (time
, wera
);
716 _time_locale
->wera_d_fmt
= (const wchar_t *) lc_time_ptr
;
717 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wera_d_fmt
,
718 era
->era_d_fmt
) + 1);
719 _time_locale
->era_d_fmt
= charfromwchar (time
, wera_d_fmt
);
721 _time_locale
->wera_d_t_fmt
= (const wchar_t *) lc_time_ptr
;
722 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wera_d_t_fmt
,
723 era
->era_d_t_fmt
) + 1);
724 _time_locale
->era_d_t_fmt
= charfromwchar (time
, wera_d_t_fmt
);
726 _time_locale
->wera_t_fmt
= (const wchar_t *) lc_time_ptr
;
727 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->wera_t_fmt
,
728 era
->era_t_fmt
) + 1);
729 _time_locale
->era_t_fmt
= charfromwchar (time
, wera_t_fmt
);
731 _time_locale
->walt_digits
= (const wchar_t *) lc_time_ptr
;
732 lc_time_ptr
= (char *) (wcpcpy ((wchar_t *) _time_locale
->walt_digits
,
733 era
->alt_digits
) + 1);
734 _time_locale
->alt_digits
= charfromwchar (time
, walt_digits
);
740 _time_locale
->wera_d_fmt
=
741 _time_locale
->wera_d_t_fmt
=
742 _time_locale
->wera_t_fmt
=
743 _time_locale
->walt_digits
= (const wchar_t *) lc_time_ptr
;
745 _time_locale
->era_d_fmt
=
746 _time_locale
->era_d_t_fmt
=
747 _time_locale
->era_t_fmt
=
748 _time_locale
->alt_digits
= (const char *) lc_time_ptr
;
749 /* Twice, to make sure wide char strings are correctly terminated. */
750 *lc_time_ptr
++ = '\0';
751 *lc_time_ptr
++ = '\0';
755 char *tmp
= (char *) realloc (new_lc_time_buf
, lc_time_ptr
- new_lc_time_buf
);
758 free (new_lc_time_buf
);
761 if (tmp
!= new_lc_time_buf
)
762 rebase_locale_buf (_time_locale
, _time_locale
+ 1, tmp
,
763 new_lc_time_buf
, lc_time_ptr
);
768 /* Called from newlib's setlocale() via __ctype_load_locale() if category
769 is LC_CTYPE. Returns LC_CTYPE values fetched from Windows locale data
770 in the structure pointed to by _ctype_locale. This is subsequently
771 accessed by functions like nl_langinfo, localeconv, printf, etc. */
773 __set_lc_ctype_from_win (const char *name
,
774 const struct lc_ctype_T
*_C_ctype_locale
,
775 struct lc_ctype_T
*_ctype_locale
,
776 char **lc_ctype_buf
, wctomb_p f_wctomb
,
777 const char *charset
, int mb_cur_max
)
779 wchar_t win_locale
[ENCODING_LEN
+ 1];
780 int ret
= __get_rfc5646_from_locale (name
, win_locale
);
783 if (!ret
&& !strcmp (charset
, "ASCII"))
786 # define MAX_CTYPE_BUFFER_SIZE 256
788 char *new_lc_ctype_buf
= (char *) malloc (MAX_CTYPE_BUFFER_SIZE
);
790 if (!new_lc_ctype_buf
)
792 char *lc_ctype_ptr
= new_lc_ctype_buf
;
793 /* C.foo is just a copy of "C" with fixed charset. */
795 memcpy (_ctype_locale
, _C_ctype_locale
, sizeof (struct lc_ctype_T
));
797 _ctype_locale
->codeset
= lc_ctype_ptr
;
798 lc_ctype_ptr
= stpcpy (lc_ctype_ptr
, charset
) + 1;
800 _ctype_locale
->mb_cur_max
= lc_ctype_ptr
;
801 *lc_ctype_ptr
++ = mb_cur_max
;
802 *lc_ctype_ptr
++ = '\0';
805 /* outdigits and woutdigits */
807 GetLocaleInfoEx (win_locale
, LOCALE_SNATIVEDIGITS
, digits
, 11);
808 for (int i
= 0; i
<= 9; ++i
)
812 /* Make sure the wchar_t's are always 2 byte aligned. */
813 if ((uintptr_t) lc_ctype_ptr
% 2)
815 wchar_t *woutdig
= (wchar_t *) lc_ctype_ptr
;
816 _ctype_locale
->woutdigits
[i
] = (const wchar_t *) woutdig
;
817 *woutdig
++ = digits
[i
];
819 lc_ctype_ptr
= (char *) woutdig
;
820 _ctype_locale
->outdigits
[i
] = lc_ctype_ptr
;
821 memset (&state
, 0, sizeof state
);
822 lc_ctype_ptr
+= f_wctomb (_REENT
, lc_ctype_ptr
, digits
[i
], &state
);
823 *lc_ctype_ptr
++ = '\0';
827 char *tmp
= (char *) realloc (new_lc_ctype_buf
,
828 lc_ctype_ptr
- new_lc_ctype_buf
);
831 free (new_lc_ctype_buf
);
834 if (tmp
!= new_lc_ctype_buf
)
835 rebase_locale_buf (_ctype_locale
, _ctype_locale
+ 1, tmp
,
836 new_lc_ctype_buf
, lc_ctype_ptr
);
841 /* Called from newlib's setlocale() via __numeric_load_locale() if category
842 is LC_NUMERIC. Returns LC_NUMERIC values fetched from Windows locale data
843 in the structure pointed to by _numeric_locale. This is subsequently
844 accessed by functions like nl_langinfo, localeconv, printf, etc. */
846 __set_lc_numeric_from_win (const char *name
,
847 const struct lc_numeric_T
*_C_numeric_locale
,
848 struct lc_numeric_T
*_numeric_locale
,
849 char **lc_numeric_buf
, wctomb_p f_wctomb
,
852 wchar_t win_locale
[ENCODING_LEN
+ 1];
853 int ret
= __get_rfc5646_from_locale (name
, win_locale
);
856 if (!ret
&& !strcmp (charset
, "ASCII"))
859 # define MAX_NUMERIC_BUFFER_SIZE 256
861 char *new_lc_numeric_buf
= (char *) malloc (MAX_NUMERIC_BUFFER_SIZE
);
862 const char *lc_numeric_end
= new_lc_numeric_buf
+ MAX_NUMERIC_BUFFER_SIZE
;
864 if (!new_lc_numeric_buf
)
866 char *lc_numeric_ptr
= new_lc_numeric_buf
;
867 /* C.foo is just a copy of "C" with fixed charset. */
869 memcpy (_numeric_locale
, _C_numeric_locale
, sizeof (struct lc_numeric_T
));
872 /* decimal_point and thousands_sep */
873 /* fa_IR. Windows decimal_point is slash, correct is dot */
874 if (!wcscmp (win_locale
, L
"fa-IR"))
876 _numeric_locale
->wdecimal_point
= setlocaleinfo (numeric
, L
'.');
877 _numeric_locale
->wthousands_sep
= setlocaleinfo (numeric
, L
',');
879 /* ps_AF. Windows decimal_point is dot, thousands_sep is comma,
880 correct are arabic separators. */
881 else if (!wcscmp (win_locale
, L
"ps-AF"))
883 _numeric_locale
->wdecimal_point
= setlocaleinfo (numeric
, 0x066b);
884 _numeric_locale
->wthousands_sep
= setlocaleinfo (numeric
, 0x066c);
888 _numeric_locale
->wdecimal_point
= getlocaleinfo (numeric
,
890 _numeric_locale
->wthousands_sep
= getlocaleinfo (numeric
,
893 _numeric_locale
->decimal_point
= charfromwchar (numeric
, wdecimal_point
);
894 _numeric_locale
->thousands_sep
= charfromwchar (numeric
, wthousands_sep
);
896 _numeric_locale
->grouping
= conv_grouping (win_locale
, LOCALE_SGROUPING
,
900 _numeric_locale
->codeset
= lc_numeric_ptr
;
901 lc_numeric_ptr
= stpcpy (lc_numeric_ptr
, charset
) + 1;
903 char *tmp
= (char *) realloc (new_lc_numeric_buf
,
904 lc_numeric_ptr
- new_lc_numeric_buf
);
907 free (new_lc_numeric_buf
);
910 if (tmp
!= new_lc_numeric_buf
)
911 rebase_locale_buf (_numeric_locale
, _numeric_locale
+ 1, tmp
,
912 new_lc_numeric_buf
, lc_numeric_ptr
);
913 *lc_numeric_buf
= tmp
;
917 /* Called from newlib's setlocale() via __monetary_load_locale() if category
918 is LC_MONETARY. Returns LC_MONETARY values fetched from Windows locale data
919 in the structure pointed to by _monetary_locale. This is subsequently
920 accessed by functions like nl_langinfo, localeconv, printf, etc. */
922 __set_lc_monetary_from_win (const char *name
,
923 const struct lc_monetary_T
*_C_monetary_locale
,
924 struct lc_monetary_T
*_monetary_locale
,
925 char **lc_monetary_buf
, wctomb_p f_wctomb
,
928 wchar_t win_locale
[ENCODING_LEN
+ 1];
929 int ret
= __get_rfc5646_from_locale (name
, win_locale
);
932 if (!ret
&& !strcmp (charset
, "ASCII"))
935 # define MAX_MONETARY_BUFFER_SIZE 512
937 char *new_lc_monetary_buf
= (char *) malloc (MAX_MONETARY_BUFFER_SIZE
);
938 const char *lc_monetary_end
= new_lc_monetary_buf
+ MAX_MONETARY_BUFFER_SIZE
;
940 if (!new_lc_monetary_buf
)
942 char *lc_monetary_ptr
= new_lc_monetary_buf
;
943 /* C.foo is just a copy of "C" with fixed charset. */
945 memcpy (_monetary_locale
, _C_monetary_locale
, sizeof (struct lc_monetary_T
));
948 /* int_curr_symbol */
949 _monetary_locale
->wint_curr_symbol
= getlocaleinfo (monetary
,
951 /* No spacing char means space. */
952 if (!_monetary_locale
->wint_curr_symbol
[3])
954 wchar_t *wc
= (wchar_t *) _monetary_locale
->wint_curr_symbol
+ 3;
957 lc_monetary_ptr
= (char *) wc
;
959 _monetary_locale
->int_curr_symbol
= charfromwchar (monetary
,
961 /* currency_symbol */
962 _monetary_locale
->wcurrency_symbol
= getlocaleinfo (monetary
,
964 /* As on Linux: If the currency_symbol can't be represented in the
965 given charset, use int_curr_symbol. */
966 if (lc_wcstombs (f_wctomb
, NULL
, _monetary_locale
->wcurrency_symbol
,
967 0, true) == (size_t) -1)
968 _monetary_locale
->currency_symbol
= _monetary_locale
->int_curr_symbol
;
970 _monetary_locale
->currency_symbol
= charfromwchar (monetary
,
972 /* mon_decimal_point and mon_thousands_sep */
973 /* fa_IR or ps_AF. Windows mon_decimal_point is slash and comma,
974 mon_thousands_sep is comma and dot, correct
975 are arabic separators. */
976 if (!wcscmp (win_locale
, L
"fa-IR")
977 || !wcscmp (win_locale
, L
"ps-AF"))
979 _monetary_locale
->wmon_decimal_point
= setlocaleinfo (monetary
,
981 _monetary_locale
->wmon_thousands_sep
= setlocaleinfo (monetary
,
986 _monetary_locale
->wmon_decimal_point
= getlocaleinfo (monetary
,
987 LOCALE_SMONDECIMALSEP
);
988 _monetary_locale
->wmon_thousands_sep
= getlocaleinfo (monetary
,
989 LOCALE_SMONTHOUSANDSEP
);
991 _monetary_locale
->mon_decimal_point
= charfromwchar (monetary
,
993 _monetary_locale
->mon_thousands_sep
= charfromwchar (monetary
,
996 _monetary_locale
->mon_grouping
= conv_grouping (win_locale
,
1000 _monetary_locale
->wpositive_sign
= getlocaleinfo (monetary
,
1001 LOCALE_SPOSITIVESIGN
);
1002 _monetary_locale
->positive_sign
= charfromwchar (monetary
, wpositive_sign
);
1004 _monetary_locale
->wnegative_sign
= getlocaleinfo (monetary
,
1005 LOCALE_SNEGATIVESIGN
);
1006 _monetary_locale
->negative_sign
= charfromwchar (monetary
, wnegative_sign
);
1007 /* int_frac_digits */
1008 *lc_monetary_ptr
= (char) getlocaleint (LOCALE_IINTLCURRDIGITS
);
1009 _monetary_locale
->int_frac_digits
= lc_monetary_ptr
++;
1011 *lc_monetary_ptr
= (char) getlocaleint (LOCALE_ICURRDIGITS
);
1012 _monetary_locale
->frac_digits
= lc_monetary_ptr
++;
1013 /* p_cs_precedes and int_p_cs_precedes */
1014 *lc_monetary_ptr
= (char) getlocaleint (LOCALE_IPOSSYMPRECEDES
);
1015 _monetary_locale
->p_cs_precedes
1016 = _monetary_locale
->int_p_cs_precedes
= lc_monetary_ptr
++;
1017 /* p_sep_by_space and int_p_sep_by_space */
1018 *lc_monetary_ptr
= (char) getlocaleint (LOCALE_IPOSSEPBYSPACE
);
1019 _monetary_locale
->p_sep_by_space
1020 = _monetary_locale
->int_p_sep_by_space
= lc_monetary_ptr
++;
1021 /* n_cs_precedes and int_n_cs_precedes */
1022 *lc_monetary_ptr
= (char) getlocaleint (LOCALE_INEGSYMPRECEDES
);
1023 _monetary_locale
->n_cs_precedes
1024 = _monetary_locale
->int_n_cs_precedes
= lc_monetary_ptr
++;
1025 /* n_sep_by_space and int_n_sep_by_space */
1026 *lc_monetary_ptr
= (char) getlocaleint (LOCALE_INEGSEPBYSPACE
);
1027 _monetary_locale
->n_sep_by_space
1028 = _monetary_locale
->int_n_sep_by_space
= lc_monetary_ptr
++;
1029 /* p_sign_posn and int_p_sign_posn */
1030 *lc_monetary_ptr
= (char) getlocaleint (LOCALE_IPOSSIGNPOSN
);
1031 _monetary_locale
->p_sign_posn
1032 = _monetary_locale
->int_p_sign_posn
= lc_monetary_ptr
++;
1033 /* n_sign_posn and int_n_sign_posn */
1034 *lc_monetary_ptr
= (char) getlocaleint (LOCALE_INEGSIGNPOSN
);
1035 _monetary_locale
->n_sign_posn
1036 = _monetary_locale
->int_n_sign_posn
= lc_monetary_ptr
++;
1039 _monetary_locale
->codeset
= lc_monetary_ptr
;
1040 lc_monetary_ptr
= stpcpy (lc_monetary_ptr
, charset
) + 1;
1042 char *tmp
= (char *) realloc (new_lc_monetary_buf
,
1043 lc_monetary_ptr
- new_lc_monetary_buf
);
1046 free (new_lc_monetary_buf
);
1049 if (tmp
!= new_lc_monetary_buf
)
1050 rebase_locale_buf (_monetary_locale
, _monetary_locale
+ 1, tmp
,
1051 new_lc_monetary_buf
, lc_monetary_ptr
);
1052 *lc_monetary_buf
= tmp
;
1057 __set_lc_messages_from_win (const char *name
,
1058 const struct lc_messages_T
*_C_messages_locale
,
1059 struct lc_messages_T
*_messages_locale
,
1060 char **lc_messages_buf
,
1061 wctomb_p f_wctomb
, const char *charset
)
1063 wchar_t win_locale
[ENCODING_LEN
+ 1];
1064 int ret
= __get_rfc5646_from_locale (name
, win_locale
);
1067 if (!ret
&& !strcmp (charset
, "ASCII"))
1070 char locale
[ENCODING_LEN
+ 1];
1072 lc_msg_t
*msg
= NULL
;
1074 /* C.foo is just a copy of "C" with fixed charset. */
1076 memcpy (_messages_locale
, _C_messages_locale
, sizeof (struct lc_messages_T
));
1079 strcpy (locale
, name
);
1080 /* Removes the charset from the locale and attach the modifer to the
1081 language_TERRITORY part. */
1082 c
= strchr (locale
, '.');
1086 c2
= strchr (c
+ 1, '@');
1087 /* Ignore @cjk* modifiers, they are newlib specials. */
1088 if (c2
&& !is_cjk_modifier (c2
))
1089 memmove (c
, c2
, strlen (c2
) + 1);
1091 /* Now search in the alphabetically order lc_msg array for the
1093 lc_msg_t locale_key
= { locale
, NULL
, NULL
, NULL
, NULL
};
1094 msg
= (lc_msg_t
*) bsearch ((void *) &locale_key
, (void *) lc_msg
,
1095 sizeof lc_msg
/ sizeof *lc_msg
,
1096 sizeof *lc_msg
, locale_cmp
);
1101 /* Evaluate string length in target charset. Characters invalid in the
1102 target charset are simply ignored, as on Linux. */
1104 len
+= (strlen (charset
) + 1);
1107 len
+= lc_wcstombs (f_wctomb
, NULL
, msg
->yesexpr
, 0) + 1;
1108 len
+= lc_wcstombs (f_wctomb
, NULL
, msg
->noexpr
, 0) + 1;
1109 len
+= lc_wcstombs (f_wctomb
, NULL
, msg
->yesstr
, 0) + 1;
1110 len
+= lc_wcstombs (f_wctomb
, NULL
, msg
->nostr
, 0) + 1;
1111 len
+= (wcslen (msg
->yesexpr
) + 1) * sizeof (wchar_t);
1112 len
+= (wcslen (msg
->noexpr
) + 1) * sizeof (wchar_t);
1113 len
+= (wcslen (msg
->yesstr
) + 1) * sizeof (wchar_t);
1114 len
+= (wcslen (msg
->nostr
) + 1) * sizeof (wchar_t);
1119 char *new_lc_messages_buf
= (char *) malloc (len
);
1120 const char *lc_messages_end
= new_lc_messages_buf
+ len
;
1122 if (!new_lc_messages_buf
)
1125 c
= new_lc_messages_buf
;
1127 _messages_locale
->codeset
= c
;
1128 c
= stpcpy (c
, charset
) + 1;
1131 _messages_locale
->yesexpr
= (const char *) c
;
1132 len
= lc_wcstombs (f_wctomb
, c
, msg
->yesexpr
, lc_messages_end
- c
);
1133 _messages_locale
->noexpr
= (const char *) (c
+= len
+ 1);
1134 len
= lc_wcstombs (f_wctomb
, c
, msg
->noexpr
, lc_messages_end
- c
);
1135 _messages_locale
->yesstr
= (const char *) (c
+= len
+ 1);
1136 len
= lc_wcstombs (f_wctomb
, c
, msg
->yesstr
, lc_messages_end
- c
);
1137 _messages_locale
->nostr
= (const char *) (c
+= len
+ 1);
1138 len
= lc_wcstombs (f_wctomb
, c
, msg
->nostr
, lc_messages_end
- c
);
1140 if ((uintptr_t) c
% 1)
1142 wchar_t *wc
= (wchar_t *) c
;
1143 _messages_locale
->wyesexpr
= (const wchar_t *) wc
;
1144 wc
= wcpcpy (wc
, msg
->yesexpr
) + 1;
1145 _messages_locale
->wnoexpr
= (const wchar_t *) wc
;
1146 wc
= wcpcpy (wc
, msg
->noexpr
) + 1;
1147 _messages_locale
->wyesstr
= (const wchar_t *) wc
;
1148 wc
= wcpcpy (wc
, msg
->yesstr
) + 1;
1149 _messages_locale
->wnostr
= (const wchar_t *) wc
;
1150 wcpcpy (wc
, msg
->nostr
);
1152 *lc_messages_buf
= new_lc_messages_buf
;
1156 const struct lc_collate_T _C_collate_locale
=
1163 /* Called from newlib's setlocale() if category is LC_COLLATE. Stores
1164 LC_COLLATE locale information. This is subsequently accessed by the
1165 below functions strcoll, strxfrm, wcscoll, wcsxfrm. */
1167 __collate_load_locale (struct __locale_t
*locale
, const char *name
,
1168 void *f_mbtowc
, const char *charset
)
1171 struct lc_collate_T
*cop
= NULL
;
1173 wchar_t win_locale
[ENCODING_LEN
+ 1];
1174 int ret
= __get_rfc5646_from_locale (name
, win_locale
);
1179 bufp
= (char *) malloc (1); /* dummy */
1182 cop
= (struct lc_collate_T
*) calloc (1, sizeof (struct lc_collate_T
));
1188 wcscpy (cop
->win_locale
, win_locale
);
1189 cop
->mbtowc
= (mbtowc_p
) f_mbtowc
;
1190 stpcpy (cop
->codeset
, charset
);
1192 struct __lc_cats tmp
= locale
->lc_cat
[LC_COLLATE
];
1193 locale
->lc_cat
[LC_COLLATE
].ptr
= !win_locale
[0] ? &_C_collate_locale
: cop
;
1194 locale
->lc_cat
[LC_COLLATE
].buf
= bufp
;
1195 /* If buf is not NULL, both pointers have been alloc'ed */
1198 free ((void *) tmp
.ptr
);
1204 /* We use the Windows functions for locale-specific string comparison and
1205 transformation. The advantage is that we don't need any files with
1206 collation information. */
1209 wcscoll_l (const wchar_t *__restrict ws1
, const wchar_t *__restrict ws2
,
1210 struct __locale_t
*locale
)
1213 const wchar_t *collate_locale
= __get_collate_locale (locale
)->win_locale
;
1215 if (!collate_locale
[0])
1216 return wcscmp (ws1
, ws2
);
1217 ret
= CompareStringEx (collate_locale
, 0, ws1
, -1, ws2
, -1, NULL
, NULL
, 0);
1220 return ret
- CSTR_EQUAL
;
1224 wcscoll (const wchar_t *__restrict ws1
, const wchar_t *__restrict ws2
)
1226 return wcscoll_l (ws1
, ws2
, __get_current_locale ());
1230 strcoll_l (const char *__restrict s1
, const char *__restrict s2
,
1231 struct __locale_t
*locale
)
1237 const wchar_t *collate_locale
= __get_collate_locale (locale
)->win_locale
;
1239 if (!collate_locale
[0])
1240 return strcmp (s1
, s2
);
1241 mbtowc_p collate_mbtowc
= __get_collate_locale (locale
)->mbtowc
;
1242 n1
= lc_mbstowcs (collate_mbtowc
, NULL
, s1
, 0) + 1;
1243 ws1
= (n1
> NT_MAX_PATH
? (wchar_t *) malloc (n1
* sizeof (wchar_t))
1245 lc_mbstowcs (collate_mbtowc
, ws1
, s1
, n1
);
1246 n2
= lc_mbstowcs (collate_mbtowc
, NULL
, s2
, 0) + 1;
1247 ws2
= (n2
> NT_MAX_PATH
? (wchar_t *) malloc (n2
* sizeof (wchar_t))
1249 lc_mbstowcs (collate_mbtowc
, ws2
, s2
, n2
);
1250 ret
= CompareStringEx (collate_locale
, 0, ws1
, -1, ws2
, -1, NULL
, NULL
, 0);
1251 if (n1
> NT_MAX_PATH
)
1253 if (n2
> NT_MAX_PATH
)
1257 return ret
- CSTR_EQUAL
;
1261 strcoll (const char *__restrict s1
, const char *__restrict s2
)
1263 return strcoll_l (s1
, s2
, __get_current_locale ());
1266 /* BSD. Used from glob.cc, fnmatch.c and regcomp.c. */
1268 __wcollate_range_cmp (wint_t c1
, wint_t c2
)
1270 wchar_t s1
[3] = { (wchar_t) c1
, L
'\0', L
'\0' };
1271 wchar_t s2
[3] = { (wchar_t) c2
, L
'\0', L
'\0' };
1273 /* Handle Unicode values >= 0x10000, convert to surrogate pair */
1276 s1
[0] = ((c1
- 0x10000) >> 10) + 0xd800;
1277 s1
[1] = ((c1
- 0x10000) & 0x3ff) + 0xdc00;
1281 s2
[0] = ((c2
- 0x10000) >> 10) + 0xd800;
1282 s2
[1] = ((c2
- 0x10000) & 0x3ff) + 0xdc00;
1284 return wcscoll (s1
, s2
);
1287 /* Not so much BSD. Used from glob.cc, fnmatch.c and regcomp.c.
1289 The args are pointers to wint_t strings. This allows to compare
1290 against collating symbols. */
1292 __wscollate_range_cmp (wint_t *c1
, wint_t *c2
,
1293 size_t c1len
, size_t c2len
)
1295 wchar_t s1
[c1len
* 2 + 1] = { 0 }; /* # of chars if all are surrogates */
1296 wchar_t s2
[c2len
* 2 + 1] = { 0 };
1298 /* wcscoll() ignores case in many locales. but we don't want that
1300 if ((iswupper (*c1
) && !iswupper (*c2
))
1301 || (iswlower (*c1
) && !iswlower (*c2
)))
1304 wcintowcs (s1
, c1
, c1len
);
1305 wcintowcs (s2
, c2
, c2len
);
1306 return wcscoll_l (s1
, s2
, __get_current_locale ());
1309 const size_t ce_size
= sizeof collating_element
/ sizeof *collating_element
;
1310 const size_t ce_e_size
= sizeof *collating_element
;
1312 /* Check if UTF-32 input character `test' is in the same equivalence class
1313 as UTF-32 character 'eqv'.
1314 Note that we only recognize input in Unicode normalization form C, that
1315 is, we expect all letters to be composed. A single character is all we
1317 To check equivalence, decompose pattern letter and input letter into
1318 normalization form KD and check the base character for equality. Also,
1319 convert all digits to the ASCII digits 0 - 9 and compare. */
1321 is_unicode_equiv (wint_t test
, wint_t eqv
)
1323 wchar_t decomp_testc
[24] = { 0 };
1324 wchar_t decomp_eqvc
[24] = { 0 };
1325 wchar_t testc
[3] = { 0 };
1326 wchar_t eqvc
[3] = { 0 };
1328 /* For equivalence classes, case doesn't matter. However, be careful.
1329 Only convert chars which have a "upper" to "lower". */
1331 eqv
= towlower (eqv
);
1332 if (iswupper (test
))
1333 test
= towlower (test
);
1334 /* Convert to UTF-16 string */
1335 if (eqv
> 0x10000) {
1336 eqvc
[0] = ((eqv
- 0x10000) >> 10) + 0xd800;
1337 eqvc
[1] = ((eqv
- 0x10000) & 0x3ff) + 0xdc00;
1340 if (test
> 0x10000) {
1341 testc
[0] = ((test
- 0x10000) >> 10) + 0xd800;
1342 testc
[1] = ((test
- 0x10000) & 0x3ff) + 0xdc00;
1345 /* Convert to decomposed form */
1346 FoldStringW (MAP_COMPOSITE
| MAP_FOLDCZONE
| MAP_FOLDDIGITS
,
1347 eqvc
, -1, decomp_eqvc
, 24);
1348 FoldStringW (MAP_COMPOSITE
| MAP_FOLDCZONE
| MAP_FOLDDIGITS
,
1349 testc
, -1, decomp_testc
, 24);
1350 /* If they are equivalent, the base char must be the same. */
1351 if (decomp_eqvc
[0] != decomp_testc
[0])
1353 /* If it's a surrogate pair, check the second char, too */
1354 if (decomp_eqvc
[0] >= 0xd800 && decomp_eqvc
[0] <= 0xdbff &&
1355 decomp_eqvc
[1] != decomp_testc
[1])
1361 comp_coll_elem (const void *key
, const void *array_member
)
1363 collating_element_t
*ckey
= (collating_element_t
*) key
;
1364 collating_element_t
*carray_member
= (collating_element_t
*) array_member
;
1366 int ret
= wcicmp ((const wint_t *) ckey
->element
,
1367 (const wint_t *) carray_member
->element
);
1368 /* The locale in the collating_element array never has a codeset
1369 attached. So the length of the collating_element locale is
1370 always <= length of the key locale, and that's all we need to
1371 check. Also, if the collating_element locale is empty, we're
1373 if (ret
== 0 && carray_member
->locale
[0])
1374 ret
= strncmp (ckey
->locale
, carray_member
->locale
,
1375 strlen (carray_member
->locale
));
1380 is_unicode_coll_elem (const wint_t *test
)
1382 collating_element_t ct
= {
1383 (const char32_t
*) test
,
1384 __get_current_locale ()->categories
[LC_COLLATE
]
1386 collating_element_t
*cmatch
;
1388 if (wcilen (test
) == 1)
1390 cmatch
= (collating_element_t
*)
1391 bsearch (&ct
, collating_element
, ce_size
, ce_e_size
, comp_coll_elem
);
1396 comp_coll_elem_n (const void *key
, const void *array_member
)
1398 collating_element_t
*ckey
= (collating_element_t
*) key
;
1399 collating_element_t
*carray_member
= (collating_element_t
*) array_member
;
1401 int ret
= wcincmp ((const wint_t *) ckey
->element
,
1402 (const wint_t *) carray_member
->element
,
1403 wcilen ((const wint_t *) carray_member
->element
));
1404 /* The locale in the collating_element array never has a codeset
1405 attached. So the length of the collating_element locale is
1406 always <= length of the key locale, and that's all we need to
1407 check. Also, if the collating_element locale is empty, we're
1409 if (ret
== 0 && carray_member
->locale
[0])
1410 ret
= strncmp (ckey
->locale
, carray_member
->locale
,
1411 strlen (carray_member
->locale
));
1415 /* Return the number of UTF-32 chars making up the next full character in
1416 inp, taking valid collation elements in the current locale into account. */
1418 next_unicode_char (wint_t *inp
)
1420 collating_element_t ct
= {
1421 (const char32_t
*) inp
,
1422 __get_current_locale ()->categories
[LC_COLLATE
]
1424 collating_element_t
*cmatch
;
1426 if (wcilen (inp
) > 1)
1428 cmatch
= (collating_element_t
*)
1429 bsearch (&ct
, collating_element
, ce_size
, ce_e_size
,
1432 return wcilen ((const wint_t *) cmatch
->element
);
1438 wcsxfrm_l (wchar_t *__restrict ws1
, const wchar_t *__restrict ws2
, size_t wsn
,
1439 struct __locale_t
*locale
)
1442 const wchar_t *collate_locale
= __get_collate_locale (locale
)->win_locale
;
1444 if (!collate_locale
[0])
1445 return wcslcpy (ws1
, ws2
, wsn
);
1446 /* Don't use LCMAP_SORTKEY in conjunction with LCMAP_BYTEREV. The cchDest
1447 parameter is used as byte count with LCMAP_SORTKEY but as char count with
1449 ret
= LCMapStringEx (collate_locale
, LCMAP_SORTKEY
, ws2
, -1, ws1
,
1450 wsn
* sizeof (wchar_t), NULL
, NULL
, 0);
1453 ret
/= sizeof (wchar_t);
1456 /* Byte swap the array ourselves here. */
1457 for (size_t idx
= 0; idx
< ret
; ++idx
)
1458 ws1
[idx
] = __builtin_bswap16 (ws1
[idx
]);
1459 /* LCMapStringW returns byte count including the terminating NUL char.
1460 wcsxfrm is supposed to return length in wchar_t excluding the NUL.
1461 Since the array is only single byte NUL-terminated yet, make sure
1462 the result is wchar_t-NUL terminated. */
1468 if (GetLastError () != ERROR_INSUFFICIENT_BUFFER
)
1472 ret
= LCMapStringEx (collate_locale
, LCMAP_SORTKEY
, ws2
, -1,
1473 NULL
, 0, NULL
, NULL
, 0);
1475 wsn
= ret
/ sizeof (wchar_t);
1481 wcsxfrm (wchar_t *__restrict ws1
, const wchar_t *__restrict ws2
, size_t wsn
)
1483 return wcsxfrm_l (ws1
, ws2
, wsn
, __get_current_locale ());
1487 strxfrm_l (char *__restrict s1
, const char *__restrict s2
, size_t sn
,
1488 struct __locale_t
*locale
)
1494 const wchar_t *collate_locale
= __get_collate_locale (locale
)->win_locale
;
1496 if (!collate_locale
[0])
1497 return strlcpy (s1
, s2
, sn
);
1498 mbtowc_p collate_mbtowc
= __get_collate_locale (locale
)->mbtowc
;
1499 n2
= lc_mbstowcs (collate_mbtowc
, NULL
, s2
, 0) + 1;
1500 ws2
= (n2
> NT_MAX_PATH
? (wchar_t *) malloc (n2
* sizeof (wchar_t))
1504 lc_mbstowcs (collate_mbtowc
, ws2
, s2
, n2
);
1505 /* The sort key is a NUL-terminated byte string. */
1506 ret
= LCMapStringEx (collate_locale
, LCMAP_SORTKEY
, ws2
, -1,
1507 (PWCHAR
) s1
, sn
, NULL
, NULL
, 0);
1512 if (!ws2
|| GetLastError () != ERROR_INSUFFICIENT_BUFFER
)
1515 ret
= LCMapStringEx (collate_locale
, LCMAP_SORTKEY
, ws2
, -1,
1516 NULL
, 0, NULL
, NULL
, 0);
1518 if (ws2
&& n2
> NT_MAX_PATH
)
1520 /* LCMapStringW returns byte count including the terminating NUL character.
1521 strxfrm is supposed to return length excluding the NUL. */
1526 strxfrm (char *__restrict s1
, const char *__restrict s2
, size_t sn
)
1528 return strxfrm_l (s1
, s2
, sn
, __get_current_locale ());
1531 /* Fetch default ANSI codepage from locale info and generate a setlocale
1532 compatible character set code. Called from newlib's setlocale(), if the
1533 charset isn't given explicitely in the POSIX compatible locale specifier. */
1535 __set_charset_from_locale (const char *loc
, char *charset
)
1537 wchar_t win_locale
[ENCODING_LEN
+ 1];
1538 char locale
[ENCODING_LEN
+ 1];
1543 /* Cut out explicit codeset */
1544 stpcpy (locale
, loc
);
1545 modifier
= strchr (loc
, '@');
1546 if ((c
= strchr (locale
, '.')))
1547 stpcpy (c
, modifier
?: "");
1548 /* Ignore @cjk* modifiers, they are newlib specials. */
1549 modifier
= strchr (locale
, '@');
1550 if (modifier
&& is_cjk_modifier (modifier
))
1553 default_codeset_t srch_dc
= { locale
, NULL
};
1554 default_codeset_t
*dc
= (default_codeset_t
*)
1555 bsearch ((void *) &srch_dc
, (void *) default_codeset
,
1556 sizeof default_codeset
/ sizeof *default_codeset
,
1557 sizeof *default_codeset
, locale_cmp
);
1560 stpcpy (charset
, dc
->codeset
);
1564 /* "C" locale, or invalid locale? */
1565 if (__get_rfc5646_from_locale (locale
, win_locale
) <= 0)
1567 else if (GetLocaleInfoEx (win_locale
,
1568 LOCALE_IDEFAULTANSICODEPAGE
| LOCALE_RETURN_NUMBER
,
1569 (PWCHAR
) &cp
, sizeof cp
))
1571 /* Translate codepage and lcid to a charset closely aligned with the default
1572 charsets defined in Glibc. */
1622 /* Some (pretty new) EU locales don't exist in GLibc and haven't been
1623 catched above. Check for @euro modifier again and make these locales
1624 always use ISO-8859-15. */
1625 if (modifier
&& !strcmp (modifier
+ 1, "euro"))
1631 stpcpy (charset
, cs
);
1634 /* Called from fhandler_tty::setup_locale. Set a codepage which reflects the
1635 internal charset setting. This is *not* necessarily the Windows
1636 codepage connected to a locale by default, so we have to set this
1639 __eval_codepage_from_internal_charset ()
1641 const char *charset
= __locale_charset (__get_global_locale ());
1642 UINT codepage
= CP_UTF8
; /* Default UTF8 */
1644 /* The internal charset names are well defined, so we can use shortcuts. */
1647 case 'B': /* BIG5 */
1650 case 'C': /* CPxxx */
1651 codepage
= strtoul (charset
+ 2, NULL
, 10);
1653 case 'E': /* EUCxx */
1656 case 'J': /* EUCJP */
1659 case 'K': /* EUCKR */
1662 case 'C': /* EUCCN */
1667 case 'G': /* GBK/GB2312/GB18030 */
1668 codepage
= (charset
[2] == '1') ? 54936 : 936;
1670 case 'I': /* ISO-8859-x */
1671 codepage
= strtoul (charset
+ 9, NULL
, 10) + 28590;
1673 case 'S': /* SJIS */
1676 default: /* All set to UTF8 already */
1682 /* This function is called from newlib's loadlocale if the locale identifier
1683 was invalid, one way or the other. It looks for the file
1685 /usr/share/locale/locale.alias
1687 which is part of the gettext package, and if it finds the locale alias
1688 in that file, it replaces the locale with the correct locale string from
1691 If successful, it returns a pointer to new_locale, NULL otherwise.*/
1693 __set_locale_from_locale_alias (const char *locale
, char *new_locale
)
1695 wchar_t wlocale
[ENCODING_LEN
+ 1];
1696 wchar_t walias
[ENCODING_LEN
+ 1];
1697 #define LOCALE_ALIAS_LINE_LEN 255
1698 char alias_buf
[LOCALE_ALIAS_LINE_LEN
+ 1], *c
;
1700 const char *alias
, *replace
;
1703 FILE *fp
= fopen ("/usr/share/locale/locale.alias", "rt");
1706 /* The incoming locale is given in the application charset, or in
1707 the Cygwin internal charset. We try both. */
1708 if (mbstowcs (wlocale
, locale
, ENCODING_LEN
+ 1) == (size_t) -1)
1709 sys_mbstowcs (wlocale
, ENCODING_LEN
+ 1, locale
);
1710 wlocale
[ENCODING_LEN
] = L
'\0';
1711 /* Ignore @cjk* modifiers, they are newlib specials. */
1712 wc
= wcschr (wlocale
, L
'@');
1713 if (wc
&& w_is_cjk_modifier (wc
))
1715 while (fgets (alias_buf
, LOCALE_ALIAS_LINE_LEN
+ 1, fp
))
1717 alias_buf
[LOCALE_ALIAS_LINE_LEN
] = '\0';
1718 c
= strrchr (alias_buf
, '\n');
1722 c
+= strspn (c
, " \t");
1723 if (!*c
|| *c
== '#')
1726 c
+= strcspn (c
, " \t");
1728 c
+= strspn (c
, " \t");
1732 c
+= strcspn (c
, " \t");
1734 if (strlen (replace
) > ENCODING_LEN
)
1736 /* The file is latin1 encoded */
1737 lc_mbstowcs (__iso_mbtowc (1), walias
, alias
, ENCODING_LEN
+ 1);
1738 walias
[ENCODING_LEN
] = L
'\0';
1739 if (!wcscmp (wlocale
, walias
))
1741 ret
= strcpy (new_locale
, replace
);
1749 /* Can be called via cygwin_internal (CW_INTERNAL_SETLOCALE) for application
1750 which really (think they) know what they are doing. */
1752 internal_setlocale ()
1754 /* Each setlocale from the environment potentially changes the
1755 multibyte representation of the CWD. Therefore we have to
1756 reevaluate the CWD's posix path and store in the new charset.
1757 Same for the PATH environment variable. */
1758 /* FIXME: Other buffered paths might be affected as well. */
1759 /* FIXME: It could be necessary to convert the entire environment,
1763 wchar_t *w_path
= NULL
, *w_cwd
;
1765 /* Don't do anything if the charset hasn't actually changed. */
1766 if (cygheap
->locale
.mbtowc
== __get_global_locale ()->mbtowc
)
1769 debug_printf ("Global charset set to %s",
1770 __locale_charset (__get_global_locale ()));
1771 /* Fetch PATH and CWD and convert to wchar_t in previous charset. */
1772 path
= getenv ("PATH");
1773 if (path
&& *path
) /* $PATH can be potentially unset. */
1775 w_path
= tp
.w_get ();
1776 _sys_mbstowcs (cygheap
->locale
.mbtowc
, w_path
, 32768, path
);
1778 w_cwd
= tp
.w_get ();
1779 cwdstuff::acquire_write ();
1780 _sys_mbstowcs (cygheap
->locale
.mbtowc
, w_cwd
, 32768,
1781 cygheap
->cwd
.get_posix ());
1782 /* Set charset for internal conversion functions. */
1783 cygheap
->locale
.mbtowc
= __get_global_locale ()->mbtowc
;
1784 if (cygheap
->locale
.mbtowc
== __ascii_mbtowc
)
1785 cygheap
->locale
.mbtowc
= __utf8_mbtowc
;
1786 /* Restore CWD and PATH in new charset. */
1787 cygheap
->cwd
.reset_posix (w_cwd
);
1788 cwdstuff::release_write ();
1791 char *c_path
= tp
.c_get ();
1792 sys_wcstombs (c_path
, 32768, w_path
);
1793 setenv ("PATH", c_path
, 1);
1797 /* Called from dll_crt0_1, before fetching the command line from Windows.
1798 Set the internal charset according to the environment locale settings.
1799 Check if a required codepage is available, and only switch internal
1801 Make sure to reset the application locale to "C" per POSIX. */
1803 initial_setlocale ()
1805 char *ret
= _setlocale_r (_REENT
, LC_CTYPE
, "");
1807 internal_setlocale ();