Cygwin: mmap: allow remapping part of an existing anonymous mapping
[newlib-cygwin.git] / newlib / libc / locale / locale.c
blobb16ec15114a591e89dd17a23179104226feea8c4
1 /*
2 FUNCTION
3 <<setlocale>>, <<localeconv>>---select or query locale
5 INDEX
6 setlocale
7 INDEX
8 localeconv
9 INDEX
10 _setlocale_r
11 INDEX
12 _localeconv_r
14 SYNOPSIS
15 #include <locale.h>
16 char *setlocale(int <[category]>, const char *<[locale]>);
17 lconv *localeconv(void);
19 char *_setlocale_r(void *<[reent]>,
20 int <[category]>, const char *<[locale]>);
21 lconv *_localeconv_r(void *<[reent]>);
23 DESCRIPTION
24 <<setlocale>> is the facility defined by ANSI C to condition the
25 execution environment for international collating and formatting
26 information; <<localeconv>> reports on the settings of the current
27 locale.
29 This is a minimal implementation, supporting only the required <<"POSIX">>
30 and <<"C">> values for <[locale]>; strings representing other locales are not
31 honored unless _MB_CAPABLE is defined.
33 If _MB_CAPABLE is defined, POSIX locale strings are allowed, following
34 the form
36 language[_TERRITORY][.charset][@@modifier]
38 <<"language">> is a two character string per ISO 639, or, if not available
39 for a given language, a three character string per ISO 639-3.
40 <<"TERRITORY">> is a country code per ISO 3166. For <<"charset">> and
41 <<"modifier">> see below.
43 Additionally to the POSIX specifier, the following extension is supported
44 for backward compatibility with older implementations using newlib:
45 <<"C-charset">>.
46 Instead of <<"C-">>, you can also specify <<"C.">>. Both variations allow
47 to specify language neutral locales while using other charsets than ASCII,
48 for instance <<"C.UTF-8">>, which keeps all settings as in the C locale,
49 but uses the UTF-8 charset.
51 The following charsets are recognized:
52 <<"UTF-8">>, <<"JIS">>, <<"EUCJP">>, <<"SJIS">>, <<"KOI8-R">>, <<"KOI8-U">>,
53 <<"KOI8-T">>, <<"GEORGIAN-PS">>, <<"PT154">>, <<"TIS-620">>, <<"ISO-8859-x">>
54 with 1 <= x <= 16, or <<"CPxxx">> with xxx in [437, 720, 737, 775, 850, 852,
55 855, 857, 858, 862, 866, 874, 932, 1125, 1250, 1251, 1252, 1253, 1254, 1255,
56 1256, 1257, 1258].
58 Charsets are case insensitive. For instance, <<"EUCJP">> and <<"eucJP">>
59 are equivalent. Charset names with dashes can also be written without
60 dashes, as in <<"UTF8">>, <<"iso88591">> or <<"koi8r">>. <<"EUCJP">> and
61 <<"EUCKR">> are also recognized with dash, <<"EUC-JP">> and <<"EUC-KR">>.
63 Full support for all of the above charsets requires that newlib has been
64 build with multibyte support and support for all ISO and Windows Codepage.
65 Otherwise all singlebyte charsets are simply mapped to ASCII. Right now,
66 only newlib for Cygwin is built with full charset support by default.
67 Under Cygwin, this implementation additionally supports the charsets
68 <<"GB18030">>, <<"GBK">>, <<"GB2312">>, <<"eucCN">>, <<"eucKR">>, and
69 <<"Big5">>. Cygwin does not support <<"JIS">>.
71 Cygwin additionally supports locales from the file
72 /usr/share/locale/locale.alias.
74 (<<"">> is also accepted; if given, the settings are read from the
75 corresponding LC_* environment variables and $LANG according to POSIX rules.)
77 This implementation also supports the modifiers <<"cjknarrow">> and
78 <<"cjkwide">>, which affect how the functions <<wcwidth>> and <<wcswidth>>
79 handle characters from the "CJK Ambiguous Width" category of characters
80 described at http://www.unicode.org/reports/tr11/#Ambiguous.
81 These characters have a width of 1 for singlebyte charsets and UTF-8,
82 and a width of 2 for multibyte charsets other than UTF-8. Specifying
83 <<"cjknarrow">> or <<"cjkwide">> forces a width of 1 or 2, respectively.
85 This implementation also supports the modifier <<"cjksingle">>
86 to enforce single-width character properties.
88 If you use <<NULL>> as the <[locale]> argument, <<setlocale>> returns a
89 pointer to the string representing the current locale. The acceptable
90 values for <[category]> are defined in `<<locale.h>>' as macros
91 beginning with <<"LC_">>.
93 <<localeconv>> returns a pointer to a structure (also defined in
94 `<<locale.h>>') describing the locale-specific conventions currently
95 in effect.
97 <<_localeconv_r>> and <<_setlocale_r>> are reentrant versions of
98 <<localeconv>> and <<setlocale>> respectively. The extra argument
99 <[reent]> is a pointer to a reentrancy structure.
101 RETURNS
102 A successful call to <<setlocale>> returns a pointer to a string
103 associated with the specified category for the new locale. The string
104 returned by <<setlocale>> is such that a subsequent call using that
105 string will restore that category (or all categories in case of LC_ALL),
106 to that state. The application shall not modify the string returned
107 which may be overwritten by a subsequent call to <<setlocale>>.
108 On error, <<setlocale>> returns <<NULL>>.
110 <<localeconv>> returns a pointer to a structure of type <<lconv>>,
111 which describes the formatting and collating conventions in effect (in
112 this implementation, always those of the C locale).
114 PORTABILITY
115 ANSI C requires <<setlocale>>, but the only locale required across all
116 implementations is the C locale.
118 NOTES
119 There is no ISO-8859-12 codepage. It's also refused by this implementation.
121 No supporting OS subroutines are required.
124 /* Parts of this code are originally taken from FreeBSD. */
126 * Copyright (c) 1996 - 2002 FreeBSD Project
127 * Copyright (c) 1991, 1993
128 * The Regents of the University of California. All rights reserved.
130 * This code is derived from software contributed to Berkeley by
131 * Paul Borman at Krystal Technologies.
133 * Redistribution and use in source and binary forms, with or without
134 * modification, are permitted provided that the following conditions
135 * are met:
136 * 1. Redistributions of source code must retain the above copyright
137 * notice, this list of conditions and the following disclaimer.
138 * 2. Redistributions in binary form must reproduce the above copyright
139 * notice, this list of conditions and the following disclaimer in the
140 * documentation and/or other materials provided with the distribution.
141 * 4. Neither the name of the University nor the names of its contributors
142 * may be used to endorse or promote products derived from this software
143 * without specific prior written permission.
145 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
146 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
147 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
148 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
149 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
150 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
151 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
152 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
153 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
154 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
155 * SUCH DAMAGE.
158 #include <newlib.h>
159 #include <errno.h>
160 #include <string.h>
161 #include <limits.h>
162 #include <reent.h>
163 #include <stdlib.h>
164 #include <wchar.h>
165 #include "setlocale.h"
166 #include "../ctype/ctype_.h"
167 #include "../stdlib/local.h"
169 #ifdef _REENT_THREAD_LOCAL
170 _Thread_local struct __locale_t *_tls_locale;
171 #endif
173 #ifdef __CYGWIN__ /* Has to be kept available as exported symbol for
174 backward compatibility. Set it in setlocale, but
175 otherwise ignore it. Applications compiled after
176 2010 don't use it anymore. */
177 int __EXPORT __mb_cur_max = 6;
178 #endif
180 char *_PathLocale = NULL;
182 #ifdef _MB_CAPABLE
184 * Category names for getenv()
186 static char *categories[_LC_LAST] = {
187 "LC_ALL",
188 "LC_COLLATE",
189 "LC_CTYPE",
190 "LC_MONETARY",
191 "LC_NUMERIC",
192 "LC_TIME",
193 "LC_MESSAGES",
195 #endif /* _MB_CAPABLE */
198 * Default locale per POSIX. Can be overridden on a per-target base.
200 #ifndef DEFAULT_LOCALE
201 #define DEFAULT_LOCALE "C"
202 #endif
204 #ifdef _MB_CAPABLE
206 * This variable can be changed by any outside mechanism. This allows,
207 * for instance, to load the default locale from a file.
209 char __default_locale[ENCODING_LEN + 1] = DEFAULT_LOCALE;
211 const struct __locale_t __C_locale =
213 { "C", "C", "C", "C", "C", "C", "C", },
214 __ascii_wctomb,
215 __ascii_mbtowc,
217 DEFAULT_CTYPE_PTR,
219 ".", "", "", "", "", "", "", "", "", "",
220 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
221 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
222 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
223 CHAR_MAX, CHAR_MAX
225 #ifndef __HAVE_LOCALE_INFO__
226 "\1",
227 "ASCII",
228 "ASCII",
229 #else /* __HAVE_LOCALE_INFO__ */
231 { NULL, NULL }, /* LC_ALL */
232 #ifdef __CYGWIN__
233 { &_C_collate_locale, NULL }, /* LC_COLLATE */
234 #else
235 { NULL, NULL }, /* LC_COLLATE */
236 #endif
237 { &_C_ctype_locale, NULL }, /* LC_CTYPE */
238 { &_C_monetary_locale, NULL }, /* LC_MONETARY */
239 { &_C_numeric_locale, NULL }, /* LC_NUMERIC */
240 { &_C_time_locale, NULL }, /* LC_TIME */
241 { &_C_messages_locale, NULL }, /* LC_MESSAGES */
243 #endif /* __HAVE_LOCALE_INFO__ */
245 #endif /* _MB_CAPABLE */
247 struct __locale_t __global_locale =
249 { "C", "C", DEFAULT_LOCALE, "C", "C", "C", "C", },
250 #ifdef __CYGWIN__
251 __utf8_wctomb,
252 __utf8_mbtowc,
253 #else
254 __ascii_wctomb,
255 __ascii_mbtowc,
256 #endif
258 DEFAULT_CTYPE_PTR,
260 ".", "", "", "", "", "", "", "", "", "",
261 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
262 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
263 CHAR_MAX, CHAR_MAX, CHAR_MAX, CHAR_MAX,
264 CHAR_MAX, CHAR_MAX
266 #ifndef __HAVE_LOCALE_INFO__
267 "\1",
268 "ASCII",
269 "ASCII",
270 #else /* __HAVE_LOCALE_INFO__ */
272 { NULL, NULL }, /* LC_ALL */
273 #ifdef __CYGWIN__
274 { &_C_collate_locale, NULL }, /* LC_COLLATE */
275 { &_C_utf8_ctype_locale, NULL }, /* LC_CTYPE */
276 #else
277 { NULL, NULL }, /* LC_COLLATE */
278 { &_C_ctype_locale, NULL }, /* LC_CTYPE */
279 #endif
280 { &_C_monetary_locale, NULL }, /* LC_MONETARY */
281 { &_C_numeric_locale, NULL }, /* LC_NUMERIC */
282 { &_C_time_locale, NULL }, /* LC_TIME */
283 { &_C_messages_locale, NULL }, /* LC_MESSAGES */
285 #endif /* __HAVE_LOCALE_INFO__ */
288 #ifdef _MB_CAPABLE
289 /* Renamed from current_locale_string to make clear this is only the
290 *global* string for setlocale (LC_ALL, NULL). There's no equivalent
291 functionality for uselocale. */
292 static char global_locale_string[_LC_LAST * (ENCODING_LEN + 1/*"/"*/ + 1)]
293 = "C";
294 static char *currentlocale (void);
296 #endif /* _MB_CAPABLE */
298 char *
299 _setlocale_r (struct _reent *p,
300 int category,
301 const char *locale)
303 #ifndef _MB_CAPABLE
304 if (locale)
306 if (strcmp (locale, "POSIX") && strcmp (locale, "C")
307 && strcmp (locale, ""))
308 return NULL;
310 return "C";
311 #else /* _MB_CAPABLE */
312 static char new_categories[_LC_LAST][ENCODING_LEN + 1];
313 static char saved_categories[_LC_LAST][ENCODING_LEN + 1];
314 int i, j, len, saverr;
315 const char *env, *r;
316 char *ret;
318 if (category < LC_ALL || category >= _LC_LAST)
320 _REENT_ERRNO(p) = EINVAL;
321 return NULL;
324 if (locale == NULL)
325 return category != LC_ALL ? __get_global_locale ()->categories[category]
326 : global_locale_string;
329 * Default to the current locale for everything.
331 for (i = 1; i < _LC_LAST; ++i)
332 strcpy (new_categories[i], __get_global_locale ()->categories[i]);
335 * Now go fill up new_categories from the locale argument
337 if (!*locale)
339 if (category == LC_ALL)
341 for (i = 1; i < _LC_LAST; ++i)
343 env = __get_locale_env (p, i);
344 if (strlen (env) > ENCODING_LEN)
346 _REENT_ERRNO(p) = EINVAL;
347 return NULL;
349 strcpy (new_categories[i], env);
352 else
354 env = __get_locale_env (p, category);
355 if (strlen (env) > ENCODING_LEN)
357 _REENT_ERRNO(p) = EINVAL;
358 return NULL;
360 strcpy (new_categories[category], env);
363 else if (category != LC_ALL)
365 if (strlen (locale) > ENCODING_LEN)
367 _REENT_ERRNO(p) = EINVAL;
368 return NULL;
370 strcpy (new_categories[category], locale);
372 else
374 if ((r = strchr (locale, '/')) == NULL)
376 if (strlen (locale) > ENCODING_LEN)
378 _REENT_ERRNO(p) = EINVAL;
379 return NULL;
381 for (i = 1; i < _LC_LAST; ++i)
382 strcpy (new_categories[i], locale);
384 else
386 for (i = 1; r[1] == '/'; ++r)
388 if (!r[1])
390 _REENT_ERRNO(p) = EINVAL;
391 return NULL; /* Hmm, just slashes... */
395 if (i == _LC_LAST)
396 break; /* Too many slashes... */
397 if ((len = r - locale) > ENCODING_LEN)
399 _REENT_ERRNO(p) = EINVAL;
400 return NULL;
402 strlcpy (new_categories[i], locale, len + 1);
403 i++;
404 while (*r == '/')
405 r++;
406 locale = r;
407 while (*r && *r != '/')
408 r++;
410 while (*locale);
411 while (i < _LC_LAST)
413 strcpy (new_categories[i], new_categories[i-1]);
414 i++;
419 if (category != LC_ALL)
421 ret = __loadlocale (__get_global_locale (), category,
422 new_categories[category]);
423 currentlocale ();
424 return ret;
427 for (i = 1; i < _LC_LAST; ++i)
429 strcpy (saved_categories[i], __get_global_locale ()->categories[i]);
430 if (__loadlocale (__get_global_locale (), i, new_categories[i]) == NULL)
432 saverr = _REENT_ERRNO(p);
433 for (j = 1; j < i; j++)
435 strcpy (new_categories[j], saved_categories[j]);
436 if (__loadlocale (__get_global_locale (), j, new_categories[j])
437 == NULL)
439 strcpy (new_categories[j], "C");
440 __loadlocale (__get_global_locale (), j, new_categories[j]);
443 _REENT_ERRNO(p) = saverr;
444 return NULL;
447 return currentlocale ();
448 #endif /* _MB_CAPABLE */
451 #ifdef _MB_CAPABLE
452 static char *
453 currentlocale ()
455 int i;
457 strcpy (global_locale_string, __get_global_locale ()->categories[1]);
459 for (i = 2; i < _LC_LAST; ++i)
460 if (strcmp (__get_global_locale ()->categories[1],
461 __get_global_locale ()->categories[i]))
463 for (i = 2; i < _LC_LAST; ++i)
465 (void)strcat(global_locale_string, "/");
466 (void)strcat(global_locale_string,
467 __get_global_locale ()->categories[i]);
469 break;
471 return global_locale_string;
474 extern void __set_ctype (struct __locale_t *, const char *charset);
476 char *
477 __loadlocale (struct __locale_t *loc, int category, char *new_locale)
479 /* At this point a full-featured system would just load the locale
480 specific data from the locale files.
481 What we do here for now is to check the incoming string for correctness.
482 The string must be in one of the allowed locale strings, either
483 one in POSIX-style, or one in the old newlib style to maintain
484 backward compatibility. If the local string is correct, the charset
485 is extracted and stored in ctype_codeset or message_charset
486 dependent on the cateogry. */
487 char *locale = NULL;
488 char charset[ENCODING_LEN + 1];
489 long val = 0;
490 char *end, *c = NULL;
491 int mbc_max;
492 wctomb_p l_wctomb;
493 mbtowc_p l_mbtowc;
494 int cjksingle = 0;
495 int cjknarrow = 0;
496 int cjkwide = 0;
498 /* Avoid doing everything twice if nothing has changed.
500 duplocale relies on this test to go wrong so the locale is actually
501 duplicated when required. Any change here has to be synced with a
502 matching change in duplocale. */
503 if (!strcmp (new_locale, loc->categories[category]))
504 return loc->categories[category];
506 #ifdef __CYGWIN__
507 /* This additional code handles the case that the incoming locale string
508 is not valid. If so, it calls the function __set_locale_from_locale_alias,
509 which is only available on Cygwin right now. The function reads the
510 file /usr/share/locale/locale.alias. The file contains locale aliases
511 and their replacement locale. For instance, the alias "french" is
512 translated to "fr_FR.ISO-8859-1", the alias "thai" is translated to
513 "th_TH.TIS-620". If successful, the function returns with a pointer
514 to the second argument, which is a buffer in which the replacement locale
515 gets stored. Otherwise the function returns NULL. */
516 char tmp_locale[ENCODING_LEN + 1];
517 int ret = 0;
519 restart:
520 if (!locale)
521 locale = new_locale;
522 else if (locale != tmp_locale)
524 locale = __set_locale_from_locale_alias (locale, tmp_locale);
525 if (!locale)
526 return NULL;
528 # define FAIL goto restart
529 #else
530 locale = new_locale;
531 # define FAIL return NULL
532 #endif
534 /* "POSIX" is translated to "C", as on Linux. */
535 if (!strcmp (locale, "POSIX"))
536 strcpy (locale, "C");
537 if (!strcmp (locale, "C")) /* Default "C" locale */
538 strcpy (charset, "ASCII");
539 else if (locale[0] == 'C'
540 && (locale[1] == '-' /* Old newlib style */
541 || locale[1] == '.')) /* Extension for the C locale to allow
542 specifying different charsets while
543 sticking to the C locale in terms
544 of sort order, etc. Proposed in
545 the Debian project. */
547 char *chp;
549 c = locale + 2;
550 strcpy (charset, c);
551 if ((chp = strchr (charset, '@')))
552 /* Strip off modifier */
553 *chp = '\0';
554 c += strlen (charset);
556 else /* POSIX style */
558 c = locale;
560 /* Don't use ctype macros here, they might be localized. */
561 /* Language */
562 if (c[0] < 'a' || c[0] > 'z'
563 || c[1] < 'a' || c[1] > 'z')
564 FAIL;
565 c += 2;
566 /* Allow three character Language per ISO 639-3 */
567 if (c[0] >= 'a' && c[0] <= 'z')
568 ++c;
569 if (c[0] == '_')
571 /* Territory */
572 ++c;
573 if (c[0] < 'A' || c[0] > 'Z'
574 || c[1] < 'A' || c[1] > 'Z')
575 FAIL;
576 c += 2;
578 if (c[0] == '.')
580 /* Charset */
581 char *chp;
583 ++c;
584 strcpy (charset, c);
585 if ((chp = strchr (charset, '@')))
586 /* Strip off modifier */
587 *chp = '\0';
588 c += strlen (charset);
590 else if (c[0] == '\0' || c[0] == '@')
591 /* End of string or just a modifier */
592 #ifdef __CYGWIN__
593 /* The Cygwin-only function __set_charset_from_locale checks
594 for the default charset which is connected to the given locale.
595 The function uses Windows functions in turn so it can't be easily
596 adapted to other targets. However, if any other target provides
597 equivalent functionality, preferrably using the same function name
598 it would be sufficient to change the guarding #ifdef. */
599 __set_charset_from_locale (locale, charset);
600 #else
601 strcpy (charset, "ISO-8859-1");
602 #endif
603 else
604 /* Invalid string */
605 FAIL;
607 if (c && c[0] == '@')
609 /* Modifier "cjksingle" is recognized to enforce single-width mode. */
610 /* Modifiers "cjknarrow" or "cjkwide" are recognized to modify the
611 behaviour of wcwidth() and wcswidth() for East Asian languages.
612 For details see the comment at the end of this function. */
613 if (!strcmp (c + 1, "cjksingle"))
614 cjksingle = 1;
615 else if (!strcmp (c + 1, "cjknarrow"))
616 cjknarrow = 1;
617 else if (!strcmp (c + 1, "cjkwide"))
618 cjkwide = 1;
620 /* We only support this subset of charsets. */
621 switch (charset[0])
623 case 'U':
624 case 'u':
625 if (strcasecmp (charset, "UTF-8") && strcasecmp (charset, "UTF8"))
626 FAIL;
627 strcpy (charset, "UTF-8");
628 mbc_max = 6;
629 l_wctomb = __utf8_wctomb;
630 l_mbtowc = __utf8_mbtowc;
631 break;
632 #ifndef __CYGWIN__
633 /* Cygwin does not support JIS at all. */
634 case 'J':
635 case 'j':
636 if (strcasecmp (charset, "JIS"))
637 FAIL;
638 strcpy (charset, "JIS");
639 mbc_max = 8;
640 l_wctomb = __jis_wctomb;
641 l_mbtowc = __jis_mbtowc;
642 break;
643 #endif /* !__CYGWIN__ */
644 case 'E':
645 case 'e':
646 if (strncasecmp (charset, "EUC", 3))
647 FAIL;
648 c = charset + 3;
649 if (*c == '-')
650 ++c;
651 if (!strcasecmp (c, "JP"))
653 strcpy (charset, "EUCJP");
654 mbc_max = 3;
655 l_wctomb = __eucjp_wctomb;
656 l_mbtowc = __eucjp_mbtowc;
658 #ifdef __CYGWIN__
659 /* Newlib does neither provide EUC-KR nor EUC-CN, and Cygwin's
660 implementation requires Windows support. */
661 else if (!strcasecmp (c, "KR"))
663 strcpy (charset, "EUCKR");
664 mbc_max = 2;
665 l_wctomb = __kr_wctomb;
666 l_mbtowc = __kr_mbtowc;
668 else if (!strcasecmp (c, "CN"))
670 strcpy (charset, "EUCCN");
671 mbc_max = 2;
672 l_wctomb = __gbk_wctomb;
673 l_mbtowc = __gbk_mbtowc;
675 #endif /* __CYGWIN__ */
676 else
677 FAIL;
678 break;
679 case 'S':
680 case 's':
681 if (strcasecmp (charset, "SJIS"))
682 FAIL;
683 strcpy (charset, "SJIS");
684 mbc_max = 2;
685 l_wctomb = __sjis_wctomb;
686 l_mbtowc = __sjis_mbtowc;
687 break;
688 case 'I':
689 case 'i':
690 /* Must be exactly one of ISO-8859-1, [...] ISO-8859-16, except for
691 ISO-8859-12. This code also recognizes the aliases without dashes. */
692 if (strncasecmp (charset, "ISO", 3))
693 FAIL;
694 c = charset + 3;
695 if (*c == '-')
696 ++c;
697 if (strncasecmp (c, "8859", 4))
698 FAIL;
699 c += 4;
700 if (*c == '-')
701 ++c;
702 val = strtol (c, &end, 10);
703 if (val < 1 || val > 16 || val == 12 || *end)
704 FAIL;
705 strcpy (charset, "ISO-8859-");
706 c = charset + 9;
707 if (val > 10)
708 *c++ = '1';
709 *c++ = val % 10 + '0';
710 *c = '\0';
711 mbc_max = 1;
712 #ifdef _MB_EXTENDED_CHARSETS_ISO
713 l_wctomb = __iso_wctomb (val);
714 l_mbtowc = __iso_mbtowc (val);
715 #else /* !_MB_EXTENDED_CHARSETS_ISO */
716 l_wctomb = __ascii_wctomb;
717 l_mbtowc = __ascii_mbtowc;
718 #endif /* _MB_EXTENDED_CHARSETS_ISO */
719 break;
720 case 'C':
721 case 'c':
722 if (charset[1] != 'P' && charset[1] != 'p')
723 FAIL;
724 strncpy (charset, "CP", 2);
725 val = strtol (charset + 2, &end, 10);
726 if (*end)
727 FAIL;
728 switch (val)
730 case 437:
731 case 720:
732 case 737:
733 case 775:
734 case 850:
735 case 852:
736 case 855:
737 case 857:
738 case 858:
739 case 862:
740 case 866:
741 case 874:
742 case 1125:
743 case 1250:
744 case 1251:
745 case 1252:
746 case 1253:
747 case 1254:
748 case 1255:
749 case 1256:
750 case 1257:
751 case 1258:
752 mbc_max = 1;
753 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
754 l_wctomb = __cp_wctomb (val);
755 l_mbtowc = __cp_mbtowc (val);
756 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
757 l_wctomb = __ascii_wctomb;
758 l_mbtowc = __ascii_mbtowc;
759 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
760 break;
761 case 932:
762 mbc_max = 2;
763 l_wctomb = __sjis_wctomb;
764 l_mbtowc = __sjis_mbtowc;
765 break;
766 default:
767 FAIL;
769 break;
770 case 'K':
771 case 'k':
772 /* KOI8-R, KOI8-U, KOI8-T and the aliases without dash */
773 if (strncasecmp (charset, "KOI8", 4))
774 FAIL;
775 c = charset + 4;
776 if (*c == '-')
777 ++c;
778 if (*c == 'R' || *c == 'r')
780 val = 20866;
781 strcpy (charset, "CP20866");
783 else if (*c == 'U' || *c == 'u')
785 val = 21866;
786 strcpy (charset, "CP21866");
788 else if (*c == 'T' || *c == 't')
790 val = 103;
791 strcpy (charset, "CP103");
793 else
794 FAIL;
795 mbc_max = 1;
796 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
797 l_wctomb = __cp_wctomb (val);
798 l_mbtowc = __cp_mbtowc (val);
799 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
800 l_wctomb = __ascii_wctomb;
801 l_mbtowc = __ascii_mbtowc;
802 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
803 break;
804 case 'A':
805 case 'a':
806 if (strcasecmp (charset, "ASCII"))
807 FAIL;
808 strcpy (charset, "ASCII");
809 mbc_max = 1;
810 l_wctomb = __ascii_wctomb;
811 l_mbtowc = __ascii_mbtowc;
812 break;
813 case 'G':
814 case 'g':
815 #ifdef __CYGWIN__
816 /* Newlib does not provide GBK/GB2312 and Cygwin's implementation
817 requires Windows support. */
818 if (!strcasecmp (charset, "GBK")
819 || !strcasecmp (charset, "GB2312"))
821 strcpy (charset, charset[2] == '2' ? "GB2312" : "GBK");
822 mbc_max = 2;
823 l_wctomb = __gbk_wctomb;
824 l_mbtowc = __gbk_mbtowc;
826 else if (!strcasecmp (charset, "GB18030"))
828 strcpy (charset, "GB18030");
829 mbc_max = 4;
830 l_wctomb = __gb18030_wctomb;
831 l_mbtowc = __gb18030_mbtowc;
833 else
834 #endif /* __CYGWIN__ */
835 /* GEORGIAN-PS and the alias without dash */
836 if (!strncasecmp (charset, "GEORGIAN", 8))
838 c = charset + 8;
839 if (*c == '-')
840 ++c;
841 if (strcasecmp (c, "PS"))
842 FAIL;
843 val = 101;
844 strcpy (charset, "CP101");
845 mbc_max = 1;
846 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
847 l_wctomb = __cp_wctomb (val);
848 l_mbtowc = __cp_mbtowc (val);
849 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
850 l_wctomb = __ascii_wctomb;
851 l_mbtowc = __ascii_mbtowc;
852 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
854 else
855 FAIL;
856 break;
857 case 'P':
858 case 'p':
859 /* PT154 */
860 if (strcasecmp (charset, "PT154"))
861 FAIL;
862 val = 102;
863 strcpy (charset, "CP102");
864 mbc_max = 1;
865 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
866 l_wctomb = __cp_wctomb (val);
867 l_mbtowc = __cp_mbtowc (val);
868 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
869 l_wctomb = __ascii_wctomb;
870 l_mbtowc = __ascii_mbtowc;
871 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
872 break;
873 case 'T':
874 case 't':
875 if (strncasecmp (charset, "TIS", 3))
876 FAIL;
877 c = charset + 3;
878 if (*c == '-')
879 ++c;
880 if (strcmp (c, "620"))
881 FAIL;
882 val = 874;
883 strcpy (charset, "CP874");
884 mbc_max = 1;
885 #ifdef _MB_EXTENDED_CHARSETS_WINDOWS
886 l_wctomb = __cp_wctomb (val);
887 l_mbtowc = __cp_mbtowc (val);
888 #else /* !_MB_EXTENDED_CHARSETS_WINDOWS */
889 l_wctomb = __ascii_wctomb;
890 l_mbtowc = __ascii_mbtowc;
891 #endif /* _MB_EXTENDED_CHARSETS_WINDOWS */
892 break;
893 #ifdef __CYGWIN__
894 /* Newlib does not provide Big5 and Cygwin's implementation
895 requires Windows support. */
896 case 'B':
897 case 'b':
898 if (strcasecmp (charset, "BIG5"))
899 FAIL;
900 strcpy (charset, "BIG5");
901 mbc_max = 2;
902 l_wctomb = __big5_wctomb;
903 l_mbtowc = __big5_mbtowc;
904 break;
905 #endif /* __CYGWIN__ */
906 default:
907 FAIL;
909 switch (category)
911 case LC_CTYPE:
912 #ifndef __HAVE_LOCALE_INFO__
913 strcpy (loc->ctype_codeset, charset);
914 loc->mb_cur_max[0] = mbc_max;
915 #endif
916 #ifdef __CYGWIN__
917 __mb_cur_max = mbc_max; /* Only for backward compat */
918 #endif
919 loc->wctomb = l_wctomb;
920 loc->mbtowc = l_mbtowc;
921 __set_ctype (loc, charset);
922 /* Set CJK width mode (1: ambiguous-wide, 0: normal, -1: disabled). */
923 /* Determine the width for the "CJK Ambiguous Width" category of
924 characters. This is used in wcwidth(). Assume single width for
925 single-byte charsets, and double width for multi-byte charsets
926 other than UTF-8. For UTF-8, use single width.
927 Single width can also be forced with the "@cjknarrow" modifier.
928 Double width can also be forced with the "@cjkwide" modifier.
930 loc->cjk_lang = cjkwide ||
931 (!cjknarrow && mbc_max > 1 && charset[0] != 'U');
932 if (cjksingle)
933 loc->cjk_lang = -1; /* Disable CJK dual-width */
934 #ifdef __HAVE_LOCALE_INFO__
935 ret = __ctype_load_locale (loc, locale, (void *) l_wctomb, charset,
936 mbc_max);
937 #endif /* __HAVE_LOCALE_INFO__ */
938 break;
939 case LC_MESSAGES:
940 #ifdef __HAVE_LOCALE_INFO__
941 ret = __messages_load_locale (loc, locale, (void *) l_wctomb, charset);
942 if (!ret)
943 #else
944 strcpy (loc->message_codeset, charset);
945 #endif /* __HAVE_LOCALE_INFO__ */
946 break;
947 #ifdef __HAVE_LOCALE_INFO__
948 #ifdef __CYGWIN__
949 /* Right now only Cygwin supports a __collate_load_locale function at all. */
950 case LC_COLLATE:
951 ret = __collate_load_locale (loc, locale, (void *) l_mbtowc, charset);
952 break;
953 #endif
954 case LC_MONETARY:
955 ret = __monetary_load_locale (loc, locale, (void *) l_wctomb, charset);
956 break;
957 case LC_NUMERIC:
958 ret = __numeric_load_locale (loc, locale, (void *) l_wctomb, charset);
959 break;
960 case LC_TIME:
961 ret = __time_load_locale (loc, locale, (void *) l_wctomb, charset);
962 break;
963 #endif /* __HAVE_LOCALE_INFO__ */
964 default:
965 break;
967 #ifdef __HAVE_LOCALE_INFO__
968 if (ret)
969 FAIL;
970 #endif /* __HAVE_LOCALE_INFO__ */
971 return strcpy(loc->categories[category], new_locale);
974 const char *
975 __get_locale_env (struct _reent *p, int category)
977 const char *env;
979 /* 1. check LC_ALL. */
980 env = _getenv_r (p, categories[0]);
982 /* 2. check LC_* */
983 if (env == NULL || !*env)
984 env = _getenv_r (p, categories[category]);
986 /* 3. check LANG */
987 if (env == NULL || !*env)
988 env = _getenv_r (p, "LANG");
990 /* 4. if none is set, fall to default locale */
991 if (env == NULL || !*env)
992 env = __default_locale;
994 return env;
996 #endif /* _MB_CAPABLE */
999 __locale_mb_cur_max (void)
1001 #ifdef __HAVE_LOCALE_INFO__
1002 return __get_current_ctype_locale ()->mb_cur_max[0];
1003 #else
1004 return __get_current_locale ()->mb_cur_max[0];
1005 #endif
1008 #ifdef __HAVE_LOCALE_INFO__
1009 const char *
1010 __locale_ctype_ptr_l (struct __locale_t *locale)
1012 return locale->ctype_ptr;
1015 const char *
1016 __locale_ctype_ptr (void)
1018 return __get_current_locale ()->ctype_ptr;
1020 #endif /* __HAVE_LOCALE_INFO__ */
1022 #ifndef _REENT_ONLY
1024 char *
1025 setlocale (int category,
1026 const char *locale)
1028 return _setlocale_r (_REENT, category, locale);
1031 #endif