2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
13 * Copyright 2014 Garrett D'Amore <garrett@damore.org>
17 * This file implements the 2008 newlocale and friends handling.
27 #include <sys/types.h>
34 #include "localeimpl.h"
38 * Big Theory of Locales:
40 * (It is recommended that readers familiarize themselves with the POSIX
41 * 2008 (XPG Issue 7) specifications for locales, first.)
43 * Historically, we had a bunch of global variables that stored locale
44 * data. While this worked well, it limited applications to a single locale
45 * at a time. This doesn't work well in certain server applications.
47 * Issue 7, X/Open introduced the concept of a locale_t object, along with
48 * versions of functions that can take this object as a parameter, along
49 * with functions to clone and manipulate these locale objects. The new
50 * functions are named with a _l() suffix.
52 * Additionally uselocale() is introduced which can change the locale of
53 * of a single thread. However, setlocale() can still be used to change
56 * In our implementation, we use libc's TSD to store the locale data that
57 * was previously global. We still have global data because some applications
58 * have had those global objects compiled into them. (Such applications will
59 * be unable to benefit from uselocale(), btw.) The legacy routines are
60 * reimplemented as wrappers that use the appropriate locale object by
61 * calling uselocale(). uselocale() when passed a NULL pointer returns the
62 * thread-specific locale object if one is present, or the global locale
63 * object otherwise. Note that once the TSD data is set, the only way
64 * to revert to the global locale is to pass the global locale LC_GLOBAL_LOCALE
67 * We are careful to minimize performance impact of multiple calls to
68 * uselocale() or setlocale() by using a cache of locale data whenever possible.
69 * As a consequence of this, applications that iterate over all possible
70 * locales will burn through a lot of virtual memory, but we find such
71 * applications rare. (locale -a might be an exception, but it is short lived.)
73 * Category data is never released (although enclosing locale objects might be),
74 * in order to guarantee thread-safety. Calling freelocale() on an object
75 * while it is in use by another thread is a programmer error (use-after-free)
76 * and we don't bother to note it further.
78 * Locale objects (global locales) established by setlocale() are also
79 * never freed (for MT safety), but we will save previous locale objects
80 * and reuse them when we can.
83 typedef struct locdata
*(*loadfn_t
)(const char *);
85 static const loadfn_t loaders
[LC_ALL
] = {
94 extern struct lc_monetary lc_monetary_posix
;
95 extern struct lc_numeric lc_numeric_posix
;
96 extern struct lc_messages lc_messages_posix
;
97 extern struct lc_time lc_time_posix
;
98 extern struct lc_ctype lc_ctype_posix
;
99 extern struct lc_collate lc_collate_posix
;
100 extern struct _RuneLocale _DefaultRuneLocale
;
102 static struct _locale posix_locale
= {
105 &__posix_ctype_locdata
,
106 &__posix_numeric_locdata
,
107 &__posix_time_locdata
,
108 &__posix_collate_locdata
,
109 &__posix_monetary_locdata
,
110 &__posix_messages_locdata
,
113 .ctype
= &lc_ctype_posix
,
114 .numeric
= &lc_numeric_posix
,
115 .collate
= &lc_collate_posix
,
116 .monetary
= &lc_monetary_posix
,
117 .messages
= &lc_messages_posix
,
118 .time
= &lc_time_posix
,
119 .runelocale
= &_DefaultRuneLocale
,
122 locale_t ___global_locale
= &posix_locale
;
125 __global_locale(void)
127 return (___global_locale
);
131 * Category names for getenv() Note that this was modified
132 * for Solaris. See <iso/locale_iso.h>.
135 static char *categories
[7] = {
148 static const char *get_locale_env(int);
149 static struct locdata
*locdata_get(int, const const char *);
150 static struct locdata
*locdata_get_cache(int, const char *);
151 static locale_t
mklocname(locale_t
);
154 * Some utility routines.
158 __locdata_alloc(const char *name
, size_t memsz
)
160 struct locdata
*ldata
;
162 if ((ldata
= lmalloc(sizeof (*ldata
))) == NULL
) {
165 if ((ldata
->l_data
[0] = libc_malloc(memsz
)) == NULL
) {
166 lfree(ldata
, sizeof (*ldata
));
170 (void) strlcpy(ldata
->l_lname
, name
, sizeof (ldata
->l_lname
));
176 * Normally we never free locale data truly, but if we failed to load it
177 * for some reason, this routine is used to cleanup the partial mess.
180 __locdata_free(struct locdata
*ldata
)
182 for (int i
= 0; i
< NLOCDATA
; i
++)
183 libc_free(ldata
->l_data
[i
]);
184 if (ldata
->l_map
!= NULL
&& ldata
->l_map_len
)
185 (void) munmap(ldata
->l_map
, ldata
->l_map_len
);
186 lfree(ldata
, sizeof (*ldata
));
190 * It turns out that for performance reasons we would really like to
191 * cache the most recently referenced locale data to avoid wasteful
192 * loading from files.
195 static struct locdata
*cache_data
[LC_ALL
];
196 static struct locdata
*cat_data
[LC_ALL
];
197 static mutex_t cache_lock
= DEFAULTMUTEX
;
200 * Returns the cached data if the locale name is the same. If not,
201 * returns NULL (cache miss). The locdata is returned with a hold on
202 * it, taken on behalf of the caller. The caller should drop the hold
203 * when it is finished.
205 static struct locdata
*
206 locdata_get_cache(int category
, const char *locname
)
210 if (category
< 0 || category
>= LC_ALL
)
213 /* Try cache first. */
214 lmutex_lock(&cache_lock
);
215 loc
= cache_data
[category
];
217 if ((loc
!= NULL
) && (strcmp(loc
->l_lname
, locname
) == 0)) {
218 lmutex_unlock(&cache_lock
);
223 * Failing that try previously loaded locales (linear search) --
224 * this could be optimized to a hash, but its unlikely that a single
225 * application will ever need to work with more than a few locales.
227 for (loc
= cat_data
[category
]; loc
!= NULL
; loc
= loc
->l_next
) {
228 if (strcmp(locname
, loc
->l_lname
) == 0) {
234 * Finally, if we still don't have one, try loading the locale
235 * data from the actual on-disk data.
237 * We drop the lock (libc wants to ensure no internal locks
238 * are held when we call other routines required to read from
239 * files, allocate memory, etc.) There is a small race here,
240 * but the consequences of the race are benign -- if multiple
241 * threads hit this at precisely the same point, we could
242 * wind up with duplicates of the locale data in the cache.
244 * This wastes the memory for an extra copy of the locale
245 * data, but there is no further harm beyond that. Its not
246 * worth the effort to recode this to something "safe"
247 * (which would require rescanning the list, etc.), given
248 * that this race will probably never actually occur.
251 lmutex_unlock(&cache_lock
);
252 loc
= (*loaders
[category
])(locname
);
253 lmutex_lock(&cache_lock
);
255 (void) strlcpy(loc
->l_lname
, locname
,
256 sizeof (loc
->l_lname
));
260 * Assuming we got one, update the cache, and stick us on the list
261 * of loaded locale data. We insert into the head (more recent
262 * use is likely to win.)
265 cache_data
[category
] = loc
;
266 if (!loc
->l_cached
) {
268 loc
->l_next
= cat_data
[category
];
269 cat_data
[category
] = loc
;
273 lmutex_unlock(&cache_lock
);
278 * Routine to get the locdata for a given category and locale.
279 * This includes retrieving it from cache, retrieving it from
282 static struct locdata
*
283 locdata_get(int category
, const char *locname
)
285 char scratch
[ENCODING_LEN
+ 1];
290 if (locname
== NULL
|| *locname
== 0) {
291 locname
= get_locale_env(category
);
295 * Extract the locale name for the category if it is a composite
298 if ((slash
= strchr(locname
, '/')) != NULL
) {
299 for (cnt
= category
; cnt
&& slash
!= NULL
; cnt
--) {
301 slash
= strchr(locname
, '/');
304 len
= slash
- locname
+ 1;
305 if (len
>= sizeof (scratch
)) {
306 len
= sizeof (scratch
);
309 len
= sizeof (scratch
);
311 (void) strlcpy(scratch
, locname
, len
);
315 if ((strcmp(locname
, "C") == 0) || (strcmp(locname
, "POSIX") == 0))
316 return (posix_locale
.locdata
[category
]);
318 return (locdata_get_cache(category
, locname
));
323 freelocptr(void *arg
)
325 locale_t
*locptr
= arg
;
331 get_locale_env(int category
)
335 /* 1. check LC_ALL. */
336 env
= getenv(categories
[LC_ALL
]);
339 if (env
== NULL
|| *env
== '\0')
340 env
= getenv(categories
[category
]);
343 if (env
== NULL
|| *env
== '\0')
344 env
= getenv("LANG");
346 /* 4. if none is set, fall to "C" */
347 if (env
== NULL
|| *env
== '\0')
355 * This routine is exposed via the MB_CUR_MAX macro. Note that legacy
356 * code will continue to use _ctype[520], but we prefer this function as
357 * it is the only way to get thread-specific information.
360 __mb_cur_max_l(locale_t loc
)
362 return (loc
->ctype
->lc_max_mblen
);
368 return (__mb_cur_max_l(uselocale(NULL
)));
376 duplocale(locale_t src
)
381 loc
= lmalloc(sizeof (*loc
));
386 /* illumos extension: POSIX says LC_GLOBAL_LOCALE here */
387 src
= ___global_locale
;
389 for (i
= 0; i
< LC_ALL
; i
++) {
390 loc
->locdata
[i
] = src
->locdata
[i
];
393 loc
->collate
= loc
->locdata
[LC_COLLATE
]->l_data
[0];
394 loc
->ctype
= loc
->locdata
[LC_CTYPE
]->l_data
[0];
395 loc
->runelocale
= loc
->locdata
[LC_CTYPE
]->l_data
[1];
396 loc
->messages
= loc
->locdata
[LC_MESSAGES
]->l_data
[0];
397 loc
->monetary
= loc
->locdata
[LC_MONETARY
]->l_data
[0];
398 loc
->numeric
= loc
->locdata
[LC_NUMERIC
]->l_data
[0];
399 loc
->time
= loc
->locdata
[LC_TIME
]->l_data
[0];
404 freelocale(locale_t loc
)
407 * We take extra care never to free a saved locale created by
408 * setlocale(). This shouldn't be strictly necessary, but a little
409 * extra safety doesn't hurt here.
411 if ((loc
!= NULL
) && (loc
!= &posix_locale
) && (!loc
->on_list
))
412 lfree(loc
, sizeof (*loc
));
416 newlocale(int catmask
, const char *locname
, locale_t base
)
421 if (catmask
& ~(LC_ALL_MASK
)) {
427 * Technically passing LC_GLOBAL_LOCALE here is illegal,
430 if (base
== NULL
|| base
== ___global_locale
) {
431 loc
= duplocale(___global_locale
);
433 loc
= duplocale(base
);
439 for (i
= 0; i
< LC_ALL
; i
++) {
440 struct locdata
*ldata
;
442 if (((1 << i
) & catmask
) == 0) {
443 /* Default to base locale if not overriding */
446 ldata
= locdata_get(i
, locname
);
453 loc
->locdata
[i
] = ldata
;
455 loc
->collate
= loc
->locdata
[LC_COLLATE
]->l_data
[0];
456 loc
->ctype
= loc
->locdata
[LC_CTYPE
]->l_data
[0];
457 loc
->runelocale
= loc
->locdata
[LC_CTYPE
]->l_data
[1];
458 loc
->messages
= loc
->locdata
[LC_MESSAGES
]->l_data
[0];
459 loc
->monetary
= loc
->locdata
[LC_MONETARY
]->l_data
[0];
460 loc
->numeric
= loc
->locdata
[LC_NUMERIC
]->l_data
[0];
461 loc
->time
= loc
->locdata
[LC_TIME
]->l_data
[0];
464 return (mklocname(loc
));
468 uselocale(locale_t loc
)
470 locale_t lastloc
= ___global_locale
;
473 locptr
= tsdalloc(_T_SETLOCALE
, sizeof (locale_t
), freelocptr
);
474 /* Should never occur */
475 if (locptr
== NULL
) {
483 /* Argument loc is NULL if we are just querying. */
486 * Set it to LC_GLOBAL_LOCAL to return to using
487 * the global locale (setlocale).
489 if (loc
== ___global_locale
) {
492 /* No validation of the provided locale at present */
498 * The caller is responsible for freeing, of course it would be
499 * gross error to call freelocale() on a locale object that is still
506 mklocname(locale_t loc
)
510 /* Look to see if any category is different */
511 for (int i
= 1; i
< LC_ALL
; ++i
) {
512 if (strcmp(loc
->locdata
[0]->l_lname
,
513 loc
->locdata
[i
]->l_lname
) != 0) {
521 * Note ordering of these follows the numeric order,
522 * if the order is changed, then setlocale() will need
523 * to be changed as well.
525 (void) snprintf(loc
->locname
, sizeof (loc
->locname
),
527 loc
->locdata
[LC_CTYPE
]->l_lname
,
528 loc
->locdata
[LC_NUMERIC
]->l_lname
,
529 loc
->locdata
[LC_TIME
]->l_lname
,
530 loc
->locdata
[LC_COLLATE
]->l_lname
,
531 loc
->locdata
[LC_MONETARY
]->l_lname
,
532 loc
->locdata
[LC_MESSAGES
]->l_lname
);
534 (void) strlcpy(loc
->locname
, loc
->locdata
[LC_CTYPE
]->l_lname
,
535 sizeof (loc
->locname
));