1 /***********************************************************************
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
13 * Information and Software Systems Research *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
21 ***********************************************************************/
25 * locale state implementation
33 static Lc_numeric_t default_numeric
= { '.', -1 };
35 static Lc_t default_lc
=
43 LC_default
|LC_checked
|LC_local
,
46 { &default_lc
, 0, 0 },
47 { &default_lc
, 0, 0 },
48 { &default_lc
, 0, 0 },
49 { &default_lc
, 0, 0 },
50 { &default_lc
, 0, 0 },
51 { &default_lc
, 0, (void*)&default_numeric
},
52 { &default_lc
, 0, 0 },
53 { &default_lc
, 0, 0 },
54 { &default_lc
, 0, 0 },
55 { &default_lc
, 0, 0 },
56 { &default_lc
, 0, 0 },
57 { &default_lc
, 0, 0 },
58 { &default_lc
, 0, 0 },
63 static Lc_numeric_t debug_numeric
= { ',', '.' };
65 static Lc_t debug_lc
=
73 LC_debug
|LC_checked
|LC_local
,
81 { &debug_lc
, 0, (void*)&debug_numeric
},
94 static Lc_t
* lcs
= &debug_lc
;
115 * return the internal category index for category
119 lcindex(int category
, int min
)
123 case LC_ALL
: return min
? -1 : AST_LC_ALL
;
124 case LC_ADDRESS
: return AST_LC_ADDRESS
;
125 case LC_COLLATE
: return AST_LC_COLLATE
;
126 case LC_CTYPE
: return AST_LC_CTYPE
;
127 case LC_IDENTIFICATION
: return AST_LC_IDENTIFICATION
;
128 case LC_LANG
: return AST_LC_LANG
;
129 case LC_MEASUREMENT
: return AST_LC_MEASUREMENT
;
130 case LC_MESSAGES
: return AST_LC_MESSAGES
;
131 case LC_MONETARY
: return AST_LC_MONETARY
;
132 case LC_NAME
: return AST_LC_NAME
;
133 case LC_NUMERIC
: return AST_LC_NUMERIC
;
134 case LC_PAPER
: return AST_LC_PAPER
;
135 case LC_TELEPHONE
: return AST_LC_TELEPHONE
;
136 case LC_TIME
: return AST_LC_TIME
;
137 case LC_XLITERATE
: return AST_LC_XLITERATE
;
143 * return the first category table entry
149 return (Lc_category_t
*)&lc_categories
[0];
153 * return the current info for category
157 lcinfo(register int category
)
159 if ((category
= lcindex(category
, 0)) < 0)
161 return LCINFO(category
);
165 * return 1 if s matches the alternation pattern p
166 * if minimum!=0 then at least that many chars must match
167 * if standard!=0 and s[0] is a digit leading non-digits are ignored in p
171 match(const char* s
, register const char* p
, int minimum
, int standard
)
173 register const char* t
;
185 while (*p
&& !isdigit(*p
))
187 else if (isdigit(*p
))
188 while (*t
&& !isdigit(*t
))
195 while (*p
&& *p
!= '|')
197 if (!*t
|| *t
== ',')
203 if (standard
&& isdigit(*p
))
208 while (*p
&& *p
!= '-')
215 if (standard
&& isdigit(*t
))
221 while (*t
&& *t
!= '-')
231 if ((!*t
|| *t
== ',') && (!*p
|| *p
== '|' || w
))
233 if (minimum
&& z
< (p
- x
) && (p
- x
) >= minimum
)
236 while (*p
&& *p
!= '|')
243 * return 1 if s matches the charset names in cp
247 match_charset(register const char* s
, register const Lc_charset_t
* cp
)
249 return match(s
, cp
->code
, 0, 1) || match(s
, cp
->alternates
, 3, 1) || cp
->ms
&& match(s
, cp
->ms
, 0, 1);
253 * low level for lccanon
257 canonical(const Lc_language_t
* lp
, const Lc_territory_t
* tp
, const Lc_charset_t
* cp
, const Lc_attribute_list_t
* ap
, unsigned long flags
, char* buf
, size_t siz
)
263 register const char* t
;
265 if (!(flags
& (LC_abbreviated
|LC_default
|LC_local
|LC_qualified
|LC_verbose
)))
266 flags
|= LC_abbreviated
;
271 if (lp
->flags
& (LC_debug
|LC_default
))
273 for (t
= lp
->code
; s
< e
&& (*s
= *t
++); s
++);
277 if (flags
& LC_verbose
)
281 while (s
< e
&& (c
= *t
++))
288 else if (!isalnum(c
))
294 for (t
= lp
->code
; s
< e
&& (*s
= *t
++); s
++);
298 if (tp
&& tp
!= &lc_territories
[0] && (!(flags
& (LC_abbreviated
|LC_default
)) || !lp
|| !streq(lp
->code
, tp
->code
)))
302 if (flags
& LC_verbose
)
306 while (s
< e
&& (c
= *t
++) && c
!= '|')
313 else if (!isalnum(c
))
319 for (t
= tp
->code
; s
< e
&& (*s
= toupper(*t
++)); s
++);
321 if (lp
&& (!(flags
& (LC_abbreviated
|LC_default
)) || cp
!= lp
->charset
) && s
< e
)
324 for (t
= cp
->code
; s
< e
&& (c
= *t
++); s
++)
331 for (c
= '@'; ap
&& s
< e
; ap
= ap
->next
)
332 if (!(flags
& (LC_abbreviated
|LC_default
|LC_verbose
)) || !(ap
->attribute
->flags
& LC_default
))
336 for (t
= ap
->attribute
->name
; s
< e
&& (*s
= *t
++); s
++);
344 * generate a canonical locale name in buf
348 lccanon(Lc_t
* lc
, unsigned long flags
, char* buf
, size_t siz
)
350 if ((flags
& LC_local
) && (!lc
->language
|| !(lc
->language
->flags
& (LC_debug
|LC_default
))))
358 GetLocaleInfo(lc
->index
, LOCALE_SENGLANGUAGE
, lang
, sizeof(lang
)) &&
359 GetLocaleInfo(lc
->index
, LOCALE_SENGCOUNTRY
, ctry
, sizeof(ctry
)))
361 if (!GetLocaleInfo(lc
->index
, LOCALE_IDEFAULTANSICODEPAGE
, code
, sizeof(code
)))
363 if (!lc
->charset
|| !lc
->charset
->ms
)
364 return sfsprintf(buf
, siz
, "%s_%s", lang
, ctry
);
365 else if (streq(lc
->charset
->ms
, code
))
366 return sfsprintf(buf
, siz
, "%s_%s.%s", lang
, ctry
, code
);
368 return sfsprintf(buf
, siz
, "%s_%s.%s,%s", lang
, ctry
, code
, lc
->charset
->ms
);
375 return canonical(lc
->language
, lc
->territory
, lc
->charset
, lc
->attributes
, flags
, buf
, siz
);
379 * make an Lc_t from a locale name
383 lcmake(const char* name
)
388 register const char* t
;
392 char* territory_name
;
394 char* attributes_name
;
397 const Lc_language_t
* lp
;
398 const Lc_territory_t
* tp
;
399 const Lc_territory_t
* tpb
;
400 const Lc_territory_t
* primary
;
401 const Lc_charset_t
* cp
;
402 const Lc_charset_t
* ppa
;
403 const Lc_attribute_t
* ap
;
404 Lc_attribute_list_t
* ai
;
405 Lc_attribute_list_t
* al
;
409 char buf
[PATH_MAX
/ 2];
410 char tmp
[PATH_MAX
/ 2];
412 if (!(t
= name
) || !*t
)
414 for (lc
= lcs
; lc
; lc
= lc
->next
)
415 if (!strcasecmp(t
, lc
->code
) || !strcasecmp(t
, lc
->name
))
417 for (mp
= lc_maps
; mp
->code
; mp
++)
418 if (streq(t
, mp
->code
))
425 else if (al
= newof(0, Lc_attribute_list_t
, 1, 0))
426 al
->attribute
= mp
->attribute
;
430 territory_name
= charset_name
= attributes_name
= 0;
432 e
= &buf
[sizeof(buf
)-2];
435 while (s
< e
&& (c
= *t
++))
437 if (isspace(c
) || (c
== '(' || c
== '-' && *t
== '-') && ++n
)
439 while ((c
= *t
++) && (isspace(c
) || (c
== '-' || c
== '(' || c
== ')') && ++n
))
442 if (isalnum(c
) && !n
)
450 while (c
&& c
!= '_' && c
!= '.' && c
!= '@')
457 if (c
== '_' && !territory_name
)
462 else if (c
== '.' && !charset_name
)
467 else if (c
== '@' && !attributes_name
)
479 if ((t
= a
) && s
< e
)
488 while (s
< e
&& (c
= *t
++))
490 if (isspace(c
) || (c
== '(' || c
== ')' || c
== '-' && *t
== '-') && ++n
)
492 while ((c
= *t
++) && (isspace(c
) || (c
== '-' || c
== '(' || c
== ')') && ++n
))
495 if (isalnum(c
) && !n
)
500 if (c
== '_' || c
== '.' || c
== '@')
516 n
= strlen(s
= language_name
);
518 for (lp
= lc_languages
; lp
->code
&& !streq(s
, lp
->code
); lp
++);
521 for (lp
= lc_languages
; lp
->code
&& (!lp
->alternates
|| !match(s
, lp
->alternates
, n
, 0)); lp
++);
526 for (lp
= lc_languages
; lp
->code
&& !streq(s
, lp
->code
); lp
++);
534 if (!lp
|| !lp
->code
)
536 for (lp
= lc_languages
; lp
->code
&& !match(s
, lp
->name
, 0, 0); lp
++);
537 if (!lp
|| !lp
->code
)
542 for (tp
= lc_territories
; tp
->code
&& !streq(s
, tp
->code
); tp
++);
547 for (tp
= lc_territories
; tp
->name
; tp
++)
548 if ((i
= match(s
, tp
->name
, 3, 0)) > z
)
558 lp
= tp
->languages
[0];
560 if (!lp
|| !lp
->code
)
563 * name not in the tables so let
564 * _ast_setlocale() and/or setlocale()
565 * handle the validity checks
570 if (!(lp
= newof(0, Lc_language_t
, 1, z
)))
572 name
= ((Lc_language_t
*)lp
)->code
= ((Lc_language_t
*)lp
)->name
= (const char*)(lp
+ 1);
573 memcpy((char*)lp
->code
, s
, z
- 1);
574 tp
= &lc_territories
[0];
575 cp
= ((Lc_language_t
*)lp
)->charset
= &lc_charsets
[0];
586 if (!tp
|| !tp
->code
)
588 if (!(s
= territory_name
))
592 for (tp
= lc_territories
; tp
->code
; tp
++)
593 if (tp
->languages
[0] == lp
)
595 if (tp
->flags
& LC_primary
)
608 if (!tp
|| !tp
->code
)
613 for (tp
= lc_territories
; tp
->code
; tp
++)
614 if (streq(s
, tp
->code
))
616 for (i
= 0; i
< elementsof(tp
->languages
) && lp
!= tp
->languages
[i
]; i
++);
617 if (i
>= elementsof(tp
->languages
))
624 for (tp
= lc_territories
; tp
->code
; tp
++)
625 if (match(s
, tp
->name
, 3, 0))
627 for (i
= 0; i
< elementsof(tp
->languages
) && lp
!= tp
->languages
[i
]; i
++);
628 if (i
< elementsof(tp
->languages
))
638 * attributes -- done here to catch misplaced charset references
641 if (s
= attributes_name
)
645 for (w
= s
; *s
&& *s
!= ','; s
++);
648 if (!(cp
= lp
->charset
) || !match_charset(w
, cp
))
649 for (cp
= lc_charsets
; cp
->code
; cp
++)
650 if (match_charset(w
, cp
))
657 for (i
= 0; i
< elementsof(lp
->attributes
) && (ap
= lp
->attributes
[i
]); i
++)
658 if (match(w
, ap
->name
, 5, 0))
660 if (ai
= newof(0, Lc_attribute_list_t
, 1, 0))
668 if (i
>= elementsof(lp
->attributes
) && (ap
= newof(0, Lc_attribute_t
, 1, sizeof(Lc_attribute_list_t
) + s
- w
+ 1)))
670 ai
= (Lc_attribute_list_t
*)(ap
+ 1);
671 strcpy((char*)(((Lc_attribute_t
*)ap
)->name
= (const char*)(ai
+ 1)), w
);
685 if (s
= charset_name
)
686 for (cp
= lc_charsets
; cp
->code
; cp
++)
687 if (match_charset(s
, cp
))
689 if (!cp
|| !cp
->code
)
690 cp
= ppa
? ppa
: lp
->charset
;
692 z
= canonical(lp
, tp
, cp
, al
, 0, s
= tmp
, sizeof(tmp
));
695 * add to the list of possibly active locales
699 n
= strlen(name
) + 1;
700 if (!(lc
= newof(0, Lc_t
, 1, n
+ z
)))
702 strcpy((char*)(lc
->name
= (const char*)(lc
+ 1)), name
);
703 strcpy((char*)(lc
->code
= lc
->name
+ n
), s
);
704 lc
->language
= lp
? lp
: &lc_languages
[0];
705 lc
->territory
= tp
? tp
: &lc_territories
[0];
706 lc
->charset
= cp
? cp
: &lc_charsets
[0];
707 if (!strcmp(lc
->charset
->code
, "utf8"))
708 lc
->flags
|= LC_utf8
;
710 for (i
= 0; i
< elementsof(lc
->info
); i
++)
715 for (i
= 0; i
< elementsof(tp
->languages
); i
++)
716 if (lp
== tp
->languages
[i
])
721 lc
->index
= MAKELCID(MAKELANGID(lp
->index
, n
), SORT_DEFAULT
);
729 * return an Lc_t* for each locale in the tables
730 * one Lc_t is allocated on the first call with lc==0
731 * this is freed when 0 returned
732 * the return value is not part of the lcmake() cache
735 typedef struct Lc_scan_s
738 Lc_attribute_list_t list
;
748 register Lc_scan_t
* ls
;
750 if (!(ls
= (Lc_scan_t
*)lc
))
752 if (!(ls
= newof(0, Lc_scan_t
, 1, 0)))
754 ls
->lc
.code
= ls
->lc
.name
= ls
->buf
;
756 ls
->language
= elementsof(ls
->lc
.territory
->languages
);
757 ls
->attribute
= elementsof(ls
->lc
.language
->attributes
);
759 if (++ls
->attribute
>= elementsof(ls
->lc
.language
->attributes
) || !(ls
->list
.attribute
= ls
->lc
.language
->attributes
[ls
->attribute
]))
761 if (++ls
->language
>= elementsof(ls
->lc
.territory
->languages
) || !(ls
->lc
.language
= ls
->lc
.territory
->languages
[ls
->language
]))
763 if (!lc_territories
[++ls
->territory
].code
)
768 ls
->lc
.territory
= &lc_territories
[ls
->territory
];
769 ls
->lc
.language
= ls
->lc
.territory
->languages
[ls
->language
= 0];
773 ls
->lc
.charset
= ls
->lc
.language
->charset
? ls
->lc
.language
->charset
: &lc_charsets
[0];
774 ls
->list
.attribute
= ls
->lc
.language
->attributes
[ls
->attribute
= 0];
778 ls
->lc
.charset
= &lc_charsets
[0];
779 ls
->list
.attribute
= 0;
782 ls
->lc
.attributes
= ls
->list
.attribute
? &ls
->list
: (Lc_attribute_list_t
*)0;
784 if (!ls
->lc
.language
|| !ls
->lc
.language
->index
)
788 if ((!ls
->list
.attribute
|| !(ls
->lc
.index
= ls
->list
.attribute
->index
)) &&
789 (!ls
->lc
.territory
|| !(ls
->lc
.index
= ls
->lc
.territory
->indices
[ls
->language
])))
790 ls
->lc
.index
= SUBLANG_DEFAULT
;
791 ls
->lc
.index
= MAKELCID(MAKELANGID(ls
->lc
.language
->index
, ls
->lc
.index
), SORT_DEFAULT
);
794 canonical(ls
->lc
.language
, ls
->lc
.territory
, ls
->lc
.charset
, ls
->lc
.attributes
, 0, ls
->buf
, sizeof(ls
->buf
));