8354 sync regcomp(3C) with upstream (fix make catalog)
[unleashed/tickless.git] / usr / src / lib / libast / common / port / lc.c
blobdb94a6acbb5d4bea368607c2ba8614536dde44ba
1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
20 * *
21 ***********************************************************************/
22 #pragma prototyped
25 * locale state implementation
28 #include "lclib.h"
29 #include "lclang.h"
31 #include <ctype.h>
33 static Lc_numeric_t default_numeric = { '.', -1 };
35 static Lc_t default_lc =
37 "C",
38 "POSIX",
39 &lc_languages[0],
40 &lc_territories[0],
41 &lc_charsets[0],
42 0,
43 LC_default|LC_checked|LC_local,
46 { &default_lc, 0, 0 },
47 { &default_lc, 0, 0 },
48 { &default_lc, 0, 0 },
49 { &default_lc, 0, 0 },
50 { &default_lc, 0, 0 },
51 { &default_lc, 0, (void*)&default_numeric },
52 { &default_lc, 0, 0 },
53 { &default_lc, 0, 0 },
54 { &default_lc, 0, 0 },
55 { &default_lc, 0, 0 },
56 { &default_lc, 0, 0 },
57 { &default_lc, 0, 0 },
58 { &default_lc, 0, 0 },
59 { &default_lc, 0, 0 }
63 static Lc_numeric_t debug_numeric = { ',', '.' };
65 static Lc_t debug_lc =
67 "debug",
68 "debug",
69 &lc_languages[1],
70 &lc_territories[1],
71 &lc_charsets[0],
72 0,
73 LC_debug|LC_checked|LC_local,
76 { &debug_lc, 0, 0 },
77 { &debug_lc, 0, 0 },
78 { &debug_lc, 0, 0 },
79 { &debug_lc, 0, 0 },
80 { &debug_lc, 0, 0 },
81 { &debug_lc, 0, (void*)&debug_numeric },
82 { &debug_lc, 0, 0 },
83 { &debug_lc, 0, 0 },
84 { &debug_lc, 0, 0 },
85 { &debug_lc, 0, 0 },
86 { &debug_lc, 0, 0 },
87 { &debug_lc, 0, 0 },
88 { &debug_lc, 0, 0 },
89 { &debug_lc, 0, 0 }
91 &default_lc
94 static Lc_t* lcs = &debug_lc;
96 Lc_t* locales[] =
98 &default_lc,
99 &default_lc,
100 &default_lc,
101 &default_lc,
102 &default_lc,
103 &default_lc,
104 &default_lc,
105 &default_lc,
106 &default_lc,
107 &default_lc,
108 &default_lc,
109 &default_lc,
110 &default_lc,
111 &default_lc
115 * return the internal category index for category
119 lcindex(int category, int min)
121 switch (category)
123 case LC_ALL: return min ? -1 : AST_LC_ALL;
124 case LC_ADDRESS: return AST_LC_ADDRESS;
125 case LC_COLLATE: return AST_LC_COLLATE;
126 case LC_CTYPE: return AST_LC_CTYPE;
127 case LC_IDENTIFICATION: return AST_LC_IDENTIFICATION;
128 case LC_LANG: return AST_LC_LANG;
129 case LC_MEASUREMENT: return AST_LC_MEASUREMENT;
130 case LC_MESSAGES: return AST_LC_MESSAGES;
131 case LC_MONETARY: return AST_LC_MONETARY;
132 case LC_NAME: return AST_LC_NAME;
133 case LC_NUMERIC: return AST_LC_NUMERIC;
134 case LC_PAPER: return AST_LC_PAPER;
135 case LC_TELEPHONE: return AST_LC_TELEPHONE;
136 case LC_TIME: return AST_LC_TIME;
137 case LC_XLITERATE: return AST_LC_XLITERATE;
139 return -1;
143 * return the first category table entry
146 Lc_category_t*
147 lccategories(void)
149 return (Lc_category_t*)&lc_categories[0];
153 * return the current info for category
156 Lc_info_t*
157 lcinfo(register int category)
159 if ((category = lcindex(category, 0)) < 0)
160 return 0;
161 return LCINFO(category);
165 * return 1 if s matches the alternation pattern p
166 * if minimum!=0 then at least that many chars must match
167 * if standard!=0 and s[0] is a digit leading non-digits are ignored in p
170 static int
171 match(const char* s, register const char* p, int minimum, int standard)
173 register const char* t;
174 const char* x;
175 int w;
176 int z;
178 z = 0;
181 t = s;
182 if (standard)
184 if (isdigit(*t))
185 while (*p && !isdigit(*p))
186 p++;
187 else if (isdigit(*p))
188 while (*t && !isdigit(*t))
189 t++;
191 if (*p)
193 w = 0;
194 x = p;
195 while (*p && *p != '|')
197 if (!*t || *t == ',')
198 break;
199 else if (*t == *p)
200 /*ok*/;
201 else if (*t == '-')
203 if (standard && isdigit(*p))
205 t++;
206 continue;
208 while (*p && *p != '-')
209 p++;
210 if (!*p)
211 break;
213 else if (*p == '-')
215 if (standard && isdigit(*t))
217 p++;
218 continue;
220 w = 1;
221 while (*t && *t != '-')
222 t++;
223 if (!*t)
224 break;
226 else
227 break;
228 t++;
229 p++;
231 if ((!*t || *t == ',') && (!*p || *p == '|' || w))
232 return p - x;
233 if (minimum && z < (p - x) && (p - x) >= minimum)
234 z = p - x;
236 while (*p && *p != '|')
237 p++;
238 } while (*p++);
239 return z;
243 * return 1 if s matches the charset names in cp
246 static int
247 match_charset(register const char* s, register const Lc_charset_t* cp)
249 return match(s, cp->code, 0, 1) || match(s, cp->alternates, 3, 1) || cp->ms && match(s, cp->ms, 0, 1);
253 * low level for lccanon
256 static size_t
257 canonical(const Lc_language_t* lp, const Lc_territory_t* tp, const Lc_charset_t* cp, const Lc_attribute_list_t* ap, unsigned long flags, char* buf, size_t siz)
259 register int c;
260 register int u;
261 register char* s;
262 register char* e;
263 register const char* t;
265 if (!(flags & (LC_abbreviated|LC_default|LC_local|LC_qualified|LC_verbose)))
266 flags |= LC_abbreviated;
267 s = buf;
268 e = &buf[siz - 3];
269 if (lp)
271 if (lp->flags & (LC_debug|LC_default))
273 for (t = lp->code; s < e && (*s = *t++); s++);
274 *s++ = 0;
275 return s - buf;
277 if (flags & LC_verbose)
279 u = 1;
280 t = lp->name;
281 while (s < e && (c = *t++))
283 if (u)
285 u = 0;
286 c = toupper(c);
288 else if (!isalnum(c))
289 u = 1;
290 *s++ = c;
293 else
294 for (t = lp->code; s < e && (*s = *t++); s++);
296 if (s < e)
298 if (tp && tp != &lc_territories[0] && (!(flags & (LC_abbreviated|LC_default)) || !lp || !streq(lp->code, tp->code)))
300 if (lp)
301 *s++ = '_';
302 if (flags & LC_verbose)
304 u = 1;
305 t = tp->name;
306 while (s < e && (c = *t++) && c != '|')
308 if (u)
310 u = 0;
311 c = toupper(c);
313 else if (!isalnum(c))
314 u = 1;
315 *s++ = c;
318 else
319 for (t = tp->code; s < e && (*s = toupper(*t++)); s++);
321 if (lp && (!(flags & (LC_abbreviated|LC_default)) || cp != lp->charset) && s < e)
323 *s++ = '.';
324 for (t = cp->code; s < e && (c = *t++); s++)
326 if (islower(c))
327 c = toupper(c);
328 *s = c;
331 for (c = '@'; ap && s < e; ap = ap->next)
332 if (!(flags & (LC_abbreviated|LC_default|LC_verbose)) || !(ap->attribute->flags & LC_default))
334 *s++ = c;
335 c = ',';
336 for (t = ap->attribute->name; s < e && (*s = *t++); s++);
339 *s++ = 0;
340 return s - buf;
344 * generate a canonical locale name in buf
347 size_t
348 lccanon(Lc_t* lc, unsigned long flags, char* buf, size_t siz)
350 if ((flags & LC_local) && (!lc->language || !(lc->language->flags & (LC_debug|LC_default))))
352 #if _WINIX
353 char lang[64];
354 char code[64];
355 char ctry[64];
357 if (lc->index &&
358 GetLocaleInfo(lc->index, LOCALE_SENGLANGUAGE, lang, sizeof(lang)) &&
359 GetLocaleInfo(lc->index, LOCALE_SENGCOUNTRY, ctry, sizeof(ctry)))
361 if (!GetLocaleInfo(lc->index, LOCALE_IDEFAULTANSICODEPAGE, code, sizeof(code)))
362 code[0] = 0;
363 if (!lc->charset || !lc->charset->ms)
364 return sfsprintf(buf, siz, "%s_%s", lang, ctry);
365 else if (streq(lc->charset->ms, code))
366 return sfsprintf(buf, siz, "%s_%s.%s", lang, ctry, code);
367 else
368 return sfsprintf(buf, siz, "%s_%s.%s,%s", lang, ctry, code, lc->charset->ms);
370 #endif
371 buf[0] = '-';
372 buf[1] = 0;
373 return 0;
375 return canonical(lc->language, lc->territory, lc->charset, lc->attributes, flags, buf, siz);
379 * make an Lc_t from a locale name
382 Lc_t*
383 lcmake(const char* name)
385 register int c;
386 register char* s;
387 register char* e;
388 register const char* t;
389 const char* a;
390 char* w;
391 char* language_name;
392 char* territory_name;
393 char* charset_name;
394 char* attributes_name;
395 Lc_t* lc;
396 const Lc_map_t* mp;
397 const Lc_language_t* lp;
398 const Lc_territory_t* tp;
399 const Lc_territory_t* tpb;
400 const Lc_territory_t* primary;
401 const Lc_charset_t* cp;
402 const Lc_charset_t* ppa;
403 const Lc_attribute_t* ap;
404 Lc_attribute_list_t* ai;
405 Lc_attribute_list_t* al;
406 int i;
407 int n;
408 int z;
409 char buf[PATH_MAX / 2];
410 char tmp[PATH_MAX / 2];
412 if (!(t = name) || !*t)
413 return &default_lc;
414 for (lc = lcs; lc; lc = lc->next)
415 if (!strcasecmp(t, lc->code) || !strcasecmp(t, lc->name))
416 return lc;
417 for (mp = lc_maps; mp->code; mp++)
418 if (streq(t, mp->code))
420 lp = mp->language;
421 tp = mp->territory;
422 cp = mp->charset;
423 if (!mp->attribute)
424 al = 0;
425 else if (al = newof(0, Lc_attribute_list_t, 1, 0))
426 al->attribute = mp->attribute;
427 goto mapped;
429 language_name = buf;
430 territory_name = charset_name = attributes_name = 0;
431 s = buf;
432 e = &buf[sizeof(buf)-2];
433 a = 0;
434 n = 0;
435 while (s < e && (c = *t++))
437 if (isspace(c) || (c == '(' || c == '-' && *t == '-') && ++n)
439 while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n))
440 if (!c)
441 break;
442 if (isalnum(c) && !n)
443 *s++ = '-';
444 else
446 n = 0;
447 if (!a)
449 a = t - 1;
450 while (c && c != '_' && c != '.' && c != '@')
451 c = *t++;
452 if (!c)
453 break;
457 if (c == '_' && !territory_name)
459 *s++ = 0;
460 territory_name = s;
462 else if (c == '.' && !charset_name)
464 *s++ = 0;
465 charset_name = s;
467 else if (c == '@' && !attributes_name)
469 *s++ = 0;
470 attributes_name = s;
472 else
474 if (isupper(c))
475 c = tolower(c);
476 *s++ = c;
479 if ((t = a) && s < e)
481 if (attributes_name)
482 *s++ = ',';
483 else
485 *s++ = 0;
486 attributes_name = s;
488 while (s < e && (c = *t++))
490 if (isspace(c) || (c == '(' || c == ')' || c == '-' && *t == '-') && ++n)
492 while ((c = *t++) && (isspace(c) || (c == '-' || c == '(' || c == ')') && ++n))
493 if (!c)
494 break;
495 if (isalnum(c) && !n)
496 *s++ = '-';
497 else
498 n = 0;
500 if (c == '_' || c == '.' || c == '@')
501 break;
502 if (isupper(c))
503 c = tolower(c);
504 *s++ = c;
507 *s = 0;
508 tp = 0;
509 cp = ppa = 0;
510 al = 0;
513 * language
516 n = strlen(s = language_name);
517 if (n == 2)
518 for (lp = lc_languages; lp->code && !streq(s, lp->code); lp++);
519 else if (n == 3)
521 for (lp = lc_languages; lp->code && (!lp->alternates || !match(s, lp->alternates, n, 0)); lp++);
522 if (!lp->code)
524 c = s[2];
525 s[2] = 0;
526 for (lp = lc_languages; lp->code && !streq(s, lp->code); lp++);
527 s[2] = c;
528 if (lp->code)
529 n = 1;
532 else
533 lp = 0;
534 if (!lp || !lp->code)
536 for (lp = lc_languages; lp->code && !match(s, lp->name, 0, 0); lp++);
537 if (!lp || !lp->code)
539 if (!territory_name)
541 if (n == 2)
542 for (tp = lc_territories; tp->code && !streq(s, tp->code); tp++);
543 else
545 z = 0;
546 tpb = 0;
547 for (tp = lc_territories; tp->name; tp++)
548 if ((i = match(s, tp->name, 3, 0)) > z)
550 tpb = tp;
551 if ((z = i) == n)
552 break;
554 if (tpb)
555 tp = tpb;
557 if (tp->code)
558 lp = tp->languages[0];
560 if (!lp || !lp->code)
563 * name not in the tables so let
564 * _ast_setlocale() and/or setlocale()
565 * handle the validity checks
568 s = (char*)name;
569 z = strlen(s) + 1;
570 if (!(lp = newof(0, Lc_language_t, 1, z)))
571 return 0;
572 name = ((Lc_language_t*)lp)->code = ((Lc_language_t*)lp)->name = (const char*)(lp + 1);
573 memcpy((char*)lp->code, s, z - 1);
574 tp = &lc_territories[0];
575 cp = ((Lc_language_t*)lp)->charset = &lc_charsets[0];
576 al = 0;
577 goto override;
583 * territory
586 if (!tp || !tp->code)
588 if (!(s = territory_name))
590 n = 0;
591 primary = 0;
592 for (tp = lc_territories; tp->code; tp++)
593 if (tp->languages[0] == lp)
595 if (tp->flags & LC_primary)
597 n = 1;
598 primary = tp;
599 break;
601 n++;
602 primary = tp;
604 if (n == 1)
605 tp = primary;
606 s = (char*)lp->code;
608 if (!tp || !tp->code)
610 n = strlen(s);
611 if (n == 2)
613 for (tp = lc_territories; tp->code; tp++)
614 if (streq(s, tp->code))
616 for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++);
617 if (i >= elementsof(tp->languages))
618 tp = 0;
619 break;
622 else
624 for (tp = lc_territories; tp->code; tp++)
625 if (match(s, tp->name, 3, 0))
627 for (i = 0; i < elementsof(tp->languages) && lp != tp->languages[i]; i++);
628 if (i < elementsof(tp->languages))
629 break;
632 if (tp && !tp->code)
633 tp = 0;
638 * attributes -- done here to catch misplaced charset references
641 if (s = attributes_name)
645 for (w = s; *s && *s != ','; s++);
646 c = *s;
647 *s = 0;
648 if (!(cp = lp->charset) || !match_charset(w, cp))
649 for (cp = lc_charsets; cp->code; cp++)
650 if (match_charset(w, cp))
652 ppa = cp;
653 break;
655 if (!cp->code)
657 for (i = 0; i < elementsof(lp->attributes) && (ap = lp->attributes[i]); i++)
658 if (match(w, ap->name, 5, 0))
660 if (ai = newof(0, Lc_attribute_list_t, 1, 0))
662 ai->attribute = ap;
663 ai->next = al;
664 al = ai;
666 break;
668 if (i >= elementsof(lp->attributes) && (ap = newof(0, Lc_attribute_t, 1, sizeof(Lc_attribute_list_t) + s - w + 1)))
670 ai = (Lc_attribute_list_t*)(ap + 1);
671 strcpy((char*)(((Lc_attribute_t*)ap)->name = (const char*)(ai + 1)), w);
672 ai->attribute = ap;
673 ai->next = al;
674 al = ai;
677 *s = c;
678 } while (*s++);
682 * charset
685 if (s = charset_name)
686 for (cp = lc_charsets; cp->code; cp++)
687 if (match_charset(s, cp))
688 break;
689 if (!cp || !cp->code)
690 cp = ppa ? ppa : lp->charset;
691 mapped:
692 z = canonical(lp, tp, cp, al, 0, s = tmp, sizeof(tmp));
695 * add to the list of possibly active locales
698 override:
699 n = strlen(name) + 1;
700 if (!(lc = newof(0, Lc_t, 1, n + z)))
701 return 0;
702 strcpy((char*)(lc->name = (const char*)(lc + 1)), name);
703 strcpy((char*)(lc->code = lc->name + n), s);
704 lc->language = lp ? lp : &lc_languages[0];
705 lc->territory = tp ? tp : &lc_territories[0];
706 lc->charset = cp ? cp : &lc_charsets[0];
707 if (!strcmp(lc->charset->code, "utf8"))
708 lc->flags |= LC_utf8;
709 lc->attributes = al;
710 for (i = 0; i < elementsof(lc->info); i++)
711 lc->info[i].lc = lc;
712 #if _WINIX
713 n = SUBLANG_DEFAULT;
714 if (tp)
715 for (i = 0; i < elementsof(tp->languages); i++)
716 if (lp == tp->languages[i])
718 n = tp->indices[i];
719 break;
721 lc->index = MAKELCID(MAKELANGID(lp->index, n), SORT_DEFAULT);
722 #endif
723 lc->next = lcs;
724 lcs = lc;
725 return lc;
729 * return an Lc_t* for each locale in the tables
730 * one Lc_t is allocated on the first call with lc==0
731 * this is freed when 0 returned
732 * the return value is not part of the lcmake() cache
735 typedef struct Lc_scan_s
737 Lc_t lc;
738 Lc_attribute_list_t list;
739 int territory;
740 int language;
741 int attribute;
742 char buf[256];
743 } Lc_scan_t;
745 Lc_t*
746 lcscan(Lc_t* lc)
748 register Lc_scan_t* ls;
750 if (!(ls = (Lc_scan_t*)lc))
752 if (!(ls = newof(0, Lc_scan_t, 1, 0)))
753 return 0;
754 ls->lc.code = ls->lc.name = ls->buf;
755 ls->territory = -1;
756 ls->language = elementsof(ls->lc.territory->languages);
757 ls->attribute = elementsof(ls->lc.language->attributes);
759 if (++ls->attribute >= elementsof(ls->lc.language->attributes) || !(ls->list.attribute = ls->lc.language->attributes[ls->attribute]))
761 if (++ls->language >= elementsof(ls->lc.territory->languages) || !(ls->lc.language = ls->lc.territory->languages[ls->language]))
763 if (!lc_territories[++ls->territory].code)
765 free(ls);
766 return 0;
768 ls->lc.territory = &lc_territories[ls->territory];
769 ls->lc.language = ls->lc.territory->languages[ls->language = 0];
771 if (ls->lc.language)
773 ls->lc.charset = ls->lc.language->charset ? ls->lc.language->charset : &lc_charsets[0];
774 ls->list.attribute = ls->lc.language->attributes[ls->attribute = 0];
776 else
778 ls->lc.charset = &lc_charsets[0];
779 ls->list.attribute = 0;
782 ls->lc.attributes = ls->list.attribute ? &ls->list : (Lc_attribute_list_t*)0;
783 #if _WINIX
784 if (!ls->lc.language || !ls->lc.language->index)
785 ls->lc.index = 0;
786 else
788 if ((!ls->list.attribute || !(ls->lc.index = ls->list.attribute->index)) &&
789 (!ls->lc.territory || !(ls->lc.index = ls->lc.territory->indices[ls->language])))
790 ls->lc.index = SUBLANG_DEFAULT;
791 ls->lc.index = MAKELCID(MAKELANGID(ls->lc.language->index, ls->lc.index), SORT_DEFAULT);
793 #endif
794 canonical(ls->lc.language, ls->lc.territory, ls->lc.charset, ls->lc.attributes, 0, ls->buf, sizeof(ls->buf));
795 return (Lc_t*)ls;