3 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
16 # Try importing the _locale module.
18 # If this fails, fall back on a basic 'C' locale emulation.
20 # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
21 # trying the import. So __all__ is also fiddled at the end of the file.
22 __all__
= ["setlocale","Error","localeconv","strcoll","strxfrm",
23 "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
24 "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
45 """ localeconv() -> dict.
46 Returns numeric and monetary locale-specific parameters.
48 # 'C' locale default values
49 return {'grouping': [127],
50 'currency_symbol': '',
55 'n_sep_by_space': 127,
59 'p_sep_by_space': 127,
60 'int_curr_symbol': '',
63 'mon_thousands_sep': '',
65 'mon_decimal_point': '',
66 'int_frac_digits': 127}
68 def setlocale(category
, value
=None):
69 """ setlocale(integer,string=None) -> string.
70 Activates/queries locale processing.
72 if value
is not None and \
74 raise Error
, '_locale emulation only supports "C" locale'
78 """ strcoll(string,string) -> int.
79 Compares two strings according to the locale.
84 """ strxfrm(string) -> string.
85 Returns a string that behaves for cmp locale-aware.
89 ### Number formatting APIs
91 # Author: Martin von Loewis
93 #perform the grouping from right to left
96 grouping
=conv
['grouping']
97 if not grouping
:return s
100 # if grouping is -1, we are done
101 if grouping
[0]==CHAR_MAX
:
103 # 0: re-use last group ad infinitum
107 grouping
=grouping
[1:]
109 result
=s
[-group
:]+conv
['thousands_sep']+result
116 result
=s
+conv
['thousands_sep']+result
119 def format(f
,val
,grouping
=0):
120 """Formats a value in the same way that the % formatting would use,
121 but takes the current locale into account.
122 Grouping is applied if the third parameter is true."""
123 result
= f
% abs(val
)
124 fields
= result
.split(".")
126 fields
[0]=_group(fields
[0])
128 res
= fields
[0]+localeconv()['decimal_point']+fields
[1]
132 raise Error
, "Too many decimal points in result string"
140 """Convert float to integer, taking the locale into account."""
141 return format("%.12g",val
)
143 def atof(str,func
=float):
144 "Parses a string as a float according to the locale settings."
145 #First, get rid of the grouping
146 ts
= localeconv()['thousands_sep']
150 #next, replace the decimal point with a dot
151 dd
= localeconv()['decimal_point']
155 #finally, parse the string
159 "Converts a string to an integer according to the locale settings."
160 return atof(str, int)
163 setlocale(LC_ALL
, "")
165 s1
=format("%d", 123456789,1)
166 print s1
, "is", atoi(s1
)
169 print s1
, "is", atof(s1
)
171 ### Locale name aliasing engine
173 # Author: Marc-Andre Lemburg, mal@lemburg.com
174 # Various tweaks by Fredrik Lundh <effbot@telia.com>
176 # store away the low-level version of setlocale (it's
178 _setlocale
= setlocale
180 def normalize(localename
):
182 """ Returns a normalized locale code for the given locale
185 The returned locale code is formatted for use with
188 If normalization fails, the original name is returned
191 If the given encoding is not known, the function defaults to
192 the default encoding for the locale code just like setlocale()
196 # Normalize the locale name and extract the encoding
197 fullname
= localename
.lower()
199 # ':' is sometimes used as encoding delimiter.
200 fullname
= fullname
.replace(':', '.')
202 langname
, encoding
= fullname
.split('.')[:2]
203 fullname
= langname
+ '.' + encoding
208 # First lookup: fullname (possibly with encoding)
209 code
= locale_alias
.get(fullname
, None)
213 # Second try: langname (without encoding)
214 code
= locale_alias
.get(langname
, None)
217 langname
, defenc
= code
.split('.')
222 encoding
= encoding_alias
.get(encoding
, encoding
)
226 return langname
+ '.' + encoding
233 def _parse_localename(localename
):
235 """ Parses the locale code for localename and returns the
236 result as tuple (language code, encoding).
238 The localename is normalized and passed through the locale
239 alias engine. A ValueError is raised in case the locale name
242 The language code corresponds to RFC 1766. code and encoding
243 can be None in case the values cannot be determined or are
244 unknown to this implementation.
247 code
= normalize(localename
)
249 return code
.split('.')[:2]
253 raise ValueError, 'unknown locale: %s' % localename
256 def _build_localename(localetuple
):
258 """ Builds a locale code from the given tuple (language code,
261 No aliasing or normalizing takes place.
264 language
, encoding
= localetuple
270 return language
+ '.' + encoding
272 def getdefaultlocale(envvars
=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
274 """ Tries to determine the default locale settings and returns
275 them as tuple (language code, encoding).
277 According to POSIX, a program which has not called
278 setlocale(LC_ALL, "") runs using the portable 'C' locale.
279 Calling setlocale(LC_ALL, "") lets it use the default locale as
280 defined by the LANG variable. Since we don't want to interfere
281 with the current locale setting we thus emulate the behavior
282 in the way described above.
284 To maintain compatibility with other platforms, not only the
285 LANG variable is tested, but a list of variables given as
286 envvars parameter. The first found to be defined will be
287 used. envvars defaults to the search path used in GNU gettext;
288 it must always contain the variable name 'LANG'.
290 Except for the code 'C', the language code corresponds to RFC
291 1766. code and encoding can be None in case the values cannot
297 # check if it's supported by the _locale module
299 code
, encoding
= _locale
._getdefaultlocale
()
300 except (ImportError, AttributeError):
303 # make sure the code/encoding values are valid
304 if sys
.platform
== "win32" and code
and code
[:2] == "0x":
305 # map windows language identifier to language name
306 code
= windows_locale
.get(int(code
, 0))
307 # ...add other platform-specific processing here, if
309 return code
, encoding
311 # fall back on POSIX behaviour
313 lookup
= os
.environ
.get
314 for variable
in envvars
:
315 localename
= lookup(variable
,None)
316 if localename
is not None:
320 return _parse_localename(localename
)
323 def getlocale(category
=LC_CTYPE
):
325 """ Returns the current setting for the given locale category as
326 tuple (language code, encoding).
328 category may be one of the LC_* value except LC_ALL. It
329 defaults to LC_CTYPE.
331 Except for the code 'C', the language code corresponds to RFC
332 1766. code and encoding can be None in case the values cannot
336 localename
= _setlocale(category
)
337 if category
== LC_ALL
and ';' in localename
:
338 raise TypeError, 'category LC_ALL is not supported'
339 return _parse_localename(localename
)
341 def setlocale(category
, locale
=None):
343 """ Set the locale for the given category. The locale can be
344 a string, a locale tuple (language code, encoding), or None.
346 Locale tuples are converted to strings the locale aliasing
347 engine. Locale strings are passed directly to the C lib.
349 category may be given as one of the LC_* values.
352 if locale
and type(locale
) is not type(""):
354 locale
= normalize(_build_localename(locale
))
355 return _setlocale(category
, locale
)
357 def resetlocale(category
=LC_ALL
):
359 """ Sets the locale for category to the default setting.
361 The default setting is determined by calling
362 getdefaultlocale(). category defaults to LC_ALL.
365 _setlocale(category
, _build_localename(getdefaultlocale()))
369 # The following data was extracted from the locale.alias file which
370 # comes with X11 and then hand edited removing the explicit encoding
371 # definitions and adding some more aliases. The file is usually
372 # available as /usr/lib/X11/locale/locale.alias.
376 # The encoding_alias table maps lowercase encoding alias names to C
377 # locale encoding names (case-sensitive).
382 'iso8859': 'ISO8859-1',
384 '88591': 'ISO8859-1',
385 'ascii': 'ISO8859-1',
387 'iso88591': 'ISO8859-1',
388 'iso_8859-1': 'ISO8859-1',
389 '885915': 'ISO8859-15',
390 'iso885915': 'ISO8859-15',
391 'iso_8859-15': 'ISO8859-15',
392 'iso8859-2': 'ISO8859-2',
393 'iso88592': 'ISO8859-2',
394 'iso_8859-2': 'ISO8859-2',
395 'iso88595': 'ISO8859-5',
396 'iso88596': 'ISO8859-6',
397 'iso88597': 'ISO8859-7',
398 'iso88598': 'ISO8859-8',
399 'iso88599': 'ISO8859-9',
400 'iso-2022-jp': 'JIS7',
414 # The locale_alias table maps lowercase alias names to C locale names
415 # (case-sensitive). Encodings are always separated from the locale
416 # name using a dot ('.'); they should only be given in case the
417 # language name is needed to interpret the given encoding alias
418 # correctly (CJK codes often have this need).
421 'american': 'en_US.ISO8859-1',
422 'ar': 'ar_AA.ISO8859-6',
423 'ar_aa': 'ar_AA.ISO8859-6',
424 'ar_sa': 'ar_SA.ISO8859-6',
425 'arabic': 'ar_AA.ISO8859-6',
426 'bg': 'bg_BG.ISO8859-5',
427 'bg_bg': 'bg_BG.ISO8859-5',
428 'bulgarian': 'bg_BG.ISO8859-5',
429 'c-french': 'fr_CA.ISO8859-1',
432 'cextend': 'en_US.ISO8859-1',
433 'chinese-s': 'zh_CN.eucCN',
434 'chinese-t': 'zh_TW.eucTW',
435 'croatian': 'hr_HR.ISO8859-2',
436 'cs': 'cs_CZ.ISO8859-2',
437 'cs_cs': 'cs_CZ.ISO8859-2',
438 'cs_cz': 'cs_CZ.ISO8859-2',
439 'cz': 'cz_CZ.ISO8859-2',
440 'cz_cz': 'cz_CZ.ISO8859-2',
441 'czech': 'cs_CS.ISO8859-2',
442 'da': 'da_DK.ISO8859-1',
443 'da_dk': 'da_DK.ISO8859-1',
444 'danish': 'da_DK.ISO8859-1',
445 'de': 'de_DE.ISO8859-1',
446 'de_at': 'de_AT.ISO8859-1',
447 'de_ch': 'de_CH.ISO8859-1',
448 'de_de': 'de_DE.ISO8859-1',
449 'dutch': 'nl_BE.ISO8859-1',
450 'ee': 'ee_EE.ISO8859-4',
451 'el': 'el_GR.ISO8859-7',
452 'el_gr': 'el_GR.ISO8859-7',
453 'en': 'en_US.ISO8859-1',
454 'en_au': 'en_AU.ISO8859-1',
455 'en_ca': 'en_CA.ISO8859-1',
456 'en_gb': 'en_GB.ISO8859-1',
457 'en_ie': 'en_IE.ISO8859-1',
458 'en_nz': 'en_NZ.ISO8859-1',
459 'en_uk': 'en_GB.ISO8859-1',
460 'en_us': 'en_US.ISO8859-1',
461 'eng_gb': 'en_GB.ISO8859-1',
462 'english': 'en_EN.ISO8859-1',
463 'english_uk': 'en_GB.ISO8859-1',
464 'english_united-states': 'en_US.ISO8859-1',
465 'english_us': 'en_US.ISO8859-1',
466 'es': 'es_ES.ISO8859-1',
467 'es_ar': 'es_AR.ISO8859-1',
468 'es_bo': 'es_BO.ISO8859-1',
469 'es_cl': 'es_CL.ISO8859-1',
470 'es_co': 'es_CO.ISO8859-1',
471 'es_cr': 'es_CR.ISO8859-1',
472 'es_ec': 'es_EC.ISO8859-1',
473 'es_es': 'es_ES.ISO8859-1',
474 'es_gt': 'es_GT.ISO8859-1',
475 'es_mx': 'es_MX.ISO8859-1',
476 'es_ni': 'es_NI.ISO8859-1',
477 'es_pa': 'es_PA.ISO8859-1',
478 'es_pe': 'es_PE.ISO8859-1',
479 'es_py': 'es_PY.ISO8859-1',
480 'es_sv': 'es_SV.ISO8859-1',
481 'es_uy': 'es_UY.ISO8859-1',
482 'es_ve': 'es_VE.ISO8859-1',
483 'et': 'et_EE.ISO8859-4',
484 'et_ee': 'et_EE.ISO8859-4',
485 'fi': 'fi_FI.ISO8859-1',
486 'fi_fi': 'fi_FI.ISO8859-1',
487 'finnish': 'fi_FI.ISO8859-1',
488 'fr': 'fr_FR.ISO8859-1',
489 'fr_be': 'fr_BE.ISO8859-1',
490 'fr_ca': 'fr_CA.ISO8859-1',
491 'fr_ch': 'fr_CH.ISO8859-1',
492 'fr_fr': 'fr_FR.ISO8859-1',
493 'fre_fr': 'fr_FR.ISO8859-1',
494 'french': 'fr_FR.ISO8859-1',
495 'french_france': 'fr_FR.ISO8859-1',
496 'ger_de': 'de_DE.ISO8859-1',
497 'german': 'de_DE.ISO8859-1',
498 'german_germany': 'de_DE.ISO8859-1',
499 'greek': 'el_GR.ISO8859-7',
500 'hebrew': 'iw_IL.ISO8859-8',
501 'hr': 'hr_HR.ISO8859-2',
502 'hr_hr': 'hr_HR.ISO8859-2',
503 'hu': 'hu_HU.ISO8859-2',
504 'hu_hu': 'hu_HU.ISO8859-2',
505 'hungarian': 'hu_HU.ISO8859-2',
506 'icelandic': 'is_IS.ISO8859-1',
507 'id': 'id_ID.ISO8859-1',
508 'id_id': 'id_ID.ISO8859-1',
509 'is': 'is_IS.ISO8859-1',
510 'is_is': 'is_IS.ISO8859-1',
511 'iso-8859-1': 'en_US.ISO8859-1',
512 'iso-8859-15': 'en_US.ISO8859-15',
513 'iso8859-1': 'en_US.ISO8859-1',
514 'iso8859-15': 'en_US.ISO8859-15',
515 'iso_8859_1': 'en_US.ISO8859-1',
516 'iso_8859_15': 'en_US.ISO8859-15',
517 'it': 'it_IT.ISO8859-1',
518 'it_ch': 'it_CH.ISO8859-1',
519 'it_it': 'it_IT.ISO8859-1',
520 'italian': 'it_IT.ISO8859-1',
521 'iw': 'iw_IL.ISO8859-8',
522 'iw_il': 'iw_IL.ISO8859-8',
524 'ja.jis': 'ja_JP.JIS7',
525 'ja.sjis': 'ja_JP.SJIS',
526 'ja_jp': 'ja_JP.eucJP',
527 'ja_jp.ajec': 'ja_JP.eucJP',
528 'ja_jp.euc': 'ja_JP.eucJP',
529 'ja_jp.eucjp': 'ja_JP.eucJP',
530 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
531 'ja_jp.jis': 'ja_JP.JIS7',
532 'ja_jp.jis7': 'ja_JP.JIS7',
533 'ja_jp.mscode': 'ja_JP.SJIS',
534 'ja_jp.sjis': 'ja_JP.SJIS',
535 'ja_jp.ujis': 'ja_JP.eucJP',
536 'japan': 'ja_JP.eucJP',
537 'japanese': 'ja_JP.SJIS',
538 'japanese-euc': 'ja_JP.eucJP',
539 'japanese.euc': 'ja_JP.eucJP',
540 'jp_jp': 'ja_JP.eucJP',
542 'ko_kr': 'ko_KR.eucKR',
543 'ko_kr.euc': 'ko_KR.eucKR',
544 'korean': 'ko_KR.eucKR',
545 'lt': 'lt_LT.ISO8859-4',
546 'lv': 'lv_LV.ISO8859-4',
547 'mk': 'mk_MK.ISO8859-5',
548 'mk_mk': 'mk_MK.ISO8859-5',
549 'nl': 'nl_NL.ISO8859-1',
550 'nl_be': 'nl_BE.ISO8859-1',
551 'nl_nl': 'nl_NL.ISO8859-1',
552 'no': 'no_NO.ISO8859-1',
553 'no_no': 'no_NO.ISO8859-1',
554 'norwegian': 'no_NO.ISO8859-1',
555 'pl': 'pl_PL.ISO8859-2',
556 'pl_pl': 'pl_PL.ISO8859-2',
557 'polish': 'pl_PL.ISO8859-2',
558 'portuguese': 'pt_PT.ISO8859-1',
559 'portuguese_brazil': 'pt_BR.ISO8859-1',
562 'pt': 'pt_PT.ISO8859-1',
563 'pt_br': 'pt_BR.ISO8859-1',
564 'pt_pt': 'pt_PT.ISO8859-1',
565 'ro': 'ro_RO.ISO8859-2',
566 'ro_ro': 'ro_RO.ISO8859-2',
567 'ru': 'ru_RU.ISO8859-5',
568 'ru_ru': 'ru_RU.ISO8859-5',
569 'rumanian': 'ro_RO.ISO8859-2',
570 'russian': 'ru_RU.ISO8859-5',
571 'serbocroatian': 'sh_YU.ISO8859-2',
572 'sh': 'sh_YU.ISO8859-2',
573 'sh_hr': 'sh_HR.ISO8859-2',
574 'sh_sp': 'sh_YU.ISO8859-2',
575 'sh_yu': 'sh_YU.ISO8859-2',
576 'sk': 'sk_SK.ISO8859-2',
577 'sk_sk': 'sk_SK.ISO8859-2',
578 'sl': 'sl_CS.ISO8859-2',
579 'sl_cs': 'sl_CS.ISO8859-2',
580 'sl_si': 'sl_SI.ISO8859-2',
581 'slovak': 'sk_SK.ISO8859-2',
582 'slovene': 'sl_CS.ISO8859-2',
583 'sp': 'sp_YU.ISO8859-5',
584 'sp_yu': 'sp_YU.ISO8859-5',
585 'spanish': 'es_ES.ISO8859-1',
586 'spanish_spain': 'es_ES.ISO8859-1',
587 'sr_sp': 'sr_SP.ISO8859-2',
588 'sv': 'sv_SE.ISO8859-1',
589 'sv_se': 'sv_SE.ISO8859-1',
590 'swedish': 'sv_SE.ISO8859-1',
591 'th_th': 'th_TH.TACTIS',
592 'tr': 'tr_TR.ISO8859-9',
593 'tr_tr': 'tr_TR.ISO8859-9',
594 'turkish': 'tr_TR.ISO8859-9',
596 'universal': 'en_US.utf',
598 'zh_cn': 'zh_CN.eucCN',
599 'zh_cn.big5': 'zh_TW.eucTW',
600 'zh_cn.euc': 'zh_CN.eucCN',
601 'zh_tw': 'zh_TW.eucTW',
602 'zh_tw.euc': 'zh_TW.eucTW',
606 # this maps windows language identifiers (as used on Windows 95 and
607 # earlier) to locale strings.
609 # NOTE: this mapping is incomplete. If your language is missing, send
610 # a note with the missing language identifier and the suggested locale
611 # code to Fredrik Lundh <effbot@telia.com>. Thanks /F
614 0x0404: "zh_TW", # Chinese (Taiwan)
615 0x0804: "zh_CN", # Chinese (PRC)
616 0x0406: "da_DK", # Danish
617 0x0413: "nl_NL", # Dutch (Netherlands)
618 0x0409: "en_US", # English (United States)
619 0x0809: "en_UK", # English (United Kingdom)
620 0x0c09: "en_AU", # English (Australian)
621 0x1009: "en_CA", # English (Canadian)
622 0x1409: "en_NZ", # English (New Zealand)
623 0x1809: "en_IE", # English (Ireland)
624 0x1c09: "en_ZA", # English (South Africa)
625 0x040b: "fi_FI", # Finnish
626 0x040c: "fr_FR", # French (Standard)
627 0x080c: "fr_BE", # French (Belgian)
628 0x0c0c: "fr_CA", # French (Canadian)
629 0x100c: "fr_CH", # French (Switzerland)
630 0x0407: "de_DE", # German (Standard)
631 0x0408: "el_GR", # Greek
632 0x040d: "iw_IL", # Hebrew
633 0x040f: "is_IS", # Icelandic
634 0x0410: "it_IT", # Italian (Standard)
635 0x0411: "ja_JA", # Japanese
636 0x0414: "no_NO", # Norwegian (Bokmal)
637 0x0816: "pt_PT", # Portuguese (Standard)
638 0x0c0a: "es_ES", # Spanish (Modern Sort)
639 0x0441: "sw_KE", # Swahili (Kenya)
640 0x041d: "sv_SE", # Swedish
641 0x081d: "sv_FI", # Swedish (Finland)
642 0x041f: "tr_TR", # Turkish
650 def _init_categories(categories
=categories
):
651 for k
,v
in globals().items():
655 del categories
['LC_ALL']
657 print 'Locale defaults as determined by getdefaultlocale():'
659 lang
, enc
= getdefaultlocale()
660 print 'Language: ', lang
or '(undefined)'
661 print 'Encoding: ', enc
or '(undefined)'
664 print 'Locale settings on startup:'
666 for name
,category
in categories
.items():
668 lang
, enc
= getlocale(category
)
669 print ' Language: ', lang
or '(undefined)'
670 print ' Encoding: ', enc
or '(undefined)'
674 print 'Locale settings after calling resetlocale():'
677 for name
,category
in categories
.items():
679 lang
, enc
= getlocale(category
)
680 print ' Language: ', lang
or '(undefined)'
681 print ' Encoding: ', enc
or '(undefined)'
685 setlocale(LC_ALL
, "")
688 print 'setlocale(LC_ALL, "") does not support the default locale'
689 print 'given in the OS environment variables.'
692 print 'Locale settings after calling setlocale(LC_ALL, ""):'
694 for name
,category
in categories
.items():
696 lang
, enc
= getlocale(category
)
697 print ' Language: ', lang
or '(undefined)'
698 print ' Encoding: ', enc
or '(undefined)'
708 __all__
.append("LC_MESSAGES")
710 if __name__
=='__main__':
711 print 'Locale aliasing:'
715 print 'Number formatting:'