3 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
16 # Try importing the _locale module.
18 # If this fails, fall back on a basic 'C' locale emulation.
20 # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
21 # trying the import. So __all__ is also fiddled at the end of the file.
22 __all__
= ["setlocale","Error","localeconv","strcoll","strxfrm",
23 "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
24 "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
45 """ localeconv() -> dict.
46 Returns numeric and monetary locale-specific parameters.
48 # 'C' locale default values
49 return {'grouping': [127],
50 'currency_symbol': '',
55 'n_sep_by_space': 127,
59 'p_sep_by_space': 127,
60 'int_curr_symbol': '',
63 'mon_thousands_sep': '',
65 'mon_decimal_point': '',
66 'int_frac_digits': 127}
68 def setlocale(category
, value
=None):
69 """ setlocale(integer,string=None) -> string.
70 Activates/queries locale processing.
72 if value
is not None and value
!= 'C':
73 raise Error
, '_locale emulation only supports "C" locale'
77 """ strcoll(string,string) -> int.
78 Compares two strings according to the locale.
83 """ strxfrm(string) -> string.
84 Returns a string that behaves for cmp locale-aware.
88 ### Number formatting APIs
90 # Author: Martin von Loewis
92 #perform the grouping from right to left
95 grouping
=conv
['grouping']
96 if not grouping
:return (s
, 0)
104 while s
and grouping
:
105 # if grouping is -1, we are done
106 if grouping
[0]==CHAR_MAX
:
108 # 0: re-use last group ad infinitum
112 grouping
=grouping
[1:]
114 result
=s
[-group
:]+conv
['thousands_sep']+result
119 if s
and s
[-1] not in "0123456789":
120 # the leading string is only spaces and signs
121 return s
+result
+spaces
,seps
125 result
=s
+conv
['thousands_sep']+result
127 return result
+spaces
,seps
129 def format(f
,val
,grouping
=0):
130 """Formats a value in the same way that the % formatting would use,
131 but takes the current locale into account.
132 Grouping is applied if the third parameter is true."""
134 fields
= result
.split(".")
137 fields
[0],seps
=_group(fields
[0])
139 result
= fields
[0]+localeconv()['decimal_point']+fields
[1]
143 raise Error
, "Too many decimal points in result string"
146 # If the number was formatted for a specific width, then it
147 # might have been filled with spaces to the left or right. If
148 # so, kill as much spaces as there where separators.
149 # Leading zeroes as fillers are not yet dealt with, as it is
150 # not clear how they should interact with grouping.
151 sp
= result
.find(" ")
153 result
= result
[:sp
]+result
[sp
+1:]
159 """Convert float to integer, taking the locale into account."""
160 return format("%.12g",val
)
162 def atof(str,func
=float):
163 "Parses a string as a float according to the locale settings."
164 #First, get rid of the grouping
165 ts
= localeconv()['thousands_sep']
169 #next, replace the decimal point with a dot
170 dd
= localeconv()['decimal_point']
174 #finally, parse the string
178 "Converts a string to an integer according to the locale settings."
179 return atof(str, int)
182 setlocale(LC_ALL
, "")
184 s1
=format("%d", 123456789,1)
185 print s1
, "is", atoi(s1
)
188 print s1
, "is", atof(s1
)
190 ### Locale name aliasing engine
192 # Author: Marc-Andre Lemburg, mal@lemburg.com
193 # Various tweaks by Fredrik Lundh <effbot@telia.com>
195 # store away the low-level version of setlocale (it's
197 _setlocale
= setlocale
199 def normalize(localename
):
201 """ Returns a normalized locale code for the given locale
204 The returned locale code is formatted for use with
207 If normalization fails, the original name is returned
210 If the given encoding is not known, the function defaults to
211 the default encoding for the locale code just like setlocale()
215 # Normalize the locale name and extract the encoding
216 fullname
= localename
.lower()
218 # ':' is sometimes used as encoding delimiter.
219 fullname
= fullname
.replace(':', '.')
221 langname
, encoding
= fullname
.split('.')[:2]
222 fullname
= langname
+ '.' + encoding
227 # First lookup: fullname (possibly with encoding)
228 code
= locale_alias
.get(fullname
, None)
232 # Second try: langname (without encoding)
233 code
= locale_alias
.get(langname
, None)
236 langname
, defenc
= code
.split('.')
241 encoding
= encoding_alias
.get(encoding
, encoding
)
245 return langname
+ '.' + encoding
252 def _parse_localename(localename
):
254 """ Parses the locale code for localename and returns the
255 result as tuple (language code, encoding).
257 The localename is normalized and passed through the locale
258 alias engine. A ValueError is raised in case the locale name
261 The language code corresponds to RFC 1766. code and encoding
262 can be None in case the values cannot be determined or are
263 unknown to this implementation.
266 code
= normalize(localename
)
268 return code
.split('.')[:2]
271 raise ValueError, 'unknown locale: %s' % localename
273 def _build_localename(localetuple
):
275 """ Builds a locale code from the given tuple (language code,
278 No aliasing or normalizing takes place.
281 language
, encoding
= localetuple
287 return language
+ '.' + encoding
289 def getdefaultlocale(envvars
=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
291 """ Tries to determine the default locale settings and returns
292 them as tuple (language code, encoding).
294 According to POSIX, a program which has not called
295 setlocale(LC_ALL, "") runs using the portable 'C' locale.
296 Calling setlocale(LC_ALL, "") lets it use the default locale as
297 defined by the LANG variable. Since we don't want to interfere
298 with the current locale setting we thus emulate the behavior
299 in the way described above.
301 To maintain compatibility with other platforms, not only the
302 LANG variable is tested, but a list of variables given as
303 envvars parameter. The first found to be defined will be
304 used. envvars defaults to the search path used in GNU gettext;
305 it must always contain the variable name 'LANG'.
307 Except for the code 'C', the language code corresponds to RFC
308 1766. code and encoding can be None in case the values cannot
314 # check if it's supported by the _locale module
316 code
, encoding
= _locale
._getdefaultlocale
()
317 except (ImportError, AttributeError):
320 # make sure the code/encoding values are valid
321 if sys
.platform
== "win32" and code
and code
[:2] == "0x":
322 # map windows language identifier to language name
323 code
= windows_locale
.get(int(code
, 0))
324 # ...add other platform-specific processing here, if
326 return code
, encoding
328 # fall back on POSIX behaviour
330 lookup
= os
.environ
.get
331 for variable
in envvars
:
332 localename
= lookup(variable
,None)
333 if localename
is not None:
337 return _parse_localename(localename
)
340 def getlocale(category
=LC_CTYPE
):
342 """ Returns the current setting for the given locale category as
343 tuple (language code, encoding).
345 category may be one of the LC_* value except LC_ALL. It
346 defaults to LC_CTYPE.
348 Except for the code 'C', the language code corresponds to RFC
349 1766. code and encoding can be None in case the values cannot
353 localename
= _setlocale(category
)
354 if category
== LC_ALL
and ';' in localename
:
355 raise TypeError, 'category LC_ALL is not supported'
356 return _parse_localename(localename
)
358 def setlocale(category
, locale
=None):
360 """ Set the locale for the given category. The locale can be
361 a string, a locale tuple (language code, encoding), or None.
363 Locale tuples are converted to strings the locale aliasing
364 engine. Locale strings are passed directly to the C lib.
366 category may be given as one of the LC_* values.
369 if locale
and type(locale
) is not type(""):
371 locale
= normalize(_build_localename(locale
))
372 return _setlocale(category
, locale
)
374 def resetlocale(category
=LC_ALL
):
376 """ Sets the locale for category to the default setting.
378 The default setting is determined by calling
379 getdefaultlocale(). category defaults to LC_ALL.
382 _setlocale(category
, _build_localename(getdefaultlocale()))
386 # The following data was extracted from the locale.alias file which
387 # comes with X11 and then hand edited removing the explicit encoding
388 # definitions and adding some more aliases. The file is usually
389 # available as /usr/lib/X11/locale/locale.alias.
393 # The encoding_alias table maps lowercase encoding alias names to C
394 # locale encoding names (case-sensitive).
399 'iso8859': 'ISO8859-1',
401 '88591': 'ISO8859-1',
402 'ascii': 'ISO8859-1',
404 'iso88591': 'ISO8859-1',
405 'iso_8859-1': 'ISO8859-1',
406 '885915': 'ISO8859-15',
407 'iso885915': 'ISO8859-15',
408 'iso_8859-15': 'ISO8859-15',
409 'iso8859-2': 'ISO8859-2',
410 'iso88592': 'ISO8859-2',
411 'iso_8859-2': 'ISO8859-2',
412 'iso88595': 'ISO8859-5',
413 'iso88596': 'ISO8859-6',
414 'iso88597': 'ISO8859-7',
415 'iso88598': 'ISO8859-8',
416 'iso88599': 'ISO8859-9',
417 'iso-2022-jp': 'JIS7',
431 # The locale_alias table maps lowercase alias names to C locale names
432 # (case-sensitive). Encodings are always separated from the locale
433 # name using a dot ('.'); they should only be given in case the
434 # language name is needed to interpret the given encoding alias
435 # correctly (CJK codes often have this need).
438 'american': 'en_US.ISO8859-1',
439 'ar': 'ar_AA.ISO8859-6',
440 'ar_aa': 'ar_AA.ISO8859-6',
441 'ar_sa': 'ar_SA.ISO8859-6',
442 'arabic': 'ar_AA.ISO8859-6',
443 'bg': 'bg_BG.ISO8859-5',
444 'bg_bg': 'bg_BG.ISO8859-5',
445 'bulgarian': 'bg_BG.ISO8859-5',
446 'c-french': 'fr_CA.ISO8859-1',
449 'cextend': 'en_US.ISO8859-1',
450 'chinese-s': 'zh_CN.eucCN',
451 'chinese-t': 'zh_TW.eucTW',
452 'croatian': 'hr_HR.ISO8859-2',
453 'cs': 'cs_CZ.ISO8859-2',
454 'cs_cs': 'cs_CZ.ISO8859-2',
455 'cs_cz': 'cs_CZ.ISO8859-2',
456 'cz': 'cz_CZ.ISO8859-2',
457 'cz_cz': 'cz_CZ.ISO8859-2',
458 'czech': 'cs_CS.ISO8859-2',
459 'da': 'da_DK.ISO8859-1',
460 'da_dk': 'da_DK.ISO8859-1',
461 'danish': 'da_DK.ISO8859-1',
462 'de': 'de_DE.ISO8859-1',
463 'de_at': 'de_AT.ISO8859-1',
464 'de_ch': 'de_CH.ISO8859-1',
465 'de_de': 'de_DE.ISO8859-1',
466 'dutch': 'nl_BE.ISO8859-1',
467 'ee': 'ee_EE.ISO8859-4',
468 'el': 'el_GR.ISO8859-7',
469 'el_gr': 'el_GR.ISO8859-7',
470 'en': 'en_US.ISO8859-1',
471 'en_au': 'en_AU.ISO8859-1',
472 'en_ca': 'en_CA.ISO8859-1',
473 'en_gb': 'en_GB.ISO8859-1',
474 'en_ie': 'en_IE.ISO8859-1',
475 'en_nz': 'en_NZ.ISO8859-1',
476 'en_uk': 'en_GB.ISO8859-1',
477 'en_us': 'en_US.ISO8859-1',
478 'eng_gb': 'en_GB.ISO8859-1',
479 'english': 'en_EN.ISO8859-1',
480 'english_uk': 'en_GB.ISO8859-1',
481 'english_united-states': 'en_US.ISO8859-1',
482 'english_us': 'en_US.ISO8859-1',
483 'es': 'es_ES.ISO8859-1',
484 'es_ar': 'es_AR.ISO8859-1',
485 'es_bo': 'es_BO.ISO8859-1',
486 'es_cl': 'es_CL.ISO8859-1',
487 'es_co': 'es_CO.ISO8859-1',
488 'es_cr': 'es_CR.ISO8859-1',
489 'es_ec': 'es_EC.ISO8859-1',
490 'es_es': 'es_ES.ISO8859-1',
491 'es_gt': 'es_GT.ISO8859-1',
492 'es_mx': 'es_MX.ISO8859-1',
493 'es_ni': 'es_NI.ISO8859-1',
494 'es_pa': 'es_PA.ISO8859-1',
495 'es_pe': 'es_PE.ISO8859-1',
496 'es_py': 'es_PY.ISO8859-1',
497 'es_sv': 'es_SV.ISO8859-1',
498 'es_uy': 'es_UY.ISO8859-1',
499 'es_ve': 'es_VE.ISO8859-1',
500 'et': 'et_EE.ISO8859-4',
501 'et_ee': 'et_EE.ISO8859-4',
502 'fi': 'fi_FI.ISO8859-1',
503 'fi_fi': 'fi_FI.ISO8859-1',
504 'finnish': 'fi_FI.ISO8859-1',
505 'fr': 'fr_FR.ISO8859-1',
506 'fr_be': 'fr_BE.ISO8859-1',
507 'fr_ca': 'fr_CA.ISO8859-1',
508 'fr_ch': 'fr_CH.ISO8859-1',
509 'fr_fr': 'fr_FR.ISO8859-1',
510 'fre_fr': 'fr_FR.ISO8859-1',
511 'french': 'fr_FR.ISO8859-1',
512 'french_france': 'fr_FR.ISO8859-1',
513 'ger_de': 'de_DE.ISO8859-1',
514 'german': 'de_DE.ISO8859-1',
515 'german_germany': 'de_DE.ISO8859-1',
516 'greek': 'el_GR.ISO8859-7',
517 'hebrew': 'iw_IL.ISO8859-8',
518 'hr': 'hr_HR.ISO8859-2',
519 'hr_hr': 'hr_HR.ISO8859-2',
520 'hu': 'hu_HU.ISO8859-2',
521 'hu_hu': 'hu_HU.ISO8859-2',
522 'hungarian': 'hu_HU.ISO8859-2',
523 'icelandic': 'is_IS.ISO8859-1',
524 'id': 'id_ID.ISO8859-1',
525 'id_id': 'id_ID.ISO8859-1',
526 'is': 'is_IS.ISO8859-1',
527 'is_is': 'is_IS.ISO8859-1',
528 'iso-8859-1': 'en_US.ISO8859-1',
529 'iso-8859-15': 'en_US.ISO8859-15',
530 'iso8859-1': 'en_US.ISO8859-1',
531 'iso8859-15': 'en_US.ISO8859-15',
532 'iso_8859_1': 'en_US.ISO8859-1',
533 'iso_8859_15': 'en_US.ISO8859-15',
534 'it': 'it_IT.ISO8859-1',
535 'it_ch': 'it_CH.ISO8859-1',
536 'it_it': 'it_IT.ISO8859-1',
537 'italian': 'it_IT.ISO8859-1',
538 'iw': 'iw_IL.ISO8859-8',
539 'iw_il': 'iw_IL.ISO8859-8',
541 'ja.jis': 'ja_JP.JIS7',
542 'ja.sjis': 'ja_JP.SJIS',
543 'ja_jp': 'ja_JP.eucJP',
544 'ja_jp.ajec': 'ja_JP.eucJP',
545 'ja_jp.euc': 'ja_JP.eucJP',
546 'ja_jp.eucjp': 'ja_JP.eucJP',
547 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
548 'ja_jp.jis': 'ja_JP.JIS7',
549 'ja_jp.jis7': 'ja_JP.JIS7',
550 'ja_jp.mscode': 'ja_JP.SJIS',
551 'ja_jp.sjis': 'ja_JP.SJIS',
552 'ja_jp.ujis': 'ja_JP.eucJP',
553 'japan': 'ja_JP.eucJP',
554 'japanese': 'ja_JP.SJIS',
555 'japanese-euc': 'ja_JP.eucJP',
556 'japanese.euc': 'ja_JP.eucJP',
557 'jp_jp': 'ja_JP.eucJP',
559 'ko_kr': 'ko_KR.eucKR',
560 'ko_kr.euc': 'ko_KR.eucKR',
561 'korean': 'ko_KR.eucKR',
562 'lt': 'lt_LT.ISO8859-4',
563 'lv': 'lv_LV.ISO8859-4',
564 'mk': 'mk_MK.ISO8859-5',
565 'mk_mk': 'mk_MK.ISO8859-5',
566 'nl': 'nl_NL.ISO8859-1',
567 'nl_be': 'nl_BE.ISO8859-1',
568 'nl_nl': 'nl_NL.ISO8859-1',
569 'no': 'no_NO.ISO8859-1',
570 'no_no': 'no_NO.ISO8859-1',
571 'norwegian': 'no_NO.ISO8859-1',
572 'pl': 'pl_PL.ISO8859-2',
573 'pl_pl': 'pl_PL.ISO8859-2',
574 'polish': 'pl_PL.ISO8859-2',
575 'portuguese': 'pt_PT.ISO8859-1',
576 'portuguese_brazil': 'pt_BR.ISO8859-1',
579 'pt': 'pt_PT.ISO8859-1',
580 'pt_br': 'pt_BR.ISO8859-1',
581 'pt_pt': 'pt_PT.ISO8859-1',
582 'ro': 'ro_RO.ISO8859-2',
583 'ro_ro': 'ro_RO.ISO8859-2',
584 'ru': 'ru_RU.ISO8859-5',
585 'ru_ru': 'ru_RU.ISO8859-5',
586 'rumanian': 'ro_RO.ISO8859-2',
587 'russian': 'ru_RU.ISO8859-5',
588 'serbocroatian': 'sh_YU.ISO8859-2',
589 'sh': 'sh_YU.ISO8859-2',
590 'sh_hr': 'sh_HR.ISO8859-2',
591 'sh_sp': 'sh_YU.ISO8859-2',
592 'sh_yu': 'sh_YU.ISO8859-2',
593 'sk': 'sk_SK.ISO8859-2',
594 'sk_sk': 'sk_SK.ISO8859-2',
595 'sl': 'sl_CS.ISO8859-2',
596 'sl_cs': 'sl_CS.ISO8859-2',
597 'sl_si': 'sl_SI.ISO8859-2',
598 'slovak': 'sk_SK.ISO8859-2',
599 'slovene': 'sl_CS.ISO8859-2',
600 'sp': 'sp_YU.ISO8859-5',
601 'sp_yu': 'sp_YU.ISO8859-5',
602 'spanish': 'es_ES.ISO8859-1',
603 'spanish_spain': 'es_ES.ISO8859-1',
604 'sr_sp': 'sr_SP.ISO8859-2',
605 'sv': 'sv_SE.ISO8859-1',
606 'sv_se': 'sv_SE.ISO8859-1',
607 'swedish': 'sv_SE.ISO8859-1',
608 'th_th': 'th_TH.TACTIS',
609 'tr': 'tr_TR.ISO8859-9',
610 'tr_tr': 'tr_TR.ISO8859-9',
611 'turkish': 'tr_TR.ISO8859-9',
613 'universal': 'en_US.utf',
615 'zh_cn': 'zh_CN.eucCN',
616 'zh_cn.big5': 'zh_TW.eucTW',
617 'zh_cn.euc': 'zh_CN.eucCN',
618 'zh_tw': 'zh_TW.eucTW',
619 'zh_tw.euc': 'zh_TW.eucTW',
623 # this maps windows language identifiers (as used on Windows 95 and
624 # earlier) to locale strings.
626 # NOTE: this mapping is incomplete. If your language is missing, send
627 # a note with the missing language identifier and the suggested locale
628 # code to Fredrik Lundh <effbot@telia.com>. Thanks /F
631 0x0404: "zh_TW", # Chinese (Taiwan)
632 0x0804: "zh_CN", # Chinese (PRC)
633 0x0406: "da_DK", # Danish
634 0x0413: "nl_NL", # Dutch (Netherlands)
635 0x0409: "en_US", # English (United States)
636 0x0809: "en_UK", # English (United Kingdom)
637 0x0c09: "en_AU", # English (Australian)
638 0x1009: "en_CA", # English (Canadian)
639 0x1409: "en_NZ", # English (New Zealand)
640 0x1809: "en_IE", # English (Ireland)
641 0x1c09: "en_ZA", # English (South Africa)
642 0x040b: "fi_FI", # Finnish
643 0x040c: "fr_FR", # French (Standard)
644 0x080c: "fr_BE", # French (Belgian)
645 0x0c0c: "fr_CA", # French (Canadian)
646 0x100c: "fr_CH", # French (Switzerland)
647 0x0407: "de_DE", # German (Standard)
648 0x0408: "el_GR", # Greek
649 0x040d: "iw_IL", # Hebrew
650 0x040f: "is_IS", # Icelandic
651 0x0410: "it_IT", # Italian (Standard)
652 0x0411: "ja_JA", # Japanese
653 0x0414: "no_NO", # Norwegian (Bokmal)
654 0x0816: "pt_PT", # Portuguese (Standard)
655 0x0c0a: "es_ES", # Spanish (Modern Sort)
656 0x0441: "sw_KE", # Swahili (Kenya)
657 0x041d: "sv_SE", # Swedish
658 0x081d: "sv_FI", # Swedish (Finland)
659 0x041f: "tr_TR", # Turkish
667 def _init_categories(categories
=categories
):
668 for k
,v
in globals().items():
672 del categories
['LC_ALL']
674 print 'Locale defaults as determined by getdefaultlocale():'
676 lang
, enc
= getdefaultlocale()
677 print 'Language: ', lang
or '(undefined)'
678 print 'Encoding: ', enc
or '(undefined)'
681 print 'Locale settings on startup:'
683 for name
,category
in categories
.items():
685 lang
, enc
= getlocale(category
)
686 print ' Language: ', lang
or '(undefined)'
687 print ' Encoding: ', enc
or '(undefined)'
691 print 'Locale settings after calling resetlocale():'
694 for name
,category
in categories
.items():
696 lang
, enc
= getlocale(category
)
697 print ' Language: ', lang
or '(undefined)'
698 print ' Encoding: ', enc
or '(undefined)'
702 setlocale(LC_ALL
, "")
705 print 'setlocale(LC_ALL, "") does not support the default locale'
706 print 'given in the OS environment variables.'
709 print 'Locale settings after calling setlocale(LC_ALL, ""):'
711 for name
,category
in categories
.items():
713 lang
, enc
= getlocale(category
)
714 print ' Language: ', lang
or '(undefined)'
715 print ' Encoding: ', enc
or '(undefined)'
725 __all__
.append("LC_MESSAGES")
727 if __name__
=='__main__':
728 print 'Locale aliasing:'
732 print 'Number formatting:'