3 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
16 # Try importing the _locale module.
18 # If this fails, fall back on a basic 'C' locale emulation.
20 # Yuck: LC_MESSAGES is non-standard: can't tell whether it exists before
21 # trying the import. So __all__ is also fiddled at the end of the file.
22 __all__
= ["setlocale","Error","localeconv","strcoll","strxfrm",
23 "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
24 "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
45 """ localeconv() -> dict.
46 Returns numeric and monetary locale-specific parameters.
48 # 'C' locale default values
49 return {'grouping': [127],
50 'currency_symbol': '',
55 'n_sep_by_space': 127,
59 'p_sep_by_space': 127,
60 'int_curr_symbol': '',
63 'mon_thousands_sep': '',
65 'mon_decimal_point': '',
66 'int_frac_digits': 127}
68 def setlocale(category
, value
=None):
69 """ setlocale(integer,string=None) -> string.
70 Activates/queries locale processing.
72 if value
not in (None, '', 'C'):
73 raise Error
, '_locale emulation only supports "C" locale'
77 """ strcoll(string,string) -> int.
78 Compares two strings according to the locale.
83 """ strxfrm(string) -> string.
84 Returns a string that behaves for cmp locale-aware.
88 ### Number formatting APIs
90 # Author: Martin von Loewis
92 #perform the grouping from right to left
95 grouping
=conv
['grouping']
96 if not grouping
:return (s
, 0)
104 while s
and grouping
:
105 # if grouping is -1, we are done
106 if grouping
[0]==CHAR_MAX
:
108 # 0: re-use last group ad infinitum
112 grouping
=grouping
[1:]
114 result
=s
[-group
:]+conv
['thousands_sep']+result
119 if s
and s
[-1] not in "0123456789":
120 # the leading string is only spaces and signs
121 return s
+result
+spaces
,seps
125 result
=s
+conv
['thousands_sep']+result
127 return result
+spaces
,seps
129 def format(f
,val
,grouping
=0):
130 """Formats a value in the same way that the % formatting would use,
131 but takes the current locale into account.
132 Grouping is applied if the third parameter is true."""
134 fields
= result
.split(".")
137 fields
[0],seps
=_group(fields
[0])
139 result
= fields
[0]+localeconv()['decimal_point']+fields
[1]
143 raise Error
, "Too many decimal points in result string"
146 # If the number was formatted for a specific width, then it
147 # might have been filled with spaces to the left or right. If
148 # so, kill as much spaces as there where separators.
149 # Leading zeroes as fillers are not yet dealt with, as it is
150 # not clear how they should interact with grouping.
151 sp
= result
.find(" ")
153 result
= result
[:sp
]+result
[sp
+1:]
159 """Convert float to integer, taking the locale into account."""
160 return format("%.12g",val
)
162 def atof(string
,func
=float):
163 "Parses a string as a float according to the locale settings."
164 #First, get rid of the grouping
165 ts
= localeconv()['thousands_sep']
167 string
= string
.replace(ts
, '')
168 #next, replace the decimal point with a dot
169 dd
= localeconv()['decimal_point']
171 string
= string
.replace(dd
, '.')
172 #finally, parse the string
176 "Converts a string to an integer according to the locale settings."
177 return atof(str, int)
180 setlocale(LC_ALL
, "")
182 s1
=format("%d", 123456789,1)
183 print s1
, "is", atoi(s1
)
186 print s1
, "is", atof(s1
)
188 ### Locale name aliasing engine
190 # Author: Marc-Andre Lemburg, mal@lemburg.com
191 # Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
193 # store away the low-level version of setlocale (it's
195 _setlocale
= setlocale
197 def normalize(localename
):
199 """ Returns a normalized locale code for the given locale
202 The returned locale code is formatted for use with
205 If normalization fails, the original name is returned
208 If the given encoding is not known, the function defaults to
209 the default encoding for the locale code just like setlocale()
213 # Normalize the locale name and extract the encoding
214 fullname
= localename
.lower()
216 # ':' is sometimes used as encoding delimiter.
217 fullname
= fullname
.replace(':', '.')
219 langname
, encoding
= fullname
.split('.')[:2]
220 fullname
= langname
+ '.' + encoding
225 # First lookup: fullname (possibly with encoding)
226 code
= locale_alias
.get(fullname
, None)
230 # Second try: langname (without encoding)
231 code
= locale_alias
.get(langname
, None)
234 langname
, defenc
= code
.split('.')
239 encoding
= encoding_alias
.get(encoding
, encoding
)
243 return langname
+ '.' + encoding
250 def _parse_localename(localename
):
252 """ Parses the locale code for localename and returns the
253 result as tuple (language code, encoding).
255 The localename is normalized and passed through the locale
256 alias engine. A ValueError is raised in case the locale name
259 The language code corresponds to RFC 1766. code and encoding
260 can be None in case the values cannot be determined or are
261 unknown to this implementation.
264 code
= normalize(localename
)
265 if '@' in localename
:
266 # Deal with locale modifiers
267 code
, modifier
= code
.split('@')
268 if modifier
== 'euro' and '.' not in code
:
269 # Assume Latin-9 for @euro locales. This is bogus,
270 # since some systems may use other encodings for these
271 # locales. Also, we ignore other modifiers.
272 return code
, 'iso-8859-15'
275 return code
.split('.')[:2]
278 raise ValueError, 'unknown locale: %s' % localename
280 def _build_localename(localetuple
):
282 """ Builds a locale code from the given tuple (language code,
285 No aliasing or normalizing takes place.
288 language
, encoding
= localetuple
294 return language
+ '.' + encoding
296 def getdefaultlocale(envvars
=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
298 """ Tries to determine the default locale settings and returns
299 them as tuple (language code, encoding).
301 According to POSIX, a program which has not called
302 setlocale(LC_ALL, "") runs using the portable 'C' locale.
303 Calling setlocale(LC_ALL, "") lets it use the default locale as
304 defined by the LANG variable. Since we don't want to interfere
305 with the current locale setting we thus emulate the behavior
306 in the way described above.
308 To maintain compatibility with other platforms, not only the
309 LANG variable is tested, but a list of variables given as
310 envvars parameter. The first found to be defined will be
311 used. envvars defaults to the search path used in GNU gettext;
312 it must always contain the variable name 'LANG'.
314 Except for the code 'C', the language code corresponds to RFC
315 1766. code and encoding can be None in case the values cannot
321 # check if it's supported by the _locale module
323 code
, encoding
= _locale
._getdefaultlocale
()
324 except (ImportError, AttributeError):
327 # make sure the code/encoding values are valid
328 if sys
.platform
== "win32" and code
and code
[:2] == "0x":
329 # map windows language identifier to language name
330 code
= windows_locale
.get(int(code
, 0))
331 # ...add other platform-specific processing here, if
333 return code
, encoding
335 # fall back on POSIX behaviour
337 lookup
= os
.environ
.get
338 for variable
in envvars
:
339 localename
= lookup(variable
,None)
340 if localename
is not None:
344 return _parse_localename(localename
)
347 def getlocale(category
=LC_CTYPE
):
349 """ Returns the current setting for the given locale category as
350 tuple (language code, encoding).
352 category may be one of the LC_* value except LC_ALL. It
353 defaults to LC_CTYPE.
355 Except for the code 'C', the language code corresponds to RFC
356 1766. code and encoding can be None in case the values cannot
360 localename
= _setlocale(category
)
361 if category
== LC_ALL
and ';' in localename
:
362 raise TypeError, 'category LC_ALL is not supported'
363 return _parse_localename(localename
)
365 def setlocale(category
, locale
=None):
367 """ Set the locale for the given category. The locale can be
368 a string, a locale tuple (language code, encoding), or None.
370 Locale tuples are converted to strings the locale aliasing
371 engine. Locale strings are passed directly to the C lib.
373 category may be given as one of the LC_* values.
376 if locale
and type(locale
) is not type(""):
378 locale
= normalize(_build_localename(locale
))
379 return _setlocale(category
, locale
)
381 def resetlocale(category
=LC_ALL
):
383 """ Sets the locale for category to the default setting.
385 The default setting is determined by calling
386 getdefaultlocale(). category defaults to LC_ALL.
389 _setlocale(category
, _build_localename(getdefaultlocale()))
391 if sys
.platform
in ('win32', 'darwin', 'mac'):
392 # On Win32, this will return the ANSI code page
393 # On the Mac, it should return the system encoding;
394 # it might return "ascii" instead
395 def getpreferredencoding(do_setlocale
= True):
396 """Return the charset that the user is likely using."""
398 return _locale
._getdefaultlocale
()[1]
400 # On Unix, if CODESET is available, use that.
404 # Fall back to parsing environment variables :-(
405 def getpreferredencoding(do_setlocale
= True):
406 """Return the charset that the user is likely using,
407 by looking at environment variables."""
408 return getdefaultlocale()[1]
410 def getpreferredencoding(do_setlocale
= True):
411 """Return the charset that the user is likely using,
412 according to the system configuration."""
414 oldloc
= setlocale(LC_CTYPE
)
415 setlocale(LC_CTYPE
, "")
416 result
= nl_langinfo(CODESET
)
417 setlocale(LC_CTYPE
, oldloc
)
420 return nl_langinfo(CODESET
)
425 # The following data was extracted from the locale.alias file which
426 # comes with X11 and then hand edited removing the explicit encoding
427 # definitions and adding some more aliases. The file is usually
428 # available as /usr/lib/X11/locale/locale.alias.
432 # The encoding_alias table maps lowercase encoding alias names to C
433 # locale encoding names (case-sensitive).
438 'iso8859': 'ISO8859-1',
440 '88591': 'ISO8859-1',
441 'ascii': 'ISO8859-1',
443 'iso88591': 'ISO8859-1',
444 'iso_8859-1': 'ISO8859-1',
445 '885915': 'ISO8859-15',
446 'iso885915': 'ISO8859-15',
447 'iso_8859-15': 'ISO8859-15',
448 'iso8859-2': 'ISO8859-2',
449 'iso88592': 'ISO8859-2',
450 'iso_8859-2': 'ISO8859-2',
451 'iso88595': 'ISO8859-5',
452 'iso88596': 'ISO8859-6',
453 'iso88597': 'ISO8859-7',
454 'iso88598': 'ISO8859-8',
455 'iso88599': 'ISO8859-9',
456 'iso-2022-jp': 'JIS7',
470 # The locale_alias table maps lowercase alias names to C locale names
471 # (case-sensitive). Encodings are always separated from the locale
472 # name using a dot ('.'); they should only be given in case the
473 # language name is needed to interpret the given encoding alias
474 # correctly (CJK codes often have this need).
477 'american': 'en_US.ISO8859-1',
478 'ar': 'ar_AA.ISO8859-6',
479 'ar_aa': 'ar_AA.ISO8859-6',
480 'ar_sa': 'ar_SA.ISO8859-6',
481 'arabic': 'ar_AA.ISO8859-6',
482 'bg': 'bg_BG.ISO8859-5',
483 'bg_bg': 'bg_BG.ISO8859-5',
484 'bulgarian': 'bg_BG.ISO8859-5',
485 'c-french': 'fr_CA.ISO8859-1',
488 'cextend': 'en_US.ISO8859-1',
489 'chinese-s': 'zh_CN.eucCN',
490 'chinese-t': 'zh_TW.eucTW',
491 'croatian': 'hr_HR.ISO8859-2',
492 'cs': 'cs_CZ.ISO8859-2',
493 'cs_cs': 'cs_CZ.ISO8859-2',
494 'cs_cz': 'cs_CZ.ISO8859-2',
495 'cz': 'cz_CZ.ISO8859-2',
496 'cz_cz': 'cz_CZ.ISO8859-2',
497 'czech': 'cs_CS.ISO8859-2',
498 'da': 'da_DK.ISO8859-1',
499 'da_dk': 'da_DK.ISO8859-1',
500 'danish': 'da_DK.ISO8859-1',
501 'de': 'de_DE.ISO8859-1',
502 'de_at': 'de_AT.ISO8859-1',
503 'de_ch': 'de_CH.ISO8859-1',
504 'de_de': 'de_DE.ISO8859-1',
505 'dutch': 'nl_BE.ISO8859-1',
506 'ee': 'ee_EE.ISO8859-4',
507 'el': 'el_GR.ISO8859-7',
508 'el_gr': 'el_GR.ISO8859-7',
509 'en': 'en_US.ISO8859-1',
510 'en_au': 'en_AU.ISO8859-1',
511 'en_ca': 'en_CA.ISO8859-1',
512 'en_gb': 'en_GB.ISO8859-1',
513 'en_ie': 'en_IE.ISO8859-1',
514 'en_nz': 'en_NZ.ISO8859-1',
515 'en_uk': 'en_GB.ISO8859-1',
516 'en_us': 'en_US.ISO8859-1',
517 'eng_gb': 'en_GB.ISO8859-1',
518 'english': 'en_EN.ISO8859-1',
519 'english_uk': 'en_GB.ISO8859-1',
520 'english_united-states': 'en_US.ISO8859-1',
521 'english_us': 'en_US.ISO8859-1',
522 'es': 'es_ES.ISO8859-1',
523 'es_ar': 'es_AR.ISO8859-1',
524 'es_bo': 'es_BO.ISO8859-1',
525 'es_cl': 'es_CL.ISO8859-1',
526 'es_co': 'es_CO.ISO8859-1',
527 'es_cr': 'es_CR.ISO8859-1',
528 'es_ec': 'es_EC.ISO8859-1',
529 'es_es': 'es_ES.ISO8859-1',
530 'es_gt': 'es_GT.ISO8859-1',
531 'es_mx': 'es_MX.ISO8859-1',
532 'es_ni': 'es_NI.ISO8859-1',
533 'es_pa': 'es_PA.ISO8859-1',
534 'es_pe': 'es_PE.ISO8859-1',
535 'es_py': 'es_PY.ISO8859-1',
536 'es_sv': 'es_SV.ISO8859-1',
537 'es_uy': 'es_UY.ISO8859-1',
538 'es_ve': 'es_VE.ISO8859-1',
539 'et': 'et_EE.ISO8859-4',
540 'et_ee': 'et_EE.ISO8859-4',
541 'fi': 'fi_FI.ISO8859-1',
542 'fi_fi': 'fi_FI.ISO8859-1',
543 'finnish': 'fi_FI.ISO8859-1',
544 'fr': 'fr_FR.ISO8859-1',
545 'fr_be': 'fr_BE.ISO8859-1',
546 'fr_ca': 'fr_CA.ISO8859-1',
547 'fr_ch': 'fr_CH.ISO8859-1',
548 'fr_fr': 'fr_FR.ISO8859-1',
549 'fre_fr': 'fr_FR.ISO8859-1',
550 'french': 'fr_FR.ISO8859-1',
551 'french_france': 'fr_FR.ISO8859-1',
552 'ger_de': 'de_DE.ISO8859-1',
553 'german': 'de_DE.ISO8859-1',
554 'german_germany': 'de_DE.ISO8859-1',
555 'greek': 'el_GR.ISO8859-7',
556 'hebrew': 'iw_IL.ISO8859-8',
557 'hr': 'hr_HR.ISO8859-2',
558 'hr_hr': 'hr_HR.ISO8859-2',
559 'hu': 'hu_HU.ISO8859-2',
560 'hu_hu': 'hu_HU.ISO8859-2',
561 'hungarian': 'hu_HU.ISO8859-2',
562 'icelandic': 'is_IS.ISO8859-1',
563 'id': 'id_ID.ISO8859-1',
564 'id_id': 'id_ID.ISO8859-1',
565 'is': 'is_IS.ISO8859-1',
566 'is_is': 'is_IS.ISO8859-1',
567 'iso-8859-1': 'en_US.ISO8859-1',
568 'iso-8859-15': 'en_US.ISO8859-15',
569 'iso8859-1': 'en_US.ISO8859-1',
570 'iso8859-15': 'en_US.ISO8859-15',
571 'iso_8859_1': 'en_US.ISO8859-1',
572 'iso_8859_15': 'en_US.ISO8859-15',
573 'it': 'it_IT.ISO8859-1',
574 'it_ch': 'it_CH.ISO8859-1',
575 'it_it': 'it_IT.ISO8859-1',
576 'italian': 'it_IT.ISO8859-1',
577 'iw': 'iw_IL.ISO8859-8',
578 'iw_il': 'iw_IL.ISO8859-8',
580 'ja.jis': 'ja_JP.JIS7',
581 'ja.sjis': 'ja_JP.SJIS',
582 'ja_jp': 'ja_JP.eucJP',
583 'ja_jp.ajec': 'ja_JP.eucJP',
584 'ja_jp.euc': 'ja_JP.eucJP',
585 'ja_jp.eucjp': 'ja_JP.eucJP',
586 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
587 'ja_jp.jis': 'ja_JP.JIS7',
588 'ja_jp.jis7': 'ja_JP.JIS7',
589 'ja_jp.mscode': 'ja_JP.SJIS',
590 'ja_jp.sjis': 'ja_JP.SJIS',
591 'ja_jp.ujis': 'ja_JP.eucJP',
592 'japan': 'ja_JP.eucJP',
593 'japanese': 'ja_JP.SJIS',
594 'japanese-euc': 'ja_JP.eucJP',
595 'japanese.euc': 'ja_JP.eucJP',
596 'jp_jp': 'ja_JP.eucJP',
598 'ko_kr': 'ko_KR.eucKR',
599 'ko_kr.euc': 'ko_KR.eucKR',
600 'korean': 'ko_KR.eucKR',
601 'lt': 'lt_LT.ISO8859-4',
602 'lv': 'lv_LV.ISO8859-4',
603 'mk': 'mk_MK.ISO8859-5',
604 'mk_mk': 'mk_MK.ISO8859-5',
605 'nl': 'nl_NL.ISO8859-1',
606 'nl_be': 'nl_BE.ISO8859-1',
607 'nl_nl': 'nl_NL.ISO8859-1',
608 'no': 'no_NO.ISO8859-1',
609 'no_no': 'no_NO.ISO8859-1',
610 'norwegian': 'no_NO.ISO8859-1',
611 'pl': 'pl_PL.ISO8859-2',
612 'pl_pl': 'pl_PL.ISO8859-2',
613 'polish': 'pl_PL.ISO8859-2',
614 'portuguese': 'pt_PT.ISO8859-1',
615 'portuguese_brazil': 'pt_BR.ISO8859-1',
618 'pt': 'pt_PT.ISO8859-1',
619 'pt_br': 'pt_BR.ISO8859-1',
620 'pt_pt': 'pt_PT.ISO8859-1',
621 'ro': 'ro_RO.ISO8859-2',
622 'ro_ro': 'ro_RO.ISO8859-2',
623 'ru': 'ru_RU.ISO8859-5',
624 'ru_ru': 'ru_RU.ISO8859-5',
625 'rumanian': 'ro_RO.ISO8859-2',
626 'russian': 'ru_RU.ISO8859-5',
627 'serbocroatian': 'sh_YU.ISO8859-2',
628 'sh': 'sh_YU.ISO8859-2',
629 'sh_hr': 'sh_HR.ISO8859-2',
630 'sh_sp': 'sh_YU.ISO8859-2',
631 'sh_yu': 'sh_YU.ISO8859-2',
632 'sk': 'sk_SK.ISO8859-2',
633 'sk_sk': 'sk_SK.ISO8859-2',
634 'sl': 'sl_CS.ISO8859-2',
635 'sl_cs': 'sl_CS.ISO8859-2',
636 'sl_si': 'sl_SI.ISO8859-2',
637 'slovak': 'sk_SK.ISO8859-2',
638 'slovene': 'sl_CS.ISO8859-2',
639 'sp': 'sp_YU.ISO8859-5',
640 'sp_yu': 'sp_YU.ISO8859-5',
641 'spanish': 'es_ES.ISO8859-1',
642 'spanish_spain': 'es_ES.ISO8859-1',
643 'sr_sp': 'sr_SP.ISO8859-2',
644 'sv': 'sv_SE.ISO8859-1',
645 'sv_se': 'sv_SE.ISO8859-1',
646 'swedish': 'sv_SE.ISO8859-1',
647 'th_th': 'th_TH.TACTIS',
648 'tr': 'tr_TR.ISO8859-9',
649 'tr_tr': 'tr_TR.ISO8859-9',
650 'turkish': 'tr_TR.ISO8859-9',
652 'universal': 'en_US.utf',
654 'zh_cn': 'zh_CN.eucCN',
655 'zh_cn.big5': 'zh_TW.eucTW',
656 'zh_cn.euc': 'zh_CN.eucCN',
657 'zh_tw': 'zh_TW.eucTW',
658 'zh_tw.euc': 'zh_TW.eucTW',
662 # this maps windows language identifiers (as used on Windows 95 and
663 # earlier) to locale strings.
665 # NOTE: this mapping is incomplete. If your language is missing, please
666 # submit a bug report to Python bug manager, which you can find via:
667 # http://www.python.org/dev/
668 # Make sure you include the missing language identifier and the suggested
673 0x0404: "zh_TW", # Chinese (Taiwan)
674 0x0804: "zh_CN", # Chinese (PRC)
675 0x0406: "da_DK", # Danish
676 0x0413: "nl_NL", # Dutch (Netherlands)
677 0x0409: "en_US", # English (United States)
678 0x0809: "en_UK", # English (United Kingdom)
679 0x0c09: "en_AU", # English (Australian)
680 0x1009: "en_CA", # English (Canadian)
681 0x1409: "en_NZ", # English (New Zealand)
682 0x1809: "en_IE", # English (Ireland)
683 0x1c09: "en_ZA", # English (South Africa)
684 0x040b: "fi_FI", # Finnish
685 0x040c: "fr_FR", # French (Standard)
686 0x080c: "fr_BE", # French (Belgian)
687 0x0c0c: "fr_CA", # French (Canadian)
688 0x100c: "fr_CH", # French (Switzerland)
689 0x0407: "de_DE", # German (Standard)
690 0x0408: "el_GR", # Greek
691 0x040d: "iw_IL", # Hebrew
692 0x040f: "is_IS", # Icelandic
693 0x0410: "it_IT", # Italian (Standard)
694 0x0411: "ja_JA", # Japanese
695 0x0414: "no_NO", # Norwegian (Bokmal)
696 0x0816: "pt_PT", # Portuguese (Standard)
697 0x0c0a: "es_ES", # Spanish (Modern Sort)
698 0x0441: "sw_KE", # Swahili (Kenya)
699 0x041d: "sv_SE", # Swedish
700 0x081d: "sv_FI", # Swedish (Finland)
701 0x041f: "tr_TR", # Turkish
709 def _init_categories(categories
=categories
):
710 for k
,v
in globals().items():
714 del categories
['LC_ALL']
716 print 'Locale defaults as determined by getdefaultlocale():'
718 lang
, enc
= getdefaultlocale()
719 print 'Language: ', lang
or '(undefined)'
720 print 'Encoding: ', enc
or '(undefined)'
723 print 'Locale settings on startup:'
725 for name
,category
in categories
.items():
727 lang
, enc
= getlocale(category
)
728 print ' Language: ', lang
or '(undefined)'
729 print ' Encoding: ', enc
or '(undefined)'
733 print 'Locale settings after calling resetlocale():'
736 for name
,category
in categories
.items():
738 lang
, enc
= getlocale(category
)
739 print ' Language: ', lang
or '(undefined)'
740 print ' Encoding: ', enc
or '(undefined)'
744 setlocale(LC_ALL
, "")
747 print 'setlocale(LC_ALL, "") does not support the default locale'
748 print 'given in the OS environment variables.'
751 print 'Locale settings after calling setlocale(LC_ALL, ""):'
753 for name
,category
in categories
.items():
755 lang
, enc
= getlocale(category
)
756 print ' Language: ', lang
or '(undefined)'
757 print ' Encoding: ', enc
or '(undefined)'
767 __all__
.append("LC_MESSAGES")
769 if __name__
=='__main__':
770 print 'Locale aliasing:'
774 print 'Number formatting:'