3 The module provides low-level access to the C lib's locale APIs
4 and adds high level number formatting APIs as well as a locale
5 aliasing engine to complement these.
7 The aliasing engine includes support for many commonly used locale
8 names and maps them to values suitable for passing to the C lib's
9 setlocale() function. It also includes default encodings for all
10 supported locale names.
16 # Try importing the _locale module.
18 # If this fails, fall back on a basic 'C' locale emulation.
40 """ localeconv() -> dict.
41 Returns numeric and monetary locale-specific parameters.
43 # 'C' locale default values
44 return {'grouping': [127],
45 'currency_symbol': '',
50 'n_sep_by_space': 127,
54 'p_sep_by_space': 127,
55 'int_curr_symbol': '',
58 'mon_thousands_sep': '',
60 'mon_decimal_point': '',
61 'int_frac_digits': 127}
63 def setlocale(category
, value
=None):
64 """ setlocale(integer,string=None) -> string.
65 Activates/queries locale processing.
67 if value
is not None and \
69 raise Error
, '_locale emulation only supports "C" locale'
73 """ strcoll(string,string) -> int.
74 Compares two strings according to the locale.
79 """ strxfrm(string) -> string.
80 Returns a string that behaves for cmp locale-aware.
84 ### Number formatting APIs
86 # Author: Martin von Loewis
88 #perform the grouping from right to left
91 grouping
=conv
['grouping']
92 if not grouping
:return s
95 # if grouping is -1, we are done
96 if grouping
[0]==CHAR_MAX
:
98 # 0: re-use last group ad infinitum
102 grouping
=grouping
[1:]
104 result
=s
[-group
:]+conv
['thousands_sep']+result
111 result
=s
+conv
['thousands_sep']+result
114 def format(f
,val
,grouping
=0):
115 """Formats a value in the same way that the % formatting would use,
116 but takes the current locale into account.
117 Grouping is applied if the third parameter is true."""
119 fields
= string
.split(result
, ".")
121 fields
[0]=_group(fields
[0])
123 return fields
[0]+localeconv()['decimal_point']+fields
[1]
127 raise Error
, "Too many decimal points in result string"
130 """Convert float to integer, taking the locale into account."""
131 return format("%.12g",val
)
133 def atof(str,func
=string
.atof
):
134 "Parses a string as a float according to the locale settings."
135 #First, get rid of the grouping
136 ts
= localeconv()['thousands_sep']
138 s
=string
.split(str,ts
)
139 str=string
.join(s
, "")
140 #next, replace the decimal point with a dot
141 dd
= localeconv()['decimal_point']
143 s
=string
.split(str,dd
)
144 str=string
.join(s
, '.')
145 #finally, parse the string
149 "Converts a string to an integer according to the locale settings."
150 return atof(str,string
.atoi
)
153 setlocale(LC_ALL
, "")
155 s1
=format("%d", 123456789,1)
156 print s1
, "is", atoi(s1
)
159 print s1
, "is", atof(s1
)
161 ### Locale name aliasing engine
163 # Author: Marc-Andre Lemburg, mal@lemburg.com
164 # Various tweaks by Fredrik Lundh <effbot@telia.com>
166 # store away the low-level version of setlocale (it's
168 _setlocale
= setlocale
170 def normalize(localename
):
172 """ Returns a normalized locale code for the given locale
175 The returned locale code is formatted for use with
178 If normalization fails, the original name is returned
181 If the given encoding is not known, the function defaults to
182 the default encoding for the locale code just like setlocale()
186 # Normalize the locale name and extract the encoding
187 fullname
= string
.lower(localename
)
189 # ':' is sometimes used as encoding delimiter.
190 fullname
= string
.replace(fullname
, ':', '.')
192 langname
, encoding
= string
.split(fullname
, '.')[:2]
193 fullname
= langname
+ '.' + encoding
198 # First lookup: fullname (possibly with encoding)
199 code
= locale_alias
.get(fullname
, None)
203 # Second try: langname (without encoding)
204 code
= locale_alias
.get(langname
, None)
207 langname
, defenc
= string
.split(code
, '.')
212 encoding
= encoding_alias
.get(encoding
, encoding
)
216 return langname
+ '.' + encoding
223 def _parse_localename(localename
):
225 """ Parses the locale code for localename and returns the
226 result as tuple (language code, encoding).
228 The localename is normalized and passed through the locale
229 alias engine. A ValueError is raised in case the locale name
232 The language code corresponds to RFC 1766. code and encoding
233 can be None in case the values cannot be determined or are
234 unknown to this implementation.
237 code
= normalize(localename
)
239 return string
.split(code
, '.')[:2]
243 raise ValueError, 'unknown locale: %s' % localename
246 def _build_localename(localetuple
):
248 """ Builds a locale code from the given tuple (language code,
251 No aliasing or normalizing takes place.
254 language
, encoding
= localetuple
260 return language
+ '.' + encoding
262 def getdefaultlocale(envvars
=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
264 """ Tries to determine the default locale settings and returns
265 them as tuple (language code, encoding).
267 According to POSIX, a program which has not called
268 setlocale(LC_ALL, "") runs using the portable 'C' locale.
269 Calling setlocale(LC_ALL, "") lets it use the default locale as
270 defined by the LANG variable. Since we don't want to interfere
271 with the current locale setting we thus emulate the behavior
272 in the way described above.
274 To maintain compatibility with other platforms, not only the
275 LANG variable is tested, but a list of variables given as
276 envvars parameter. The first found to be defined will be
277 used. envvars defaults to the search path used in GNU gettext;
278 it must always contain the variable name 'LANG'.
280 Except for the code 'C', the language code corresponds to RFC
281 1766. code and encoding can be None in case the values cannot
287 # check if it's supported by the _locale module
289 code
, encoding
= _locale
._getdefaultlocale
()
290 except (ImportError, AttributeError):
293 # make sure the code/encoding values are valid
294 if sys
.platform
== "win32" and code
and code
[:2] == "0x":
295 # map windows language identifier to language name
296 code
= windows_locale
.get(int(code
, 0))
297 # ...add other platform-specific processing here, if
299 return code
, encoding
301 # fall back on POSIX behaviour
303 lookup
= os
.environ
.get
304 for variable
in envvars
:
305 localename
= lookup(variable
,None)
306 if localename
is not None:
310 return _parse_localename(localename
)
313 def getlocale(category
=LC_CTYPE
):
315 """ Returns the current setting for the given locale category as
316 tuple (language code, encoding).
318 category may be one of the LC_* value except LC_ALL. It
319 defaults to LC_CTYPE.
321 Except for the code 'C', the language code corresponds to RFC
322 1766. code and encoding can be None in case the values cannot
326 localename
= _setlocale(category
)
327 if category
== LC_ALL
and ';' in localename
:
328 raise TypeError, 'category LC_ALL is not supported'
329 return _parse_localename(localename
)
331 def setlocale(category
, locale
=None):
333 """ Set the locale for the given category. The locale can be
334 a string, a locale tuple (language code, encoding), or None.
336 Locale tuples are converted to strings the locale aliasing
337 engine. Locale strings are passed directly to the C lib.
339 category may be given as one of the LC_* values.
342 if locale
and type(locale
) is not type(""):
344 locale
= normalize(_build_localename(locale
))
345 return _setlocale(category
, locale
)
347 def resetlocale(category
=LC_ALL
):
349 """ Sets the locale for category to the default setting.
351 The default setting is determined by calling
352 getdefaultlocale(). category defaults to LC_ALL.
355 _setlocale(category
, _build_localename(getdefaultlocale()))
359 # The following data was extracted from the locale.alias file which
360 # comes with X11 and then hand edited removing the explicit encoding
361 # definitions and adding some more aliases. The file is usually
362 # available as /usr/lib/X11/locale/locale.alias.
366 # The encoding_alias table maps lowercase encoding alias names to C
367 # locale encoding names (case-sensitive).
372 'iso8859': 'ISO8859-1',
374 '88591': 'ISO8859-1',
375 'ascii': 'ISO8859-1',
377 'iso88591': 'ISO8859-1',
378 'iso_8859-1': 'ISO8859-1',
379 '885915': 'ISO8859-15',
380 'iso885915': 'ISO8859-15',
381 'iso_8859-15': 'ISO8859-15',
382 'iso8859-2': 'ISO8859-2',
383 'iso88592': 'ISO8859-2',
384 'iso_8859-2': 'ISO8859-2',
385 'iso88595': 'ISO8859-5',
386 'iso88596': 'ISO8859-6',
387 'iso88597': 'ISO8859-7',
388 'iso88598': 'ISO8859-8',
389 'iso88599': 'ISO8859-9',
390 'iso-2022-jp': 'JIS7',
404 # The locale_alias table maps lowercase alias names to C locale names
405 # (case-sensitive). Encodings are always separated from the locale
406 # name using a dot ('.'); they should only be given in case the
407 # language name is needed to interpret the given encoding alias
408 # correctly (CJK codes often have this need).
411 'american': 'en_US.ISO8859-1',
412 'ar': 'ar_AA.ISO8859-6',
413 'ar_aa': 'ar_AA.ISO8859-6',
414 'ar_sa': 'ar_SA.ISO8859-6',
415 'arabic': 'ar_AA.ISO8859-6',
416 'bg': 'bg_BG.ISO8859-5',
417 'bg_bg': 'bg_BG.ISO8859-5',
418 'bulgarian': 'bg_BG.ISO8859-5',
419 'c-french': 'fr_CA.ISO8859-1',
422 'cextend': 'en_US.ISO8859-1',
423 'chinese-s': 'zh_CN.eucCN',
424 'chinese-t': 'zh_TW.eucTW',
425 'croatian': 'hr_HR.ISO8859-2',
426 'cs': 'cs_CZ.ISO8859-2',
427 'cs_cs': 'cs_CZ.ISO8859-2',
428 'cs_cz': 'cs_CZ.ISO8859-2',
429 'cz': 'cz_CZ.ISO8859-2',
430 'cz_cz': 'cz_CZ.ISO8859-2',
431 'czech': 'cs_CS.ISO8859-2',
432 'da': 'da_DK.ISO8859-1',
433 'da_dk': 'da_DK.ISO8859-1',
434 'danish': 'da_DK.ISO8859-1',
435 'de': 'de_DE.ISO8859-1',
436 'de_at': 'de_AT.ISO8859-1',
437 'de_ch': 'de_CH.ISO8859-1',
438 'de_de': 'de_DE.ISO8859-1',
439 'dutch': 'nl_BE.ISO8859-1',
440 'ee': 'ee_EE.ISO8859-4',
441 'el': 'el_GR.ISO8859-7',
442 'el_gr': 'el_GR.ISO8859-7',
443 'en': 'en_US.ISO8859-1',
444 'en_au': 'en_AU.ISO8859-1',
445 'en_ca': 'en_CA.ISO8859-1',
446 'en_gb': 'en_GB.ISO8859-1',
447 'en_ie': 'en_IE.ISO8859-1',
448 'en_nz': 'en_NZ.ISO8859-1',
449 'en_uk': 'en_GB.ISO8859-1',
450 'en_us': 'en_US.ISO8859-1',
451 'eng_gb': 'en_GB.ISO8859-1',
452 'english': 'en_EN.ISO8859-1',
453 'english_uk': 'en_GB.ISO8859-1',
454 'english_united-states': 'en_US.ISO8859-1',
455 'english_us': 'en_US.ISO8859-1',
456 'es': 'es_ES.ISO8859-1',
457 'es_ar': 'es_AR.ISO8859-1',
458 'es_bo': 'es_BO.ISO8859-1',
459 'es_cl': 'es_CL.ISO8859-1',
460 'es_co': 'es_CO.ISO8859-1',
461 'es_cr': 'es_CR.ISO8859-1',
462 'es_ec': 'es_EC.ISO8859-1',
463 'es_es': 'es_ES.ISO8859-1',
464 'es_gt': 'es_GT.ISO8859-1',
465 'es_mx': 'es_MX.ISO8859-1',
466 'es_ni': 'es_NI.ISO8859-1',
467 'es_pa': 'es_PA.ISO8859-1',
468 'es_pe': 'es_PE.ISO8859-1',
469 'es_py': 'es_PY.ISO8859-1',
470 'es_sv': 'es_SV.ISO8859-1',
471 'es_uy': 'es_UY.ISO8859-1',
472 'es_ve': 'es_VE.ISO8859-1',
473 'et': 'et_EE.ISO8859-4',
474 'et_ee': 'et_EE.ISO8859-4',
475 'fi': 'fi_FI.ISO8859-1',
476 'fi_fi': 'fi_FI.ISO8859-1',
477 'finnish': 'fi_FI.ISO8859-1',
478 'fr': 'fr_FR.ISO8859-1',
479 'fr_be': 'fr_BE.ISO8859-1',
480 'fr_ca': 'fr_CA.ISO8859-1',
481 'fr_ch': 'fr_CH.ISO8859-1',
482 'fr_fr': 'fr_FR.ISO8859-1',
483 'fre_fr': 'fr_FR.ISO8859-1',
484 'french': 'fr_FR.ISO8859-1',
485 'french_france': 'fr_FR.ISO8859-1',
486 'ger_de': 'de_DE.ISO8859-1',
487 'german': 'de_DE.ISO8859-1',
488 'german_germany': 'de_DE.ISO8859-1',
489 'greek': 'el_GR.ISO8859-7',
490 'hebrew': 'iw_IL.ISO8859-8',
491 'hr': 'hr_HR.ISO8859-2',
492 'hr_hr': 'hr_HR.ISO8859-2',
493 'hu': 'hu_HU.ISO8859-2',
494 'hu_hu': 'hu_HU.ISO8859-2',
495 'hungarian': 'hu_HU.ISO8859-2',
496 'icelandic': 'is_IS.ISO8859-1',
497 'id': 'id_ID.ISO8859-1',
498 'id_id': 'id_ID.ISO8859-1',
499 'is': 'is_IS.ISO8859-1',
500 'is_is': 'is_IS.ISO8859-1',
501 'iso-8859-1': 'en_US.ISO8859-1',
502 'iso-8859-15': 'en_US.ISO8859-15',
503 'iso8859-1': 'en_US.ISO8859-1',
504 'iso8859-15': 'en_US.ISO8859-15',
505 'iso_8859_1': 'en_US.ISO8859-1',
506 'iso_8859_15': 'en_US.ISO8859-15',
507 'it': 'it_IT.ISO8859-1',
508 'it_ch': 'it_CH.ISO8859-1',
509 'it_it': 'it_IT.ISO8859-1',
510 'italian': 'it_IT.ISO8859-1',
511 'iw': 'iw_IL.ISO8859-8',
512 'iw_il': 'iw_IL.ISO8859-8',
514 'ja.jis': 'ja_JP.JIS7',
515 'ja.sjis': 'ja_JP.SJIS',
516 'ja_jp': 'ja_JP.eucJP',
517 'ja_jp.ajec': 'ja_JP.eucJP',
518 'ja_jp.euc': 'ja_JP.eucJP',
519 'ja_jp.eucjp': 'ja_JP.eucJP',
520 'ja_jp.iso-2022-jp': 'ja_JP.JIS7',
521 'ja_jp.jis': 'ja_JP.JIS7',
522 'ja_jp.jis7': 'ja_JP.JIS7',
523 'ja_jp.mscode': 'ja_JP.SJIS',
524 'ja_jp.sjis': 'ja_JP.SJIS',
525 'ja_jp.ujis': 'ja_JP.eucJP',
526 'japan': 'ja_JP.eucJP',
527 'japanese': 'ja_JP.SJIS',
528 'japanese-euc': 'ja_JP.eucJP',
529 'japanese.euc': 'ja_JP.eucJP',
530 'jp_jp': 'ja_JP.eucJP',
532 'ko_kr': 'ko_KR.eucKR',
533 'ko_kr.euc': 'ko_KR.eucKR',
534 'korean': 'ko_KR.eucKR',
535 'lt': 'lt_LT.ISO8859-4',
536 'lv': 'lv_LV.ISO8859-4',
537 'mk': 'mk_MK.ISO8859-5',
538 'mk_mk': 'mk_MK.ISO8859-5',
539 'nl': 'nl_NL.ISO8859-1',
540 'nl_be': 'nl_BE.ISO8859-1',
541 'nl_nl': 'nl_NL.ISO8859-1',
542 'no': 'no_NO.ISO8859-1',
543 'no_no': 'no_NO.ISO8859-1',
544 'norwegian': 'no_NO.ISO8859-1',
545 'pl': 'pl_PL.ISO8859-2',
546 'pl_pl': 'pl_PL.ISO8859-2',
547 'polish': 'pl_PL.ISO8859-2',
548 'portuguese': 'pt_PT.ISO8859-1',
549 'portuguese_brazil': 'pt_BR.ISO8859-1',
552 'pt': 'pt_PT.ISO8859-1',
553 'pt_br': 'pt_BR.ISO8859-1',
554 'pt_pt': 'pt_PT.ISO8859-1',
555 'ro': 'ro_RO.ISO8859-2',
556 'ro_ro': 'ro_RO.ISO8859-2',
557 'ru': 'ru_RU.ISO8859-5',
558 'ru_ru': 'ru_RU.ISO8859-5',
559 'rumanian': 'ro_RO.ISO8859-2',
560 'russian': 'ru_RU.ISO8859-5',
561 'serbocroatian': 'sh_YU.ISO8859-2',
562 'sh': 'sh_YU.ISO8859-2',
563 'sh_hr': 'sh_HR.ISO8859-2',
564 'sh_sp': 'sh_YU.ISO8859-2',
565 'sh_yu': 'sh_YU.ISO8859-2',
566 'sk': 'sk_SK.ISO8859-2',
567 'sk_sk': 'sk_SK.ISO8859-2',
568 'sl': 'sl_CS.ISO8859-2',
569 'sl_cs': 'sl_CS.ISO8859-2',
570 'sl_si': 'sl_SI.ISO8859-2',
571 'slovak': 'sk_SK.ISO8859-2',
572 'slovene': 'sl_CS.ISO8859-2',
573 'sp': 'sp_YU.ISO8859-5',
574 'sp_yu': 'sp_YU.ISO8859-5',
575 'spanish': 'es_ES.ISO8859-1',
576 'spanish_spain': 'es_ES.ISO8859-1',
577 'sr_sp': 'sr_SP.ISO8859-2',
578 'sv': 'sv_SE.ISO8859-1',
579 'sv_se': 'sv_SE.ISO8859-1',
580 'swedish': 'sv_SE.ISO8859-1',
581 'th_th': 'th_TH.TACTIS',
582 'tr': 'tr_TR.ISO8859-9',
583 'tr_tr': 'tr_TR.ISO8859-9',
584 'turkish': 'tr_TR.ISO8859-9',
586 'universal': 'en_US.utf',
588 'zh_cn': 'zh_CN.eucCN',
589 'zh_cn.big5': 'zh_TW.eucTW',
590 'zh_cn.euc': 'zh_CN.eucCN',
591 'zh_tw': 'zh_TW.eucTW',
592 'zh_tw.euc': 'zh_TW.eucTW',
596 # this maps windows language identifiers (as used on Windows 95 and
597 # earlier) to locale strings.
599 # NOTE: this mapping is incomplete. If your language is missing, send
600 # a note with the missing language identifier and the suggested locale
601 # code to Fredrik Lundh <effbot@telia.com>. Thanks /F
604 0x0404: "zh_TW", # Chinese (Taiwan)
605 0x0804: "zh_CN", # Chinese (PRC)
606 0x0406: "da_DK", # Danish
607 0x0413: "nl_NL", # Dutch (Netherlands)
608 0x0409: "en_US", # English (United States)
609 0x0809: "en_UK", # English (United Kingdom)
610 0x0c09: "en_AU", # English (Australian)
611 0x1009: "en_CA", # English (Canadian)
612 0x1409: "en_NZ", # English (New Zealand)
613 0x1809: "en_IE", # English (Ireland)
614 0x1c09: "en_ZA", # English (South Africa)
615 0x040b: "fi_FI", # Finnish
616 0x040c: "fr_FR", # French (Standard)
617 0x080c: "fr_BE", # French (Belgian)
618 0x0c0c: "fr_CA", # French (Canadian)
619 0x100c: "fr_CH", # French (Switzerland)
620 0x0407: "de_DE", # German (Standard)
621 0x0408: "el_GR", # Greek
622 0x040d: "iw_IL", # Hebrew
623 0x040f: "is_IS", # Icelandic
624 0x0410: "it_IT", # Italian (Standard)
625 0x0411: "ja_JA", # Japanese
626 0x0414: "no_NO", # Norwegian (Bokmal)
627 0x0816: "pt_PT", # Portuguese (Standard)
628 0x0c0a: "es_ES", # Spanish (Modern Sort)
629 0x0441: "sw_KE", # Swahili (Kenya)
630 0x041d: "sv_SE", # Swedish
631 0x081d: "sv_FI", # Swedish (Finland)
632 0x041f: "tr_TR", # Turkish
640 def _init_categories(categories
=categories
):
641 for k
,v
in globals().items():
645 del categories
['LC_ALL']
647 print 'Locale defaults as determined by getdefaultlocale():'
649 lang
, enc
= getdefaultlocale()
650 print 'Language: ', lang
or '(undefined)'
651 print 'Encoding: ', enc
or '(undefined)'
654 print 'Locale settings on startup:'
656 for name
,category
in categories
.items():
658 lang
, enc
= getlocale(category
)
659 print ' Language: ', lang
or '(undefined)'
660 print ' Encoding: ', enc
or '(undefined)'
664 print 'Locale settings after calling resetlocale():'
667 for name
,category
in categories
.items():
669 lang
, enc
= getlocale(category
)
670 print ' Language: ', lang
or '(undefined)'
671 print ' Encoding: ', enc
or '(undefined)'
675 setlocale(LC_ALL
, "")
678 print 'setlocale(LC_ALL, "") does not support the default locale'
679 print 'given in the OS environment variables.'
682 print 'Locale settings after calling setlocale(LC_ALL, ""):'
684 for name
,category
in categories
.items():
686 lang
, enc
= getlocale(category
)
687 print ' Language: ', lang
or '(undefined)'
688 print ' Encoding: ', enc
or '(undefined)'
693 if __name__
=='__main__':
694 print 'Locale aliasing:'
698 print 'Number formatting:'