Lib/locale.py

   1 """ Locale support.
   2
   3     The module provides low-level access to the C lib's locale APIs
   4     and adds high level number formatting APIs as well as a locale
   5     aliasing engine to complement these.
   6
   7     The aliasing engine includes support for many commonly used locale
   8     names and maps them to values suitable for passing to the C lib's
   9     setlocale() function. It also includes default encodings for all
  10     supported locale names.
  11
  12 """
  13
  14 import sys
  15
  16 # Try importing the _locale module.
  17 #
  18 # If this fails, fall back on a basic 'C' locale emulation.
  19
  20 # Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
  21 # trying the import.  So __all__ is also fiddled at the end of the file.
  22 __all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
  23            "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
  24            "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
  25
  26 try:
  27
  28     from _locale import *
  29
  30 except ImportError:
  31
  32     # Locale emulation
  33
  34     CHAR_MAX = 127
  35     LC_ALL = 6
  36     LC_COLLATE = 3
  37     LC_CTYPE = 0
  38     LC_MESSAGES = 5
  39     LC_MONETARY = 4
  40     LC_NUMERIC = 1
  41     LC_TIME = 2
  42     Error = ValueError
  43
  44     def localeconv():
  45         """ localeconv() -> dict.
  46             Returns numeric and monetary locale-specific parameters.
  47         """
  48         # 'C' locale default values
  49         return {'grouping': [127],
  50                 'currency_symbol': '',
  51                 'n_sign_posn': 127,
  52                 'p_cs_precedes': 127,
  53                 'n_cs_precedes': 127,
  54                 'mon_grouping': [],
  55                 'n_sep_by_space': 127,
  56                 'decimal_point': '.',
  57                 'negative_sign': '',
  58                 'positive_sign': '',
  59                 'p_sep_by_space': 127,
  60                 'int_curr_symbol': '',
  61                 'p_sign_posn': 127,
  62                 'thousands_sep': '',
  63                 'mon_thousands_sep': '',
  64                 'frac_digits': 127,
  65                 'mon_decimal_point': '',
  66                 'int_frac_digits': 127}
  67
  68     def setlocale(category, value=None):
  69         """ setlocale(integer,string=None) -> string.
  70             Activates/queries locale processing.
  71         """
  72         if value not in (None, '', 'C'):
  73             raise Error, '_locale emulation only supports "C" locale'
  74         return 'C'
  75
  76     def strcoll(a,b):
  77         """ strcoll(string,string) -> int.
  78             Compares two strings according to the locale.
  79         """
  80         return cmp(a,b)
  81
  82     def strxfrm(s):
  83         """ strxfrm(string) -> string.
  84             Returns a string that behaves for cmp locale-aware.
  85         """
  86         return s
  87
  88 ### Number formatting APIs
  89
  90 # Author: Martin von Loewis
  91
  92 #perform the grouping from right to left
  93 def _group(s):
  94     conv=localeconv()
  95     grouping=conv['grouping']
  96     if not grouping:return (s, 0)
  97     result=""
  98     seps = 0
  99     spaces = ""
 100     if s[-1] == ' ':
 101         sp = s.find(' ')
 102         spaces = s[sp:]
 103         s = s[:sp]
 104     while s and grouping:
 105         # if grouping is -1, we are done
 106         if grouping[0]==CHAR_MAX:
 107             break
 108         # 0: re-use last group ad infinitum
 109         elif grouping[0]!=0:
 110             #process last group
 111             group=grouping[0]
 112             grouping=grouping[1:]
 113         if result:
 114             result=s[-group:]+conv['thousands_sep']+result
 115             seps += 1
 116         else:
 117             result=s[-group:]
 118         s=s[:-group]
 119         if s and s[-1] not in "0123456789":
 120             # the leading string is only spaces and signs
 121             return s+result+spaces,seps
 122     if not result:
 123         return s+spaces,seps
 124     if s:
 125         result=s+conv['thousands_sep']+result
 126         seps += 1
 127     return result+spaces,seps
 128
 129 def format(f,val,grouping=0):
 130     """Formats a value in the same way that the % formatting would use,
 131     but takes the current locale into account.
 132     Grouping is applied if the third parameter is true."""
 133     result = f % val
 134     fields = result.split(".")
 135     seps = 0
 136     if grouping:
 137         fields[0],seps=_group(fields[0])
 138     if len(fields)==2:
 139         result = fields[0]+localeconv()['decimal_point']+fields[1]
 140     elif len(fields)==1:
 141         result = fields[0]
 142     else:
 143         raise Error, "Too many decimal points in result string"
 144
 145     while seps:
 146         # If the number was formatted for a specific width, then it
 147         # might have been filled with spaces to the left or right. If
 148         # so, kill as much spaces as there where separators.
 149         # Leading zeroes as fillers are not yet dealt with, as it is
 150         # not clear how they should interact with grouping.
 151         sp = result.find(" ")
 152         if sp==-1:break
 153         result = result[:sp]+result[sp+1:]
 154         seps -= 1
 155
 156     return result
 157
 158 def str(val):
 159     """Convert float to integer, taking the locale into account."""
 160     return format("%.12g",val)
 161
 162 def atof(string,func=float):
 163     "Parses a string as a float according to the locale settings."
 164     #First, get rid of the grouping
 165     ts = localeconv()['thousands_sep']
 166     if ts:
 167         string = string.replace(ts, '')
 168     #next, replace the decimal point with a dot
 169     dd = localeconv()['decimal_point']
 170     if dd:
 171         string = string.replace(dd, '.')
 172     #finally, parse the string
 173     return func(string)
 174
 175 def atoi(str):
 176     "Converts a string to an integer according to the locale settings."
 177     return atof(str, int)
 178
 179 def _test():
 180     setlocale(LC_ALL, "")
 181     #do grouping
 182     s1=format("%d", 123456789,1)
 183     print s1, "is", atoi(s1)
 184     #standard formatting
 185     s1=str(3.14)
 186     print s1, "is", atof(s1)
 187
 188 ### Locale name aliasing engine
 189
 190 # Author: Marc-Andre Lemburg, mal@lemburg.com
 191 # Various tweaks by Fredrik Lundh <fredrik@pythonware.com>
 192
 193 # store away the low-level version of setlocale (it's
 194 # overridden below)
 195 _setlocale = setlocale
 196
 197 def normalize(localename):
 198
 199     """ Returns a normalized locale code for the given locale
 200         name.
 201
 202         The returned locale code is formatted for use with
 203         setlocale().
 204
 205         If normalization fails, the original name is returned
 206         unchanged.
 207
 208         If the given encoding is not known, the function defaults to
 209         the default encoding for the locale code just like setlocale()
 210         does.
 211
 212     """
 213     # Normalize the locale name and extract the encoding
 214     fullname = localename.lower()
 215     if ':' in fullname:
 216         # ':' is sometimes used as encoding delimiter.
 217         fullname = fullname.replace(':', '.')
 218     if '.' in fullname:
 219         langname, encoding = fullname.split('.')[:2]
 220         fullname = langname + '.' + encoding
 221     else:
 222         langname = fullname
 223         encoding = ''
 224
 225     # First lookup: fullname (possibly with encoding)
 226     code = locale_alias.get(fullname, None)
 227     if code is not None:
 228         return code
 229
 230     # Second try: langname (without encoding)
 231     code = locale_alias.get(langname, None)
 232     if code is not None:
 233         if '.' in code:
 234             langname, defenc = code.split('.')
 235         else:
 236             langname = code
 237             defenc = ''
 238         if encoding:
 239             encoding = encoding_alias.get(encoding, encoding)
 240         else:
 241             encoding = defenc
 242         if encoding:
 243             return langname + '.' + encoding
 244         else:
 245             return langname
 246
 247     else:
 248         return localename
 249
 250 def _parse_localename(localename):
 251
 252     """ Parses the locale code for localename and returns the
 253         result as tuple (language code, encoding).
 254
 255         The localename is normalized and passed through the locale
 256         alias engine. A ValueError is raised in case the locale name
 257         cannot be parsed.
 258
 259         The language code corresponds to RFC 1766.  code and encoding
 260         can be None in case the values cannot be determined or are
 261         unknown to this implementation.
 262
 263     """
 264     code = normalize(localename)
 265     if '@' in localename:
 266         # Deal with locale modifiers
 267         code, modifier = code.split('@')
 268         if modifier == 'euro' and '.' not in code:
 269             # Assume Latin-9 for @euro locales. This is bogus,
 270             # since some systems may use other encodings for these
 271             # locales. Also, we ignore other modifiers.
 272             return code, 'iso-8859-15'
 273
 274     if '.' in code:
 275         return code.split('.')[:2]
 276     elif code == 'C':
 277         return None, None
 278     raise ValueError, 'unknown locale: %s' % localename
 279
 280 def _build_localename(localetuple):
 281
 282     """ Builds a locale code from the given tuple (language code,
 283         encoding).
 284
 285         No aliasing or normalizing takes place.
 286
 287     """
 288     language, encoding = localetuple
 289     if language is None:
 290         language = 'C'
 291     if encoding is None:
 292         return language
 293     else:
 294         return language + '.' + encoding
 295
 296 def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
 297
 298     """ Tries to determine the default locale settings and returns
 299         them as tuple (language code, encoding).
 300
 301         According to POSIX, a program which has not called
 302         setlocale(LC_ALL, "") runs using the portable 'C' locale.
 303         Calling setlocale(LC_ALL, "") lets it use the default locale as
 304         defined by the LANG variable. Since we don't want to interfere
 305         with the current locale setting we thus emulate the behavior
 306         in the way described above.
 307
 308         To maintain compatibility with other platforms, not only the
 309         LANG variable is tested, but a list of variables given as
 310         envvars parameter. The first found to be defined will be
 311         used. envvars defaults to the search path used in GNU gettext;
 312         it must always contain the variable name 'LANG'.
 313
 314         Except for the code 'C', the language code corresponds to RFC
 315         1766.  code and encoding can be None in case the values cannot
 316         be determined.
 317
 318     """
 319
 320     try:
 321         # check if it's supported by the _locale module
 322         import _locale
 323         code, encoding = _locale._getdefaultlocale()
 324     except (ImportError, AttributeError):
 325         pass
 326     else:
 327         # make sure the code/encoding values are valid
 328         if sys.platform == "win32" and code and code[:2] == "0x":
 329             # map windows language identifier to language name
 330             code = windows_locale.get(int(code, 0))
 331         # ...add other platform-specific processing here, if
 332         # necessary...
 333         return code, encoding
 334
 335     # fall back on POSIX behaviour
 336     import os
 337     lookup = os.environ.get
 338     for variable in envvars:
 339         localename = lookup(variable,None)
 340         if localename is not None:
 341             break
 342     else:
 343         localename = 'C'
 344     return _parse_localename(localename)
 345
 346
 347 def getlocale(category=LC_CTYPE):
 348
 349     """ Returns the current setting for the given locale category as
 350         tuple (language code, encoding).
 351
 352         category may be one of the LC_* value except LC_ALL. It
 353         defaults to LC_CTYPE.
 354
 355         Except for the code 'C', the language code corresponds to RFC
 356         1766.  code and encoding can be None in case the values cannot
 357         be determined.
 358
 359     """
 360     localename = _setlocale(category)
 361     if category == LC_ALL and ';' in localename:
 362         raise TypeError, 'category LC_ALL is not supported'
 363     return _parse_localename(localename)
 364
 365 def setlocale(category, locale=None):
 366
 367     """ Set the locale for the given category.  The locale can be
 368         a string, a locale tuple (language code, encoding), or None.
 369
 370         Locale tuples are converted to strings the locale aliasing
 371         engine.  Locale strings are passed directly to the C lib.
 372
 373         category may be given as one of the LC_* values.
 374
 375     """
 376     if locale and type(locale) is not type(""):
 377         # convert to string
 378         locale = normalize(_build_localename(locale))
 379     return _setlocale(category, locale)
 380
 381 def resetlocale(category=LC_ALL):
 382
 383     """ Sets the locale for category to the default setting.
 384
 385         The default setting is determined by calling
 386         getdefaultlocale(). category defaults to LC_ALL.
 387
 388     """
 389     _setlocale(category, _build_localename(getdefaultlocale()))
 390
 391 if sys.platform in ('win32', 'darwin', 'mac'):
 392     # On Win32, this will return the ANSI code page
 393     # On the Mac, it should return the system encoding;
 394     # it might return "ascii" instead
 395     def getpreferredencoding(do_setlocale = True):
 396         """Return the charset that the user is likely using."""
 397         import _locale
 398         return _locale._getdefaultlocale()[1]
 399 else:
 400     # On Unix, if CODESET is available, use that.
 401     try:
 402         CODESET
 403     except NameError:
 404         # Fall back to parsing environment variables :-(
 405         def getpreferredencoding(do_setlocale = True):
 406             """Return the charset that the user is likely using,
 407             by looking at environment variables."""
 408             return getdefaultlocale()[1]
 409     else:
 410         def getpreferredencoding(do_setlocale = True):
 411             """Return the charset that the user is likely using,
 412             according to the system configuration."""
 413             if do_setlocale:
 414                 oldloc = setlocale(LC_CTYPE)
 415                 setlocale(LC_CTYPE, "")
 416                 result = nl_langinfo(CODESET)
 417                 setlocale(LC_CTYPE, oldloc)
 418                 return result
 419             else:
 420                 return nl_langinfo(CODESET)
 421
 422
 423 ### Database
 424 #
 425 # The following data was extracted from the locale.alias file which
 426 # comes with X11 and then hand edited removing the explicit encoding
 427 # definitions and adding some more aliases. The file is usually
 428 # available as /usr/lib/X11/locale/locale.alias.
 429 #
 430
 431 #
 432 # The encoding_alias table maps lowercase encoding alias names to C
 433 # locale encoding names (case-sensitive).
 434 #
 435 encoding_alias = {
 436         '437':                          'C',
 437         'c':                            'C',
 438         'iso8859':                      'ISO8859-1',
 439         '8859':                         'ISO8859-1',
 440         '88591':                        'ISO8859-1',
 441         'ascii':                        'ISO8859-1',
 442         'en':                           'ISO8859-1',
 443         'iso88591':                     'ISO8859-1',
 444         'iso_8859-1':                   'ISO8859-1',
 445         '885915':                       'ISO8859-15',
 446         'iso885915':                    'ISO8859-15',
 447         'iso_8859-15':                  'ISO8859-15',
 448         'iso8859-2':                    'ISO8859-2',
 449         'iso88592':                     'ISO8859-2',
 450         'iso_8859-2':                   'ISO8859-2',
 451         'iso88595':                     'ISO8859-5',
 452         'iso88596':                     'ISO8859-6',
 453         'iso88597':                     'ISO8859-7',
 454         'iso88598':                     'ISO8859-8',
 455         'iso88599':                     'ISO8859-9',
 456         'iso-2022-jp':                  'JIS7',
 457         'jis':                          'JIS7',
 458         'jis7':                         'JIS7',
 459         'sjis':                         'SJIS',
 460         'tis620':                       'TACTIS',
 461         'ajec':                         'eucJP',
 462         'eucjp':                        'eucJP',
 463         'ujis':                         'eucJP',
 464         'utf-8':                        'utf',
 465         'utf8':                         'utf',
 466         'utf8@ucs4':                    'utf',
 467 }
 468
 469 #
 470 # The locale_alias table maps lowercase alias names to C locale names
 471 # (case-sensitive). Encodings are always separated from the locale
 472 # name using a dot ('.'); they should only be given in case the
 473 # language name is needed to interpret the given encoding alias
 474 # correctly (CJK codes often have this need).
 475 #
 476 locale_alias = {
 477         'american':                      'en_US.ISO8859-1',
 478         'ar':                            'ar_AA.ISO8859-6',
 479         'ar_aa':                         'ar_AA.ISO8859-6',
 480         'ar_sa':                         'ar_SA.ISO8859-6',
 481         'arabic':                        'ar_AA.ISO8859-6',
 482         'bg':                            'bg_BG.ISO8859-5',
 483         'bg_bg':                         'bg_BG.ISO8859-5',
 484         'bulgarian':                     'bg_BG.ISO8859-5',
 485         'c-french':                      'fr_CA.ISO8859-1',
 486         'c':                             'C',
 487         'c_c':                           'C',
 488         'cextend':                       'en_US.ISO8859-1',
 489         'chinese-s':                     'zh_CN.eucCN',
 490         'chinese-t':                     'zh_TW.eucTW',
 491         'croatian':                      'hr_HR.ISO8859-2',
 492         'cs':                            'cs_CZ.ISO8859-2',
 493         'cs_cs':                         'cs_CZ.ISO8859-2',
 494         'cs_cz':                         'cs_CZ.ISO8859-2',
 495         'cz':                            'cz_CZ.ISO8859-2',
 496         'cz_cz':                         'cz_CZ.ISO8859-2',
 497         'czech':                         'cs_CS.ISO8859-2',
 498         'da':                            'da_DK.ISO8859-1',
 499         'da_dk':                         'da_DK.ISO8859-1',
 500         'danish':                        'da_DK.ISO8859-1',
 501         'de':                            'de_DE.ISO8859-1',
 502         'de_at':                         'de_AT.ISO8859-1',
 503         'de_ch':                         'de_CH.ISO8859-1',
 504         'de_de':                         'de_DE.ISO8859-1',
 505         'dutch':                         'nl_BE.ISO8859-1',
 506         'ee':                            'ee_EE.ISO8859-4',
 507         'el':                            'el_GR.ISO8859-7',
 508         'el_gr':                         'el_GR.ISO8859-7',
 509         'en':                            'en_US.ISO8859-1',
 510         'en_au':                         'en_AU.ISO8859-1',
 511         'en_ca':                         'en_CA.ISO8859-1',
 512         'en_gb':                         'en_GB.ISO8859-1',
 513         'en_ie':                         'en_IE.ISO8859-1',
 514         'en_nz':                         'en_NZ.ISO8859-1',
 515         'en_uk':                         'en_GB.ISO8859-1',
 516         'en_us':                         'en_US.ISO8859-1',
 517         'eng_gb':                        'en_GB.ISO8859-1',
 518         'english':                       'en_EN.ISO8859-1',
 519         'english_uk':                    'en_GB.ISO8859-1',
 520         'english_united-states':         'en_US.ISO8859-1',
 521         'english_us':                    'en_US.ISO8859-1',
 522         'es':                            'es_ES.ISO8859-1',
 523         'es_ar':                         'es_AR.ISO8859-1',
 524         'es_bo':                         'es_BO.ISO8859-1',
 525         'es_cl':                         'es_CL.ISO8859-1',
 526         'es_co':                         'es_CO.ISO8859-1',
 527         'es_cr':                         'es_CR.ISO8859-1',
 528         'es_ec':                         'es_EC.ISO8859-1',
 529         'es_es':                         'es_ES.ISO8859-1',
 530         'es_gt':                         'es_GT.ISO8859-1',
 531         'es_mx':                         'es_MX.ISO8859-1',
 532         'es_ni':                         'es_NI.ISO8859-1',
 533         'es_pa':                         'es_PA.ISO8859-1',
 534         'es_pe':                         'es_PE.ISO8859-1',
 535         'es_py':                         'es_PY.ISO8859-1',
 536         'es_sv':                         'es_SV.ISO8859-1',
 537         'es_uy':                         'es_UY.ISO8859-1',
 538         'es_ve':                         'es_VE.ISO8859-1',
 539         'et':                            'et_EE.ISO8859-4',
 540         'et_ee':                         'et_EE.ISO8859-4',
 541         'fi':                            'fi_FI.ISO8859-1',
 542         'fi_fi':                         'fi_FI.ISO8859-1',
 543         'finnish':                       'fi_FI.ISO8859-1',
 544         'fr':                            'fr_FR.ISO8859-1',
 545         'fr_be':                         'fr_BE.ISO8859-1',
 546         'fr_ca':                         'fr_CA.ISO8859-1',
 547         'fr_ch':                         'fr_CH.ISO8859-1',
 548         'fr_fr':                         'fr_FR.ISO8859-1',
 549         'fre_fr':                        'fr_FR.ISO8859-1',
 550         'french':                        'fr_FR.ISO8859-1',
 551         'french_france':                 'fr_FR.ISO8859-1',
 552         'ger_de':                        'de_DE.ISO8859-1',
 553         'german':                        'de_DE.ISO8859-1',
 554         'german_germany':                'de_DE.ISO8859-1',
 555         'greek':                         'el_GR.ISO8859-7',
 556         'hebrew':                        'iw_IL.ISO8859-8',
 557         'hr':                            'hr_HR.ISO8859-2',
 558         'hr_hr':                         'hr_HR.ISO8859-2',
 559         'hu':                            'hu_HU.ISO8859-2',
 560         'hu_hu':                         'hu_HU.ISO8859-2',
 561         'hungarian':                     'hu_HU.ISO8859-2',
 562         'icelandic':                     'is_IS.ISO8859-1',
 563         'id':                            'id_ID.ISO8859-1',
 564         'id_id':                         'id_ID.ISO8859-1',
 565         'is':                            'is_IS.ISO8859-1',
 566         'is_is':                         'is_IS.ISO8859-1',
 567         'iso-8859-1':                    'en_US.ISO8859-1',
 568         'iso-8859-15':                   'en_US.ISO8859-15',
 569         'iso8859-1':                     'en_US.ISO8859-1',
 570         'iso8859-15':                    'en_US.ISO8859-15',
 571         'iso_8859_1':                    'en_US.ISO8859-1',
 572         'iso_8859_15':                   'en_US.ISO8859-15',
 573         'it':                            'it_IT.ISO8859-1',
 574         'it_ch':                         'it_CH.ISO8859-1',
 575         'it_it':                         'it_IT.ISO8859-1',
 576         'italian':                       'it_IT.ISO8859-1',
 577         'iw':                            'iw_IL.ISO8859-8',
 578         'iw_il':                         'iw_IL.ISO8859-8',
 579         'ja':                            'ja_JP.eucJP',
 580         'ja.jis':                        'ja_JP.JIS7',
 581         'ja.sjis':                       'ja_JP.SJIS',
 582         'ja_jp':                         'ja_JP.eucJP',
 583         'ja_jp.ajec':                    'ja_JP.eucJP',
 584         'ja_jp.euc':                     'ja_JP.eucJP',
 585         'ja_jp.eucjp':                   'ja_JP.eucJP',
 586         'ja_jp.iso-2022-jp':             'ja_JP.JIS7',
 587         'ja_jp.jis':                     'ja_JP.JIS7',
 588         'ja_jp.jis7':                    'ja_JP.JIS7',
 589         'ja_jp.mscode':                  'ja_JP.SJIS',
 590         'ja_jp.sjis':                    'ja_JP.SJIS',
 591         'ja_jp.ujis':                    'ja_JP.eucJP',
 592         'japan':                         'ja_JP.eucJP',
 593         'japanese':                      'ja_JP.SJIS',
 594         'japanese-euc':                  'ja_JP.eucJP',
 595         'japanese.euc':                  'ja_JP.eucJP',
 596         'jp_jp':                         'ja_JP.eucJP',
 597         'ko':                            'ko_KR.eucKR',
 598         'ko_kr':                         'ko_KR.eucKR',
 599         'ko_kr.euc':                     'ko_KR.eucKR',
 600         'korean':                        'ko_KR.eucKR',
 601         'lt':                            'lt_LT.ISO8859-4',
 602         'lv':                            'lv_LV.ISO8859-4',
 603         'mk':                            'mk_MK.ISO8859-5',
 604         'mk_mk':                         'mk_MK.ISO8859-5',
 605         'nl':                            'nl_NL.ISO8859-1',
 606         'nl_be':                         'nl_BE.ISO8859-1',
 607         'nl_nl':                         'nl_NL.ISO8859-1',
 608         'no':                            'no_NO.ISO8859-1',
 609         'no_no':                         'no_NO.ISO8859-1',
 610         'norwegian':                     'no_NO.ISO8859-1',
 611         'pl':                            'pl_PL.ISO8859-2',
 612         'pl_pl':                         'pl_PL.ISO8859-2',
 613         'polish':                        'pl_PL.ISO8859-2',
 614         'portuguese':                    'pt_PT.ISO8859-1',
 615         'portuguese_brazil':             'pt_BR.ISO8859-1',
 616         'posix':                         'C',
 617         'posix-utf2':                    'C',
 618         'pt':                            'pt_PT.ISO8859-1',
 619         'pt_br':                         'pt_BR.ISO8859-1',
 620         'pt_pt':                         'pt_PT.ISO8859-1',
 621         'ro':                            'ro_RO.ISO8859-2',
 622         'ro_ro':                         'ro_RO.ISO8859-2',
 623         'ru':                            'ru_RU.ISO8859-5',
 624         'ru_ru':                         'ru_RU.ISO8859-5',
 625         'rumanian':                      'ro_RO.ISO8859-2',
 626         'russian':                       'ru_RU.ISO8859-5',
 627         'serbocroatian':                 'sh_YU.ISO8859-2',
 628         'sh':                            'sh_YU.ISO8859-2',
 629         'sh_hr':                         'sh_HR.ISO8859-2',
 630         'sh_sp':                         'sh_YU.ISO8859-2',
 631         'sh_yu':                         'sh_YU.ISO8859-2',
 632         'sk':                            'sk_SK.ISO8859-2',
 633         'sk_sk':                         'sk_SK.ISO8859-2',
 634         'sl':                            'sl_CS.ISO8859-2',
 635         'sl_cs':                         'sl_CS.ISO8859-2',
 636         'sl_si':                         'sl_SI.ISO8859-2',
 637         'slovak':                        'sk_SK.ISO8859-2',
 638         'slovene':                       'sl_CS.ISO8859-2',
 639         'sp':                            'sp_YU.ISO8859-5',
 640         'sp_yu':                         'sp_YU.ISO8859-5',
 641         'spanish':                       'es_ES.ISO8859-1',
 642         'spanish_spain':                 'es_ES.ISO8859-1',
 643         'sr_sp':                         'sr_SP.ISO8859-2',
 644         'sv':                            'sv_SE.ISO8859-1',
 645         'sv_se':                         'sv_SE.ISO8859-1',
 646         'swedish':                       'sv_SE.ISO8859-1',
 647         'th_th':                         'th_TH.TACTIS',
 648         'tr':                            'tr_TR.ISO8859-9',
 649         'tr_tr':                         'tr_TR.ISO8859-9',
 650         'turkish':                       'tr_TR.ISO8859-9',
 651         'univ':                          'en_US.utf',
 652         'universal':                     'en_US.utf',
 653         'zh':                            'zh_CN.eucCN',
 654         'zh_cn':                         'zh_CN.eucCN',
 655         'zh_cn.big5':                    'zh_TW.eucTW',
 656         'zh_cn.euc':                     'zh_CN.eucCN',
 657         'zh_tw':                         'zh_TW.eucTW',
 658         'zh_tw.euc':                     'zh_TW.eucTW',
 659 }
 660
 661 #
 662 # this maps windows language identifiers (as used on Windows 95 and
 663 # earlier) to locale strings.
 664 #
 665 # NOTE: this mapping is incomplete.  If your language is missing, please
 666 # submit a bug report to Python bug manager, which you can find via:
 667 #     http://www.python.org/dev/
 668 # Make sure you include the missing language identifier and the suggested
 669 # locale code.
 670 #
 671
 672 windows_locale = {
 673     0x0404: "zh_TW", # Chinese (Taiwan)
 674     0x0804: "zh_CN", # Chinese (PRC)
 675     0x0406: "da_DK", # Danish
 676     0x0413: "nl_NL", # Dutch (Netherlands)
 677     0x0409: "en_US", # English (United States)
 678     0x0809: "en_UK", # English (United Kingdom)
 679     0x0c09: "en_AU", # English (Australian)
 680     0x1009: "en_CA", # English (Canadian)
 681     0x1409: "en_NZ", # English (New Zealand)
 682     0x1809: "en_IE", # English (Ireland)
 683     0x1c09: "en_ZA", # English (South Africa)
 684     0x040b: "fi_FI", # Finnish
 685     0x040c: "fr_FR", # French (Standard)
 686     0x080c: "fr_BE", # French (Belgian)
 687     0x0c0c: "fr_CA", # French (Canadian)
 688     0x100c: "fr_CH", # French (Switzerland)
 689     0x0407: "de_DE", # German (Standard)
 690     0x0408: "el_GR", # Greek
 691     0x040d: "iw_IL", # Hebrew
 692     0x040f: "is_IS", # Icelandic
 693     0x0410: "it_IT", # Italian (Standard)
 694     0x0411: "ja_JA", # Japanese
 695     0x0414: "no_NO", # Norwegian (Bokmal)
 696     0x0816: "pt_PT", # Portuguese (Standard)
 697     0x0c0a: "es_ES", # Spanish (Modern Sort)
 698     0x0441: "sw_KE", # Swahili (Kenya)
 699     0x041d: "sv_SE", # Swedish
 700     0x081d: "sv_FI", # Swedish (Finland)
 701     0x041f: "tr_TR", # Turkish
 702 }
 703
 704 def _print_locale():
 705
 706     """ Test function.
 707     """
 708     categories = {}
 709     def _init_categories(categories=categories):
 710         for k,v in globals().items():
 711             if k[:3] == 'LC_':
 712                 categories[k] = v
 713     _init_categories()
 714     del categories['LC_ALL']
 715
 716     print 'Locale defaults as determined by getdefaultlocale():'
 717     print '-'*72
 718     lang, enc = getdefaultlocale()
 719     print 'Language: ', lang or '(undefined)'
 720     print 'Encoding: ', enc or '(undefined)'
 721     print
 722
 723     print 'Locale settings on startup:'
 724     print '-'*72
 725     for name,category in categories.items():
 726         print name, '...'
 727         lang, enc = getlocale(category)
 728         print '   Language: ', lang or '(undefined)'
 729         print '   Encoding: ', enc or '(undefined)'
 730         print
 731
 732     print
 733     print 'Locale settings after calling resetlocale():'
 734     print '-'*72
 735     resetlocale()
 736     for name,category in categories.items():
 737         print name, '...'
 738         lang, enc = getlocale(category)
 739         print '   Language: ', lang or '(undefined)'
 740         print '   Encoding: ', enc or '(undefined)'
 741         print
 742
 743     try:
 744         setlocale(LC_ALL, "")
 745     except:
 746         print 'NOTE:'
 747         print 'setlocale(LC_ALL, "") does not support the default locale'
 748         print 'given in the OS environment variables.'
 749     else:
 750         print
 751         print 'Locale settings after calling setlocale(LC_ALL, ""):'
 752         print '-'*72
 753         for name,category in categories.items():
 754             print name, '...'
 755             lang, enc = getlocale(category)
 756             print '   Language: ', lang or '(undefined)'
 757             print '   Encoding: ', enc or '(undefined)'
 758             print
 759
 760 ###
 761
 762 try:
 763     LC_MESSAGES
 764 except NameError:
 765     pass
 766 else:
 767     __all__.append("LC_MESSAGES")
 768
 769 if __name__=='__main__':
 770     print 'Locale aliasing:'
 771     print
 772     _print_locale()
 773     print
 774     print 'Number formatting:'
 775     print
 776     _test()