for git v1.5.2 (and below): chdir to the directory of the target file before executin...
[translate_toolkit.git] / lang / data.py
blob1b040702df7a2c6f332ead420adbfe5ec0afcfa5
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 #
4 # Copyright 2007 Zuza Software Foundation
5 #
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """This module stores information and functionality that relates to plurals."""
24 import unicodedata
26 # The key is the language code, which may contain country codes and modifiers.
27 # The value is a tuple: (Full name in English, nplurals, plural equation)
29 languages = {
30 'af': ('Afrikaans', 2, '(n != 1)'),
31 'ak': ('Akan', 2, 'n > 1'),
32 'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n>=3 && n<=10 ? 3 : n>=11 && n<=99 ? 4 : 5'),
33 'az': ('Azerbaijani', 2, '(n != 1)'),
34 'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
35 'bg': ('Bulgarian', 2, '(n != 1)'),
36 'bn': ('Bengali', 2, '(n != 1)'),
37 'bo': ('Tibetan', 1, '0'),
38 'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
39 'ca': ('Catalan', 2, '(n != 1)'),
40 'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
41 'cy': ('Welsh', 2, '(n==2) ? 1 : 0'),
42 'da': ('Danish', 2, '(n != 1)'),
43 'de': ('German', 2, '(n != 1)'),
44 'dz': ('Dzongkha', 1, '0'),
45 'el': ('Greek', 2, '(n != 1)'),
46 'en': ('English', 2, '(n != 1)'),
47 'en_UK': ('English (United Kingdom)', 2, '(n != 1)'),
48 'en_ZA': ('English (South Africa)', 2, '(n != 1)'),
49 'eo': ('Esperanto', 2, '(n != 1)'),
50 'es': ('Spanish', 2, '(n != 1)'),
51 'et': ('Estonian', 2, '(n != 1)'),
52 'eu': ('Basque', 2, '(n != 1)'),
53 'fa': ('Persian', 1, '0'),
54 'fi': ('Finnish', 2, '(n != 1)'),
55 'fo': ('Faroese', 2, '(n != 1)'),
56 'fr': ('French', 2, '(n > 1)'),
57 'fur': ('Friulian', 2, '(n != 1)'),
58 'fy': ('Frisian', 2, '(n != 1)'),
59 'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
60 'gl': ('Galician', 2, '(n != 1)'),
61 'gu': ('Gujarati', 2, '(n != 1)'),
62 'he': ('Hebrew', 2, '(n != 1)'),
63 'hi': ('Hindi', 2, '(n != 1)'),
64 'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
65 'hu': ('Hungarian', 2, '(n != 1)'),
66 'id': ('Indonesian', 1, '0'),
67 'is': ('Icelandic', 2, '(n != 1)'),
68 'it': ('Italian', 2, '(n != 1)'),
69 'ja': ('Japanese', 1, '0'),
70 'ka': ('Georgian', 1, '0'),
71 'km': ('Khmer', 1, '0'),
72 'ko': ('Korean', 1, '0'),
73 'ku': ('Kurdish', 2, '(n != 1)'),
74 'lb': ('Letzeburgesch', 2, '(n != 1)'),
75 'ln': ('Lingala', 2, '(n > 1)'),
76 'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
77 'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
78 'mg': ('Malagasy', 2, '(n > 1)'),
79 'mn': ('Mongolian', 2, '(n != 1)'),
80 'mr': ('Marathi', 2, '(n != 1)'),
81 'ms': ('Malay', 1, '0'),
82 'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
83 'nah': ('Nahuatl', 2, '(n != 1)'),
84 'nb': ('Norwegian Bokmal', 2, '(n != 1)'),
85 'ne': ('Nepali', 2, '(n != 1)'),
86 'nl': ('Dutch', 2, '(n != 1)'),
87 'nn': ('Norwegian Nynorsk', 2, '(n != 1)'),
88 'nso': ('Northern Sotho', 2, '(n > 1)'),
89 'or': ('Oriya', 2, '(n != 1)'),
90 'pa': ('Punjabi', 2, '(n != 1)'),
91 'pap': ('Papiamento', 2, '(n != 1)'),
92 'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
93 'pt': ('Portugese', 2, '(n != 1)'),
94 'pt_BR': ('Portugese (Brazil)', 2, '(n > 1)'),
95 'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
96 'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
97 'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
98 'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
99 'sq': ('Albanian', 2, '(n != 1)'),
100 'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
101 'sv': ('Swedish', 2, '(n != 1)'),
102 'ta': ('Tamil', 2, '(n != 1)'),
103 'th': ('Thai', 1, '0'),
104 'tk': ('Turkmen', 2, '(n != 1)'),
105 'tr': ('Turkish', 1, '0'),
106 'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
107 'vi': ('Vietnamese',1 , '0'),
108 'wa': ('Walloon', 2, '(n > 1)'),
109 # Chinese is difficult because the main divide is on script, not really
110 # country. Simplified Chinese is used mostly in China, Singapore and Malaysia.
111 # Traditional Chinese is used mostly in Hong Kong, Taiwan and Macau.
112 'zh_CN': ('Chinese (China)', 1, '0'),
113 'zh_HK': ('Chinese (Hong Kong)', 1, '0'),
114 'zh_TW': ('Chinese (Taiwan)', 1, '0'),
117 def simplercode(code):
118 """This attempts to simplify the given language code by ignoring country
119 codes, for example."""
120 # Check http://www.rfc-editor.org/rfc/bcp/bcp47.txt for possible extra issues
121 # http://www.rfc-editor.org/rfc/rfc4646.txt
122 # http://www.w3.org/International/articles/language-tags/
123 if not code:
124 return code
126 # The @ modifier is used for script variants of the same language, like
127 # sr@Latn or gez_ER@abegede
128 modifier = code.rfind("@")
129 if modifier >= 0:
130 return code[:modifier]
132 underscore = code.rfind("_")
133 if underscore >= 0:
134 return code[:underscore]
137 import gettext
138 import re
140 iso639 = {}
141 iso3166 = {}
143 dialectre = re.compile(r"([^(\s]+)\s*\(([^)]+)\)")
145 def tr_lang(langcode):
146 """Gives a function that can translate a language name, even in the form
147 "language (country)"
148 into the language with iso code langcode."""
149 langfunc = gettext_lang(langcode)
150 countryfunc = gettext_country(langcode)
152 def handlelanguage(name):
153 match = dialectre.match(name)
154 if match:
155 language, country = match.groups()
156 return u"%s (%s)" % (langfunc(language), countryfunc(country))
157 else:
158 return langfunc(name)
160 return handlelanguage
162 def gettext_lang(langcode):
163 """Returns a gettext function to translate language names into the given
164 language."""
165 if not langcode in iso639:
166 t = gettext.translation('iso_639', languages=[langcode], fallback=True)
167 iso639[langcode] = t.ugettext
168 return iso639[langcode]
170 def gettext_country(langcode):
171 """Returns a gettext function to translate country names into the given
172 language."""
173 if not langcode in iso3166:
174 t = gettext.translation('iso_3166', languages=[langcode], fallback=True)
175 iso3166[langcode] = t.ugettext
176 return iso3166[langcode]
178 def normalize(string, normal_form="NFC"):
179 """Return a unicode string in its normalized form
181 @param sting: The string to be normalized
182 @param normal_form: NFC (default), NFD, NFCK, NFDK
183 @return: Normalized string
185 return unicodedata.normalize(normal_form, string)
187 def forceunicode(string):
188 """Helper method to ensure that the parameter becomes unicode if not yet"""
189 if string is None:
190 return None
191 if isinstance(string, str):
192 encoding = getattr(string, "encoding", "utf-8")
193 string = string.decode(encoding)
194 string = normalize(string)
195 return string