2 # -*- coding: utf-8 -*-
4 # Copyright 2007 Zuza Software Foundation
6 # This file is part of translate.
8 # translate is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # translate is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with translate; if not, write to the Free Software
20 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 """This module stores information and functionality that relates to plurals."""
26 # The key is the language code, which may contain country codes and modifiers.
27 # The value is a tuple: (Full name in English, nplurals, plural equation)
30 'af': ('Afrikaans', 2, '(n != 1)'),
31 'ak': ('Akan', 2, 'n > 1'),
32 'ar': ('Arabic', 6, 'n==0 ? 0 : n==1 ? 1 : n==2 ? 2 : n>=3 && n<=10 ? 3 : n>=11 && n<=99 ? 4 : 5'),
33 'az': ('Azerbaijani', 2, '(n != 1)'),
34 'be': ('Belarusian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
35 'bg': ('Bulgarian', 2, '(n != 1)'),
36 'bn': ('Bengali', 2, '(n != 1)'),
37 'bo': ('Tibetan', 1, '0'),
38 'bs': ('Bosnian', 3, 'n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2'),
39 'ca': ('Catalan', 2, '(n != 1)'),
40 'cs': ('Czech', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
41 'cy': ('Welsh', 2, '(n==2) ? 1 : 0'),
42 'da': ('Danish', 2, '(n != 1)'),
43 'de': ('German', 2, '(n != 1)'),
44 'dz': ('Dzongkha', 1, '0'),
45 'el': ('Greek', 2, '(n != 1)'),
46 'en': ('English', 2, '(n != 1)'),
47 'en_UK': ('English (United Kingdom)', 2, '(n != 1)'),
48 'en_ZA': ('English (South Africa)', 2, '(n != 1)'),
49 'eo': ('Esperanto', 2, '(n != 1)'),
50 'es': ('Spanish', 2, '(n != 1)'),
51 'et': ('Estonian', 2, '(n != 1)'),
52 'eu': ('Basque', 2, '(n != 1)'),
53 'fa': ('Persian', 1, '0'),
54 'fi': ('Finnish', 2, '(n != 1)'),
55 'fo': ('Faroese', 2, '(n != 1)'),
56 'fr': ('French', 2, '(n > 1)'),
57 'fur': ('Friulian', 2, '(n != 1)'),
58 'fy': ('Frisian', 2, '(n != 1)'),
59 'ga': ('Irish', 3, 'n==1 ? 0 : n==2 ? 1 : 2'),
60 'gl': ('Galician', 2, '(n != 1)'),
61 'gu': ('Gujarati', 2, '(n != 1)'),
62 'he': ('Hebrew', 2, '(n != 1)'),
63 'hi': ('Hindi', 2, '(n != 1)'),
64 'hr': ('Croatian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
65 'hu': ('Hungarian', 2, '(n != 1)'),
66 'id': ('Indonesian', 1, '0'),
67 'is': ('Icelandic', 2, '(n != 1)'),
68 'it': ('Italian', 2, '(n != 1)'),
69 'ja': ('Japanese', 1, '0'),
70 'ka': ('Georgian', 1, '0'),
71 'km': ('Khmer', 1, '0'),
72 'ko': ('Korean', 1, '0'),
73 'ku': ('Kurdish', 2, '(n != 1)'),
74 'lb': ('Letzeburgesch', 2, '(n != 1)'),
75 'ln': ('Lingala', 2, '(n > 1)'),
76 'lt': ('Lithuanian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && (n%100<10 || n%100>=20) ? 1 : 2)'),
77 'lv': ('Latvian', 3, '(n%10==1 && n%100!=11 ? 0 : n != 0 ? 1 : 2)'),
78 'mg': ('Malagasy', 2, '(n > 1)'),
79 'mn': ('Mongolian', 2, '(n != 1)'),
80 'mr': ('Marathi', 2, '(n != 1)'),
81 'ms': ('Malay', 1, '0'),
82 'mt': ('Maltese', 4, '(n==1 ? 0 : n==0 || ( n%100>1 && n%100<11) ? 1 : (n%100>10 && n%100<20 ) ? 2 : 3)'),
83 'nah': ('Nahuatl', 2, '(n != 1)'),
84 'nb': ('Norwegian Bokmal', 2, '(n != 1)'),
85 'ne': ('Nepali', 2, '(n != 1)'),
86 'nl': ('Dutch', 2, '(n != 1)'),
87 'nn': ('Norwegian Nynorsk', 2, '(n != 1)'),
88 'nso': ('Northern Sotho', 2, '(n > 1)'),
89 'or': ('Oriya', 2, '(n != 1)'),
90 'pa': ('Punjabi', 2, '(n != 1)'),
91 'pap': ('Papiamento', 2, '(n != 1)'),
92 'pl': ('Polish', 3, '(n==1 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
93 'pt': ('Portugese', 2, '(n != 1)'),
94 'pt_BR': ('Portugese (Brazil)', 2, '(n > 1)'),
95 'ro': ('Romanian', 3, '(n==1 ? 0 : (n==0 || (n%100 > 0 && n%100 < 20)) ? 1 : 2);'),
96 'ru': ('Russian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
97 'sk': ('Slovak', 3, '(n==1) ? 0 : (n>=2 && n<=4) ? 1 : 2'),
98 'sl': ('Slovenian', 4, '(n%100==1 ? 0 : n%100==2 ? 1 : n%100==3 || n%100==4 ? 2 : 3)'),
99 'sq': ('Albanian', 2, '(n != 1)'),
100 'sr': ('Serbian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
101 'sv': ('Swedish', 2, '(n != 1)'),
102 'ta': ('Tamil', 2, '(n != 1)'),
103 'th': ('Thai', 1, '0'),
104 'tk': ('Turkmen', 2, '(n != 1)'),
105 'tr': ('Turkish', 1, '0'),
106 'uk': ('Ukrainian', 3, '(n%10==1 && n%100!=11 ? 0 : n%10>=2 && n%10<=4 && (n%100<10 || n%100>=20) ? 1 : 2)'),
107 'vi': ('Vietnamese',1 , '0'),
108 'wa': ('Walloon', 2, '(n > 1)'),
109 # Chinese is difficult because the main divide is on script, not really
110 # country. Simplified Chinese is used mostly in China, Singapore and Malaysia.
111 # Traditional Chinese is used mostly in Hong Kong, Taiwan and Macau.
112 'zh_CN': ('Chinese (China)', 1, '0'),
113 'zh_HK': ('Chinese (Hong Kong)', 1, '0'),
114 'zh_TW': ('Chinese (Taiwan)', 1, '0'),
117 def simplercode(code
):
118 """This attempts to simplify the given language code by ignoring country
119 codes, for example."""
120 # Check http://www.rfc-editor.org/rfc/bcp/bcp47.txt for possible extra issues
121 # http://www.rfc-editor.org/rfc/rfc4646.txt
122 # http://www.w3.org/International/articles/language-tags/
126 # The @ modifier is used for script variants of the same language, like
127 # sr@Latn or gez_ER@abegede
128 modifier
= code
.rfind("@")
130 return code
[:modifier
]
132 underscore
= code
.rfind("_")
134 return code
[:underscore
]
143 dialectre
= re
.compile(r
"([^(\s]+)\s*\(([^)]+)\)")
145 def tr_lang(langcode
):
146 """Gives a function that can translate a language name, even in the form
148 into the language with iso code langcode."""
149 langfunc
= gettext_lang(langcode
)
150 countryfunc
= gettext_country(langcode
)
152 def handlelanguage(name
):
153 match
= dialectre
.match(name
)
155 language
, country
= match
.groups()
156 return u
"%s (%s)" % (langfunc(language
), countryfunc(country
))
158 return langfunc(name
)
160 return handlelanguage
162 def gettext_lang(langcode
):
163 """Returns a gettext function to translate language names into the given
165 if not langcode
in iso639
:
166 t
= gettext
.translation('iso_639', languages
=[langcode
], fallback
=True)
167 iso639
[langcode
] = t
.ugettext
168 return iso639
[langcode
]
170 def gettext_country(langcode
):
171 """Returns a gettext function to translate country names into the given
173 if not langcode
in iso3166
:
174 t
= gettext
.translation('iso_3166', languages
=[langcode
], fallback
=True)
175 iso3166
[langcode
] = t
.ugettext
176 return iso3166
[langcode
]
178 def normalize(string
, normal_form
="NFC"):
179 """Return a unicode string in its normalized form
181 @param sting: The string to be normalized
182 @param normal_form: NFC (default), NFD, NFCK, NFDK
183 @return: Normalized string
185 return unicodedata
.normalize(normal_form
, string
)
187 def forceunicode(string
):
188 """Helper method to ensure that the parameter becomes unicode if not yet"""
191 if isinstance(string
, str):
192 encoding
= getattr(string
, "encoding", "utf-8")
193 string
= string
.decode(encoding
)
194 string
= normalize(string
)