* Updates
[mediawiki.git] / maintenance / language / languages.inc
blob03fc4177f1d3ffc91298b180311a96fff635b061
1 <?php
2 /**
3  * Handle messages in the language files.
4  *
5  * @package MediaWiki
6  * @subpackage Maintenance
7  */
9 require_once( 'messageTypes.inc' );
11 class languages {
12         protected $mLanguages; # List of languages
13         protected $mRawMessages; # Raw list of the messages in each language
14         protected $mMessages; # Messages in each language (except for English), divided to groups
15         protected $mGeneralMessages; # General messages in English, divided to groups
16         protected $mIgnoredMessages; # All the messages which should be exist only in the English file
17         protected $mOptionalMessages; # All the messages which may be translated or not, depending on the language
19         /**
20          * Load the list of languages: all the Messages*.php
21          * files in the languages directory.
22          *
23          * @param $exif Treat the EXIF messages?
24          */
25         function __construct( $exif = true ) {
26                 global $wgIgnoredMessages, $wgOptionalMessages, $wgEXIFMessages;
27                 $this->mIgnoredMessages = $wgIgnoredMessages;
28                 if ( $exif ) {
29                         $this->mOptionalMessages = array_merge( $wgOptionalMessages );
30                 } else {
31                         $this->mOptionalMessages = array_merge( $wgOptionalMessages, $wgEXIFMessages );
32                 }
34                 $this->mLanguages = array_keys( Language::getLanguageNames( true ) );
35                 sort( $this->mLanguages );
36         }
38         /**
39          * Get the language list.
40          *
41          * @return The language list.
42          */
43         public function getLanguages() {
44                 return $this->mLanguages;
45         }
47         /**
48          * Get the ignored messages list.
49          *
50          * @return The ignored messages list.
51          */
52         public function getIgnoredMessages() {
53                 return $this->mIgnoredMessages;
54         }
56         /**
57          * Get the optional messages list.
58          *
59          * @return The optional messages list.
60          */
61         public function getOptionalMessages() {
62                 return $this->mOptionalMessages;
63         }
65         /**
66          * Load the raw messages for a specific langauge from the messages file.
67          *
68          * @param $code The langauge code.
69          */
70         protected function loadRawMessages( $code ) {
71                 if ( isset( $this->mRawMessages[$code] ) ) {
72                         return;
73                 }
74                 $filename = Language::getMessagesFileName( $code );
75                 if ( file_exists( $filename ) ) {
76                         require( $filename );
77                         if ( isset( $messages ) ) {
78                                 $this->mRawMessages[$code] = $messages;
79                         } else {
80                                 $this->mRawMessages[$code] = array();
81                         }
82                 } else {
83                         $this->mRawMessages[$code] = array();
84                 }
85         }
87         /**
88          * Load the messages for a specific language (which is not English) and divide them to groups:
89          * all - all the messages.
90          * required - messages which should be translated in order to get a complete translation.
91          * optional - messages which can be translated, the fallback translation is used if not translated.
92          * obsolete - messages which should not be translated, either because they are not exist, or they are ignored messages.
93          * translated - messages which are either required or optional, but translated from English and needed.
94          *
95          * @param $code The language code.
96          */
97         private function loadMessages( $code ) {
98                 if ( isset( $this->mMessages[$code] ) ) {
99                         return;
100                 }
101                 $this->loadRawMessages( $code );
102                 $this->loadGeneralMessages();
103                 $this->mMessages[$code]['all'] = $this->mRawMessages[$code];
104                 $this->mMessages[$code]['required'] = array();
105                 $this->mMessages[$code]['optional'] = array();
106                 $this->mMessages[$code]['obsolete'] = array();
107                 $this->mMessages[$code]['translated'] = array();
108                 foreach ( $this->mMessages[$code]['all'] as $key => $value ) {
109                         if ( isset( $this->mGeneralMessages['required'][$key] ) ) {
110                                 $this->mMessages[$code]['required'][$key] = $value;
111                                 $this->mMessages[$code]['translated'][$key] = $value;
112                         } else if ( isset( $this->mGeneralMessages['optional'][$key] ) ) {
113                                 $this->mMessages[$code]['optional'][$key] = $value;
114                                 $this->mMessages[$code]['translated'][$key] = $value;
115                         } else {
116                                 $this->mMessages[$code]['obsolete'][$key] = $value;
117                         }
118                 }
119         }
121         /**
122          * Load the messages for English and divide them to groups:
123          * all - all the messages.
124          * required - messages which should be translated to other languages in order to get a complete translation.
125          * optional - messages which can be translated to other languages, but it's not required for a complete translation.
126          * ignored - messages which should not be translated to other languages.
127          * translatable - messages which are either required or optional, but can be translated from English.
128          */
129         private function loadGeneralMessages() {
130                 if ( isset( $this->mGeneralMessages ) ) {
131                         return;
132                 }
133                 $this->loadRawMessages( 'en' );
134                 $this->mGeneralMessages['all'] = $this->mRawMessages['en'];
135                 $this->mGeneralMessages['required'] = array();
136                 $this->mGeneralMessages['optional'] = array();
137                 $this->mGeneralMessages['ignored'] = array();
138                 $this->mGeneralMessages['translatable'] = array();
139                 foreach ( $this->mGeneralMessages['all'] as $key => $value ) {
140                         if ( in_array( $key, $this->mIgnoredMessages ) ) {
141                                 $this->mGeneralMessages['ignored'][$key] = $value;
142                         } else if ( in_array( $key, $this->mOptionalMessages ) ) {
143                                 $this->mGeneralMessages['optional'][$key] = $value;
144                                 $this->mGeneralMessages['translatable'][$key] = $value;
145                         } else {
146                                 $this->mGeneralMessages['required'][$key] = $value;
147                                 $this->mGeneralMessages['translatable'][$key] = $value;
148                         }
149                 }
150         }
152         /**
153          * Get all the messages for a specific langauge (not English), without the
154          * fallback language messages, divided to groups:
155          * all - all the messages.
156          * required - messages which should be translated in order to get a complete translation.
157          * optional - messages which can be translated, the fallback translation is used if not translated.
158          * obsolete - messages which should not be translated, either because they are not exist, or they are ignored messages.
159          * translated - messages which are either required or optional, but translated from English and needed.
160          *
161          * @param $code The langauge code.
162          *
163          * @return The messages in this language.
164          */
165         public function getMessages( $code ) {
166                 $this->loadMessages( $code );
167                 return $this->mMessages[$code];
168         }
170         /**
171          * Get all the general English messages, divided to groups:
172          * all - all the messages.
173          * required - messages which should be translated to other languages in order to get a complete translation.
174          * optional - messages which can be translated to other languages, but it's not required for a complete translation.
175          * ignored - messages which should not be translated to other languages.
176          * translatable - messages which are either required or optional, but can be translated from English.
177          *
178          * @return The general English messages.
179          */
180         public function getGeneralMessages() {
181                 $this->loadGeneralMessages();
182                 return $this->mGeneralMessages;
183         }
185         /**
186          * Get the untranslated messages for a specific language.
187          *
188          * @param $code The langauge code.
189          *
190          * @return The untranslated messages for this language.
191          */
192         public function getUntranslatedMessages( $code ) {
193                 $this->loadGeneralMessages();
194                 $this->loadMessages( $code );
195                 $requiredGeneralMessages = array_keys( $this->mGeneralMessages['required'] );
196                 $requiredMessages = array_keys( $this->mMessages[$code]['required'] );
197                 $untranslatedMessages = array();
198                 foreach ( array_diff( $requiredGeneralMessages, $requiredMessages ) as $key ) {
199                         $untranslatedMessages[$key] = $this->mGeneralMessages['required'][$key];
200                 }
201                 return $untranslatedMessages;
202         }
204         /**
205          * Get the duplicate messages for a specific language.
206          *
207          * @param $code The langauge code.
208          *
209          * @return The duplicate messages for this language.
210          */
211         public function getDuplicateMessages( $code ) {
212                 $this->loadGeneralMessages();
213                 $this->loadMessages( $code );
214                 $duplicateMessages = array();
215                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
216                         if ( $this->mGeneralMessages['translatable'][$key] == $value ) {
217                                 $duplicateMessages[$key] = $value;
218                         }
219                 }
220                 return $duplicateMessages;
221         }
223         /**
224          * Get the messages which do not use some variables.
225          *
226          * @param $code The langauge code.
227          *
228          * @return The messages which do not use some variables in this language.
229          */
230         public function getMessagesWithoutVariables( $code ) {
231                 $this->loadGeneralMessages();
232                 $this->loadMessages( $code );
233                 $variables = array( '\$1', '\$2', '\$3', '\$4', '\$5', '\$6', '\$7', '\$8', '\$9' );
234                 $messagesWithoutVariables = array();
235                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
236                         $missing = false;
237                         foreach ( $variables as $var ) {
238                                 if ( preg_match( "/$var/sU", $this->mGeneralMessages['translatable'][$key] ) &&
239                                         !preg_match( "/$var/sU", $value ) ) {
240                                         $missing = true;
241                                 }
242                         }
243                         if ( $missing ) {
244                                 $messagesWithoutVariables[$key] = $value;
245                         }
246                 }
247                 return $messagesWithoutVariables;
248         }
250         /**
251          * Get the empty messages.
252          *
253          * @param $code The langauge code.
254          *
255          * @return The empty messages for this language.
256          */
257         public function getEmptyMessages( $code ) {
258                 $this->loadGeneralMessages();
259                 $this->loadMessages( $code );
260                 $emptyMessages = array();
261                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
262                         if ( $value === '' || $value === '-' ) {
263                                 $emptyMessages[$key] = $value;
264                         }
265                 }
266                 return $emptyMessages;
267         }
269         /**
270          * Get the messages with trailing whitespace.
271          *
272          * @param $code The langauge code.
273          *
274          * @return The messages with trailing whitespace in this language.
275          */
276         public function getMessagesWithWhitespace( $code ) {
277                 $this->loadGeneralMessages();
278                 $this->loadMessages( $code );
279                 $messagesWithWhitespace = array();
280                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
281                         if ( $this->mGeneralMessages['translatable'][$key] !== '' && $value !== rtrim( $value ) ) {
282                                 $messagesWithWhitespace[$key] = $value;
283                         }
284                 }
285                 return $messagesWithWhitespace;
286         }
288         /**
289          * Get the non-XHTML messages.
290          *
291          * @param $code The langauge code.
292          *
293          * @return The non-XHTML messages for this language.
294          */
295         public function getNonXHTMLMessages( $code ) {
296                 $this->loadGeneralMessages();
297                 $this->loadMessages( $code );
298                 $wrongPhrases = array(
299                         '<hr *\\?>',
300                         '<br *\\?>',
301                         '<hr/>',
302                         '<br/>',
303                 );
304                 $wrongPhrases = '~(' . implode( '|', $wrongPhrases ) . ')~sDu';
305                 $nonXHTMLMessages = array();
306                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
307                         if ( preg_match( $wrongPhrases, $value ) ) {
308                                 $nonXHTMLMessages[$key] = $value;
309                         }
310                 }
311                 return $nonXHTMLMessages;
312         }
314         /**
315          * Get the messages which include wrong characters.
316          *
317          * @param $code The langauge code.
318          *
319          * @return The messages which include wrong characters in this language.
320          */
321         public function getMessagesWithWrongChars( $code ) {
322                 $this->loadGeneralMessages();
323                 $this->loadMessages( $code );
324                 $wrongChars = array(
325                         '[LRM]' => "\xE2\x80\x8E",
326                         '[RLM]' => "\xE2\x80\x8F",
327                         '[LRE]' => "\xE2\x80\xAA",
328                         '[RLE]' => "\xE2\x80\xAB",
329                         '[POP]' => "\xE2\x80\xAC",
330                         '[LRO]' => "\xE2\x80\xAD",
331                         '[RLO]' => "\xE2\x80\xAB",
332                         '[ZWSP]'=> "\xE2\x80\x8B",
333                         '[NBSP]'=> "\xC2\xA0",
334                         '[WJ]'  => "\xE2\x81\xA0",
335                         '[BOM]' => "\xEF\xBB\xBF",
336                         '[FFFD]'=> "\xEF\xBF\xBD",
337                 );
338                 $wrongRegExp = '/(' . implode( '|', array_values( $wrongChars ) ) . ')/sDu';
339                 $wrongCharsMessages = array();
340                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
341                         if ( preg_match( $wrongRegExp, $value ) ) {
342                                 foreach ( $wrongChars as $viewableChar => $hiddenChar ) {
343                                         $value = str_replace( $hiddenChar, $viewableChar, $value );
344                                 }
345                                 $wrongCharsMessages[$key] = $value;
346                         }
347                 }
348                 return $wrongCharsMessages;
349         }
351         /**
352          * Output a messages list
353          *
354          * @param $messages The messages list
355          * @param $code The language code
356          * @param $text The text to show before the list (optional)
357          * @param $level The display level (optional)
358          * @param $links Show links (optional)
359          * @param $wikilang The langauge of the wiki to display the list in, for the links (optional)
360          */
361         public function outputMessagesList( $messages, $code, $text = '', $level = 2, $links = false, $wikilang = null ) {
362                 if ( count( $messages ) == 0 ) {
363                         return;
364                 }
365                 if ( $text ) {
366                         echo "$text\n";
367                 }
368                 if ( $level == 1 ) {
369                         echo "[messages are hidden]\n";
370                 } else {
371                         foreach ( $messages as $key => $value ) {
372                                 if ( $links ) {
373                                         $displayKey = ucfirst( $key );
374                                         if ( !isset( $wikilang ) ) {
375                                                 global $wgContLang;
376                                                 $wikilang = $wgContLang->getCode();
377                                         }
378                                         if ( $code == $wikilang ) {
379                                                 $displayKey = "[[MediaWiki:$displayKey|$key]]";
380                                         } else {
381                                                 $displayKey = "[[MediaWiki:$displayKey/$code|$key]]";
382                                         }
383                                 } else {
384                                         $displayKey = $key;
385                                 }
386                                 if ( $level == 2 ) {
387                                         echo "* $displayKey\n";
388                                 } else {
389                                         echo "* $displayKey:            '$value'\n";
390                                 }
391                         }
392                 }
393         }