Localization fix.
[mediawiki.git] / maintenance / language / languages.inc
blob6d16f80c0efdce10a3b798139d772869c5ea2381
1 <?php
2 /**
3  * Handle messages in the language files.
4  *
5  * @file
6  * @ingroup MaintenanceLanguage
7  */
9 /**
10  * @ingroup MaintenanceLanguage
11  */
12 class languages {
13         protected $mLanguages; # List of languages
14         protected $mRawMessages; # Raw list of the messages in each language
15         protected $mMessages; # Messages in each language (except for English), divided to groups
16         protected $mGeneralMessages; # General messages in English, divided to groups
17         protected $mIgnoredMessages; # All the messages which should be exist only in the English file
18         protected $mOptionalMessages; # All the messages which may be translated or not, depending on the language
20         /**
21          * Load the list of languages: all the Messages*.php
22          * files in the languages directory.
23          *
24          * @param $exif Treat the EXIF messages?
25          */
26         function __construct( $exif = true ) {
27                 require( dirname(__FILE__) . '/messageTypes.inc' );
28                 $this->mIgnoredMessages = $wgIgnoredMessages;
29                 if ( $exif ) {
30                         $this->mOptionalMessages = array_merge( $wgOptionalMessages );
31                 } else {
32                         $this->mOptionalMessages = array_merge( $wgOptionalMessages, $wgEXIFMessages );
33                 }
35                 $this->mLanguages = array_keys( Language::getLanguageNames( true ) );
36                 sort( $this->mLanguages );
37         }
39         /**
40          * Get the language list.
41          *
42          * @return The language list.
43          */
44         public function getLanguages() {
45                 return $this->mLanguages;
46         }
48         /**
49          * Get the ignored messages list.
50          *
51          * @return The ignored messages list.
52          */
53         public function getIgnoredMessages() {
54                 return $this->mIgnoredMessages;
55         }
57         /**
58          * Get the optional messages list.
59          *
60          * @return The optional messages list.
61          */
62         public function getOptionalMessages() {
63                 return $this->mOptionalMessages;
64         }
66         /**
67          * Load the raw messages for a specific language from the messages file.
68          *
69          * @param $code The language code.
70          */
71         protected function loadRawMessages( $code ) {
72                 if ( isset( $this->mRawMessages[$code] ) ) {
73                         return;
74                 }
75                 $filename = Language::getMessagesFileName( $code );
76                 if ( file_exists( $filename ) ) {
77                         require( $filename );
78                         if ( isset( $messages ) ) {
79                                 $this->mRawMessages[$code] = $messages;
80                         } else {
81                                 $this->mRawMessages[$code] = array();
82                         }
83                 } else {
84                         $this->mRawMessages[$code] = array();
85                 }
86         }
88         /**
89          * Load the messages for a specific language (which is not English) and divide them to groups:
90          * all - all the messages.
91          * required - messages which should be translated in order to get a complete translation.
92          * optional - messages which can be translated, the fallback translation is used if not translated.
93          * obsolete - messages which should not be translated, either because they are not exist, or they are ignored messages.
94          * translated - messages which are either required or optional, but translated from English and needed.
95          *
96          * @param $code The language code.
97          */
98         private function loadMessages( $code ) {
99                 if ( isset( $this->mMessages[$code] ) ) {
100                         return;
101                 }
102                 $this->loadRawMessages( $code );
103                 $this->loadGeneralMessages();
104                 $this->mMessages[$code]['all'] = $this->mRawMessages[$code];
105                 $this->mMessages[$code]['required'] = array();
106                 $this->mMessages[$code]['optional'] = array();
107                 $this->mMessages[$code]['obsolete'] = array();
108                 $this->mMessages[$code]['translated'] = array();
109                 foreach ( $this->mMessages[$code]['all'] as $key => $value ) {
110                         if ( isset( $this->mGeneralMessages['required'][$key] ) ) {
111                                 $this->mMessages[$code]['required'][$key] = $value;
112                                 $this->mMessages[$code]['translated'][$key] = $value;
113                         } else if ( isset( $this->mGeneralMessages['optional'][$key] ) ) {
114                                 $this->mMessages[$code]['optional'][$key] = $value;
115                                 $this->mMessages[$code]['translated'][$key] = $value;
116                         } else {
117                                 $this->mMessages[$code]['obsolete'][$key] = $value;
118                         }
119                 }
120         }
122         /**
123          * Load the messages for English and divide them to groups:
124          * all - all the messages.
125          * required - messages which should be translated to other languages in order to get a complete translation.
126          * optional - messages which can be translated to other languages, but it's not required for a complete translation.
127          * ignored - messages which should not be translated to other languages.
128          * translatable - messages which are either required or optional, but can be translated from English.
129          */
130         private function loadGeneralMessages() {
131                 if ( isset( $this->mGeneralMessages ) ) {
132                         return;
133                 }
134                 $this->loadRawMessages( 'en' );
135                 $this->mGeneralMessages['all'] = $this->mRawMessages['en'];
136                 $this->mGeneralMessages['required'] = array();
137                 $this->mGeneralMessages['optional'] = array();
138                 $this->mGeneralMessages['ignored'] = array();
139                 $this->mGeneralMessages['translatable'] = array();
140                 foreach ( $this->mGeneralMessages['all'] as $key => $value ) {
141                         if ( in_array( $key, $this->mIgnoredMessages ) ) {
142                                 $this->mGeneralMessages['ignored'][$key] = $value;
143                         } else if ( in_array( $key, $this->mOptionalMessages ) ) {
144                                 $this->mGeneralMessages['optional'][$key] = $value;
145                                 $this->mGeneralMessages['translatable'][$key] = $value;
146                         } else {
147                                 $this->mGeneralMessages['required'][$key] = $value;
148                                 $this->mGeneralMessages['translatable'][$key] = $value;
149                         }
150                 }
151         }
153         /**
154          * Get all the messages for a specific language (not English), without the
155          * fallback language messages, divided to groups:
156          * all - all the messages.
157          * required - messages which should be translated in order to get a complete translation.
158          * optional - messages which can be translated, the fallback translation is used if not translated.
159          * obsolete - messages which should not be translated, either because they are not exist, or they are ignored messages.
160          * translated - messages which are either required or optional, but translated from English and needed.
161          *
162          * @param $code The language code.
163          *
164          * @return The messages in this language.
165          */
166         public function getMessages( $code ) {
167                 $this->loadMessages( $code );
168                 return $this->mMessages[$code];
169         }
171         /**
172          * Get all the general English messages, divided to groups:
173          * all - all the messages.
174          * required - messages which should be translated to other languages in order to get a complete translation.
175          * optional - messages which can be translated to other languages, but it's not required for a complete translation.
176          * ignored - messages which should not be translated to other languages.
177          * translatable - messages which are either required or optional, but can be translated from English.
178          *
179          * @return The general English messages.
180          */
181         public function getGeneralMessages() {
182                 $this->loadGeneralMessages();
183                 return $this->mGeneralMessages;
184         }
186         /**
187          * Get the untranslated messages for a specific language.
188          *
189          * @param $code The language code.
190          *
191          * @return The untranslated messages for this language.
192          */
193         public function getUntranslatedMessages( $code ) {
194                 $this->loadGeneralMessages();
195                 $this->loadMessages( $code );
196                 $requiredGeneralMessages = array_keys( $this->mGeneralMessages['required'] );
197                 $requiredMessages = array_keys( $this->mMessages[$code]['required'] );
198                 $untranslatedMessages = array();
199                 foreach ( array_diff( $requiredGeneralMessages, $requiredMessages ) as $key ) {
200                         $untranslatedMessages[$key] = $this->mGeneralMessages['required'][$key];
201                 }
202                 return $untranslatedMessages;
203         }
205         /**
206          * Get the duplicate messages for a specific language.
207          *
208          * @param $code The language code.
209          *
210          * @return The duplicate messages for this language.
211          */
212         public function getDuplicateMessages( $code ) {
213                 $this->loadGeneralMessages();
214                 $this->loadMessages( $code );
215                 $duplicateMessages = array();
216                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
217                         if ( $this->mGeneralMessages['translatable'][$key] == $value ) {
218                                 $duplicateMessages[$key] = $value;
219                         }
220                 }
221                 return $duplicateMessages;
222         }
224         public function getObsoleteMessages( $code ) {
225                 $this->loadGeneralMessages();
226                 $this->loadMessages( $code );
227                 return $this->mMessages[$code]['obsolete'];
228         }
230         /**
231          * Get the messages which do not use some variables.
232          *
233          * @param $code The language code.
234          *
235          * @return The messages which do not use some variables in this language.
236          */
237         public function getMessagesWithoutVariables( $code ) {
238                 $this->loadGeneralMessages();
239                 $this->loadMessages( $code );
240                 $variables = array( '\$1', '\$2', '\$3', '\$4', '\$5', '\$6', '\$7', '\$8', '\$9' );
241                 $messagesWithoutVariables = array();
242                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
243                         $missing = false;
244                         foreach ( $variables as $var ) {
245                                 if ( preg_match( "/$var/sU", $this->mGeneralMessages['translatable'][$key] ) &&
246                                         !preg_match( "/$var/sU", $value ) ) {
247                                         $missing = true;
248                                 }
249                         }
250                         if ( $missing ) {
251                                 $messagesWithoutVariables[$key] = $value;
252                         }
253                 }
254                 return $messagesWithoutVariables;
255         }
257         /**
258          * Get the messages which do not use plural.
259          *
260          * @param $code The language code.
261          *
262          * @return The messages which do not use plural in this language.
263          */
264         public function getMessagesWithoutPlural( $code ) {
265                 $this->loadGeneralMessages();
266                 $this->loadMessages( $code );
267                 $messagesWithoutPlural = array();
268                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
269                         if ( stripos( $this->mGeneralMessages['translatable'][$key], '{{plural:' ) !== false && stripos( $value, '{{plural:' ) === false ) {
270                                 $messagesWithoutPlural[$key] = $value;
271                         }
272                 }
273                 return $messagesWithoutPlural;
274         }
276         /**
277          * Get the empty messages.
278          *
279          * @param $code The language code.
280          *
281          * @return The empty messages for this language.
282          */
283         public function getEmptyMessages( $code ) {
284                 $this->loadGeneralMessages();
285                 $this->loadMessages( $code );
286                 $emptyMessages = array();
287                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
288                         if ( $value === '' || $value === '-' ) {
289                                 $emptyMessages[$key] = $value;
290                         }
291                 }
292                 return $emptyMessages;
293         }
295         /**
296          * Get the messages with trailing whitespace.
297          *
298          * @param $code The language code.
299          *
300          * @return The messages with trailing whitespace in this language.
301          */
302         public function getMessagesWithWhitespace( $code ) {
303                 $this->loadGeneralMessages();
304                 $this->loadMessages( $code );
305                 $messagesWithWhitespace = array();
306                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
307                         if ( $this->mGeneralMessages['translatable'][$key] !== '' && $value !== rtrim( $value ) ) {
308                                 $messagesWithWhitespace[$key] = $value;
309                         }
310                 }
311                 return $messagesWithWhitespace;
312         }
314         /**
315          * Get the non-XHTML messages.
316          *
317          * @param $code The language code.
318          *
319          * @return The non-XHTML messages for this language.
320          */
321         public function getNonXHTMLMessages( $code ) {
322                 $this->loadGeneralMessages();
323                 $this->loadMessages( $code );
324                 $wrongPhrases = array(
325                         '<hr *\\?>',
326                         '<br *\\?>',
327                         '<hr/>',
328                         '<br/>',
329                         '<hr>',
330                         '<br>',
331                 );
332                 $wrongPhrases = '~(' . implode( '|', $wrongPhrases ) . ')~sDu';
333                 $nonXHTMLMessages = array();
334                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
335                         if ( preg_match( $wrongPhrases, $value ) ) {
336                                 $nonXHTMLMessages[$key] = $value;
337                         }
338                 }
339                 return $nonXHTMLMessages;
340         }
342         /**
343          * Get the messages which include wrong characters.
344          *
345          * @param $code The language code.
346          *
347          * @return The messages which include wrong characters in this language.
348          */
349         public function getMessagesWithWrongChars( $code ) {
350                 $this->loadGeneralMessages();
351                 $this->loadMessages( $code );
352                 $wrongChars = array(
353                         '[LRM]' => "\xE2\x80\x8E",
354                         '[RLM]' => "\xE2\x80\x8F",
355                         '[LRE]' => "\xE2\x80\xAA",
356                         '[RLE]' => "\xE2\x80\xAB",
357                         '[POP]' => "\xE2\x80\xAC",
358                         '[LRO]' => "\xE2\x80\xAD",
359                         '[RLO]' => "\xE2\x80\xAB",
360                         '[ZWSP]'=> "\xE2\x80\x8B",
361                         '[NBSP]'=> "\xC2\xA0",
362                         '[WJ]'  => "\xE2\x81\xA0",
363                         '[BOM]' => "\xEF\xBB\xBF",
364                         '[FFFD]'=> "\xEF\xBF\xBD",
365                 );
366                 $wrongRegExp = '/(' . implode( '|', array_values( $wrongChars ) ) . ')/sDu';
367                 $wrongCharsMessages = array();
368                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
369                         if ( preg_match( $wrongRegExp, $value ) ) {
370                                 foreach ( $wrongChars as $viewableChar => $hiddenChar ) {
371                                         $value = str_replace( $hiddenChar, $viewableChar, $value );
372                                 }
373                                 $wrongCharsMessages[$key] = $value;
374                         }
375                 }
376                 return $wrongCharsMessages;
377         }
379         public function getMessagesWithDubiousLinks( $code ) {
380                 $this->loadGeneralMessages();
381                 $this->loadMessages( $code );
382                 $tc = Title::legalChars() . '#%{}';
383                 $messages = array();
384                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
385                         $matches = array();
386                         preg_match_all( "/\[\[([{$tc}]+)(?:\\|(.+?))?]]/sDu", $value, $matches);
387                         for ($i = 0; $i < count($matches[0]); $i++ ) {
388                                 if ( preg_match( "/.*project.*/isDu",  $matches[1][$i]) ) {
389                                         $messages[$key][] = $matches[0][$i];
390                                 }
391                         }
394                         if ( isset( $messages[$key] ) ) {
395                                 $messages[$key] = implode( $messages[$key],", " );
396                         }
397                 }
398                 return $messages;
399         }
401         public function getMessagesWithUnbalanced( $code ) {
402                 $this->loadGeneralMessages();
403                 $this->loadMessages( $code );
404                 $messages = array();
405                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
407                         $a = $b = $c = $d = 0;
408                         foreach ( preg_split('//', $value) as $char ) {
409                                 switch ($char) {
410                                         case '[': $a++; break;
411                                         case ']': $b++; break;
412                                         case '{': $c++; break;
413                                         case '}': $d++; break;
414                                 }
415                         }
417                         if ( $a !== $b || $c !== $d ) {
418                                 $messages[$key] = "$a, $b, $c, $d";
419                         }
420                         
421                 }
422                 return $messages;
423         }
427 class extensionLanguages extends languages {
428         private $mMessageGroup; # The message group
430         /**
431          * Load the messages group.
432          * @param $group The messages group.
433          */
434         function __construct( MessageGroup $group ) {
435                 $this->mMessageGroup = $group;
437                 $bools = $this->mMessageGroup->getBools();
438                 $this->mIgnoredMessages = $bools['ignored'];
439                 $this->mOptionalMessages = $bools['optional'];
440         }
442         /**
443          * Get the extension name.
444          *
445          * @return The extension name.
446          */
447         public function name() {
448                 return $this->mMessageGroup->getLabel();
449         }
451         /**
452          * Load the raw messages for a specific language.
453          *
454          * @param $code The language code.
455          */
456         protected function loadRawMessages( $code ) {
457                 if( !isset( $this->mRawMessages[$code] ) ) {
458                         $this->mRawMessages[$code] = $this->mMessageGroup->load( $code );
459                         if( empty( $this->mRawMessages[$code] ) ) {
460                                 $this->mRawMessages[$code] = array();
461                         }
462                 }
463         }