* (bug 13490) Show upload/file size limit on upload form
[mediawiki.git] / maintenance / language / languages.inc
blob9472e254eb86d0bc232b609f7411bf6cfe1e4278
1 <?php
2 /**
3  * Handle messages in the language files.
4  *
5  * @addtogroup Maintenance
6  */
8 class languages {
9         protected $mLanguages; # List of languages
10         protected $mRawMessages; # Raw list of the messages in each language
11         protected $mMessages; # Messages in each language (except for English), divided to groups
12         protected $mGeneralMessages; # General messages in English, divided to groups
13         protected $mIgnoredMessages; # All the messages which should be exist only in the English file
14         protected $mOptionalMessages; # All the messages which may be translated or not, depending on the language
16         /**
17          * Load the list of languages: all the Messages*.php
18          * files in the languages directory.
19          *
20          * @param $exif Treat the EXIF messages?
21          */
22         function __construct( $exif = true ) {
23                 require( dirname(__FILE__) . '/messageTypes.inc' );
24                 $this->mIgnoredMessages = $wgIgnoredMessages;
25                 if ( $exif ) {
26                         $this->mOptionalMessages = array_merge( $wgOptionalMessages );
27                 } else {
28                         $this->mOptionalMessages = array_merge( $wgOptionalMessages, $wgEXIFMessages );
29                 }
31                 $this->mLanguages = array_keys( Language::getLanguageNames( true ) );
32                 sort( $this->mLanguages );
33         }
35         /**
36          * Get the language list.
37          *
38          * @return The language list.
39          */
40         public function getLanguages() {
41                 return $this->mLanguages;
42         }
44         /**
45          * Get the ignored messages list.
46          *
47          * @return The ignored messages list.
48          */
49         public function getIgnoredMessages() {
50                 return $this->mIgnoredMessages;
51         }
53         /**
54          * Get the optional messages list.
55          *
56          * @return The optional messages list.
57          */
58         public function getOptionalMessages() {
59                 return $this->mOptionalMessages;
60         }
62         /**
63          * Load the raw messages for a specific language from the messages file.
64          *
65          * @param $code The language code.
66          */
67         protected function loadRawMessages( $code ) {
68                 if ( isset( $this->mRawMessages[$code] ) ) {
69                         return;
70                 }
71                 $filename = Language::getMessagesFileName( $code );
72                 if ( file_exists( $filename ) ) {
73                         require( $filename );
74                         if ( isset( $messages ) ) {
75                                 $this->mRawMessages[$code] = $messages;
76                         } else {
77                                 $this->mRawMessages[$code] = array();
78                         }
79                 } else {
80                         $this->mRawMessages[$code] = array();
81                 }
82         }
84         /**
85          * Load the messages for a specific language (which is not English) and divide them to groups:
86          * all - all the messages.
87          * required - messages which should be translated in order to get a complete translation.
88          * optional - messages which can be translated, the fallback translation is used if not translated.
89          * obsolete - messages which should not be translated, either because they are not exist, or they are ignored messages.
90          * translated - messages which are either required or optional, but translated from English and needed.
91          *
92          * @param $code The language code.
93          */
94         private function loadMessages( $code ) {
95                 if ( isset( $this->mMessages[$code] ) ) {
96                         return;
97                 }
98                 $this->loadRawMessages( $code );
99                 $this->loadGeneralMessages();
100                 $this->mMessages[$code]['all'] = $this->mRawMessages[$code];
101                 $this->mMessages[$code]['required'] = array();
102                 $this->mMessages[$code]['optional'] = array();
103                 $this->mMessages[$code]['obsolete'] = array();
104                 $this->mMessages[$code]['translated'] = array();
105                 foreach ( $this->mMessages[$code]['all'] as $key => $value ) {
106                         if ( isset( $this->mGeneralMessages['required'][$key] ) ) {
107                                 $this->mMessages[$code]['required'][$key] = $value;
108                                 $this->mMessages[$code]['translated'][$key] = $value;
109                         } else if ( isset( $this->mGeneralMessages['optional'][$key] ) ) {
110                                 $this->mMessages[$code]['optional'][$key] = $value;
111                                 $this->mMessages[$code]['translated'][$key] = $value;
112                         } else {
113                                 $this->mMessages[$code]['obsolete'][$key] = $value;
114                         }
115                 }
116         }
118         /**
119          * Load the messages for English and divide them to groups:
120          * all - all the messages.
121          * required - messages which should be translated to other languages in order to get a complete translation.
122          * optional - messages which can be translated to other languages, but it's not required for a complete translation.
123          * ignored - messages which should not be translated to other languages.
124          * translatable - messages which are either required or optional, but can be translated from English.
125          */
126         private function loadGeneralMessages() {
127                 if ( isset( $this->mGeneralMessages ) ) {
128                         return;
129                 }
130                 $this->loadRawMessages( 'en' );
131                 $this->mGeneralMessages['all'] = $this->mRawMessages['en'];
132                 $this->mGeneralMessages['required'] = array();
133                 $this->mGeneralMessages['optional'] = array();
134                 $this->mGeneralMessages['ignored'] = array();
135                 $this->mGeneralMessages['translatable'] = array();
136                 foreach ( $this->mGeneralMessages['all'] as $key => $value ) {
137                         if ( in_array( $key, $this->mIgnoredMessages ) ) {
138                                 $this->mGeneralMessages['ignored'][$key] = $value;
139                         } else if ( in_array( $key, $this->mOptionalMessages ) ) {
140                                 $this->mGeneralMessages['optional'][$key] = $value;
141                                 $this->mGeneralMessages['translatable'][$key] = $value;
142                         } else {
143                                 $this->mGeneralMessages['required'][$key] = $value;
144                                 $this->mGeneralMessages['translatable'][$key] = $value;
145                         }
146                 }
147         }
149         /**
150          * Get all the messages for a specific language (not English), without the
151          * fallback language messages, divided to groups:
152          * all - all the messages.
153          * required - messages which should be translated in order to get a complete translation.
154          * optional - messages which can be translated, the fallback translation is used if not translated.
155          * obsolete - messages which should not be translated, either because they are not exist, or they are ignored messages.
156          * translated - messages which are either required or optional, but translated from English and needed.
157          *
158          * @param $code The language code.
159          *
160          * @return The messages in this language.
161          */
162         public function getMessages( $code ) {
163                 $this->loadMessages( $code );
164                 return $this->mMessages[$code];
165         }
167         /**
168          * Get all the general English messages, divided to groups:
169          * all - all the messages.
170          * required - messages which should be translated to other languages in order to get a complete translation.
171          * optional - messages which can be translated to other languages, but it's not required for a complete translation.
172          * ignored - messages which should not be translated to other languages.
173          * translatable - messages which are either required or optional, but can be translated from English.
174          *
175          * @return The general English messages.
176          */
177         public function getGeneralMessages() {
178                 $this->loadGeneralMessages();
179                 return $this->mGeneralMessages;
180         }
182         /**
183          * Get the untranslated messages for a specific language.
184          *
185          * @param $code The language code.
186          *
187          * @return The untranslated messages for this language.
188          */
189         public function getUntranslatedMessages( $code ) {
190                 $this->loadGeneralMessages();
191                 $this->loadMessages( $code );
192                 $requiredGeneralMessages = array_keys( $this->mGeneralMessages['required'] );
193                 $requiredMessages = array_keys( $this->mMessages[$code]['required'] );
194                 $untranslatedMessages = array();
195                 foreach ( array_diff( $requiredGeneralMessages, $requiredMessages ) as $key ) {
196                         $untranslatedMessages[$key] = $this->mGeneralMessages['required'][$key];
197                 }
198                 return $untranslatedMessages;
199         }
201         /**
202          * Get the duplicate messages for a specific language.
203          *
204          * @param $code The language code.
205          *
206          * @return The duplicate messages for this language.
207          */
208         public function getDuplicateMessages( $code ) {
209                 $this->loadGeneralMessages();
210                 $this->loadMessages( $code );
211                 $duplicateMessages = array();
212                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
213                         if ( $this->mGeneralMessages['translatable'][$key] == $value ) {
214                                 $duplicateMessages[$key] = $value;
215                         }
216                 }
217                 return $duplicateMessages;
218         }
220         public function getObsoleteMessages( $code ) {
221                 $this->loadGeneralMessages();
222                 $this->loadMessages( $code );
223                 return $this->mMessages[$code]['obsolete'];
224         }
226         /**
227          * Get the messages which do not use some variables.
228          *
229          * @param $code The language code.
230          *
231          * @return The messages which do not use some variables in this language.
232          */
233         public function getMessagesWithoutVariables( $code ) {
234                 $this->loadGeneralMessages();
235                 $this->loadMessages( $code );
236                 $variables = array( '\$1', '\$2', '\$3', '\$4', '\$5', '\$6', '\$7', '\$8', '\$9' );
237                 $messagesWithoutVariables = array();
238                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
239                         $missing = false;
240                         foreach ( $variables as $var ) {
241                                 if ( preg_match( "/$var/sU", $this->mGeneralMessages['translatable'][$key] ) &&
242                                         !preg_match( "/$var/sU", $value ) ) {
243                                         $missing = true;
244                                 }
245                         }
246                         if ( $missing ) {
247                                 $messagesWithoutVariables[$key] = $value;
248                         }
249                 }
250                 return $messagesWithoutVariables;
251         }
253         /**
254          * Get the messages which do not use plural.
255          *
256          * @param $code The language code.
257          *
258          * @return The messages which do not use plural in this language.
259          */
260         public function getMessagesWithoutPlural( $code ) {
261                 $this->loadGeneralMessages();
262                 $this->loadMessages( $code );
263                 $messagesWithoutPlural = array();
264                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
265                         if ( stripos( $this->mGeneralMessages['translatable'][$key], '{{plural:' ) !== false && stripos( $value, '{{plural:' ) === false ) {
266                                 $messagesWithoutPlural[$key] = $value;
267                         }
268                 }
269                 return $messagesWithoutPlural;
270         }
272         /**
273          * Get the empty messages.
274          *
275          * @param $code The language code.
276          *
277          * @return The empty messages for this language.
278          */
279         public function getEmptyMessages( $code ) {
280                 $this->loadGeneralMessages();
281                 $this->loadMessages( $code );
282                 $emptyMessages = array();
283                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
284                         if ( $value === '' || $value === '-' ) {
285                                 $emptyMessages[$key] = $value;
286                         }
287                 }
288                 return $emptyMessages;
289         }
291         /**
292          * Get the messages with trailing whitespace.
293          *
294          * @param $code The language code.
295          *
296          * @return The messages with trailing whitespace in this language.
297          */
298         public function getMessagesWithWhitespace( $code ) {
299                 $this->loadGeneralMessages();
300                 $this->loadMessages( $code );
301                 $messagesWithWhitespace = array();
302                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
303                         if ( $this->mGeneralMessages['translatable'][$key] !== '' && $value !== rtrim( $value ) ) {
304                                 $messagesWithWhitespace[$key] = $value;
305                         }
306                 }
307                 return $messagesWithWhitespace;
308         }
310         /**
311          * Get the non-XHTML messages.
312          *
313          * @param $code The language code.
314          *
315          * @return The non-XHTML messages for this language.
316          */
317         public function getNonXHTMLMessages( $code ) {
318                 $this->loadGeneralMessages();
319                 $this->loadMessages( $code );
320                 $wrongPhrases = array(
321                         '<hr *\\?>',
322                         '<br *\\?>',
323                         '<hr/>',
324                         '<br/>',
325                 );
326                 $wrongPhrases = '~(' . implode( '|', $wrongPhrases ) . ')~sDu';
327                 $nonXHTMLMessages = array();
328                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
329                         if ( preg_match( $wrongPhrases, $value ) ) {
330                                 $nonXHTMLMessages[$key] = $value;
331                         }
332                 }
333                 return $nonXHTMLMessages;
334         }
336         /**
337          * Get the messages which include wrong characters.
338          *
339          * @param $code The language code.
340          *
341          * @return The messages which include wrong characters in this language.
342          */
343         public function getMessagesWithWrongChars( $code ) {
344                 $this->loadGeneralMessages();
345                 $this->loadMessages( $code );
346                 $wrongChars = array(
347                         '[LRM]' => "\xE2\x80\x8E",
348                         '[RLM]' => "\xE2\x80\x8F",
349                         '[LRE]' => "\xE2\x80\xAA",
350                         '[RLE]' => "\xE2\x80\xAB",
351                         '[POP]' => "\xE2\x80\xAC",
352                         '[LRO]' => "\xE2\x80\xAD",
353                         '[RLO]' => "\xE2\x80\xAB",
354                         '[ZWSP]'=> "\xE2\x80\x8B",
355                         '[NBSP]'=> "\xC2\xA0",
356                         '[WJ]'  => "\xE2\x81\xA0",
357                         '[BOM]' => "\xEF\xBB\xBF",
358                         '[FFFD]'=> "\xEF\xBF\xBD",
359                 );
360                 $wrongRegExp = '/(' . implode( '|', array_values( $wrongChars ) ) . ')/sDu';
361                 $wrongCharsMessages = array();
362                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
363                         if ( preg_match( $wrongRegExp, $value ) ) {
364                                 foreach ( $wrongChars as $viewableChar => $hiddenChar ) {
365                                         $value = str_replace( $hiddenChar, $viewableChar, $value );
366                                 }
367                                 $wrongCharsMessages[$key] = $value;
368                         }
369                 }
370                 return $wrongCharsMessages;
371         }
373         public function getMessagesWithDubiousLinks( $code ) {
374                 $this->loadGeneralMessages();
375                 $this->loadMessages( $code );
376                 $tc = Title::legalChars() . '#%{}';
377                 $messages = array();
378                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
379                         $matches = array();
380                         preg_match_all( "/\[\[([{$tc}]+)(?:\\|(.+?))?]]/sDu", $value, $matches);
381                         for ($i = 0; $i < count($matches[0]); $i++ ) {
382                                 if ( preg_match( "/.*project.*/isDu",  $matches[1][$i]) ) {
383                                         $messages[$key][] = $matches[0][$i];
384                                 }
385                         }
388                         if ( isset( $messages[$key] ) ) {
389                                 $messages[$key] = implode( $messages[$key],", " );
390                         }
391                 }
392                 return $messages;
393         }
395         public function getMessagesWithUnbalanced( $code ) {
396                 $this->loadGeneralMessages();
397                 $this->loadMessages( $code );
398                 $messages = array();
399                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
401                         $a = $b = $c = $d = 0;
402                         foreach ( preg_split('//', $value) as $char ) {
403                                 switch ($char) {
404                                         case '[': $a++; break;
405                                         case ']': $b++; break;
406                                         case '{': $c++; break;
407                                         case '}': $d++; break;
408                                 }
409                         }
411                         if ( $a !== $b || $c !== $d ) {
412                                 $messages[$key] = "$a, $b, $c, $d";
413                         }
414                         
415                 }
416                 return $messages;
417         }