*Re-add some things lost in merge
[mediawiki.git] / maintenance / language / languages.inc
bloba10cae9e0d67f1f7975bfd3f031e704728d98803
1 <?php
2 /**
3  * Handle messages in the language files.
4  *
5  * @addtogroup Maintenance
6  */
8 require_once( 'messageTypes.inc' );
10 class languages {
11         protected $mLanguages; # List of languages
12         protected $mRawMessages; # Raw list of the messages in each language
13         protected $mMessages; # Messages in each language (except for English), divided to groups
14         protected $mGeneralMessages; # General messages in English, divided to groups
15         protected $mIgnoredMessages; # All the messages which should be exist only in the English file
16         protected $mOptionalMessages; # All the messages which may be translated or not, depending on the language
18         /**
19          * Load the list of languages: all the Messages*.php
20          * files in the languages directory.
21          *
22          * @param $exif Treat the EXIF messages?
23          */
24         function __construct( $exif = true ) {
25                 global $wgIgnoredMessages, $wgOptionalMessages, $wgEXIFMessages;
26                 $this->mIgnoredMessages = $wgIgnoredMessages;
27                 if ( $exif ) {
28                         $this->mOptionalMessages = array_merge( $wgOptionalMessages );
29                 } else {
30                         $this->mOptionalMessages = array_merge( $wgOptionalMessages, $wgEXIFMessages );
31                 }
33                 $this->mLanguages = array_keys( Language::getLanguageNames( true ) );
34                 sort( $this->mLanguages );
35         }
37         /**
38          * Get the language list.
39          *
40          * @return The language list.
41          */
42         public function getLanguages() {
43                 return $this->mLanguages;
44         }
46         /**
47          * Get the ignored messages list.
48          *
49          * @return The ignored messages list.
50          */
51         public function getIgnoredMessages() {
52                 return $this->mIgnoredMessages;
53         }
55         /**
56          * Get the optional messages list.
57          *
58          * @return The optional messages list.
59          */
60         public function getOptionalMessages() {
61                 return $this->mOptionalMessages;
62         }
64         /**
65          * Load the raw messages for a specific langauge from the messages file.
66          *
67          * @param $code The langauge code.
68          */
69         protected function loadRawMessages( $code ) {
70                 if ( isset( $this->mRawMessages[$code] ) ) {
71                         return;
72                 }
73                 $filename = Language::getMessagesFileName( $code );
74                 if ( file_exists( $filename ) ) {
75                         require( $filename );
76                         if ( isset( $messages ) ) {
77                                 $this->mRawMessages[$code] = $messages;
78                         } else {
79                                 $this->mRawMessages[$code] = array();
80                         }
81                 } else {
82                         $this->mRawMessages[$code] = array();
83                 }
84         }
86         /**
87          * Load the messages for a specific language (which is not English) and divide them to groups:
88          * all - all the messages.
89          * required - messages which should be translated in order to get a complete translation.
90          * optional - messages which can be translated, the fallback translation is used if not translated.
91          * obsolete - messages which should not be translated, either because they are not exist, or they are ignored messages.
92          * translated - messages which are either required or optional, but translated from English and needed.
93          *
94          * @param $code The language code.
95          */
96         private function loadMessages( $code ) {
97                 if ( isset( $this->mMessages[$code] ) ) {
98                         return;
99                 }
100                 $this->loadRawMessages( $code );
101                 $this->loadGeneralMessages();
102                 $this->mMessages[$code]['all'] = $this->mRawMessages[$code];
103                 $this->mMessages[$code]['required'] = array();
104                 $this->mMessages[$code]['optional'] = array();
105                 $this->mMessages[$code]['obsolete'] = array();
106                 $this->mMessages[$code]['translated'] = array();
107                 foreach ( $this->mMessages[$code]['all'] as $key => $value ) {
108                         if ( isset( $this->mGeneralMessages['required'][$key] ) ) {
109                                 $this->mMessages[$code]['required'][$key] = $value;
110                                 $this->mMessages[$code]['translated'][$key] = $value;
111                         } else if ( isset( $this->mGeneralMessages['optional'][$key] ) ) {
112                                 $this->mMessages[$code]['optional'][$key] = $value;
113                                 $this->mMessages[$code]['translated'][$key] = $value;
114                         } else {
115                                 $this->mMessages[$code]['obsolete'][$key] = $value;
116                         }
117                 }
118         }
120         /**
121          * Load the messages for English and divide them to groups:
122          * all - all the messages.
123          * required - messages which should be translated to other languages in order to get a complete translation.
124          * optional - messages which can be translated to other languages, but it's not required for a complete translation.
125          * ignored - messages which should not be translated to other languages.
126          * translatable - messages which are either required or optional, but can be translated from English.
127          */
128         private function loadGeneralMessages() {
129                 if ( isset( $this->mGeneralMessages ) ) {
130                         return;
131                 }
132                 $this->loadRawMessages( 'en' );
133                 $this->mGeneralMessages['all'] = $this->mRawMessages['en'];
134                 $this->mGeneralMessages['required'] = array();
135                 $this->mGeneralMessages['optional'] = array();
136                 $this->mGeneralMessages['ignored'] = array();
137                 $this->mGeneralMessages['translatable'] = array();
138                 foreach ( $this->mGeneralMessages['all'] as $key => $value ) {
139                         if ( in_array( $key, $this->mIgnoredMessages ) ) {
140                                 $this->mGeneralMessages['ignored'][$key] = $value;
141                         } else if ( in_array( $key, $this->mOptionalMessages ) ) {
142                                 $this->mGeneralMessages['optional'][$key] = $value;
143                                 $this->mGeneralMessages['translatable'][$key] = $value;
144                         } else {
145                                 $this->mGeneralMessages['required'][$key] = $value;
146                                 $this->mGeneralMessages['translatable'][$key] = $value;
147                         }
148                 }
149         }
151         /**
152          * Get all the messages for a specific langauge (not English), without the
153          * fallback language messages, divided to groups:
154          * all - all the messages.
155          * required - messages which should be translated in order to get a complete translation.
156          * optional - messages which can be translated, the fallback translation is used if not translated.
157          * obsolete - messages which should not be translated, either because they are not exist, or they are ignored messages.
158          * translated - messages which are either required or optional, but translated from English and needed.
159          *
160          * @param $code The langauge code.
161          *
162          * @return The messages in this language.
163          */
164         public function getMessages( $code ) {
165                 $this->loadMessages( $code );
166                 return $this->mMessages[$code];
167         }
169         /**
170          * Get all the general English messages, divided to groups:
171          * all - all the messages.
172          * required - messages which should be translated to other languages in order to get a complete translation.
173          * optional - messages which can be translated to other languages, but it's not required for a complete translation.
174          * ignored - messages which should not be translated to other languages.
175          * translatable - messages which are either required or optional, but can be translated from English.
176          *
177          * @return The general English messages.
178          */
179         public function getGeneralMessages() {
180                 $this->loadGeneralMessages();
181                 return $this->mGeneralMessages;
182         }
184         /**
185          * Get the untranslated messages for a specific language.
186          *
187          * @param $code The langauge code.
188          *
189          * @return The untranslated messages for this language.
190          */
191         public function getUntranslatedMessages( $code ) {
192                 $this->loadGeneralMessages();
193                 $this->loadMessages( $code );
194                 $requiredGeneralMessages = array_keys( $this->mGeneralMessages['required'] );
195                 $requiredMessages = array_keys( $this->mMessages[$code]['required'] );
196                 $untranslatedMessages = array();
197                 foreach ( array_diff( $requiredGeneralMessages, $requiredMessages ) as $key ) {
198                         $untranslatedMessages[$key] = $this->mGeneralMessages['required'][$key];
199                 }
200                 return $untranslatedMessages;
201         }
203         /**
204          * Get the duplicate messages for a specific language.
205          *
206          * @param $code The langauge code.
207          *
208          * @return The duplicate messages for this language.
209          */
210         public function getDuplicateMessages( $code ) {
211                 $this->loadGeneralMessages();
212                 $this->loadMessages( $code );
213                 $duplicateMessages = array();
214                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
215                         if ( $this->mGeneralMessages['translatable'][$key] == $value ) {
216                                 $duplicateMessages[$key] = $value;
217                         }
218                 }
219                 return $duplicateMessages;
220         }
222         /**
223          * Get the messages which do not use some variables.
224          *
225          * @param $code The langauge code.
226          *
227          * @return The messages which do not use some variables in this language.
228          */
229         public function getMessagesWithoutVariables( $code ) {
230                 $this->loadGeneralMessages();
231                 $this->loadMessages( $code );
232                 $variables = array( '\$1', '\$2', '\$3', '\$4', '\$5', '\$6', '\$7', '\$8', '\$9' );
233                 $messagesWithoutVariables = array();
234                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
235                         $missing = false;
236                         foreach ( $variables as $var ) {
237                                 if ( preg_match( "/$var/sU", $this->mGeneralMessages['translatable'][$key] ) &&
238                                         !preg_match( "/$var/sU", $value ) ) {
239                                         $missing = true;
240                                 }
241                         }
242                         if ( $missing ) {
243                                 $messagesWithoutVariables[$key] = $value;
244                         }
245                 }
246                 return $messagesWithoutVariables;
247         }
249         /**
250          * Get the messages which do not use plural.
251          *
252          * @param $code The langauge code.
253          *
254          * @return The messages which do not use plural in this language.
255          */
256         public function getMessagesWithoutPlural( $code ) {
257                 $this->loadGeneralMessages();
258                 $this->loadMessages( $code );
259                 $messagesWithoutPlural = array();
260                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
261                         if ( stripos( $this->mGeneralMessages['translatable'][$key], '{{plural:' ) !== false && stripos( $value, '{{plural:' ) === false ) {
262                                 $messagesWithoutPlural[$key] = $value;
263                         }
264                 }
265                 return $messagesWithoutPlural;
266         }
268         /**
269          * Get the empty messages.
270          *
271          * @param $code The langauge code.
272          *
273          * @return The empty messages for this language.
274          */
275         public function getEmptyMessages( $code ) {
276                 $this->loadGeneralMessages();
277                 $this->loadMessages( $code );
278                 $emptyMessages = array();
279                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
280                         if ( $value === '' || $value === '-' ) {
281                                 $emptyMessages[$key] = $value;
282                         }
283                 }
284                 return $emptyMessages;
285         }
287         /**
288          * Get the messages with trailing whitespace.
289          *
290          * @param $code The langauge code.
291          *
292          * @return The messages with trailing whitespace in this language.
293          */
294         public function getMessagesWithWhitespace( $code ) {
295                 $this->loadGeneralMessages();
296                 $this->loadMessages( $code );
297                 $messagesWithWhitespace = array();
298                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
299                         if ( $this->mGeneralMessages['translatable'][$key] !== '' && $value !== rtrim( $value ) ) {
300                                 $messagesWithWhitespace[$key] = $value;
301                         }
302                 }
303                 return $messagesWithWhitespace;
304         }
306         /**
307          * Get the non-XHTML messages.
308          *
309          * @param $code The langauge code.
310          *
311          * @return The non-XHTML messages for this language.
312          */
313         public function getNonXHTMLMessages( $code ) {
314                 $this->loadGeneralMessages();
315                 $this->loadMessages( $code );
316                 $wrongPhrases = array(
317                         '<hr *\\?>',
318                         '<br *\\?>',
319                         '<hr/>',
320                         '<br/>',
321                 );
322                 $wrongPhrases = '~(' . implode( '|', $wrongPhrases ) . ')~sDu';
323                 $nonXHTMLMessages = array();
324                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
325                         if ( preg_match( $wrongPhrases, $value ) ) {
326                                 $nonXHTMLMessages[$key] = $value;
327                         }
328                 }
329                 return $nonXHTMLMessages;
330         }
332         /**
333          * Get the messages which include wrong characters.
334          *
335          * @param $code The langauge code.
336          *
337          * @return The messages which include wrong characters in this language.
338          */
339         public function getMessagesWithWrongChars( $code ) {
340                 $this->loadGeneralMessages();
341                 $this->loadMessages( $code );
342                 $wrongChars = array(
343                         '[LRM]' => "\xE2\x80\x8E",
344                         '[RLM]' => "\xE2\x80\x8F",
345                         '[LRE]' => "\xE2\x80\xAA",
346                         '[RLE]' => "\xE2\x80\xAB",
347                         '[POP]' => "\xE2\x80\xAC",
348                         '[LRO]' => "\xE2\x80\xAD",
349                         '[RLO]' => "\xE2\x80\xAB",
350                         '[ZWSP]'=> "\xE2\x80\x8B",
351                         '[NBSP]'=> "\xC2\xA0",
352                         '[WJ]'  => "\xE2\x81\xA0",
353                         '[BOM]' => "\xEF\xBB\xBF",
354                         '[FFFD]'=> "\xEF\xBF\xBD",
355                 );
356                 $wrongRegExp = '/(' . implode( '|', array_values( $wrongChars ) ) . ')/sDu';
357                 $wrongCharsMessages = array();
358                 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
359                         if ( preg_match( $wrongRegExp, $value ) ) {
360                                 foreach ( $wrongChars as $viewableChar => $hiddenChar ) {
361                                         $value = str_replace( $hiddenChar, $viewableChar, $value );
362                                 }
363                                 $wrongCharsMessages[$key] = $value;
364                         }
365                 }
366                 return $wrongCharsMessages;
367         }
369         /**
370          * Output a messages list
371          *
372          * @param $messages The messages list
373          * @param $code The language code
374          * @param $text The text to show before the list (optional)
375          * @param $level The display level (optional)
376          * @param $links Show links (optional)
377          * @param $wikilang The langauge of the wiki to display the list in, for the links (optional)
378          */
379         public function outputMessagesList( $messages, $code, $text = '', $level = 2, $links = false, $wikilang = null ) {
380                 if ( count( $messages ) == 0 ) {
381                         return;
382                 }
383                 if ( $text ) {
384                         echo "$text\n";
385                 }
386                 if ( $level == 1 ) {
387                         echo "[messages are hidden]\n";
388                 } else {
389                         foreach ( $messages as $key => $value ) {
390                                 if ( $links ) {
391                                         $displayKey = ucfirst( $key );
392                                         if ( !isset( $wikilang ) ) {
393                                                 global $wgContLang;
394                                                 $wikilang = $wgContLang->getCode();
395                                         }
396                                         if ( $code == $wikilang ) {
397                                                 $displayKey = "[[MediaWiki:$displayKey|$key]]";
398                                         } else {
399                                                 $displayKey = "[[MediaWiki:$displayKey/$code|$key]]";
400                                         }
401                                 } else {
402                                         $displayKey = $key;
403                                 }
404                                 if ( $level == 2 ) {
405                                         echo "* $displayKey\n";
406                                 } else {
407                                         echo "* $displayKey:            '$value'\n";
408                                 }
409                         }
410                 }
411         }