3 * Handle messages in the language files.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @ingroup MaintenanceLanguage
25 * @ingroup MaintenanceLanguage
28 /** @var array List of languages */
29 protected $mLanguages;
31 /** @var array Raw list of the messages in each language */
32 protected $mRawMessages;
34 /** @var array Messages in each language (except for English), divided to groups */
37 /** @var array Fallback language in each language */
40 /** @var array General messages in English, divided to groups */
41 protected $mGeneralMessages;
43 /** @var array All the messages which should be exist only in the English file */
44 protected $mIgnoredMessages;
46 /** @var array All the messages which may be translated or not, depending on the language */
47 protected $mOptionalMessages;
49 /** @var array Namespace names */
50 protected $mNamespaceNames;
52 /** @var array Namespace aliases */
53 protected $mNamespaceAliases;
55 /** @var array Magic words */
56 protected $mMagicWords;
58 /** @var array Special page aliases */
59 protected $mSpecialPageAliases;
62 * Load the list of languages: all the Messages*.php
63 * files in the languages directory.
65 function __construct() {
66 Hooks::run( 'LocalisationIgnoredOptionalMessages',
67 [ &$this->mIgnoredMessages, &$this->mOptionalMessages ] );
69 $this->mLanguages = array_keys( Language::fetchLanguageNames( null, 'mwfile' ) );
70 sort( $this->mLanguages );
74 * Get the language list.
76 * @return array The language list.
78 public function getLanguages() {
79 return $this->mLanguages;
83 * Get the ignored messages list.
85 * @return array The ignored messages list.
87 public function getIgnoredMessages() {
88 return $this->mIgnoredMessages;
92 * Get the optional messages list.
94 * @return array The optional messages list.
96 public function getOptionalMessages() {
97 return $this->mOptionalMessages;
101 * Load the language file.
103 * @param string $code The language code.
105 protected function loadFile( $code ) {
106 if ( isset( $this->mRawMessages[$code] ) &&
107 isset( $this->mFallback[$code] ) &&
108 isset( $this->mNamespaceNames[$code] ) &&
109 isset( $this->mNamespaceAliases[$code] ) &&
110 isset( $this->mMagicWords[$code] ) &&
111 isset( $this->mSpecialPageAliases[$code] )
115 $this->mRawMessages[$code] = [];
116 $this->mFallback[$code] = '';
117 $this->mNamespaceNames[$code] = [];
118 $this->mNamespaceAliases[$code] = [];
119 $this->mMagicWords[$code] = [];
120 $this->mSpecialPageAliases[$code] = [];
122 $jsonfilename = Language::getJsonMessagesFileName( $code );
123 if ( file_exists( $jsonfilename ) ) {
124 $json = Language::getLocalisationCache()->readJSONFile( $jsonfilename );
125 $this->mRawMessages[$code] = $json['messages'];
128 $filename = Language::getMessagesFileName( $code );
129 if ( file_exists( $filename ) ) {
131 if ( isset( $fallback ) ) {
132 $this->mFallback[$code] = $fallback;
134 if ( isset( $namespaceNames ) ) {
135 $this->mNamespaceNames[$code] = $namespaceNames;
137 if ( isset( $namespaceAliases ) ) {
138 $this->mNamespaceAliases[$code] = $namespaceAliases;
140 if ( isset( $magicWords ) ) {
141 $this->mMagicWords[$code] = $magicWords;
143 if ( isset( $specialPageAliases ) ) {
144 $this->mSpecialPageAliases[$code] = $specialPageAliases;
150 * Load the messages for a specific language (which is not English) and divide them to
152 * all - all the messages.
153 * required - messages which should be translated in order to get a complete translation.
154 * optional - messages which can be translated, the fallback translation is used if not
156 * obsolete - messages which should not be translated, either because they do not exist,
157 * or they are ignored messages.
158 * translated - messages which are either required or optional, but translated from
159 * English and needed.
161 * @param string $code The language code.
163 private function loadMessages( $code ) {
164 if ( isset( $this->mMessages[$code] ) ) {
167 $this->loadFile( $code );
168 $this->loadGeneralMessages();
169 $this->mMessages[$code]['all'] = $this->mRawMessages[$code];
170 $this->mMessages[$code]['required'] = [];
171 $this->mMessages[$code]['optional'] = [];
172 $this->mMessages[$code]['obsolete'] = [];
173 $this->mMessages[$code]['translated'] = [];
174 foreach ( $this->mMessages[$code]['all'] as $key => $value ) {
175 if ( isset( $this->mGeneralMessages['required'][$key] ) ) {
176 $this->mMessages[$code]['required'][$key] = $value;
177 $this->mMessages[$code]['translated'][$key] = $value;
178 } elseif ( isset( $this->mGeneralMessages['optional'][$key] ) ) {
179 $this->mMessages[$code]['optional'][$key] = $value;
180 $this->mMessages[$code]['translated'][$key] = $value;
182 $this->mMessages[$code]['obsolete'][$key] = $value;
188 * Load the messages for English and divide them to groups:
189 * all - all the messages.
190 * required - messages which should be translated to other languages in order to get a
191 * complete translation.
192 * optional - messages which can be translated to other languages, but it's not required
193 * for a complete translation.
194 * ignored - messages which should not be translated to other languages.
195 * translatable - messages which are either required or optional, but can be translated
198 private function loadGeneralMessages() {
199 if ( isset( $this->mGeneralMessages ) ) {
202 $this->loadFile( 'en' );
203 $this->mGeneralMessages['all'] = $this->mRawMessages['en'];
204 $this->mGeneralMessages['required'] = [];
205 $this->mGeneralMessages['optional'] = [];
206 $this->mGeneralMessages['ignored'] = [];
207 $this->mGeneralMessages['translatable'] = [];
208 foreach ( $this->mGeneralMessages['all'] as $key => $value ) {
209 if ( in_array( $key, $this->mIgnoredMessages ) ) {
210 $this->mGeneralMessages['ignored'][$key] = $value;
211 } elseif ( in_array( $key, $this->mOptionalMessages ) ) {
212 $this->mGeneralMessages['optional'][$key] = $value;
213 $this->mGeneralMessages['translatable'][$key] = $value;
215 $this->mGeneralMessages['required'][$key] = $value;
216 $this->mGeneralMessages['translatable'][$key] = $value;
222 * Get all the messages for a specific language (not English), without the
223 * fallback language messages, divided to groups:
224 * all - all the messages.
225 * required - messages which should be translated in order to get a complete translation.
226 * optional - messages which can be translated, the fallback translation is used if not
228 * obsolete - messages which should not be translated, either because they do not exist,
229 * or they are ignored messages.
230 * translated - messages which are either required or optional, but translated from
231 * English and needed.
233 * @param string $code The language code.
235 * @return string The messages in this language.
237 public function getMessages( $code ) {
238 $this->loadMessages( $code );
240 return $this->mMessages[$code];
244 * Get all the general English messages, divided to groups:
245 * all - all the messages.
246 * required - messages which should be translated to other languages in
247 * order to get a complete translation.
248 * optional - messages which can be translated to other languages, but it's
249 * not required for a complete translation.
250 * ignored - messages which should not be translated to other languages.
251 * translatable - messages which are either required or optional, but can be
252 * translated from English.
254 * @return array The general English messages.
256 public function getGeneralMessages() {
257 $this->loadGeneralMessages();
259 return $this->mGeneralMessages;
263 * Get fallback language code for a specific language.
265 * @param string $code The language code.
267 * @return string Fallback code.
269 public function getFallback( $code ) {
270 $this->loadFile( $code );
272 return $this->mFallback[$code];
276 * Get namespace names for a specific language.
278 * @param string $code The language code.
280 * @return array Namespace names.
282 public function getNamespaceNames( $code ) {
283 $this->loadFile( $code );
285 return $this->mNamespaceNames[$code];
289 * Get namespace aliases for a specific language.
291 * @param string $code The language code.
293 * @return array Namespace aliases.
295 public function getNamespaceAliases( $code ) {
296 $this->loadFile( $code );
298 return $this->mNamespaceAliases[$code];
302 * Get magic words for a specific language.
304 * @param string $code The language code.
306 * @return array Magic words.
308 public function getMagicWords( $code ) {
309 $this->loadFile( $code );
311 return $this->mMagicWords[$code];
315 * Get special page aliases for a specific language.
317 * @param string $code The language code.
319 * @return array Special page aliases.
321 public function getSpecialPageAliases( $code ) {
322 $this->loadFile( $code );
324 return $this->mSpecialPageAliases[$code];
328 * Get the untranslated messages for a specific language.
330 * @param string $code The language code.
332 * @return array The untranslated messages for this language.
334 public function getUntranslatedMessages( $code ) {
335 $this->loadGeneralMessages();
336 $this->loadMessages( $code );
338 return array_diff_key( $this->mGeneralMessages['required'], $this->mMessages[$code]['required'] );
342 * Get the duplicate messages for a specific language.
344 * @param string $code The language code.
346 * @return array The duplicate messages for this language.
348 public function getDuplicateMessages( $code ) {
349 $this->loadGeneralMessages();
350 $this->loadMessages( $code );
351 $duplicateMessages = [];
352 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
353 if ( $this->mGeneralMessages['translatable'][$key] == $value ) {
354 $duplicateMessages[$key] = $value;
358 return $duplicateMessages;
362 * Get the obsolete messages for a specific language.
364 * @param string $code The language code.
366 * @return array The obsolete messages for this language.
368 public function getObsoleteMessages( $code ) {
369 $this->loadGeneralMessages();
370 $this->loadMessages( $code );
372 return $this->mMessages[$code]['obsolete'];
376 * Get the messages whose variables do not match the original ones.
378 * @param string $code The language code.
380 * @return array The messages whose variables do not match the original ones.
382 public function getMessagesWithMismatchVariables( $code ) {
383 $this->loadGeneralMessages();
384 $this->loadMessages( $code );
385 $variables = [ '\$1', '\$2', '\$3', '\$4', '\$5', '\$6', '\$7', '\$8', '\$9' ];
386 $mismatchMessages = [];
387 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
389 foreach ( $variables as $var ) {
390 if ( preg_match( "/$var/sU", $this->mGeneralMessages['translatable'][$key] ) &&
391 !preg_match( "/$var/sU", $value )
395 if ( !preg_match( "/$var/sU", $this->mGeneralMessages['translatable'][$key] ) &&
396 preg_match( "/$var/sU", $value )
402 $mismatchMessages[$key] = $value;
406 return $mismatchMessages;
410 * Get the messages which do not use plural.
412 * @param string $code The language code.
414 * @return array The messages which do not use plural in this language.
416 public function getMessagesWithoutPlural( $code ) {
417 $this->loadGeneralMessages();
418 $this->loadMessages( $code );
419 $messagesWithoutPlural = [];
420 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
421 if ( stripos( $this->mGeneralMessages['translatable'][$key], '{{plural:' ) !== false &&
422 stripos( $value, '{{plural:' ) === false
424 $messagesWithoutPlural[$key] = $value;
428 return $messagesWithoutPlural;
432 * Get the empty messages.
434 * @param string $code The language code.
436 * @return array The empty messages for this language.
438 public function getEmptyMessages( $code ) {
439 $this->loadGeneralMessages();
440 $this->loadMessages( $code );
442 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
443 if ( $value === '' || $value === '-' ) {
444 $emptyMessages[$key] = $value;
448 return $emptyMessages;
452 * Get the messages with trailing whitespace.
454 * @param string $code The language code.
456 * @return array The messages with trailing whitespace in this language.
458 public function getMessagesWithWhitespace( $code ) {
459 $this->loadGeneralMessages();
460 $this->loadMessages( $code );
461 $messagesWithWhitespace = [];
462 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
463 if ( $this->mGeneralMessages['translatable'][$key] !== '' && $value !== rtrim( $value ) ) {
464 $messagesWithWhitespace[$key] = $value;
468 return $messagesWithWhitespace;
472 * Get the non-XHTML messages.
474 * @param string $code The language code.
476 * @return array The non-XHTML messages for this language.
478 public function getNonXHTMLMessages( $code ) {
479 $this->loadGeneralMessages();
480 $this->loadMessages( $code );
489 $wrongPhrases = '~(' . implode( '|', $wrongPhrases ) . ')~sDu';
490 $nonXHTMLMessages = [];
491 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
492 if ( preg_match( $wrongPhrases, $value ) ) {
493 $nonXHTMLMessages[$key] = $value;
497 return $nonXHTMLMessages;
501 * Get the messages which include wrong characters.
503 * @param string $code The language code.
505 * @return array The messages which include wrong characters in this language.
507 public function getMessagesWithWrongChars( $code ) {
508 $this->loadGeneralMessages();
509 $this->loadMessages( $code );
511 '[LRM]' => "\xE2\x80\x8E",
512 '[RLM]' => "\xE2\x80\x8F",
513 '[LRE]' => "\xE2\x80\xAA",
514 '[RLE]' => "\xE2\x80\xAB",
515 '[POP]' => "\xE2\x80\xAC",
516 '[LRO]' => "\xE2\x80\xAD",
517 '[RLO]' => "\xE2\x80\xAB",
518 '[ZWSP]' => "\xE2\x80\x8B",
519 '[NBSP]' => "\xC2\xA0",
520 '[WJ]' => "\xE2\x81\xA0",
521 '[BOM]' => "\xEF\xBB\xBF",
522 '[FFFD]' => "\xEF\xBF\xBD",
524 $wrongRegExp = '/(' . implode( '|', array_values( $wrongChars ) ) . ')/sDu';
525 $wrongCharsMessages = [];
526 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
527 if ( preg_match( $wrongRegExp, $value ) ) {
528 foreach ( $wrongChars as $viewableChar => $hiddenChar ) {
529 $value = str_replace( $hiddenChar, $viewableChar, $value );
531 $wrongCharsMessages[$key] = $value;
535 return $wrongCharsMessages;
539 * Get the messages which include dubious links.
541 * @param string $code The language code.
543 * @return array The messages which include dubious links in this language.
545 public function getMessagesWithDubiousLinks( $code ) {
546 $this->loadGeneralMessages();
547 $this->loadMessages( $code );
548 $tc = Title::legalChars() . '#%{}';
550 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
552 preg_match_all( "/\[\[([{$tc}]+)(?:\\|(.+?))?]]/sDu", $value, $matches );
553 $numMatches = count( $matches[0] );
554 for ( $i = 0; $i < $numMatches; $i++ ) {
555 if ( preg_match( "/.*project.*/isDu", $matches[1][$i] ) ) {
556 $messages[$key][] = $matches[0][$i];
560 if ( isset( $messages[$key] ) ) {
561 $messages[$key] = implode( $messages[$key], ", " );
569 * Get the messages which include unbalanced brackets.
571 * @param string $code The language code.
573 * @return array The messages which include unbalanced brackets in this language.
575 public function getMessagesWithUnbalanced( $code ) {
576 $this->loadGeneralMessages();
577 $this->loadMessages( $code );
579 foreach ( $this->mMessages[$code]['translated'] as $key => $value ) {
580 $a = $b = $c = $d = 0;
581 foreach ( preg_split( '//', $value ) as $char ) {
598 if ( $a !== $b || $c !== $d ) {
599 $messages[$key] = "$a, $b, $c, $d";
607 * Get the untranslated namespace names.
609 * @param string $code The language code.
611 * @return array The untranslated namespace names in this language.
613 public function getUntranslatedNamespaces( $code ) {
614 $this->loadFile( 'en' );
615 $this->loadFile( $code );
616 $namespacesDiff = array_diff_key( $this->mNamespaceNames['en'], $this->mNamespaceNames[$code] );
617 if ( isset( $namespacesDiff[NS_MAIN] ) ) {
618 unset( $namespacesDiff[NS_MAIN] );
621 return $namespacesDiff;
625 * Get the project talk namespace names with no $1.
627 * @param string $code The language code.
629 * @return array The problematic project talk namespaces in this language.
631 public function getProblematicProjectTalks( $code ) {
632 $this->loadFile( $code );
635 # Check default namespace name
636 if ( isset( $this->mNamespaceNames[$code][NS_PROJECT_TALK] ) ) {
637 $default = $this->mNamespaceNames[$code][NS_PROJECT_TALK];
638 if ( strpos( $default, '$1' ) === false ) {
639 $namespaces[$default] = 'default';
643 # Check namespace aliases
644 foreach ( $this->mNamespaceAliases[$code] as $key => $value ) {
645 if ( $value == NS_PROJECT_TALK && strpos( $key, '$1' ) === false ) {
646 $namespaces[$key] = '';
654 * Get the untranslated magic words.
656 * @param string $code The language code.
658 * @return array The untranslated magic words in this language.
660 public function getUntranslatedMagicWords( $code ) {
661 $this->loadFile( 'en' );
662 $this->loadFile( $code );
664 foreach ( $this->mMagicWords['en'] as $key => $value ) {
665 if ( !isset( $this->mMagicWords[$code][$key] ) ) {
666 $magicWords[$key] = $value[1];
674 * Get the obsolete magic words.
676 * @param string $code The language code.
678 * @return array The obsolete magic words in this language.
680 public function getObsoleteMagicWords( $code ) {
681 $this->loadFile( 'en' );
682 $this->loadFile( $code );
684 foreach ( $this->mMagicWords[$code] as $key => $value ) {
685 if ( !isset( $this->mMagicWords['en'][$key] ) ) {
686 $magicWords[$key] = $value[1];
694 * Get the magic words that override the original English magic word.
696 * @param string $code The language code.
698 * @return array The overriding magic words in this language.
700 public function getOverridingMagicWords( $code ) {
701 $this->loadFile( 'en' );
702 $this->loadFile( $code );
704 foreach ( $this->mMagicWords[$code] as $key => $local ) {
705 if ( !isset( $this->mMagicWords['en'][$key] ) ) {
706 # Unrecognized magic word
709 $en = $this->mMagicWords['en'][$key];
710 array_shift( $local );
712 foreach ( $en as $word ) {
713 if ( !in_array( $word, $local ) ) {
714 $magicWords[$key] = $word;
724 * Get the magic words which do not match the case-sensitivity of the original words.
726 * @param string $code The language code.
728 * @return array The magic words whose case does not match in this language.
730 public function getCaseMismatchMagicWords( $code ) {
731 $this->loadFile( 'en' );
732 $this->loadFile( $code );
734 foreach ( $this->mMagicWords[$code] as $key => $local ) {
735 if ( !isset( $this->mMagicWords['en'][$key] ) ) {
736 # Unrecognized magic word
739 if ( $local[0] != $this->mMagicWords['en'][$key][0] ) {
740 $magicWords[$key] = $local[0];
748 * Get the untranslated special page names.
750 * @param string $code The language code.
752 * @return array The untranslated special page names in this language.
754 public function getUntraslatedSpecialPages( $code ) {
755 $this->loadFile( 'en' );
756 $this->loadFile( $code );
757 $specialPageAliases = [];
758 foreach ( $this->mSpecialPageAliases['en'] as $key => $value ) {
759 if ( !isset( $this->mSpecialPageAliases[$code][$key] ) ) {
760 $specialPageAliases[$key] = $value[0];
764 return $specialPageAliases;
768 * Get the obsolete special page names.
770 * @param string $code The language code.
772 * @return array The obsolete special page names in this language.
774 public function getObsoleteSpecialPages( $code ) {
775 $this->loadFile( 'en' );
776 $this->loadFile( $code );
777 $specialPageAliases = [];
778 foreach ( $this->mSpecialPageAliases[$code] as $key => $value ) {
779 if ( !isset( $this->mSpecialPageAliases['en'][$key] ) ) {
780 $specialPageAliases[$key] = $value[0];
784 return $specialPageAliases;
788 class ExtensionLanguages extends Languages {
792 private $mMessageGroup;
795 * Load the messages group.
796 * @param MessageGroup $group The messages group.
798 function __construct( MessageGroup $group ) {
799 $this->mMessageGroup = $group;
801 $this->mIgnoredMessages = $this->mMessageGroup->getIgnored();
802 $this->mOptionalMessages = $this->mMessageGroup->getOptional();
806 * Get the extension name.
808 * @return string The extension name.
810 public function name() {
811 return $this->mMessageGroup->getLabel();
815 * Load the language file.
817 * @param string $code The language code.
819 protected function loadFile( $code ) {
820 if ( !isset( $this->mRawMessages[$code] ) ) {
821 $this->mRawMessages[$code] = $this->mMessageGroup->load( $code );
822 if ( empty( $this->mRawMessages[$code] ) ) {
823 $this->mRawMessages[$code] = [];