3 * Helper class for checkLanguage.php script.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @ingroup MaintenanceLanguage
25 * @ingroup MaintenanceLanguage
27 class CheckLanguageCLI {
28 protected $code = null;
30 protected $doLinks = false;
31 protected $linksPrefix = '';
32 protected $wikiCode = 'en';
33 protected $checkAll = false;
34 protected $output = 'plain';
35 protected $checks = array();
38 protected $results = array();
40 private $includeExif = false;
44 * @param $options array Options for script.
46 public function __construct( array $options ) {
47 if ( isset( $options['help'] ) ) {
52 if ( isset( $options['lang'] ) ) {
53 $this->code = $options['lang'];
55 global $wgLanguageCode;
56 $this->code = $wgLanguageCode;
59 if ( isset( $options['level'] ) ) {
60 $this->level = $options['level'];
63 $this->doLinks = isset( $options['links'] );
64 $this->includeExif = !isset( $options['noexif'] );
65 $this->checkAll = isset( $options['all'] );
67 if ( isset( $options['prefix'] ) ) {
68 $this->linksPrefix = $options['prefix'];
71 if ( isset( $options['wikilang'] ) ) {
72 $this->wikiCode = $options['wikilang'];
75 if ( isset( $options['whitelist'] ) ) {
76 $this->checks = explode( ',', $options['whitelist'] );
77 } elseif ( isset( $options['blacklist'] ) ) {
78 $this->checks = array_diff(
79 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
80 explode( ',', $options['blacklist'] )
82 } elseif ( isset( $options['easy'] ) ) {
83 $this->checks = $this->easyChecks();
85 $this->checks = $this->defaultChecks();
88 if ( isset( $options['output'] ) ) {
89 $this->output = $options['output'];
92 $this->L = new languages( $this->includeExif );
96 * Get the default checks.
97 * @return array A list of the default checks.
99 protected function defaultChecks() {
101 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
102 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
103 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case',
104 'special', 'special-old',
109 * Get the checks which check other things than messages.
110 * @return array A list of the non-message checks.
112 protected function nonMessageChecks() {
114 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over',
115 'magic-case', 'special', 'special-old',
120 * Get the checks that can easily be treated by non-speakers of the language.
121 * @return Array A list of the easy checks.
123 protected function easyChecks() {
125 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old',
126 'magic-over', 'magic-case', 'special-old',
132 * @return array An array of all check names mapped to their function names.
134 protected function getChecks() {
136 'untranslated' => 'getUntranslatedMessages',
137 'duplicate' => 'getDuplicateMessages',
138 'obsolete' => 'getObsoleteMessages',
139 'variables' => 'getMessagesWithMismatchVariables',
140 'plural' => 'getMessagesWithoutPlural',
141 'empty' => 'getEmptyMessages',
142 'whitespace' => 'getMessagesWithWhitespace',
143 'xhtml' => 'getNonXHTMLMessages',
144 'chars' => 'getMessagesWithWrongChars',
145 'links' => 'getMessagesWithDubiousLinks',
146 'unbalanced' => 'getMessagesWithUnbalanced',
147 'namespace' => 'getUntranslatedNamespaces',
148 'projecttalk' => 'getProblematicProjectTalks',
149 'magic' => 'getUntranslatedMagicWords',
150 'magic-old' => 'getObsoleteMagicWords',
151 'magic-over' => 'getOverridingMagicWords',
152 'magic-case' => 'getCaseMismatchMagicWords',
153 'special' => 'getUntraslatedSpecialPages',
154 'special-old' => 'getObsoleteSpecialPages',
159 * Get total count for each check non-messages check.
160 * @return array An array of all check names mapped to a two-element array:
161 * function name to get the total count and language code or null
164 protected function getTotalCount() {
166 'namespace' => array( 'getNamespaceNames', 'en' ),
167 'projecttalk' => null,
168 'magic' => array( 'getMagicWords', 'en' ),
169 'magic-old' => array( 'getMagicWords', null ),
170 'magic-over' => array( 'getMagicWords', null ),
171 'magic-case' => array( 'getMagicWords', null ),
172 'special' => array( 'getSpecialPageAliases', 'en' ),
173 'special-old' => array( 'getSpecialPageAliases', null ),
178 * Get all check descriptions.
179 * @return array An array of all check names mapped to their descriptions.
181 protected function getDescriptions() {
183 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
184 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
185 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
186 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:',
187 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
188 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
189 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
190 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
191 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
192 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
193 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
194 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
195 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
196 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
197 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:',
198 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
199 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
200 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
201 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:',
207 * @return string The help string.
209 protected function help() {
211 Run this script to check a specific language file, or all of them.
212 Command line settings are in form --parameter[=value].
214 --help: Show this help.
215 --lang: Language code (default: the installation default language).
216 --all: Check all customized languages.
217 --level: Show the following display level (default: 2):
218 * 0: Skip the checks (useful for checking syntax).
219 * 1: Show only the stub headers and number of wrong messages, without list of messages.
220 * 2: Show only the headers and the message keys, without the message values.
221 * 3: Show both the headers and the complete messages, with both keys and values.
222 --links: Link the message values (default off).
223 --prefix: prefix to add to links.
224 --wikilang: For the links, what is the content language of the wiki to display the output in (default en).
225 --noexif: Do not check for Exif messages (a bit hard and boring to translate), if you know
226 that they are currently not translated and want to focus on other problems (default off).
227 --whitelist: Do only the following checks (form: code,code).
228 --blacklist: Do not do the following checks (form: code,code).
229 --easy: Do only the easy checks, which can be treated by non-speakers of the language.
231 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
232 * untranslated: Messages which are required to translate, but are not translated.
233 * duplicate: Messages which translation equal to fallback
234 * obsolete: Messages which are untranslatable or do not exist, but are translated.
235 * variables: Messages without variables which should be used, or with variables which should not be used.
236 * empty: Empty messages and messages that contain only -.
237 * whitespace: Messages which have trailing whitespace.
238 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
239 * chars: Messages with hidden characters.
240 * links: Messages which contains broken links to pages (does not find all).
241 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
242 * namespace: Namespace names that were not translated.
243 * projecttalk: Namespace names and aliases where the project talk does not contain $1.
244 * magic: Magic words that were not translated.
245 * magic-old: Magic words which do not exist.
246 * magic-over: Magic words that override the original English word.
247 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word.
248 * special: Special page names that were not translated.
249 * special-old: Special page names which do not exist.
255 * Execute the script.
257 public function execute() {
259 if ( $this->level > 0 ) {
260 switch ( $this->output ) {
268 throw new MWException( "Invalid output type $this->output" );
274 * Execute the checks.
276 protected function doChecks() {
277 $ignoredCodes = array( 'en', 'enRTL' );
279 $this->results = array();
281 if ( $this->checkAll ) {
282 foreach ( $this->L->getLanguages() as $language ) {
283 if ( !in_array( $language, $ignoredCodes ) ) {
284 $this->results[$language] = $this->checkLanguage( $language );
288 if ( in_array( $this->code, $ignoredCodes ) ) {
289 throw new MWException( "Cannot check code $this->code." );
291 $this->results[$this->code] = $this->checkLanguage( $this->code );
297 * Get the check blacklist.
298 * @return array The list of checks which should not be executed.
300 protected function getCheckBlacklist() {
301 global $checkBlacklist;
303 return $checkBlacklist;
308 * @param $code string The language code.
309 * @throws MWException
310 * @return array The results.
312 protected function checkLanguage( $code ) {
315 if ( $this->level === 0 ) {
316 $this->L->getMessages( $code );
321 $checkFunctions = $this->getChecks();
322 $checkBlacklist = $this->getCheckBlacklist();
323 foreach ( $this->checks as $check ) {
324 if ( isset( $checkBlacklist[$code] ) &&
325 in_array( $check, $checkBlacklist[$code] )
327 $results[$check] = array();
331 $callback = array( $this->L, $checkFunctions[$check] );
332 if ( !is_callable( $callback ) ) {
333 throw new MWException( "Unkown check $check." );
335 $results[$check] = call_user_func( $callback, $code );
342 * Format a message key.
343 * @param $key string The message key.
344 * @param $code string The language code.
345 * @return string The formatted message key.
347 protected function formatKey( $key, $code ) {
348 if ( $this->doLinks ) {
349 $displayKey = ucfirst( $key );
350 if ( $code == $this->wikiCode ) {
351 return "[[{$this->linksPrefix}MediaWiki:$displayKey|$key]]";
353 return "[[{$this->linksPrefix}MediaWiki:$displayKey/$code|$key]]";
361 * Output the checks results as plain text.
363 protected function outputText() {
364 foreach ( $this->results as $code => $results ) {
365 $translated = $this->L->getMessages( $code );
366 $translated = count( $translated['translated'] );
367 foreach ( $results as $check => $messages ) {
368 $count = count( $messages );
370 if ( $check == 'untranslated' ) {
371 $translatable = $this->L->getGeneralMessages();
372 $total = count( $translatable['translatable'] );
373 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) {
374 $totalCount = $this->getTotalCount();
375 $totalCount = $totalCount[$check];
376 $callback = array( $this->L, $totalCount[0] );
377 $callCode = $totalCount[1] ? $totalCount[1] : $code;
378 $total = count( call_user_func( $callback, $callCode ) );
380 $total = $translated;
382 $search = array( '$1', '$2', '$3' );
383 $replace = array( $count, $total, $code );
384 $descriptions = $this->getDescriptions();
385 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
386 if ( $this->level == 1 ) {
387 echo "[messages are hidden]\n";
389 foreach ( $messages as $key => $value ) {
390 if ( !in_array( $check, $this->nonMessageChecks() ) ) {
391 $key = $this->formatKey( $key, $code );
393 if ( $this->level == 2 || empty( $value ) ) {
396 echo "* $key: '$value'\n";
406 * Output the checks results as wiki text.
408 function outputWiki() {
410 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', array_diff( $this->checks, $this->nonMessageChecks() ) );
411 foreach ( $this->results as $code => $results ) {
412 $detailTextForLang = "==$code==\n";
415 $detailTextForLangChecks = array();
416 foreach ( $results as $check => $messages ) {
417 if ( in_array( $check, $this->nonMessageChecks() ) ) {
420 $count = count( $messages );
423 $messageDetails = array();
424 foreach ( $messages as $key => $details ) {
425 $displayKey = $this->formatKey( $key, $code );
426 $messageDetails[] = $displayKey;
428 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
429 $numbers[] = "'''[[#$code-$check|$count]]'''";
435 if ( count( $detailTextForLangChecks ) ) {
436 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
440 # Don't list languages without problems
443 $language = Language::fetchLanguageName( $code );
444 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
447 $tableRows = implode( "\n|-\n", $rows );
449 $version = SpecialVersion::getVersion( 'nodb' );
451 '''Check results are for:''' <code>$version</code>
454 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
464 * Check if there are any results for the checks, in any language.
465 * @return bool True if there are any results, false if not.
467 protected function isEmpty() {
468 foreach ( $this->results as $results ) {
469 foreach ( $results as $messages ) {
470 if ( !empty( $messages ) ) {
481 * @ingroup MaintenanceLanguage
483 class CheckExtensionsCLI extends CheckLanguageCLI {
488 * @param $options array Options for script.
489 * @param $extension string The extension name (or names).
491 public function __construct( array $options, $extension ) {
492 if ( isset( $options['help'] ) ) {
497 if ( isset( $options['lang'] ) ) {
498 $this->code = $options['lang'];
500 global $wgLanguageCode;
501 $this->code = $wgLanguageCode;
504 if ( isset( $options['level'] ) ) {
505 $this->level = $options['level'];
508 $this->doLinks = isset( $options['links'] );
510 if ( isset( $options['wikilang'] ) ) {
511 $this->wikiCode = $options['wikilang'];
514 if ( isset( $options['whitelist'] ) ) {
515 $this->checks = explode( ',', $options['whitelist'] );
516 } elseif ( isset( $options['blacklist'] ) ) {
517 $this->checks = array_diff(
518 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
519 explode( ',', $options['blacklist'] )
521 } elseif ( isset( $options['easy'] ) ) {
522 $this->checks = $this->easyChecks();
524 $this->checks = $this->defaultChecks();
527 if ( isset( $options['output'] ) ) {
528 $this->output = $options['output'];
531 # Some additional checks not enabled by default
532 if ( isset( $options['duplicate'] ) ) {
533 $this->checks[] = 'duplicate';
536 $this->extensions = array();
537 $extensions = new PremadeMediawikiExtensionGroups();
538 $extensions->addAll();
539 if ( $extension == 'all' ) {
540 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
541 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
542 $this->extensions[] = new extensionLanguages( $group );
545 } elseif ( $extension == 'wikimedia' ) {
546 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
547 foreach ( $wikimedia->wmfextensions() as $extension ) {
548 $group = MessageGroups::getGroup( $extension );
549 $this->extensions[] = new extensionLanguages( $group );
551 } elseif ( $extension == 'flaggedrevs' ) {
552 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
553 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) {
554 $this->extensions[] = new extensionLanguages( $group );
558 $extensions = explode( ',', $extension );
559 foreach ( $extensions as $extension ) {
560 $group = MessageGroups::getGroup( 'ext-' . $extension );
562 $extension = new extensionLanguages( $group );
563 $this->extensions[] = $extension;
565 print "No such extension $extension.\n";
572 * Get the default checks.
573 * @return array A list of the default checks.
575 protected function defaultChecks() {
577 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
578 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
583 * Get the checks which check other things than messages.
584 * @return array A list of the non-message checks.
586 protected function nonMessageChecks() {
591 * Get the checks that can easily be treated by non-speakers of the language.
592 * @return arrayA list of the easy checks.
594 protected function easyChecks() {
596 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars',
602 * @return string The help string.
604 protected function help() {
606 Run this script to check the status of a specific language in extensions, or all of them.
607 Command line settings are in form --parameter[=value], except for the first one.
609 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions, "wikimedia" for extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs extension messages.
610 * lang: Language code (default: the installation default language).
611 * help: Show this help.
612 * level: Show the following display level (default: 2).
613 * links: Link the message values (default off).
614 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
615 * whitelist: Do only the following checks (form: code,code).
616 * blacklist: Do not perform the following checks (form: code,code).
617 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
618 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
619 * untranslated: Messages which are required to translate, but are not translated.
620 * duplicate: Messages which translation equal to fallback
621 * obsolete: Messages which are untranslatable, but translated.
622 * variables: Messages without variables which should be used, or with variables which should not be used.
623 * empty: Empty messages.
624 * whitespace: Messages which have trailing whitespace.
625 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
626 * chars: Messages with hidden characters.
627 * links: Messages which contains broken links to pages (does not find all).
628 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
629 Display levels (default: 2):
630 * 0: Skip the checks (useful for checking syntax).
631 * 1: Show only the stub headers and number of wrong messages, without list of messages.
632 * 2: Show only the headers and the message keys, without the message values.
633 * 3: Show both the headers and the complete messages, with both keys and values.
639 * Execute the script.
641 public function execute() {
646 * Check a language and show the results.
647 * @param $code string The language code.
648 * @throws MWException
650 protected function checkLanguage( $code ) {
651 foreach ( $this->extensions as $extension ) {
652 $this->L = $extension;
653 $this->results = array();
654 $this->results[$code] = parent::checkLanguage( $code );
656 if ( !$this->isEmpty() ) {
657 echo $extension->name() . ":\n";
659 if ( $this->level > 0 ) {
660 switch ( $this->output ) {
668 throw new MWException( "Invalid output type $this->output" );
678 # Blacklist some checks for some languages
679 $checkBlacklist = array(
680 #'code' => array( 'check1', 'check2' ... )
681 'az' => array( 'plural' ),
682 'bo' => array( 'plural' ),
683 'cdo' => array( 'plural' ),
684 'dz' => array( 'plural' ),
685 'id' => array( 'plural' ),
686 'fa' => array( 'plural' ),
687 'gan' => array( 'plural' ),
688 'gan-hans' => array( 'plural' ),
689 'gan-hant' => array( 'plural' ),
690 'gn' => array( 'plural' ),
691 'hak' => array( 'plural' ),
692 'hu' => array( 'plural' ),
693 'ja' => array( 'plural' ), // Does not use plural
694 'jv' => array( 'plural' ),
695 'ka' => array( 'plural' ),
696 'kk-arab' => array( 'plural' ),
697 'kk-cyrl' => array( 'plural' ),
698 'kk-latn' => array( 'plural' ),
699 'km' => array( 'plural' ),
700 'kn' => array( 'plural' ),
701 'ko' => array( 'plural' ),
702 'lzh' => array( 'plural' ),
703 'mn' => array( 'plural' ),
704 'ms' => array( 'plural' ),
705 'my' => array( 'plural', 'chars' ), // Uses a lot zwnj
706 'sah' => array( 'plural' ),
707 'sq' => array( 'plural' ),
708 'tet' => array( 'plural' ),
709 'th' => array( 'plural' ),
710 'to' => array( 'plural' ),
711 'tr' => array( 'plural' ),
712 'vi' => array( 'plural' ),
713 'wuu' => array( 'plural' ),
714 'xmf' => array( 'plural' ),
715 'yo' => array( 'plural' ),
716 'yue' => array( 'plural' ),
717 'zh' => array( 'plural' ),
718 'zh-classical' => array( 'plural' ),
719 'zh-cn' => array( 'plural' ),
720 'zh-hans' => array( 'plural' ),
721 'zh-hant' => array( 'plural' ),
722 'zh-hk' => array( 'plural' ),
723 'zh-sg' => array( 'plural' ),
724 'zh-tw' => array( 'plural' ),
725 'zh-yue' => array( 'plural' ),