3 * @ingroup MaintenanceLanguage
6 class CheckLanguageCLI {
7 protected $code = null;
9 protected $doLinks = false;
10 protected $wikiCode = 'en';
11 protected $checkAll = false;
12 protected $output = 'plain';
13 protected $checks = array();
16 protected $results = array();
18 private $includeExif = false;
22 * @param $options Options for script.
24 public function __construct( Array $options ) {
25 if ( isset( $options['help'] ) ) {
30 if ( isset( $options['lang'] ) ) {
31 $this->code = $options['lang'];
33 global $wgLanguageCode;
34 $this->code = $wgLanguageCode;
37 if ( isset( $options['level'] ) ) {
38 $this->level = $options['level'];
41 $this->doLinks = isset( $options['links'] );
42 $this->includeExif = !isset( $options['noexif'] );
43 $this->checkAll = isset( $options['all'] );
45 if ( isset( $options['wikilang'] ) ) {
46 $this->wikiCode = $options['wikilang'];
49 if ( isset( $options['whitelist'] ) ) {
50 $this->checks = explode( ',', $options['whitelist'] );
51 } elseif ( isset( $options['blacklist'] ) ) {
52 $this->checks = array_diff(
53 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
54 explode( ',', $options['blacklist'] )
56 } elseif ( isset( $options['easy'] ) ) {
57 $this->checks = $this->easyChecks();
59 $this->checks = $this->defaultChecks();
62 if ( isset( $options['output'] ) ) {
63 $this->output = $options['output'];
66 $this->L = new languages( $this->includeExif );
70 * Get the default checks.
71 * @return A list of the default checks.
73 protected function defaultChecks() {
75 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
76 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
77 'projecttalk', 'skin', 'magic', 'magic-old', 'magic-over', 'magic-case',
78 'special', 'special-old',
83 * Get the checks which check other things than messages.
84 * @return A list of the non-message checks.
86 protected function nonMessageChecks() {
88 'namespace', 'projecttalk', 'skin', 'magic', 'magic-old', 'magic-over',
89 'magic-case', 'special', 'special-old',
94 * Get the checks that can easily be treated by non-speakers of the language.
95 * @return A list of the easy checks.
97 protected function easyChecks() {
99 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old',
100 'magic-over', 'magic-case', 'special-old',
106 * @return An array of all check names mapped to their function names.
108 protected function getChecks() {
110 'untranslated' => 'getUntranslatedMessages',
111 'duplicate' => 'getDuplicateMessages',
112 'obsolete' => 'getObsoleteMessages',
113 'variables' => 'getMessagesWithoutVariables',
114 'plural' => 'getMessagesWithoutPlural',
115 'empty' => 'getEmptyMessages',
116 'whitespace' => 'getMessagesWithWhitespace',
117 'xhtml' => 'getNonXHTMLMessages',
118 'chars' => 'getMessagesWithWrongChars',
119 'links' => 'getMessagesWithDubiousLinks',
120 'unbalanced' => 'getMessagesWithUnbalanced',
121 'namespace' => 'getUntranslatedNamespaces',
122 'projecttalk' => 'getProblematicProjectTalks',
123 'skin' => 'getUntranslatedSkins',
124 'magic' => 'getUntranslatedMagicWords',
125 'magic-old' => 'getObsoleteMagicWords',
126 'magic-over' => 'getOverridingMagicWords',
127 'magic-case' => 'getCaseMismatchMagicWords',
128 'special' => 'getUntraslatedSpecialPages',
129 'special-old' => 'getObsoleteSpecialPages',
134 * Get total count for each check non-messages check.
135 * @return An array of all check names mapped to a two-element array:
136 * function name to get the total count and language code or null
139 protected function getTotalCount() {
141 'namespace' => array( 'getNamespaceNames', 'en' ),
142 'projecttalk' => null,
143 'skin' => array( 'getSkinNames', 'en' ),
144 'magic' => array( 'getMagicWords', 'en' ),
145 'magic-old' => array( 'getMagicWords', null ),
146 'magic-over' => array( 'getMagicWords', null ),
147 'magic-case' => array( 'getMagicWords', null ),
148 'special' => array( 'getSpecialPageAliases', 'en' ),
149 'special-old' => array( 'getSpecialPageAliases', null ),
154 * Get all check descriptions.
155 * @return An array of all check names mapped to their descriptions.
157 protected function getDescriptions() {
159 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
160 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
161 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
162 'variables' => '$1 message(s) of $2 in $3 don\'t use some variables that en uses:',
163 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
164 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
165 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
166 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
167 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
168 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
169 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
170 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
171 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
172 'skin' => '$1 skin name(s) of $2 are not translated to $3, but exist in en:',
173 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
174 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:',
175 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
176 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
177 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
178 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:',
184 * @return The help string.
186 protected function help() {
188 Run this script to check a specific language file, or all of them.
189 Command line settings are in form --parameter[=value].
191 * lang: Language code (default: the installation default language).
192 * all: Check all customized languages.
193 * help: Show this help.
194 * level: Show the following display level (default: 2).
195 * links: Link the message values (default off).
196 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
197 * whitelist: Do only the following checks (form: code,code).
198 * blacklist: Don't do the following checks (form: code,code).
199 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
200 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
201 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
202 * untranslated: Messages which are required to translate, but are not translated.
203 * duplicate: Messages which translation equal to fallback
204 * obsolete: Messages which are untranslatable or do not exist, but are translated.
205 * variables: Messages without variables which should be used.
206 * empty: Empty messages and messages that contain only -.
207 * whitespace: Messages which have trailing whitespace.
208 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
209 * chars: Messages with hidden characters.
210 * links: Messages which contains broken links to pages (does not find all).
211 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
212 * namespace: Namespace names that were not translated.
213 * projecttalk: Namespace names and aliases where the project talk does not contain $1.
214 * skin: Skin names that were not translated.
215 * magic: Magic words that were not translated.
216 * magic-old: Magic words which do not exist.
217 * magic-over: Magic words that override the original English word.
218 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word.
219 * special: Special page names that were not translated.
220 * special-old: Special page names which do not exist.
221 Display levels (default: 2):
222 * 0: Skip the checks (useful for checking syntax).
223 * 1: Show only the stub headers and number of wrong messages, without list of messages.
224 * 2: Show only the headers and the message keys, without the message values.
225 * 3: Show both the headers and the complete messages, with both keys and values.
231 * Execute the script.
233 public function execute() {
235 if ( $this->level > 0 ) {
236 switch ( $this->output ) {
244 throw new MWException( "Invalid output type $this->output" );
250 * Execute the checks.
252 protected function doChecks() {
253 $ignoredCodes = array( 'en', 'enRTL' );
255 $this->results = array();
257 if ( $this->checkAll ) {
258 foreach ( $this->L->getLanguages() as $language ) {
259 if ( !in_array( $language, $ignoredCodes ) ) {
260 $this->results[$language] = $this->checkLanguage( $language );
264 if ( in_array( $this->code, $ignoredCodes ) ) {
265 throw new MWException( "Cannot check code $this->code." );
267 $this->results[$this->code] = $this->checkLanguage( $this->code );
273 * Get the check blacklist.
274 * @return The list of checks which should not be executed.
276 protected function getCheckBlacklist() {
277 global $checkBlacklist;
278 return $checkBlacklist;
283 * @param $code The language code.
284 * @return The results.
286 protected function checkLanguage( $code ) {
288 if ( $this->level === 0 ) {
289 $this->L->getMessages( $code );
294 $checkFunctions = $this->getChecks();
295 $checkBlacklist = $this->getCheckBlacklist();
296 foreach ( $this->checks as $check ) {
297 if ( isset( $checkBlacklist[$code] ) &&
298 in_array( $check, $checkBlacklist[$code] ) ) {
299 $result[$check] = array();
303 $callback = array( $this->L, $checkFunctions[$check] );
304 if ( !is_callable( $callback ) ) {
305 throw new MWException( "Unkown check $check." );
307 $results[$check] = call_user_func( $callback, $code );
314 * Format a message key.
315 * @param $key The message key.
316 * @param $code The language code.
317 * @return The formatted message key.
319 protected function formatKey( $key, $code ) {
320 if ( $this->doLinks ) {
321 $displayKey = ucfirst( $key );
322 if ( $code == $this->wikiCode ) {
323 return "[[MediaWiki:$displayKey|$key]]";
325 return "[[MediaWiki:$displayKey/$code|$key]]";
333 * Output the checks results as plain text.
334 * @return The checks results as plain text.
336 protected function outputText() {
337 foreach ( $this->results as $code => $results ) {
338 $translated = $this->L->getMessages( $code );
339 $translated = count( $translated['translated'] );
340 foreach ( $results as $check => $messages ) {
341 $count = count( $messages );
343 if ( $check == 'untranslated' ) {
344 $translatable = $this->L->getGeneralMessages();
345 $total = count( $translatable['translatable'] );
346 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) {
347 $totalCount = $this->getTotalCount();
348 $totalCount = $totalCount[$check];
349 $callback = array( $this->L, $totalCount[0] );
350 $callCode = $totalCount[1] ? $totalCount[1] : $code;
351 $total = count( call_user_func( $callback, $callCode ) );
353 $total = $translated;
355 $search = array( '$1', '$2', '$3' );
356 $replace = array( $count, $total, $code );
357 $descriptions = $this->getDescriptions();
358 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
359 if ( $this->level == 1 ) {
360 echo "[messages are hidden]\n";
362 foreach ( $messages as $key => $value ) {
363 if( !in_array( $check, $this->nonMessageChecks() ) ) {
364 $key = $this->formatKey( $key, $code );
366 if ( $this->level == 2 || empty( $value ) ) {
369 echo "* $key: '$value'\n";
379 * Output the checks results as wiki text.
380 * @return The checks results as wiki text.
382 function outputWiki() {
383 global $wgContLang, $IP;
385 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
386 foreach ( $this->results as $code => $results ) {
387 $detailTextForLang = "==$code==\n";
390 $detailTextForLangChecks = array();
391 foreach ( $results as $check => $messages ) {
392 if( in_array( $check, $this->nonMessageChecks() ) ) {
395 $count = count( $messages );
398 $messageDetails = array();
399 foreach ( $messages as $key => $details ) {
400 $displayKey = $this->formatKey( $key, $code );
401 $messageDetails[] = $displayKey;
403 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
404 $numbers[] = "'''[[#$code-$check|$count]]'''";
411 if ( count( $detailTextForLangChecks ) ) {
412 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
416 # Don't list languages without problems
419 $language = $wgContLang->getLanguageName( $code );
420 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
423 $tableRows = implode( "\n|-\n", $rows );
425 $version = SpecialVersion::getVersion( $IP );
427 '''Check results are for:''' <code>$version</code>
430 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
440 * Check if there are any results for the checks, in any language.
441 * @return True if there are any results, false if not.
443 protected function isEmpty() {
444 foreach( $this->results as $code => $results ) {
445 foreach( $results as $check => $messages ) {
446 if( !empty( $messages ) ) {
455 class CheckExtensionsCLI extends CheckLanguageCLI {
460 * @param $options Options for script.
461 * @param $extension The extension name (or names).
463 public function __construct( Array $options, $extension ) {
464 if ( isset( $options['help'] ) ) {
469 if ( isset( $options['lang'] ) ) {
470 $this->code = $options['lang'];
472 global $wgLanguageCode;
473 $this->code = $wgLanguageCode;
476 if ( isset( $options['level'] ) ) {
477 $this->level = $options['level'];
480 $this->doLinks = isset( $options['links'] );
482 if ( isset( $options['wikilang'] ) ) {
483 $this->wikiCode = $options['wikilang'];
486 if ( isset( $options['whitelist'] ) ) {
487 $this->checks = explode( ',', $options['whitelist'] );
488 } elseif ( isset( $options['blacklist'] ) ) {
489 $this->checks = array_diff(
490 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
491 explode( ',', $options['blacklist'] )
493 } elseif ( isset( $options['easy'] ) ) {
494 $this->checks = $this->easyChecks();
496 $this->checks = $this->defaultChecks();
499 if ( isset( $options['output'] ) ) {
500 $this->output = $options['output'];
503 # Some additional checks not enabled by default
504 if ( isset( $options['duplicate'] ) ) {
505 $this->checks[] = 'duplicate';
508 $this->extensions = array();
509 $extensions = new PremadeMediawikiExtensionGroups();
510 $extensions->addAll();
511 if ( $extension == 'all' ) {
512 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
513 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
514 $this->extensions[] = new extensionLanguages( $group );
517 } elseif ( $extension == 'wikimedia' ) {
518 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
519 foreach ( $wikimedia->wmfextensions() as $extension ) {
520 $group = MessageGroups::getGroup( $extension );
521 $this->extensions[] = new extensionLanguages( $group );
524 $extensions = explode( ',', $extension );
525 foreach ( $extensions as $extension ) {
526 $group = MessageGroups::getGroup( 'ext-' . $extension );
528 $extension = new extensionLanguages( $group );
529 $this->extensions[] = $extension;
531 print "No such extension $extension.\n";
538 * Get the default checks.
539 * @return A list of the default checks.
541 protected function defaultChecks() {
543 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
544 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
549 * Get the checks which check other things than messages.
550 * @return A list of the non-message checks.
552 protected function nonMessageChecks() {
557 * Get the checks that can easily be treated by non-speakers of the language.
558 * @return A list of the easy checks.
560 protected function easyChecks() {
562 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars',
568 * @return The help string.
570 protected function help() {
572 Run this script to check the status of a specific language in extensions, or all of them.
573 Command line settings are in form --parameter[=value], except for the first one.
575 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions or "wikimedia" for extensions used by Wikimedia.
576 * lang: Language code (default: the installation default language).
577 * help: Show this help.
578 * level: Show the following display level (default: 2).
579 * links: Link the message values (default off).
580 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
581 * whitelist: Do only the following checks (form: code,code).
582 * blacklist: Do not perform the following checks (form: code,code).
583 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
584 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
585 * untranslated: Messages which are required to translate, but are not translated.
586 * duplicate: Messages which translation equal to fallback
587 * obsolete: Messages which are untranslatable, but translated.
588 * variables: Messages without variables which should be used.
589 * empty: Empty messages.
590 * whitespace: Messages which have trailing whitespace.
591 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
592 * chars: Messages with hidden characters.
593 * links: Messages which contains broken links to pages (does not find all).
594 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
595 Display levels (default: 2):
596 * 0: Skip the checks (useful for checking syntax).
597 * 1: Show only the stub headers and number of wrong messages, without list of messages.
598 * 2: Show only the headers and the message keys, without the message values.
599 * 3: Show both the headers and the complete messages, with both keys and values.
605 * Execute the script.
607 public function execute() {
612 * Check a language and show the results.
613 * @param $code The language code.
615 protected function checkLanguage( $code ) {
616 foreach( $this->extensions as $extension ) {
617 $this->L = $extension;
618 $this->results = array();
619 $this->results[$code] = parent::checkLanguage( $code );
621 if( !$this->isEmpty() ) {
622 echo $extension->name() . ":\n";
624 if( $this->level > 0 ) {
625 switch( $this->output ) {
633 throw new MWException( "Invalid output type $this->output" );
643 # Blacklist some checks for some languages
644 $checkBlacklist = array(
645 #'code' => array( 'check1', 'check2' ... )
646 'gan' => array( 'plural' ),
647 'gn' => array( 'plural' ),
648 'hak' => array( 'plural' ),
649 'hu' => array( 'plural' ),
650 'ja' => array( 'plural' ), // Does not use plural
651 'ka' => array( 'plural' ),
652 'kk-arab' => array( 'plural' ),
653 'kk-cyrl' => array( 'plural' ),
654 'kk-latn' => array( 'plural' ),
655 'ko' => array( 'plural' ),
656 'mn' => array( 'plural' ),
657 'ms' => array( 'plural' ),
658 'my' => array( 'chars' ), // Uses a lot zwnj
659 'sah' => array( 'plural' ),
660 'sq' => array( 'plural' ),
661 'tet' => array( 'plural' ),
662 'th' => array( 'plural' ),
663 'wuu' => array( 'plural' ),
664 'xmf' => array( 'plural' ),
665 'yue' => array( 'plural' ),
666 'zh' => array( 'plural' ),
667 'zh-classical' => array( 'plural' ),
668 'zh-cn' => array( 'plural' ),
669 'zh-hans' => array( 'plural' ),
670 'zh-hant' => array( 'plural' ),
671 'zh-hk' => array( 'plural' ),
672 'zh-sg' => array( 'plural' ),
673 'zh-tw' => array( 'plural' ),
674 'zh-yue' => array( 'plural' ),