Update per r43658 and r43661
[lhc/web/wiklou.git] / maintenance / language / checkLanguage.inc
1 <?php
2 /**
3 * @ingroup MaintenanceLanguage
4 */
5
6 class CheckLanguageCLI {
7 protected $code = null;
8 protected $level = 2;
9 protected $doLinks = false;
10 protected $wikiCode = 'en';
11 protected $checkAll = false;
12 protected $output = 'plain';
13 protected $checks = array();
14 protected $L = null;
15
16 protected $results = array();
17
18 private $includeExif = false;
19
20 /**
21 * Constructor.
22 * @param $options Options for script.
23 */
24 public function __construct( Array $options ) {
25 if ( isset( $options['help'] ) ) {
26 echo $this->help();
27 exit();
28 }
29
30 if ( isset( $options['lang'] ) ) {
31 $this->code = $options['lang'];
32 } else {
33 global $wgLanguageCode;
34 $this->code = $wgLanguageCode;
35 }
36
37 if ( isset( $options['level'] ) ) {
38 $this->level = $options['level'];
39 }
40
41 $this->doLinks = isset( $options['links'] );
42 $this->includeExif = !isset( $options['noexif'] );
43 $this->checkAll = isset( $options['all'] );
44
45 if ( isset( $options['wikilang'] ) ) {
46 $this->wikiCode = $options['wikilang'];
47 }
48
49 if ( isset( $options['whitelist'] ) ) {
50 $this->checks = explode( ',', $options['whitelist'] );
51 } elseif ( isset( $options['blacklist'] ) ) {
52 $this->checks = array_diff(
53 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
54 explode( ',', $options['blacklist'] )
55 );
56 } elseif ( isset( $options['easy'] ) ) {
57 $this->checks = $this->easyChecks();
58 } else {
59 $this->checks = $this->defaultChecks();
60 }
61
62 if ( isset( $options['output'] ) ) {
63 $this->output = $options['output'];
64 }
65
66 $this->L = new languages( $this->includeExif );
67 }
68
69 /**
70 * Get the default checks.
71 * @return A list of the default checks.
72 */
73 protected function defaultChecks() {
74 return array(
75 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
76 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
77 'projecttalk', 'skin', 'magic', 'magic-old', 'magic-over', 'magic-case',
78 'special', 'special-old',
79 );
80 }
81
82 /**
83 * Get the checks which check other things than messages.
84 * @return A list of the non-message checks.
85 */
86 protected function nonMessageChecks() {
87 return array(
88 'namespace', 'projecttalk', 'skin', 'magic', 'magic-old', 'magic-over',
89 'magic-case', 'special', 'special-old',
90 );
91 }
92
93 /**
94 * Get the checks that can easily be treated by non-speakers of the language.
95 * @return A list of the easy checks.
96 */
97 protected function easyChecks() {
98 return array(
99 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old',
100 'magic-over', 'magic-case', 'special-old',
101 );
102 }
103
104 /**
105 * Get all checks.
106 * @return An array of all check names mapped to their function names.
107 */
108 protected function getChecks() {
109 return array(
110 'untranslated' => 'getUntranslatedMessages',
111 'duplicate' => 'getDuplicateMessages',
112 'obsolete' => 'getObsoleteMessages',
113 'variables' => 'getMessagesWithoutVariables',
114 'plural' => 'getMessagesWithoutPlural',
115 'empty' => 'getEmptyMessages',
116 'whitespace' => 'getMessagesWithWhitespace',
117 'xhtml' => 'getNonXHTMLMessages',
118 'chars' => 'getMessagesWithWrongChars',
119 'links' => 'getMessagesWithDubiousLinks',
120 'unbalanced' => 'getMessagesWithUnbalanced',
121 'namespace' => 'getUntranslatedNamespaces',
122 'projecttalk' => 'getProblematicProjectTalks',
123 'skin' => 'getUntranslatedSkins',
124 'magic' => 'getUntranslatedMagicWords',
125 'magic-old' => 'getObsoleteMagicWords',
126 'magic-over' => 'getOverridingMagicWords',
127 'magic-case' => 'getCaseMismatchMagicWords',
128 'special' => 'getUntraslatedSpecialPages',
129 'special-old' => 'getObsoleteSpecialPages',
130 );
131 }
132
133 /**
134 * Get total count for each check non-messages check.
135 * @return An array of all check names mapped to a two-element array:
136 * function name to get the total count and language code or null
137 * for checked code.
138 */
139 protected function getTotalCount() {
140 return array(
141 'namespace' => array( 'getNamespaceNames', 'en' ),
142 'projecttalk' => null,
143 'skin' => array( 'getSkinNames', 'en' ),
144 'magic' => array( 'getMagicWords', 'en' ),
145 'magic-old' => array( 'getMagicWords', null ),
146 'magic-over' => array( 'getMagicWords', null ),
147 'magic-case' => array( 'getMagicWords', null ),
148 'special' => array( 'getSpecialPageAliases', 'en' ),
149 'special-old' => array( 'getSpecialPageAliases', null ),
150 );
151 }
152
153 /**
154 * Get all check descriptions.
155 * @return An array of all check names mapped to their descriptions.
156 */
157 protected function getDescriptions() {
158 return array(
159 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
160 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
161 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
162 'variables' => '$1 message(s) of $2 in $3 don\'t use some variables that en uses:',
163 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
164 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
165 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
166 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
167 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
168 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
169 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
170 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
171 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
172 'skin' => '$1 skin name(s) of $2 are not translated to $3, but exist in en:',
173 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
174 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:',
175 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
176 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
177 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
178 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:',
179 );
180 }
181
182 /**
183 * Get help.
184 * @return The help string.
185 */
186 protected function help() {
187 return <<<ENDS
188 Run this script to check a specific language file, or all of them.
189 Command line settings are in form --parameter[=value].
190 Parameters:
191 * lang: Language code (default: the installation default language).
192 * all: Check all customized languages.
193 * help: Show this help.
194 * level: Show the following display level (default: 2).
195 * links: Link the message values (default off).
196 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
197 * whitelist: Do only the following checks (form: code,code).
198 * blacklist: Don't do the following checks (form: code,code).
199 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
200 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
201 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
202 * untranslated: Messages which are required to translate, but are not translated.
203 * duplicate: Messages which translation equal to fallback
204 * obsolete: Messages which are untranslatable or do not exist, but are translated.
205 * variables: Messages without variables which should be used.
206 * empty: Empty messages and messages that contain only -.
207 * whitespace: Messages which have trailing whitespace.
208 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
209 * chars: Messages with hidden characters.
210 * links: Messages which contains broken links to pages (does not find all).
211 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
212 * namespace: Namespace names that were not translated.
213 * projecttalk: Namespace names and aliases where the project talk does not contain $1.
214 * skin: Skin names that were not translated.
215 * magic: Magic words that were not translated.
216 * magic-old: Magic words which do not exist.
217 * magic-over: Magic words that override the original English word.
218 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word.
219 * special: Special page names that were not translated.
220 * special-old: Special page names which do not exist.
221 Display levels (default: 2):
222 * 0: Skip the checks (useful for checking syntax).
223 * 1: Show only the stub headers and number of wrong messages, without list of messages.
224 * 2: Show only the headers and the message keys, without the message values.
225 * 3: Show both the headers and the complete messages, with both keys and values.
226
227 ENDS;
228 }
229
230 /**
231 * Execute the script.
232 */
233 public function execute() {
234 $this->doChecks();
235 if ( $this->level > 0 ) {
236 switch ( $this->output ) {
237 case 'plain':
238 $this->outputText();
239 break;
240 case 'wiki':
241 $this->outputWiki();
242 break;
243 default:
244 throw new MWException( "Invalid output type $this->output" );
245 }
246 }
247 }
248
249 /**
250 * Execute the checks.
251 */
252 protected function doChecks() {
253 $ignoredCodes = array( 'en', 'enRTL' );
254
255 $this->results = array();
256 # Check the language
257 if ( $this->checkAll ) {
258 foreach ( $this->L->getLanguages() as $language ) {
259 if ( !in_array( $language, $ignoredCodes ) ) {
260 $this->results[$language] = $this->checkLanguage( $language );
261 }
262 }
263 } else {
264 if ( in_array( $this->code, $ignoredCodes ) ) {
265 throw new MWException( "Cannot check code $this->code." );
266 } else {
267 $this->results[$this->code] = $this->checkLanguage( $this->code );
268 }
269 }
270 }
271
272 /**
273 * Get the check blacklist.
274 * @return The list of checks which should not be executed.
275 */
276 protected function getCheckBlacklist() {
277 global $checkBlacklist;
278 return $checkBlacklist;
279 }
280
281 /**
282 * Check a language.
283 * @param $code The language code.
284 * @return The results.
285 */
286 protected function checkLanguage( $code ) {
287 # Syntax check only
288 if ( $this->level === 0 ) {
289 $this->L->getMessages( $code );
290 return;
291 }
292
293 $results = array();
294 $checkFunctions = $this->getChecks();
295 $checkBlacklist = $this->getCheckBlacklist();
296 foreach ( $this->checks as $check ) {
297 if ( isset( $checkBlacklist[$code] ) &&
298 in_array( $check, $checkBlacklist[$code] ) ) {
299 $result[$check] = array();
300 continue;
301 }
302
303 $callback = array( $this->L, $checkFunctions[$check] );
304 if ( !is_callable( $callback ) ) {
305 throw new MWException( "Unkown check $check." );
306 }
307 $results[$check] = call_user_func( $callback, $code );
308 }
309
310 return $results;
311 }
312
313 /**
314 * Format a message key.
315 * @param $key The message key.
316 * @param $code The language code.
317 * @return The formatted message key.
318 */
319 protected function formatKey( $key, $code ) {
320 if ( $this->doLinks ) {
321 $displayKey = ucfirst( $key );
322 if ( $code == $this->wikiCode ) {
323 return "[[MediaWiki:$displayKey|$key]]";
324 } else {
325 return "[[MediaWiki:$displayKey/$code|$key]]";
326 }
327 } else {
328 return $key;
329 }
330 }
331
332 /**
333 * Output the checks results as plain text.
334 * @return The checks results as plain text.
335 */
336 protected function outputText() {
337 foreach ( $this->results as $code => $results ) {
338 $translated = $this->L->getMessages( $code );
339 $translated = count( $translated['translated'] );
340 foreach ( $results as $check => $messages ) {
341 $count = count( $messages );
342 if ( $count ) {
343 if ( $check == 'untranslated' ) {
344 $translatable = $this->L->getGeneralMessages();
345 $total = count( $translatable['translatable'] );
346 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) {
347 $totalCount = $this->getTotalCount();
348 $totalCount = $totalCount[$check];
349 $callback = array( $this->L, $totalCount[0] );
350 $callCode = $totalCount[1] ? $totalCount[1] : $code;
351 $total = count( call_user_func( $callback, $callCode ) );
352 } else {
353 $total = $translated;
354 }
355 $search = array( '$1', '$2', '$3' );
356 $replace = array( $count, $total, $code );
357 $descriptions = $this->getDescriptions();
358 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
359 if ( $this->level == 1 ) {
360 echo "[messages are hidden]\n";
361 } else {
362 foreach ( $messages as $key => $value ) {
363 if( !in_array( $check, $this->nonMessageChecks() ) ) {
364 $key = $this->formatKey( $key, $code );
365 }
366 if ( $this->level == 2 || empty( $value ) ) {
367 echo "* $key\n";
368 } else {
369 echo "* $key: '$value'\n";
370 }
371 }
372 }
373 }
374 }
375 }
376 }
377
378 /**
379 * Output the checks results as wiki text.
380 * @return The checks results as wiki text.
381 */
382 function outputWiki() {
383 global $wgContLang, $IP;
384 $detailText = '';
385 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
386 foreach ( $this->results as $code => $results ) {
387 $detailTextForLang = "==$code==\n";
388 $numbers = array();
389 $problems = 0;
390 $detailTextForLangChecks = array();
391 foreach ( $results as $check => $messages ) {
392 if( in_array( $check, $this->nonMessageChecks() ) ) {
393 continue;
394 }
395 $count = count( $messages );
396 if ( $count ) {
397 $problems += $count;
398 $messageDetails = array();
399 foreach ( $messages as $key => $details ) {
400 $displayKey = $this->formatKey( $key, $code );
401 $messageDetails[] = $displayKey;
402 }
403 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
404 $numbers[] = "'''[[#$code-$check|$count]]'''";
405 } else {
406 $numbers[] = $count;
407 }
408
409 }
410
411 if ( count( $detailTextForLangChecks ) ) {
412 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
413 }
414
415 if ( !$problems ) {
416 # Don't list languages without problems
417 continue;
418 }
419 $language = $wgContLang->getLanguageName( $code );
420 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
421 }
422
423 $tableRows = implode( "\n|-\n", $rows );
424
425 $version = SpecialVersion::getVersion( $IP );
426 echo <<<EOL
427 '''Check results are for:''' <code>$version</code>
428
429
430 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
431 $tableRows
432 |}
433
434 $detailText
435
436 EOL;
437 }
438
439 /**
440 * Check if there are any results for the checks, in any language.
441 * @return True if there are any results, false if not.
442 */
443 protected function isEmpty() {
444 foreach( $this->results as $code => $results ) {
445 foreach( $results as $check => $messages ) {
446 if( !empty( $messages ) ) {
447 return false;
448 }
449 }
450 }
451 return true;
452 }
453 }
454
455 class CheckExtensionsCLI extends CheckLanguageCLI {
456 private $extensions;
457
458 /**
459 * Constructor.
460 * @param $options Options for script.
461 * @param $extension The extension name (or names).
462 */
463 public function __construct( Array $options, $extension ) {
464 if ( isset( $options['help'] ) ) {
465 echo $this->help();
466 exit();
467 }
468
469 if ( isset( $options['lang'] ) ) {
470 $this->code = $options['lang'];
471 } else {
472 global $wgLanguageCode;
473 $this->code = $wgLanguageCode;
474 }
475
476 if ( isset( $options['level'] ) ) {
477 $this->level = $options['level'];
478 }
479
480 $this->doLinks = isset( $options['links'] );
481
482 if ( isset( $options['wikilang'] ) ) {
483 $this->wikiCode = $options['wikilang'];
484 }
485
486 if ( isset( $options['whitelist'] ) ) {
487 $this->checks = explode( ',', $options['whitelist'] );
488 } elseif ( isset( $options['blacklist'] ) ) {
489 $this->checks = array_diff(
490 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
491 explode( ',', $options['blacklist'] )
492 );
493 } elseif ( isset( $options['easy'] ) ) {
494 $this->checks = $this->easyChecks();
495 } else {
496 $this->checks = $this->defaultChecks();
497 }
498
499 if ( isset( $options['output'] ) ) {
500 $this->output = $options['output'];
501 }
502
503 # Some additional checks not enabled by default
504 if ( isset( $options['duplicate'] ) ) {
505 $this->checks[] = 'duplicate';
506 }
507
508 $this->extensions = array();
509 $extensions = new PremadeMediawikiExtensionGroups();
510 $extensions->addAll();
511 if ( $extension == 'all' ) {
512 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
513 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
514 $this->extensions[] = new extensionLanguages( $group );
515 }
516 }
517 } elseif ( $extension == 'wikimedia' ) {
518 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
519 foreach ( $wikimedia->wmfextensions() as $extension ) {
520 $group = MessageGroups::getGroup( $extension );
521 $this->extensions[] = new extensionLanguages( $group );
522 }
523 } else {
524 $extensions = explode( ',', $extension );
525 foreach ( $extensions as $extension ) {
526 $group = MessageGroups::getGroup( 'ext-' . $extension );
527 if ( $group ) {
528 $extension = new extensionLanguages( $group );
529 $this->extensions[] = $extension;
530 } else {
531 print "No such extension $extension.\n";
532 }
533 }
534 }
535 }
536
537 /**
538 * Get the default checks.
539 * @return A list of the default checks.
540 */
541 protected function defaultChecks() {
542 return array(
543 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
544 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
545 );
546 }
547
548 /**
549 * Get the checks which check other things than messages.
550 * @return A list of the non-message checks.
551 */
552 protected function nonMessageChecks() {
553 return array();
554 }
555
556 /**
557 * Get the checks that can easily be treated by non-speakers of the language.
558 * @return A list of the easy checks.
559 */
560 protected function easyChecks() {
561 return array(
562 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars',
563 );
564 }
565
566 /**
567 * Get help.
568 * @return The help string.
569 */
570 protected function help() {
571 return <<<ENDS
572 Run this script to check the status of a specific language in extensions, or all of them.
573 Command line settings are in form --parameter[=value], except for the first one.
574 Parameters:
575 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions or "wikimedia" for extensions used by Wikimedia.
576 * lang: Language code (default: the installation default language).
577 * help: Show this help.
578 * level: Show the following display level (default: 2).
579 * links: Link the message values (default off).
580 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
581 * whitelist: Do only the following checks (form: code,code).
582 * blacklist: Do not perform the following checks (form: code,code).
583 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
584 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
585 * untranslated: Messages which are required to translate, but are not translated.
586 * duplicate: Messages which translation equal to fallback
587 * obsolete: Messages which are untranslatable, but translated.
588 * variables: Messages without variables which should be used.
589 * empty: Empty messages.
590 * whitespace: Messages which have trailing whitespace.
591 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
592 * chars: Messages with hidden characters.
593 * links: Messages which contains broken links to pages (does not find all).
594 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
595 Display levels (default: 2):
596 * 0: Skip the checks (useful for checking syntax).
597 * 1: Show only the stub headers and number of wrong messages, without list of messages.
598 * 2: Show only the headers and the message keys, without the message values.
599 * 3: Show both the headers and the complete messages, with both keys and values.
600
601 ENDS;
602 }
603
604 /**
605 * Execute the script.
606 */
607 public function execute() {
608 $this->doChecks();
609 }
610
611 /**
612 * Check a language and show the results.
613 * @param $code The language code.
614 */
615 protected function checkLanguage( $code ) {
616 foreach( $this->extensions as $extension ) {
617 $this->L = $extension;
618 $this->results = array();
619 $this->results[$code] = parent::checkLanguage( $code );
620
621 if( !$this->isEmpty() ) {
622 echo $extension->name() . ":\n";
623
624 if( $this->level > 0 ) {
625 switch( $this->output ) {
626 case 'plain':
627 $this->outputText();
628 break;
629 case 'wiki':
630 $this->outputWiki();
631 break;
632 default:
633 throw new MWException( "Invalid output type $this->output" );
634 }
635 }
636
637 echo "\n";
638 }
639 }
640 }
641 }
642
643 # Blacklist some checks for some languages
644 $checkBlacklist = array(
645 #'code' => array( 'check1', 'check2' ... )
646 'gan' => array( 'plural' ),
647 'gn' => array( 'plural' ),
648 'hak' => array( 'plural' ),
649 'hu' => array( 'plural' ),
650 'ja' => array( 'plural' ), // Does not use plural
651 'ka' => array( 'plural' ),
652 'kk-arab' => array( 'plural' ),
653 'kk-cyrl' => array( 'plural' ),
654 'kk-latn' => array( 'plural' ),
655 'ko' => array( 'plural' ),
656 'mn' => array( 'plural' ),
657 'ms' => array( 'plural' ),
658 'my' => array( 'chars' ), // Uses a lot zwnj
659 'sah' => array( 'plural' ),
660 'sq' => array( 'plural' ),
661 'tet' => array( 'plural' ),
662 'th' => array( 'plural' ),
663 'wuu' => array( 'plural' ),
664 'xmf' => array( 'plural' ),
665 'yue' => array( 'plural' ),
666 'zh' => array( 'plural' ),
667 'zh-classical' => array( 'plural' ),
668 'zh-cn' => array( 'plural' ),
669 'zh-hans' => array( 'plural' ),
670 'zh-hant' => array( 'plural' ),
671 'zh-hk' => array( 'plural' ),
672 'zh-sg' => array( 'plural' ),
673 'zh-tw' => array( 'plural' ),
674 'zh-yue' => array( 'plural' ),
675 );