Fix for r55810: oly set $optionsWithArgs if it's not defined, as in the old version...
[lhc/web/wiklou.git] / maintenance / language / checkLanguage.inc
1 <?php
2 /**
3 * @ingroup MaintenanceLanguage
4 */
5
6 class CheckLanguageCLI {
7 protected $code = null;
8 protected $level = 2;
9 protected $doLinks = false;
10 protected $wikiCode = 'en';
11 protected $checkAll = false;
12 protected $output = 'plain';
13 protected $checks = array();
14 protected $L = null;
15
16 protected $results = array();
17
18 private $includeExif = false;
19
20 /**
21 * Constructor.
22 * @param $options Options for script.
23 */
24 public function __construct( Array $options ) {
25 if ( isset( $options['help'] ) ) {
26 echo $this->help();
27 exit(1);
28 }
29
30 if ( isset( $options['lang'] ) ) {
31 $this->code = $options['lang'];
32 } else {
33 global $wgLanguageCode;
34 $this->code = $wgLanguageCode;
35 }
36
37 if ( isset( $options['level'] ) ) {
38 $this->level = $options['level'];
39 }
40
41 $this->doLinks = isset( $options['links'] );
42 $this->includeExif = !isset( $options['noexif'] );
43 $this->checkAll = isset( $options['all'] );
44
45 if ( isset( $options['wikilang'] ) ) {
46 $this->wikiCode = $options['wikilang'];
47 }
48
49 if ( isset( $options['whitelist'] ) ) {
50 $this->checks = explode( ',', $options['whitelist'] );
51 } elseif ( isset( $options['blacklist'] ) ) {
52 $this->checks = array_diff(
53 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
54 explode( ',', $options['blacklist'] )
55 );
56 } elseif ( isset( $options['easy'] ) ) {
57 $this->checks = $this->easyChecks();
58 } else {
59 $this->checks = $this->defaultChecks();
60 }
61
62 if ( isset( $options['output'] ) ) {
63 $this->output = $options['output'];
64 }
65
66 $this->L = new languages( $this->includeExif );
67 }
68
69 /**
70 * Get the default checks.
71 * @return A list of the default checks.
72 */
73 protected function defaultChecks() {
74 return array(
75 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
76 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
77 'projecttalk', 'magic', 'magic-old', 'magic-over', 'magic-case',
78 'special', 'special-old',
79 );
80 }
81
82 /**
83 * Get the checks which check other things than messages.
84 * @return A list of the non-message checks.
85 */
86 protected function nonMessageChecks() {
87 return array(
88 'namespace', 'projecttalk', 'magic', 'magic-old', 'magic-over',
89 'magic-case', 'special', 'special-old',
90 );
91 }
92
93 /**
94 * Get the checks that can easily be treated by non-speakers of the language.
95 * @return A list of the easy checks.
96 */
97 protected function easyChecks() {
98 return array(
99 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars', 'magic-old',
100 'magic-over', 'magic-case', 'special-old',
101 );
102 }
103
104 /**
105 * Get all checks.
106 * @return An array of all check names mapped to their function names.
107 */
108 protected function getChecks() {
109 return array(
110 'untranslated' => 'getUntranslatedMessages',
111 'duplicate' => 'getDuplicateMessages',
112 'obsolete' => 'getObsoleteMessages',
113 'variables' => 'getMessagesWithMismatchVariables',
114 'plural' => 'getMessagesWithoutPlural',
115 'empty' => 'getEmptyMessages',
116 'whitespace' => 'getMessagesWithWhitespace',
117 'xhtml' => 'getNonXHTMLMessages',
118 'chars' => 'getMessagesWithWrongChars',
119 'links' => 'getMessagesWithDubiousLinks',
120 'unbalanced' => 'getMessagesWithUnbalanced',
121 'namespace' => 'getUntranslatedNamespaces',
122 'projecttalk' => 'getProblematicProjectTalks',
123 'magic' => 'getUntranslatedMagicWords',
124 'magic-old' => 'getObsoleteMagicWords',
125 'magic-over' => 'getOverridingMagicWords',
126 'magic-case' => 'getCaseMismatchMagicWords',
127 'special' => 'getUntraslatedSpecialPages',
128 'special-old' => 'getObsoleteSpecialPages',
129 );
130 }
131
132 /**
133 * Get total count for each check non-messages check.
134 * @return An array of all check names mapped to a two-element array:
135 * function name to get the total count and language code or null
136 * for checked code.
137 */
138 protected function getTotalCount() {
139 return array(
140 'namespace' => array( 'getNamespaceNames', 'en' ),
141 'projecttalk' => null,
142 'magic' => array( 'getMagicWords', 'en' ),
143 'magic-old' => array( 'getMagicWords', null ),
144 'magic-over' => array( 'getMagicWords', null ),
145 'magic-case' => array( 'getMagicWords', null ),
146 'special' => array( 'getSpecialPageAliases', 'en' ),
147 'special-old' => array( 'getSpecialPageAliases', null ),
148 );
149 }
150
151 /**
152 * Get all check descriptions.
153 * @return An array of all check names mapped to their descriptions.
154 */
155 protected function getDescriptions() {
156 return array(
157 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
158 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
159 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
160 'variables' => '$1 message(s) of $2 in $3 don\'t match the variables used in en:',
161 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
162 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
163 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
164 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
165 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
166 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
167 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
168 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
169 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
170 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
171 'magic-old' => '$1 magic word(s) of $2 do not exist in en, but exist in $3:',
172 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
173 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
174 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
175 'special-old' => '$1 special page alias(es) of $2 do not exist in en, but exist in $3:',
176 );
177 }
178
179 /**
180 * Get help.
181 * @return The help string.
182 */
183 protected function help() {
184 return <<<ENDS
185 Run this script to check a specific language file, or all of them.
186 Command line settings are in form --parameter[=value].
187 Parameters:
188 * lang: Language code (default: the installation default language).
189 * all: Check all customized languages.
190 * help: Show this help.
191 * level: Show the following display level (default: 2).
192 * links: Link the message values (default off).
193 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
194 * whitelist: Do only the following checks (form: code,code).
195 * blacklist: Don't do the following checks (form: code,code).
196 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
197 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
198 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
199 * untranslated: Messages which are required to translate, but are not translated.
200 * duplicate: Messages which translation equal to fallback
201 * obsolete: Messages which are untranslatable or do not exist, but are translated.
202 * variables: Messages without variables which should be used, or with variables which shouldn't be used.
203 * empty: Empty messages and messages that contain only -.
204 * whitespace: Messages which have trailing whitespace.
205 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
206 * chars: Messages with hidden characters.
207 * links: Messages which contains broken links to pages (does not find all).
208 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
209 * namespace: Namespace names that were not translated.
210 * projecttalk: Namespace names and aliases where the project talk does not contain $1.
211 * magic: Magic words that were not translated.
212 * magic-old: Magic words which do not exist.
213 * magic-over: Magic words that override the original English word.
214 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word.
215 * special: Special page names that were not translated.
216 * special-old: Special page names which do not exist.
217 Display levels (default: 2):
218 * 0: Skip the checks (useful for checking syntax).
219 * 1: Show only the stub headers and number of wrong messages, without list of messages.
220 * 2: Show only the headers and the message keys, without the message values.
221 * 3: Show both the headers and the complete messages, with both keys and values.
222
223 ENDS;
224 }
225
226 /**
227 * Execute the script.
228 */
229 public function execute() {
230 $this->doChecks();
231 if ( $this->level > 0 ) {
232 switch ( $this->output ) {
233 case 'plain':
234 $this->outputText();
235 break;
236 case 'wiki':
237 $this->outputWiki();
238 break;
239 default:
240 throw new MWException( "Invalid output type $this->output" );
241 }
242 }
243 }
244
245 /**
246 * Execute the checks.
247 */
248 protected function doChecks() {
249 $ignoredCodes = array( 'en', 'enRTL' );
250
251 $this->results = array();
252 # Check the language
253 if ( $this->checkAll ) {
254 foreach ( $this->L->getLanguages() as $language ) {
255 if ( !in_array( $language, $ignoredCodes ) ) {
256 $this->results[$language] = $this->checkLanguage( $language );
257 }
258 }
259 } else {
260 if ( in_array( $this->code, $ignoredCodes ) ) {
261 throw new MWException( "Cannot check code $this->code." );
262 } else {
263 $this->results[$this->code] = $this->checkLanguage( $this->code );
264 }
265 }
266 }
267
268 /**
269 * Get the check blacklist.
270 * @return The list of checks which should not be executed.
271 */
272 protected function getCheckBlacklist() {
273 global $checkBlacklist;
274 return $checkBlacklist;
275 }
276
277 /**
278 * Check a language.
279 * @param $code The language code.
280 * @return The results.
281 */
282 protected function checkLanguage( $code ) {
283 # Syntax check only
284 if ( $this->level === 0 ) {
285 $this->L->getMessages( $code );
286 return;
287 }
288
289 $results = array();
290 $checkFunctions = $this->getChecks();
291 $checkBlacklist = $this->getCheckBlacklist();
292 foreach ( $this->checks as $check ) {
293 if ( isset( $checkBlacklist[$code] ) &&
294 in_array( $check, $checkBlacklist[$code] ) ) {
295 $result[$check] = array();
296 continue;
297 }
298
299 $callback = array( $this->L, $checkFunctions[$check] );
300 if ( !is_callable( $callback ) ) {
301 throw new MWException( "Unkown check $check." );
302 }
303 $results[$check] = call_user_func( $callback, $code );
304 }
305
306 return $results;
307 }
308
309 /**
310 * Format a message key.
311 * @param $key The message key.
312 * @param $code The language code.
313 * @return The formatted message key.
314 */
315 protected function formatKey( $key, $code ) {
316 if ( $this->doLinks ) {
317 $displayKey = ucfirst( $key );
318 if ( $code == $this->wikiCode ) {
319 return "[[MediaWiki:$displayKey|$key]]";
320 } else {
321 return "[[MediaWiki:$displayKey/$code|$key]]";
322 }
323 } else {
324 return $key;
325 }
326 }
327
328 /**
329 * Output the checks results as plain text.
330 * @return The checks results as plain text.
331 */
332 protected function outputText() {
333 foreach ( $this->results as $code => $results ) {
334 $translated = $this->L->getMessages( $code );
335 $translated = count( $translated['translated'] );
336 foreach ( $results as $check => $messages ) {
337 $count = count( $messages );
338 if ( $count ) {
339 if ( $check == 'untranslated' ) {
340 $translatable = $this->L->getGeneralMessages();
341 $total = count( $translatable['translatable'] );
342 } elseif ( in_array( $check, $this->nonMessageChecks() ) ) {
343 $totalCount = $this->getTotalCount();
344 $totalCount = $totalCount[$check];
345 $callback = array( $this->L, $totalCount[0] );
346 $callCode = $totalCount[1] ? $totalCount[1] : $code;
347 $total = count( call_user_func( $callback, $callCode ) );
348 } else {
349 $total = $translated;
350 }
351 $search = array( '$1', '$2', '$3' );
352 $replace = array( $count, $total, $code );
353 $descriptions = $this->getDescriptions();
354 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
355 if ( $this->level == 1 ) {
356 echo "[messages are hidden]\n";
357 } else {
358 foreach ( $messages as $key => $value ) {
359 if( !in_array( $check, $this->nonMessageChecks() ) ) {
360 $key = $this->formatKey( $key, $code );
361 }
362 if ( $this->level == 2 || empty( $value ) ) {
363 echo "* $key\n";
364 } else {
365 echo "* $key: '$value'\n";
366 }
367 }
368 }
369 }
370 }
371 }
372 }
373
374 /**
375 * Output the checks results as wiki text.
376 * @return The checks results as wiki text.
377 */
378 function outputWiki() {
379 global $wgContLang, $IP;
380 $detailText = '';
381 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
382 foreach ( $this->results as $code => $results ) {
383 $detailTextForLang = "==$code==\n";
384 $numbers = array();
385 $problems = 0;
386 $detailTextForLangChecks = array();
387 foreach ( $results as $check => $messages ) {
388 if( in_array( $check, $this->nonMessageChecks() ) ) {
389 continue;
390 }
391 $count = count( $messages );
392 if ( $count ) {
393 $problems += $count;
394 $messageDetails = array();
395 foreach ( $messages as $key => $details ) {
396 $displayKey = $this->formatKey( $key, $code );
397 $messageDetails[] = $displayKey;
398 }
399 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
400 $numbers[] = "'''[[#$code-$check|$count]]'''";
401 } else {
402 $numbers[] = $count;
403 }
404
405 }
406
407 if ( count( $detailTextForLangChecks ) ) {
408 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
409 }
410
411 if ( !$problems ) {
412 # Don't list languages without problems
413 continue;
414 }
415 $language = $wgContLang->getLanguageName( $code );
416 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
417 }
418
419 $tableRows = implode( "\n|-\n", $rows );
420
421 $version = SpecialVersion::getVersion( 'nodb' );
422 echo <<<EOL
423 '''Check results are for:''' <code>$version</code>
424
425
426 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
427 $tableRows
428 |}
429
430 $detailText
431
432 EOL;
433 }
434
435 /**
436 * Check if there are any results for the checks, in any language.
437 * @return True if there are any results, false if not.
438 */
439 protected function isEmpty() {
440 foreach( $this->results as $code => $results ) {
441 foreach( $results as $check => $messages ) {
442 if( !empty( $messages ) ) {
443 return false;
444 }
445 }
446 }
447 return true;
448 }
449 }
450
451 class CheckExtensionsCLI extends CheckLanguageCLI {
452 private $extensions;
453
454 /**
455 * Constructor.
456 * @param $options Options for script.
457 * @param $extension The extension name (or names).
458 */
459 public function __construct( Array $options, $extension ) {
460 if ( isset( $options['help'] ) ) {
461 echo $this->help();
462 exit(1);
463 }
464
465 if ( isset( $options['lang'] ) ) {
466 $this->code = $options['lang'];
467 } else {
468 global $wgLanguageCode;
469 $this->code = $wgLanguageCode;
470 }
471
472 if ( isset( $options['level'] ) ) {
473 $this->level = $options['level'];
474 }
475
476 $this->doLinks = isset( $options['links'] );
477
478 if ( isset( $options['wikilang'] ) ) {
479 $this->wikiCode = $options['wikilang'];
480 }
481
482 if ( isset( $options['whitelist'] ) ) {
483 $this->checks = explode( ',', $options['whitelist'] );
484 } elseif ( isset( $options['blacklist'] ) ) {
485 $this->checks = array_diff(
486 isset( $options['easy'] ) ? $this->easyChecks() : $this->defaultChecks(),
487 explode( ',', $options['blacklist'] )
488 );
489 } elseif ( isset( $options['easy'] ) ) {
490 $this->checks = $this->easyChecks();
491 } else {
492 $this->checks = $this->defaultChecks();
493 }
494
495 if ( isset( $options['output'] ) ) {
496 $this->output = $options['output'];
497 }
498
499 # Some additional checks not enabled by default
500 if ( isset( $options['duplicate'] ) ) {
501 $this->checks[] = 'duplicate';
502 }
503
504 $this->extensions = array();
505 $extensions = new PremadeMediawikiExtensionGroups();
506 $extensions->addAll();
507 if ( $extension == 'all' ) {
508 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
509 if ( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
510 $this->extensions[] = new extensionLanguages( $group );
511 }
512 }
513 } elseif ( $extension == 'wikimedia' ) {
514 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
515 foreach ( $wikimedia->wmfextensions() as $extension ) {
516 $group = MessageGroups::getGroup( $extension );
517 $this->extensions[] = new extensionLanguages( $group );
518 }
519 } elseif ( $extension == 'flaggedrevs' ) {
520 foreach ( MessageGroups::singleton()->getGroups() as $group ) {
521 if ( strpos( $group->getId(), 'ext-flaggedrevs-' ) === 0 && !$group->isMeta() ) {
522 $this->extensions[] = new extensionLanguages( $group );
523 }
524 }
525 } else {
526 $extensions = explode( ',', $extension );
527 foreach ( $extensions as $extension ) {
528 $group = MessageGroups::getGroup( 'ext-' . $extension );
529 if ( $group ) {
530 $extension = new extensionLanguages( $group );
531 $this->extensions[] = $extension;
532 } else {
533 print "No such extension $extension.\n";
534 }
535 }
536 }
537 }
538
539 /**
540 * Get the default checks.
541 * @return A list of the default checks.
542 */
543 protected function defaultChecks() {
544 return array(
545 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
546 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
547 );
548 }
549
550 /**
551 * Get the checks which check other things than messages.
552 * @return A list of the non-message checks.
553 */
554 protected function nonMessageChecks() {
555 return array();
556 }
557
558 /**
559 * Get the checks that can easily be treated by non-speakers of the language.
560 * @return A list of the easy checks.
561 */
562 protected function easyChecks() {
563 return array(
564 'duplicate', 'obsolete', 'empty', 'whitespace', 'xhtml', 'chars',
565 );
566 }
567
568 /**
569 * Get help.
570 * @return The help string.
571 */
572 protected function help() {
573 return <<<ENDS
574 Run this script to check the status of a specific language in extensions, or all of them.
575 Command line settings are in form --parameter[=value], except for the first one.
576 Parameters:
577 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions, "wikimedia" for extensions used by Wikimedia or "flaggedrevs" for all FLaggedRevs extension messages.
578 * lang: Language code (default: the installation default language).
579 * help: Show this help.
580 * level: Show the following display level (default: 2).
581 * links: Link the message values (default off).
582 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
583 * whitelist: Do only the following checks (form: code,code).
584 * blacklist: Do not perform the following checks (form: code,code).
585 * easy: Do only the easy checks, which can be treated by non-speakers of the language.
586 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
587 * untranslated: Messages which are required to translate, but are not translated.
588 * duplicate: Messages which translation equal to fallback
589 * obsolete: Messages which are untranslatable, but translated.
590 * variables: Messages without variables which should be used, or with variables which should not be used.
591 * empty: Empty messages.
592 * whitespace: Messages which have trailing whitespace.
593 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
594 * chars: Messages with hidden characters.
595 * links: Messages which contains broken links to pages (does not find all).
596 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
597 Display levels (default: 2):
598 * 0: Skip the checks (useful for checking syntax).
599 * 1: Show only the stub headers and number of wrong messages, without list of messages.
600 * 2: Show only the headers and the message keys, without the message values.
601 * 3: Show both the headers and the complete messages, with both keys and values.
602
603 ENDS;
604 }
605
606 /**
607 * Execute the script.
608 */
609 public function execute() {
610 $this->doChecks();
611 }
612
613 /**
614 * Check a language and show the results.
615 * @param $code The language code.
616 */
617 protected function checkLanguage( $code ) {
618 foreach( $this->extensions as $extension ) {
619 $this->L = $extension;
620 $this->results = array();
621 $this->results[$code] = parent::checkLanguage( $code );
622
623 if( !$this->isEmpty() ) {
624 echo $extension->name() . ":\n";
625
626 if( $this->level > 0 ) {
627 switch( $this->output ) {
628 case 'plain':
629 $this->outputText();
630 break;
631 case 'wiki':
632 $this->outputWiki();
633 break;
634 default:
635 throw new MWException( "Invalid output type $this->output" );
636 }
637 }
638
639 echo "\n";
640 }
641 }
642 }
643 }
644
645 # Blacklist some checks for some languages
646 $checkBlacklist = array(
647 #'code' => array( 'check1', 'check2' ... )
648 'gan' => array( 'plural' ),
649 'gan-hans' => array( 'plural' ),
650 'gan-hant' => array( 'plural' ),
651 'gn' => array( 'plural' ),
652 'hak' => array( 'plural' ),
653 'hu' => array( 'plural' ),
654 'ja' => array( 'plural' ), // Does not use plural
655 'ka' => array( 'plural' ),
656 'kk-arab' => array( 'plural' ),
657 'kk-cyrl' => array( 'plural' ),
658 'kk-latn' => array( 'plural' ),
659 'ko' => array( 'plural' ),
660 'lzh' => array( 'plural' ),
661 'mn' => array( 'plural' ),
662 'ms' => array( 'plural' ),
663 'my' => array( 'chars' ), // Uses a lot zwnj
664 'sah' => array( 'plural' ),
665 'sq' => array( 'plural' ),
666 'tet' => array( 'plural' ),
667 'th' => array( 'plural' ),
668 'wuu' => array( 'plural' ),
669 'xmf' => array( 'plural' ),
670 'yue' => array( 'plural' ),
671 'zh' => array( 'plural' ),
672 'zh-classical' => array( 'plural' ),
673 'zh-cn' => array( 'plural' ),
674 'zh-hans' => array( 'plural' ),
675 'zh-hant' => array( 'plural' ),
676 'zh-hk' => array( 'plural' ),
677 'zh-sg' => array( 'plural' ),
678 'zh-tw' => array( 'plural' ),
679 'zh-yue' => array( 'plural' ),
680 );