* New features for checking languages: Checking namespace names, skin names, magic...
[lhc/web/wiklou.git] / maintenance / language / checkLanguage.inc
1 <?php
2 /**
3 * @ingroup MaintenanceLanguage
4 */
5
6 class CheckLanguageCLI {
7 protected $code = null;
8 protected $level = 2;
9 protected $doLinks = false;
10 protected $wikiCode = 'en';
11 protected $checkAll = false;
12 protected $output = 'plain';
13 protected $checks = array();
14 protected $L = null;
15
16 protected $results = array();
17
18 private $includeExif = false;
19
20 /**
21 * Constructor.
22 * @param $options Options for script.
23 */
24 public function __construct( Array $options ) {
25 if ( isset( $options['help'] ) ) {
26 echo $this->help();
27 exit();
28 }
29
30 if ( isset($options['lang']) ) {
31 $this->code = $options['lang'];
32 } else {
33 global $wgLanguageCode;
34 $this->code = $wgLanguageCode;
35 }
36
37 if ( isset($options['level']) ) {
38 $this->level = $options['level'];
39 }
40
41 $this->doLinks = isset($options['links']);
42 $this->includeExif = !isset($options['noexif']);
43 $this->checkAll = isset($options['all']);
44
45 if ( isset($options['wikilang']) ) {
46 $this->wikiCode = $options['wikilang'];
47 }
48
49 if ( isset( $options['whitelist'] ) ) {
50 $this->checks = explode( ',', $options['whitelist'] );
51 } elseif ( isset( $options['blacklist'] ) ) {
52 $this->checks = array_diff(
53 $this->defaultChecks(),
54 explode( ',', $options['blacklist'] )
55 );
56 } else {
57 $this->checks = $this->defaultChecks();
58 }
59
60 if ( isset($options['output']) ) {
61 $this->output = $options['output'];
62 }
63
64 $this->L = new languages( $this->includeExif );
65 }
66
67 /**
68 * Get the default checks.
69 * @return A list of the default checks.
70 */
71 protected function defaultChecks() {
72 return array(
73 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
74 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced', 'namespace',
75 'projecttalk', 'skin', 'magic', 'magic-over', 'magic-case', 'special',
76 );
77 }
78
79 /**
80 * Get the non-message checks.
81 * @return A list of the non-message checks.
82 */
83 protected function nonMessageChecks() {
84 return array(
85 'namespace', 'projecttalk', 'skin', 'magic', 'magic-over', 'magic-case',
86 'special',
87 );
88 }
89
90 /**
91 * Get all checks.
92 * @return An array of all check names mapped to their function names.
93 */
94 protected function getChecks() {
95 return array(
96 'untranslated' => 'getUntranslatedMessages',
97 'duplicate' => 'getDuplicateMessages',
98 'obsolete' => 'getObsoleteMessages',
99 'variables' => 'getMessagesWithoutVariables',
100 'plural' => 'getMessagesWithoutPlural',
101 'empty' => 'getEmptyMessages',
102 'whitespace' => 'getMessagesWithWhitespace',
103 'xhtml' => 'getNonXHTMLMessages',
104 'chars' => 'getMessagesWithWrongChars',
105 'links' => 'getMessagesWithDubiousLinks',
106 'unbalanced' => 'getMessagesWithUnbalanced',
107 'namespace' => 'getUntranslatedNamespaces',
108 'projecttalk' => 'getProblematicProjectTalks',
109 'skin' => 'getUntranslatedSkins',
110 'magic' => 'getUntranslatedMagicWords',
111 'magic-over' => 'getOverridingMagicWords',
112 'magic-case' => 'getCaseMismatchMagicWords',
113 'special' => 'getUntraslatedSpecialPages',
114 );
115 }
116
117 /**
118 * Get all check descriptions.
119 * @return An array of all check names mapped to their descriptions.
120 */
121 protected function getDescriptions() {
122 return array(
123 'untranslated' => '$1 message(s) of $2 are not translated to $3, but exist in en:',
124 'duplicate' => '$1 message(s) of $2 are translated the same in en and $3:',
125 'obsolete' => '$1 message(s) of $2 do not exist in en or are in the ignore list, but exist in $3:',
126 'variables' => '$1 message(s) of $2 in $3 don\'t use some variables that en uses:',
127 'plural' => '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:',
128 'empty' => '$1 message(s) of $2 in $3 are empty or -:',
129 'whitespace' => '$1 message(s) of $2 in $3 have trailing whitespace:',
130 'xhtml' => '$1 message(s) of $2 in $3 contain illegal XHTML:',
131 'chars' => '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:',
132 'links' => '$1 message(s) of $2 in $3 have problematic link(s):',
133 'unbalanced' => '$1 message(s) of $2 in $3 have unbalanced {[]}:',
134 'namespace' => '$1 namespace name(s) of $2 are not translated to $3, but exist in en:',
135 'projecttalk' => '$1 namespace name(s) and alias(es) in $3 are project talk namespaces without the parameter:',
136 'skin' => '$1 skin name(s) of $2 are not translated to $3, but exist in en:',
137 'magic' => '$1 magic word(s) of $2 are not translated to $3, but exist in en:',
138 'magic-over' => '$1 magic word(s) of $2 in $3 do not contain the original en word(s):',
139 'magic-case' => '$1 magic word(s) of $2 in $3 change the case-sensitivity of the original en word:',
140 'special' => '$1 special page alias(es) of $2 are not translated to $3, but exist in en:',
141 );
142 }
143
144 /**
145 * Get help.
146 * @return The help string.
147 */
148 protected function help() {
149 return <<<ENDS
150 Run this script to check a specific language file, or all of them.
151 Command line settings are in form --parameter[=value].
152 Parameters:
153 * lang: Language code (default: the installation default language).
154 * all: Check all customized languages.
155 * help: Show this help.
156 * level: Show the following display level (default: 2).
157 * links: Link the message values (default off).
158 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
159 * whitelist: Do only the following checks (form: code,code).
160 * blacklist: Don't do the following checks (form: code,code).
161 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
162 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
163 * untranslated: Messages which are required to translate, but are not translated.
164 * duplicate: Messages which translation equal to fallback
165 * obsolete: Messages which are untranslatable, but translated.
166 * variables: Messages without variables which should be used.
167 * empty: Empty messages and messages that contain only -.
168 * whitespace: Messages which have trailing whitespace.
169 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
170 * chars: Messages with hidden characters.
171 * links: Messages which contains broken links to pages (does not find all).
172 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
173 * namespace: Namespace names that were not translated.
174 * projecttalk: Namespace names and aliases where the project talk does not contain $1.
175 * skin: Skin names that were not translated.
176 * magic: Magic words that were not translated.
177 * magic-over: Magic words that override the original English word.
178 * magic-case: Magic words whose translation changes the case-sensitivity of the original English word.
179 * special: Special page names that were not translated.
180 Display levels (default: 2):
181 * 0: Skip the checks (useful for checking syntax).
182 * 1: Show only the stub headers and number of wrong messages, without list of messages.
183 * 2: Show only the headers and the message keys, without the message values.
184 * 3: Show both the headers and the complete messages, with both keys and values.
185
186 ENDS;
187 }
188
189 /**
190 * Execute the script.
191 */
192 public function execute() {
193 $this->doChecks();
194 if ( $this->level > 0 ) {
195 switch ( $this->output ) {
196 case 'plain':
197 $this->outputText();
198 break;
199 case 'wiki':
200 $this->outputWiki();
201 break;
202 default:
203 throw new MWException( "Invalid output type $this->output" );
204 }
205 }
206 }
207
208 /**
209 * Execute the checks.
210 */
211 protected function doChecks() {
212 $ignoredCodes = array( 'en', 'enRTL' );
213
214 $this->results = array();
215 # Check the language
216 if ( $this->checkAll ) {
217 foreach ( $this->L->getLanguages() as $language ) {
218 if ( !in_array( $language, $ignoredCodes ) ) {
219 $this->results[$language] = $this->checkLanguage( $language );
220 }
221 }
222 } else {
223 if ( in_array( $this->code, $ignoredCodes ) ) {
224 throw new MWException( "Cannot check code $this->code." );
225 } else {
226 $this->results[$this->code] = $this->checkLanguage( $this->code );
227 }
228 }
229 }
230
231 /**
232 * Get the check blacklist.
233 * @return The list of checks which should not be executed.
234 */
235 protected function getCheckBlacklist() {
236 global $checkBlacklist;
237 return $checkBlacklist;
238 }
239
240 /**
241 * Check a language.
242 * @param $code The language code.
243 * @return The results.
244 */
245 protected function checkLanguage( $code ) {
246 # Syntax check only
247 if ( $this->level === 0 ) {
248 $this->L->getMessages( $code );
249 return;
250 }
251
252 $results = array();
253 $checkFunctions = $this->getChecks();
254 $checkBlacklist = $this->getCheckBlacklist();
255 foreach ( $this->checks as $check ) {
256 if ( isset( $checkBlacklist[$code] ) &&
257 in_array( $check, $checkBlacklist[$code] ) ) {
258 $result[$check] = array();
259 continue;
260 }
261
262 $callback = array( $this->L, $checkFunctions[$check] );
263 if ( !is_callable( $callback ) ) {
264 # DEBUG
265 # throw new MWException( "Unkown check $check." );
266 continue;
267 }
268 $results[$check] = call_user_func( $callback , $code );
269 }
270
271 return $results;
272 }
273
274 /**
275 * Format a message key.
276 * @param $key The message key.
277 * @param $code The language code.
278 * @return The formatted message key.
279 */
280 protected function formatKey( $key, $code ) {
281 if ( $this->doLinks ) {
282 $displayKey = ucfirst( $key );
283 if ( $code == $this->wikiCode ) {
284 return "[[MediaWiki:$displayKey|$key]]";
285 } else {
286 return "[[MediaWiki:$displayKey/$code|$key]]";
287 }
288 } else {
289 return $key;
290 }
291 }
292
293 /**
294 * Output the checks results as plain text.
295 * @return The checks results as plain text.
296 */
297 protected function outputText() {
298 foreach ( $this->results as $code => $results ) {
299 $translated = $this->L->getMessages( $code );
300 $translated = count( $translated['translated'] );
301 foreach ( $results as $check => $messages ) {
302 $count = count( $messages );
303 if ( $count ) {
304 switch( $check ) {
305 case 'untranslated':
306 $translatable = $this->L->getGeneralMessages();
307 $total = count( $translatable['translatable'] );
308 break;
309 case 'namespace':
310 $total = count( $this->L->getNamespaceNames( 'en' ) );
311 break;
312 case 'projecttalk':
313 $total = null;
314 break;
315 case 'skin':
316 $total = count( $this->L->getSkinNames( 'en' ) );
317 break;
318 case 'magic':
319 $total = count( $this->L->getMagicWords( 'en' ) );
320 break;
321 case 'magic-over':
322 case 'magic-case':
323 $total = count( $this->L->getMagicWords( $code ) );
324 break;
325 case 'special':
326 $total = count( $this->L->getSpecialPageAliases( 'en' ) );
327 break;
328 default:
329 $total = $translated;
330 }
331 $search = array( '$1', '$2', '$3' );
332 $replace = array( $count, $total, $code );
333 $descriptions = $this->getDescriptions();
334 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
335 if ( $this->level == 1 ) {
336 echo "[messages are hidden]\n";
337 } else {
338 foreach ( $messages as $key => $value ) {
339 if( !in_array( $check, $this->nonMessageChecks() ) ) {
340 $key = $this->formatKey( $key, $code );
341 }
342 if ( $this->level == 2 || empty( $value ) ) {
343 echo "* $key\n";
344 } else {
345 echo "* $key: '$value'\n";
346 }
347 }
348 }
349 }
350 }
351 }
352 }
353
354 /**
355 * Output the checks results as wiki text.
356 * @return The checks results as wiki text.
357 */
358 function outputWiki() {
359 global $wgContLang, $IP;
360 $detailText = '';
361 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
362 foreach ( $this->results as $code => $results ) {
363 $detailTextForLang = "==$code==\n";
364 $numbers = array();
365 $problems = 0;
366 $detailTextForLangChecks = array();
367 foreach ( $results as $check => $messages ) {
368 if( in_array( $check, $this->nonMessageChecks() ) ) {
369 continue;
370 }
371 $count = count( $messages );
372 if ( $count ) {
373 $problems += $count;
374 $messageDetails = array();
375 foreach ( $messages as $key => $details ) {
376 $displayKey = $this->formatKey( $key, $code );
377 $messageDetails[] = $displayKey;
378 }
379 $detailTextForLangChecks[] = "=== $code-$check ===\n* " . implode( ', ', $messageDetails );
380 $numbers[] = "'''[[#$code-$check|$count]]'''";
381 } else {
382 $numbers[] = $count;
383 }
384
385 }
386
387 if ( count( $detailTextForLangChecks ) ) {
388 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
389 }
390
391 if ( !$problems ) {
392 # Don't list languages without problems
393 continue;
394 }
395 $language = $wgContLang->getLanguageName( $code );
396 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
397 }
398
399 $tableRows = implode( "\n|-\n", $rows );
400
401 $version = SpecialVersion::getVersion( $IP );
402 echo <<<EOL
403 '''Check results are for:''' <code>$version</code>
404
405
406 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear: both;"
407 $tableRows
408 |}
409
410 $detailText
411
412 EOL;
413 }
414
415 /**
416 * Check if there are any results for the checks, in any language.
417 * @return True if there are any results, false if not.
418 */
419 protected function isEmpty() {
420 foreach( $this->results as $code => $results ) {
421 foreach( $results as $check => $messages ) {
422 if( !empty( $messages ) ) {
423 return false;
424 }
425 }
426 }
427 return true;
428 }
429 }
430
431 class CheckExtensionsCLI extends CheckLanguageCLI {
432 private $extensions;
433
434 /**
435 * Constructor.
436 * @param $options Options for script.
437 * @param $extension The extension name (or names).
438 */
439 public function __construct( Array $options, $extension ) {
440 if ( isset( $options['help'] ) ) {
441 echo $this->help();
442 exit();
443 }
444
445 if ( isset($options['lang']) ) {
446 $this->code = $options['lang'];
447 } else {
448 global $wgLanguageCode;
449 $this->code = $wgLanguageCode;
450 }
451
452 if ( isset($options['level']) ) {
453 $this->level = $options['level'];
454 }
455
456 $this->doLinks = isset($options['links']);
457
458 if ( isset($options['wikilang']) ) {
459 $this->wikiCode = $options['wikilang'];
460 }
461
462 if ( isset( $options['whitelist'] ) ) {
463 $this->checks = explode( ',', $options['whitelist'] );
464 } elseif ( isset( $options['blacklist'] ) ) {
465 $this->checks = array_diff(
466 $this->defaultChecks(),
467 explode( ',', $options['blacklist'] )
468 );
469 } else {
470 $this->checks = $this->defaultChecks();
471 }
472
473 if ( isset($options['output']) ) {
474 $this->output = $options['output'];
475 }
476
477 # Some additional checks not enabled by default
478 if ( isset( $options['duplicate'] ) ) {
479 $this->checks[] = 'duplicate';
480 }
481
482 $this->extensions = array();
483 $extensions = new PremadeMediawikiExtensionGroups();
484 $extensions->addAll();
485 if( $extension == 'all' ) {
486 foreach( MessageGroups::singleton()->getGroups() as $group ) {
487 if( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
488 $this->extensions[] = new extensionLanguages( $group );
489 }
490 }
491 } elseif( $extension == 'wikimedia' ) {
492 $wikimedia = MessageGroups::getGroup( 'ext-0-wikimedia' );
493 foreach( $wikimedia->wmfextensions() as $extension ) {
494 $group = MessageGroups::getGroup( $extension );
495 $this->extensions[] = new extensionLanguages( $group );
496 }
497 } else {
498 $extensions = explode( ',', $extension );
499 foreach( $extensions as $extension ) {
500 $group = MessageGroups::getGroup( 'ext-' . $extension );
501 if( $group ) {
502 $extension = new extensionLanguages( $group );
503 $this->extensions[] = $extension;
504 } else {
505 print "No such extension $extension.\n";
506 }
507 }
508 }
509 }
510
511 /**
512 * Get the default checks.
513 * @return A list of the default checks.
514 */
515 protected function defaultChecks() {
516 return array(
517 'untranslated', 'duplicate', 'obsolete', 'variables', 'empty', 'plural',
518 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced',
519 );
520 }
521
522 /**
523 * Get help.
524 * @return The help string.
525 */
526 protected function help() {
527 return <<<ENDS
528 Run this script to check the status of a specific language in extensions, or all of them.
529 Command line settings are in form --parameter[=value], except for the first one.
530 Parameters:
531 * First parameter (mandatory): Extension name, multiple extension names (separated by commas), "all" for all the extensions or "wikimedia" for extensions used by Wikimedia.
532 * lang: Language code (default: the installation default language).
533 * help: Show this help.
534 * level: Show the following display level (default: 2).
535 * links: Link the message values (default off).
536 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
537 * whitelist: Do only the following checks (form: code,code).
538 * blacklist: Do not perform the following checks (form: code,code).
539 * duplicate: Additionally check for messages which are translated the same to English (default off).
540 Check codes (ideally, all of them should result 0; all the checks are executed by default (except language-specific check blacklists in checkLanguage.inc):
541 * untranslated: Messages which are required to translate, but are not translated.
542 * duplicate: Messages which translation equal to fallback
543 * obsolete: Messages which are untranslatable, but translated.
544 * variables: Messages without variables which should be used.
545 * empty: Empty messages.
546 * whitespace: Messages which have trailing whitespace.
547 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
548 * chars: Messages with hidden characters.
549 * links: Messages which contains broken links to pages (does not find all).
550 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
551 Display levels (default: 2):
552 * 0: Skip the checks (useful for checking syntax).
553 * 1: Show only the stub headers and number of wrong messages, without list of messages.
554 * 2: Show only the headers and the message keys, without the message values.
555 * 3: Show both the headers and the complete messages, with both keys and values.
556
557 ENDS;
558 }
559
560 /**
561 * Execute the script.
562 */
563 public function execute() {
564 $this->doChecks();
565 }
566
567 /**
568 * Check a language and show the results.
569 * @param $code The language code.
570 */
571 protected function checkLanguage( $code ) {
572 foreach( $this->extensions as $extension ) {
573 $this->L = $extension;
574 $this->results = array();
575 $this->results[$code] = parent::checkLanguage( $code );
576
577 if( !$this->isEmpty() ) {
578 echo $extension->name() . ":\n";
579
580 if( $this->level > 0 ) {
581 switch( $this->output ) {
582 case 'plain':
583 $this->outputText();
584 break;
585 case 'wiki':
586 $this->outputWiki();
587 break;
588 default:
589 throw new MWException( "Invalid output type $this->output" );
590 }
591 }
592
593 echo "\n";
594 }
595 }
596 }
597 }
598
599 # Blacklist some checks for some languages
600 $checkBlacklist = array(
601 #'code' => array( 'check1', 'check2' ... )
602 'gan' => array( 'plural' ),
603 'gn' => array( 'plural' ),
604 'hak' => array( 'plural' ),
605 'hu' => array( 'plural' ),
606 'ja' => array( 'plural' ), // Does not use plural
607 'ka' => array( 'plural' ),
608 'kk-arab' => array( 'plural' ),
609 'kk-cyrl' => array( 'plural' ),
610 'kk-latn' => array( 'plural' ),
611 'ko' => array( 'plural' ),
612 'mn' => array( 'plural' ),
613 'ms' => array( 'plural' ),
614 'my' => array( 'chars' ), // Uses a lot zwnj
615 'sah' => array( 'plural' ),
616 'sq' => array( 'plural' ),
617 'tet' => array( 'plural' ),
618 'th' => array( 'plural' ),
619 'wuu' => array( 'plural' ),
620 'xmf' => array( 'plural' ),
621 'yue' => array( 'plural' ),
622 'zh' => array( 'plural' ),
623 'zh-classical' => array( 'plural' ),
624 'zh-cn' => array( 'plural' ),
625 'zh-hans' => array( 'plural' ),
626 'zh-hant' => array( 'plural' ),
627 'zh-hk' => array( 'plural' ),
628 'zh-sg' => array( 'plural' ),
629 'zh-tw' => array( 'plural' ),
630 'zh-yue' => array( 'plural' ),
631 );