Partial revert of r12145. Bug 12145 Kazakh message updates did not mention/motivate...
[lhc/web/wiklou.git] / maintenance / language / checkLanguage.inc
1 <?php
2
3 class CheckLanguageCLI {
4 protected $code = null;
5 protected $level = 2;
6 protected $doLinks = false;
7 protected $wikiCode = 'en';
8 protected $checkAll = false;
9 protected $output = 'plain';
10 protected $checks = array();
11 protected $L = null;
12
13 protected $defaultChecks = array(
14 'untranslated', 'obsolete', 'variables', 'empty', 'plural',
15 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced'
16 );
17
18 protected $results = array();
19
20 private $includeExif = false;
21
22 /**
23 * GLOBALS: $wgLanguageCode;
24 */
25 public function __construct( Array $options ) {
26
27 if ( isset( $options['help'] ) ) {
28 echo $this->help();
29 exit();
30 }
31
32 if ( isset($options['lang']) ) {
33 $this->code = $options['lang'];
34 } else {
35 global $wgLanguageCode;
36 $this->code = $wgLanguageCode;
37 }
38
39 if ( isset($options['level']) ) {
40 $this->level = $options['level'];
41 }
42
43 $this->doLinks = isset($options['links']);
44 $this->includeExif = !isset($options['noexif']);
45 $this->checkAll = isset($options['all']);
46
47 if ( isset($options['wikilang']) ) {
48 $this->wikiCode = $options['wikilang'];
49 }
50
51 if ( isset( $options['whitelist'] ) ) {
52 $this->checks = explode( ',', $options['whitelist'] );
53 } elseif ( isset( $options['blacklist'] ) ) {
54 $this->checks = array_diff(
55 $this->defaultChecks,
56 explode( ',', $options['blacklist'] )
57 );
58 } else {
59 $this->checks = $this->defaultChecks;
60 }
61
62 if ( isset($options['output']) ) {
63 $this->output = $options['output'];
64 }
65
66 # Some additional checks not enabled by default
67 if ( isset( $options['duplicate'] ) ) {
68 $this->checks[] = 'duplicate';
69 }
70
71 $this->L = new languages( $this->includeExif );
72 }
73
74 protected function getChecks() {
75 $checks = array();
76 $checks['untranslated'] = 'getUntranslatedMessages';
77 $checks['duplicate'] = 'getDuplicateMessages';
78 $checks['obsolete'] = 'getObsoleteMessages';
79 $checks['variables'] = 'getMessagesWithoutVariables';
80 $checks['plural'] = 'getMessagesWithoutPlural';
81 $checks['empty'] = 'getEmptyMessages';
82 $checks['whitespace'] = 'getMessagesWithWhitespace';
83 $checks['xhtml'] = 'getNonXHTMLMessages';
84 $checks['chars'] = 'getMessagesWithWrongChars';
85 $checks['links'] = 'getMessagesWithDubiousLinks';
86 $checks['unbalanced'] = 'getMessagesWithUnbalanced';
87 return $checks;
88 }
89
90 protected function getDescriptions() {
91 $descriptions = array();
92 $descriptions['untranslated'] = '$1 message(s) of $2 are not translated to $3, but exist in en:';
93 $descriptions['duplicate'] = '$1 message(s) of $2 are translated the same in en and $3:';
94 $descriptions['obsolete'] = '$1 message(s) of $2 do not exist in en or are in the ignore list, but are in $3';
95 $descriptions['variables'] = '$1 message(s) of $2 in $3 don\'t use some variables that en uses:';
96 $descriptions['plural'] = '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:';
97 $descriptions['empty'] = '$1 message(s) of $2 in $3 are empty or -:';
98 $descriptions['whitespace'] = '$1 message(s) of $2 in $3 have trailing whitespace:';
99 $descriptions['xhtml'] = '$1 message(s) of $2 in $3 contain illegal XHTML:';
100 $descriptions['chars'] = '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:';
101 $descriptions['links'] = '$1 message(s) of $2 in $3 have problematic link(s):';
102 $descriptions['unbalanced'] = '$1 message(s) of $2 in $3 have unbalanced {[]}:';
103 return $descriptions;
104 }
105
106 protected function help() {
107 return <<<ENDS
108 Run this script to check a specific language file, or all of them.
109 Command line settings are in form --parameter[=value].
110 Parameters:
111 * lang: Language code (default: the installation default language).
112 * all: Check all customized languages.
113 * help: Show this help.
114 * level: Show the following level (default: 2).
115 * links: Link the message values (default off).
116 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
117 * whitelist: Do only the following checks (form: code,code).
118 * blacklist: Don't do the following checks (form: code,code).
119 * duplicate: Additionally check for messages which are translated the same to English (default off).
120 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
121 Check codes (ideally, all of them should result 0; all the checks are executed by default (except duplicate and language specific check blacklists in checkLanguage.inc):
122 * untranslated: Messages which are required to translate, but are not translated.
123 * duplicate: Messages which translation equal to fallback
124 * obsolete: Messages which are untranslatable, but translated.
125 * variables: Messages without variables which should be used.
126 * empty: Empty messages.
127 * whitespace: Messages which have trailing whitespace.
128 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
129 * chars: Messages with hidden characters.
130 * links: Messages which contains broken links to pages (does not find all).
131 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
132 Display levels (default: 2):
133 * 0: Skip the checks (useful for checking syntax).
134 * 1: Show only the stub headers and number of wrong messages, without list of messages.
135 * 2: Show only the headers and the message keys, without the message values.
136 * 3: Show both the headers and the complete messages, with both keys and values.
137
138 ENDS;
139 }
140
141 public function execute() {
142 $this->doChecks();
143 if ( $this->level > 0 ) {
144 switch ($this->output) {
145 case 'plain':
146 $this->outputText();
147 break;
148 case 'wiki':
149 $this->outputWiki();
150 break;
151 default:
152 throw new MWException( "Invalid output type $this->output");
153 }
154 }
155 }
156
157 protected function doChecks() {
158 $ignoredCodes = array( 'en', 'enRTL' );
159
160 $this->results = array();
161 # Check the language
162 if ( $this->checkAll ) {
163 foreach ( $this->L->getLanguages() as $language ) {
164 if ( !in_array($language, $ignoredCodes) ) {
165 $this->results[$language] = $this->checkLanguage( $language );
166 }
167 }
168 } else {
169 if ( in_array($this->code, $ignoredCodes) ) {
170 throw new MWException("Cannot check code $this->code.");
171 } else {
172 $this->results[$this->code] = $this->checkLanguage( $this->code );
173 }
174 }
175 }
176
177 protected function getCheckBlacklist() {
178 global $checkBlacklist;
179 return $checkBlacklist;
180 }
181
182 protected function checkLanguage( $code ) {
183 # Syntax check only
184 if ( $this->level === 0 ) {
185 $this->L->getMessages( $code );
186 return;
187 }
188
189 $results = array();
190 $checkFunctions = $this->getChecks();
191 $checkBlacklist = $this->getCheckBlacklist();
192 foreach ( $this->checks as $check ) {
193 if ( isset($checkBlacklist[$code]) &&
194 in_array($check, $checkBlacklist[$code]) ) {
195 $result[$check] = array();
196 continue;
197 }
198
199 $callback = array( $this->L, $checkFunctions[$check] );
200 if ( !is_callable($callback ) ) {
201 throw new MWException( "Unkown check $check." );
202 }
203 $results[$check] = call_user_func( $callback , $code );
204 }
205
206 return $results;
207 }
208
209 protected function formatKey( $key, $code ) {
210 if ( $this->doLinks ) {
211 $displayKey = ucfirst( $key );
212 if ( $code == $this->wikiCode ) {
213 return "[[MediaWiki:$displayKey|$key]]";
214 } else {
215 return "[[MediaWiki:$displayKey/$code|$key]]";
216 }
217 } else {
218 return $key;
219 }
220 }
221
222 protected function outputText() {
223 foreach ( $this->results as $code => $results ) {
224 $translated = $this->L->getMessages( $code );
225 $translated = count( $translated['translated'] );
226 $translatable = $this->L->getGeneralMessages();
227 $translatable = count( $translatable['translatable'] );
228 foreach ( $results as $check => $messages ) {
229 $count = count( $messages );
230 if ( $count ) {
231 $search = array( '$1', '$2', '$3' );
232 $replace = array( $count, $check == 'untranslated' ? $translatable: $translated, $code );
233 $descriptions = $this->getDescriptions();
234 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
235 if ( $this->level == 1 ) {
236 echo "[messages are hidden]\n";
237 } else {
238 foreach ( $messages as $key => $value ) {
239 $displayKey = $this->formatKey( $key, $code );
240 if ( $this->level == 2 ) {
241 echo "* $displayKey\n";
242 } else {
243 echo "* $displayKey: '$value'\n";
244 }
245 }
246 }
247 }
248 }
249 }
250 }
251
252 /**
253 * Globals: $wgContLang, $IP
254 */
255 function outputWiki() {
256 global $wgContLang, $IP;
257 $detailText = '';
258 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
259 foreach ( $this->results as $code => $results ) {
260 $detailTextForLang = "==$code==\n";
261 $numbers = array();
262 $problems = 0;
263 $detailTextForLangChecks = array();
264 foreach ( $results as $check => $messages ) {
265 $count = count( $messages );
266 if ( $count ) {
267 $problems += $count;
268 $messageDetails = array();
269 foreach ( $messages as $key => $details ) {
270 $displayKey = $this->formatKey( $key, $code );
271 $messageDetails[] = $displayKey;
272 }
273 $detailTextForLangChecks[] = "===$code-$check===\n* " . implode( ', ', $messageDetails );
274 $numbers[] = "'''[[#$code-$check|$count]]'''";
275 } else {
276 $numbers[] = $count;
277 }
278
279 }
280
281 if ( count( $detailTextForLangChecks ) ) {
282 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
283 }
284
285 if ( !$problems ) { continue; } // Don't list languages without problems
286 $language = $wgContLang->getLanguageName( $code );
287 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
288 }
289
290 $tableRows = implode( "\n|-\n", $rows );
291
292 $version = SpecialVersion::getVersion( $IP );
293 echo <<<EOL
294 '''Check results are for:''' <code>$version</code>
295
296
297 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear:both;"
298 $tableRows
299 |}
300
301 $detailText
302
303 EOL;
304 }
305 }
306
307 class CheckExtensionsCLI extends CheckLanguageCLI {
308 private $extensions;
309
310 public function __construct( Array $options, $extension ) {
311 if ( isset( $options['help'] ) ) {
312 echo $this->help();
313 exit();
314 }
315
316 if ( isset($options['lang']) ) {
317 $this->code = $options['lang'];
318 } else {
319 global $wgLanguageCode;
320 $this->code = $wgLanguageCode;
321 }
322
323 if ( isset($options['level']) ) {
324 $this->level = $options['level'];
325 }
326
327 $this->doLinks = isset($options['links']);
328
329 if ( isset($options['wikilang']) ) {
330 $this->wikiCode = $options['wikilang'];
331 }
332
333 if ( isset( $options['whitelist'] ) ) {
334 $this->checks = explode( ',', $options['whitelist'] );
335 } elseif ( isset( $options['blacklist'] ) ) {
336 $this->checks = array_diff(
337 $this->defaultChecks,
338 explode( ',', $options['blacklist'] )
339 );
340 } else {
341 $this->checks = $this->defaultChecks;
342 }
343
344 if ( isset($options['output']) ) {
345 $this->output = $options['output'];
346 }
347
348 # Some additional checks not enabled by default
349 if ( isset( $options['duplicate'] ) ) {
350 $this->checks[] = 'duplicate';
351 }
352
353 if( $extension == 'all' ) {
354 $this->extensions = array();
355 foreach( MessageGroups::singleton()->getGroups() as $group ) {
356 if( strpos( $group->getId(), 'ext-' ) === 0 && !$group->isMeta() ) {
357 $this->extensions[] = new extensionLanguages( $group );
358 }
359 }
360 } else {
361 $group = MessageGroups::getGroup( 'ext-' . $extension );
362 if( $group ) {
363 $extension = new extensionLanguages( $group );
364 $this->extensions = array( $extension );
365 } else {
366 print "No such extension $extension.\n";
367 $this->extensions = array();
368 }
369 }
370 }
371
372 protected function help() {
373 return <<<ENDS
374 Run this script to check the status of a specific language in extensions, or all of them.
375 Command line settings are in form --parameter[=value], except for the first one.
376 Parameters:
377 * First parameter (mandatory): Extension name, or "all" for all the extensions.
378 * lang: Language code (default: the installation default language).
379 * help: Show this help.
380 * level: Show the following level (default: 2).
381 * links: Link the message values (default off).
382 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
383 * whitelist: Do only the following checks (form: code,code).
384 * blacklist: Don't do the following checks (form: code,code).
385 * duplicate: Additionally check for messages which are translated the same to English (default off).
386 Check codes (ideally, all of them should result 0; all the checks are executed by default (except duplicate and language specific check blacklists in checkLanguage.inc):
387 * untranslated: Messages which are required to translate, but are not translated.
388 * duplicate: Messages which translation equal to fallback
389 * obsolete: Messages which are untranslatable, but translated.
390 * variables: Messages without variables which should be used.
391 * empty: Empty messages.
392 * whitespace: Messages which have trailing whitespace.
393 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
394 * chars: Messages with hidden characters.
395 * links: Messages which contains broken links to pages (does not find all).
396 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
397 Display levels (default: 2):
398 * 0: Skip the checks (useful for checking syntax).
399 * 1: Show only the stub headers and number of wrong messages, without list of messages.
400 * 2: Show only the headers and the message keys, without the message values.
401 * 3: Show both the headers and the complete messages, with both keys and values.
402
403 ENDS;
404 }
405
406 public function execute() {
407 $this->doChecks();
408 }
409
410 protected function checkLanguage( $code ) {
411 foreach( $this->extensions as $extension ) {
412 echo $extension->name() . ":\n";
413
414 $this->L = $extension;
415 $this->results = array();
416 $this->results[$code] = parent::checkLanguage( $code );
417
418 if( $this->level > 0 ) {
419 switch( $this->output ) {
420 case 'plain':
421 $this->outputText();
422 break;
423 case 'wiki':
424 $this->outputWiki();
425 break;
426 default:
427 throw new MWException( "Invalid output type $this->output" );
428 }
429 }
430
431 echo "\n";
432 }
433 }
434 }
435
436 # Blacklist some checks for some languages
437 $checkBlacklist = array(
438 #'code' => array( 'check1', 'check2' ... )
439 'gan' => array( 'plural' ),
440 'hak' => array( 'plural' ),
441 'ja' => array( 'plural' ), // Does not use plural
442 'ka' => array( 'plural' ),
443 'kk-arab' => array( 'plural' ),
444 'kk-cyrl' => array( 'plural' ),
445 'kk-latn' => array( 'plural' ),
446 'ko' => array( 'plural' ),
447 'mn' => array( 'plural' ),
448 'ms' => array( 'plural' ),
449 'my' => array( 'chars' ), // Uses a lot zwnj
450 'sq' => array( 'plural' ),
451 'tet' => array( 'plural' ),
452 'th' => array( 'plural' ),
453 'wuu' => array( 'plural' ),
454 'xmf' => array( 'plural' ),
455 'yue' => array( 'plural' ),
456 'zh' => array( 'plural' ),
457 'zh-classical' => array( 'plural' ),
458 'zh-cn' => array( 'plural' ),
459 'zh-hans' => array( 'plural' ),
460 'zh-hant' => array( 'plural' ),
461 'zh-hk' => array( 'plural' ),
462 'zh-sg' => array( 'plural' ),
463 'zh-tw' => array( 'plural' ),
464 'zh-yue' => array( 'plural' ),
465 );