Make --links work for output=wiki too
[lhc/web/wiklou.git] / maintenance / language / checkLanguage.php
1 <?php
2 /**
3 * Check a language file.
4 *
5 * @addtogroup Maintenance
6 */
7
8 require_once( dirname(__FILE__).'/../commandLine.inc' );
9 require_once( 'languages.inc' );
10
11 $cli = new CheckLanguageCLI( $options );
12 $cli->execute();
13
14 class CheckLanguageCLI {
15 private $code = null;
16 private $level = 2;
17 private $doLinks = false;
18 private $wikiCode = 'en';
19 private $includeExif = false;
20 private $checkAll = false;
21 private $output = 'plain';
22 private $checks = array();
23
24 private $defaultChecks = array(
25 'untranslated', 'obsolete', 'variables', 'empty', 'plural',
26 'whitespace', 'xhtml', 'chars', 'links', 'unbalanced'
27 );
28
29 private $L = null;
30
31 /**
32 * GLOBALS: $wgLanguageCode;
33 */
34 public function __construct( Array $options ) {
35
36 if ( isset( $options['help'] ) ) {
37 echo $this->help();
38 exit();
39 }
40
41 if ( isset($options['lang']) ) {
42 $this->code = $options['lang'];
43 } else {
44 global $wgLanguageCode;
45 $this->code = $wgLanguageCode;
46 }
47
48 if ( isset($options['level']) ) {
49 $this->level = $options['level'];
50 }
51
52 $this->doLinks = isset($options['links']);
53 $this->includeExif = !isset($options['noexif']);
54 $this->checkAll = isset($options['all']);
55
56 if ( isset($options['wikilang']) ) {
57 $this->wikiCode = $options['wikilang'];
58 }
59
60 if ( isset( $options['whitelist'] ) ) {
61 $this->checks = explode( ',', $options['whitelist'] );
62 } elseif ( isset( $options['blacklist'] ) ) {
63 $this->checks = array_diff(
64 $this->defaultChecks,
65 explode( ',', $options['blacklist'] )
66 );
67 } else {
68 $this->checks = $this->defaultChecks;
69 }
70
71 if ( isset($options['output']) ) {
72 $this->output = $options['output'];
73 }
74
75 # Some additional checks not enabled by default
76 if ( isset( $options['duplicate'] ) ) {
77 $this->checks[] = 'duplicate';
78 }
79
80 $this->L = new languages( $this->includeExif );
81 }
82
83 protected function getChecks() {
84 $checks = array();
85 $checks['untranslated'] = 'getUntranslatedMessages';
86 $checks['duplicate'] = 'getDuplicateMessages';
87 $checks['obsolete'] = 'getObsoleteMessages';
88 $checks['variables'] = 'getMessagesWithoutVariables';
89 $checks['plural'] = 'getMessagesWithoutPlural';
90 $checks['empty'] = 'getEmptyMessages';
91 $checks['whitespace'] = 'getMessagesWithWhitespace';
92 $checks['xhtml'] = 'getNonXHTMLMessages';
93 $checks['chars'] = 'getMessagesWithWrongChars';
94 $checks['links'] = 'getMessagesWithDubiousLinks';
95 $checks['unbalanced'] = 'getMessagesWithUnbalanced';
96 return $checks;
97 }
98
99 protected function getDescriptions() {
100 $descriptions = array();
101 $descriptions['untranslated'] = '$1 message(s) of $2 are not translated to $3, but exist in en:';
102 $descriptions['duplicate'] = '$1 message(s) of $2 are translated the same in en and $3:';
103 $descriptions['obsolete'] = '$1 message(s) of $2 do not exist in en or are in the ignore list, but are in $3';
104 $descriptions['variables'] = '$1 message(s) of $2 in $3 don\'t use some variables that en uses:';
105 $descriptions['plural'] = '$1 message(s) of $2 in $3 don\'t use {{plural}} while en uses:';
106 $descriptions['empty'] = '$1 message(s) of $2 in $3 are empty or -:';
107 $descriptions['whitespace'] = '$1 message(s) of $2 in $3 have trailing whitespace:';
108 $descriptions['xhtml'] = '$1 message(s) of $2 in $3 contain illegal XHTML:';
109 $descriptions['chars'] = '$1 message(s) of $2 in $3 include hidden chars which should not be used in the messages:';
110 $descriptions['links'] = '$1 message(s) of $2 in $3 have problematic link(s):';
111 $descriptions['unbalanced'] = '$1 message(s) of $2 in $3 have unbalanced {[]}:';
112 return $descriptions;
113 }
114
115 protected function help() {
116 return <<<ENDS
117 Run this script to check a specific language file, or all of them.
118 Command line settings are in form --parameter[=value].
119 Parameters:
120 * lang: Language code (default: the installation default language).
121 * all: Check all customized languages
122 * help: Show this help.
123 * level: Show the following level (default: 2).
124 * links: Link the message values (default off).
125 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
126 * whitelist: Do only the following checks (form: code,code).
127 * blacklist: Don't do the following checks (form: code,code).
128 * duplicate: Additionally check for messages which are translated the same to English (default off).
129 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
130 Check codes (ideally, all of them should result 0; all the checks are executed by default (except duplicate and language specific check blacklists in checkLanguage.inc):
131 * untranslated: Messages which are required to translate, but are not translated.
132 * duplicate: Messages which translation equal to fallback
133 * obsolete: Messages which are untranslatable, but translated.
134 * variables: Messages without variables which should be used.
135 * empty: Empty messages.
136 * whitespace: Messages which have trailing whitespace.
137 * xhtml: Messages which are not well-formed XHTML (checks only few common errors).
138 * chars: Messages with hidden characters.
139 * links: Messages which contains broken links to pages (does not find all).
140 * unbalanced: Messages which contains unequal numbers of opening {[ and closing ]}.
141 Display levels (default: 2):
142 * 0: Skip the checks (useful for checking syntax).
143 * 1: Show only the stub headers and number of wrong messages, without list of messages.
144 * 2: Show only the headers and the message keys, without the message values.
145 * 3: Show both the headers and the complete messages, with both keys and values.
146
147 ENDS;
148 }
149
150 private $results = array();
151
152 public function execute() {
153 $this->doChecks();
154 if ( $this->level > 0 ) {
155 switch ($this->output) {
156 case 'plain':
157 $this->outputText();
158 break;
159 case 'wiki':
160 $this->outputWiki();
161 break;
162 default:
163 throw new MWException( "Invalid output type $this->output");
164 }
165 }
166 }
167
168 protected function doChecks() {
169 $ignoredCodes = array( 'en', 'enRTL' );
170
171 $this->results = array();
172 # Check the language
173 if ( $this->checkAll ) {
174 foreach ( $this->L->getLanguages() as $language ) {
175 if ( !in_array($language, $ignoredCodes) ) {
176 $this->results[$language] = $this->checkLanguage( $language );
177 }
178 }
179 } else {
180 if ( in_array($this->code, $ignoredCodes) ) {
181 throw new MWException("Cannot check code $this->code.");
182 } else {
183 $this->results[$this->code] = $this->checkLanguage( $this->code );
184 }
185 }
186 }
187
188 protected function getCheckBlacklist() {
189 static $checkBlacklist = null;
190 if ( $checkBlacklist === null ) {
191 $checkBlacklist = array();
192 require( dirname(__FILE__) . '/checkLanguage.inc' );
193 }
194 return $checkBlacklist;
195 }
196
197 protected function checkLanguage( $code ) {
198 # Syntax check only
199 if ( $this->level === 0 ) {
200 $this->L->getMessages( $code );
201 return;
202 }
203
204 $results = array();
205 $checkFunctions = $this->getChecks();
206 $checkBlacklist = $this->getCheckBlacklist();
207 foreach ( $this->checks as $check ) {
208 if ( isset($checkBlacklist[$code]) &&
209 in_array($check, $checkBlacklist[$code]) ) {
210 $result[$check] = array();
211 continue;
212 }
213
214 $callback = array( $this->L, $checkFunctions[$check] );
215 if ( !is_callable($callback ) ) {
216 throw new MWException( "Unkown check $check." );
217 }
218 $results[$check] = call_user_func( $callback , $code );
219 }
220
221 return $results;
222 }
223
224 protected function formatKey( $key, $code ) {
225 if ( $this->doLinks ) {
226 $displayKey = ucfirst( $key );
227 if ( $code == $this->wikiCode ) {
228 return "[[MediaWiki:$displayKey|$key]]";
229 } else {
230 return "[[MediaWiki:$displayKey/$code|$key]]";
231 }
232 } else {
233 return $key;
234 } }
235
236 protected function outputText( ) {
237 foreach ( $this->results as $code => $results ) {
238 $translated = $this->L->getMessages( $code );
239 $translated = count( $translated['translated'] );
240 foreach ( $results as $check => $messages ) {
241 $count = count( $messages );
242 if ( $count ) {
243 $search = array( '$1', '$2', '$3' );
244 $replace = array( $count, $translated, $code );
245 $descriptions = $this->getDescriptions();
246 echo "\n" . str_replace( $search, $replace, $descriptions[$check] ) . "\n";
247 if ( $this->level == 1 ) {
248 echo "[messages are hidden]\n";
249 } else {
250 foreach ( $messages as $key => $value ) {
251 $displayKey = $this->formatKey( $key, $code );
252 if ( $this->level == 2 ) {
253 echo "* $displayKey\n";
254 } else {
255 echo "* $displayKey: '$value'\n";
256 }
257 }
258 }
259 }
260 }
261 }
262 }
263
264 /**
265 * Globals: $wgContLang, $IP
266 */
267 function outputWiki() {
268 global $wgContLang, $IP;
269 $detailText = '';
270 $rows[] = '! Language !! Code !! Total !! ' . implode( ' !! ', $this->checks );
271 foreach ( $this->results as $code => $results ) {
272 $detailTextForLang = "==$code==\n";
273 $numbers = array();
274 $problems = 0;
275 $detailTextForLangChecks = array();
276 foreach ( $results as $check => $messages ) {
277 $count = count( $messages );
278 if ( $count ) {
279 $problems += $count;
280 $messageDetails = array();
281 foreach ( $messages as $key => $details ) {
282 $displayKey = $this->formatKey( $key, $code );
283 $messageDetails[] = $displayKey;
284 }
285 $detailTextForLangChecks[] = "===$code-$check===\n* " . implode( ', ', $messageDetails );
286 $numbers[] = "'''[[#$code-$check|$count]]'''";
287 } else {
288 $numbers[] = $count;
289 }
290
291 }
292
293 if ( count( $detailTextForLangChecks ) ) {
294 $detailText .= $detailTextForLang . implode( "\n", $detailTextForLangChecks ) . "\n";
295 }
296
297 if ( !$problems ) { continue; } // Don't list languages without problems
298 $language = $wgContLang->getLanguageName( $code );
299 $rows[] = "| $language || $code || $problems || " . implode( ' || ', $numbers );
300 }
301
302 $tableRows = implode( "\n|-\n", $rows );
303
304 $version = SpecialVersion::getVersion( $IP );
305 echo <<<EOL
306 '''Check results are for:''' <code>$version</code>
307
308
309 {| class="sortable wikitable" border="2" cellpadding="4" cellspacing="0" style="background-color: #F9F9F9; border: 1px #AAAAAA solid; border-collapse: collapse; clear:both;"
310 $tableRows
311 |}
312
313 $detailText
314
315 EOL;
316 }
317
318 }