Moved language-related command-line scripts to their own directory.
[lhc/web/wiklou.git] / maintenance / language / checkLanguage.php
1 <?php
2 /**
3 * Check a language file.
4 *
5 * @package MediaWiki
6 * @subpackage Maintenance
7 */
8
9 require_once( dirname(__FILE__).'/../commandLine.inc' );
10 require_once( 'languages.inc' );
11
12 /**
13 * Check a language.
14 *
15 * @param $code The language code.
16 */
17 function checkLanguage( $code ) {
18 global $wgLanguages, $wgGeneralMessages, $wgRequiredMessagesNumber, $wgDisplayLevel, $wgLinks, $wgWikiLanguage, $wgChecks;
19
20 # Get messages
21 $messages = $wgLanguages->getMessages( $code );
22 $messagesNumber = count( $messages['translated'] );
23
24 # Skip the checks if specified
25 if ( $wgDisplayLevel == 0 ) {
26 return;
27 }
28
29 # Untranslated messages
30 if ( in_array( 'untranslated', $wgChecks ) ) {
31 $untranslatedMessages = $wgLanguages->getUntranslatedMessages( $code );
32 $untranslatedMessagesNumber = count( $untranslatedMessages );
33 $wgLanguages->outputMessagesList( $untranslatedMessages, $code, "\n$untranslatedMessagesNumber messages of $wgRequiredMessagesNumber are not translated to $code, but exist in en:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage );
34 }
35
36 # Duplicate messages
37 if ( in_array( 'duplicate', $wgChecks ) ) {
38 $duplicateMessages = $wgLanguages->getDuplicateMessages( $code );
39 $duplicateMessagesNumber = count( $duplicateMessages );
40 $wgLanguages->outputMessagesList( $duplicateMessages, $code, "\n$duplicateMessagesNumber messages of $messagesNumber are translated the same in en and $code:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage );
41 }
42
43 # Obsolete messages
44 if ( in_array( 'obsolete', $wgChecks ) ) {
45 $obsoleteMessages = $messages['obsolete'];
46 $obsoleteMessagesNumber = count( $obsoleteMessages );
47 $wgLanguages->outputMessagesList( $obsoleteMessages, $code, "\n$obsoleteMessagesNumber messages of $messagesNumber are not exist in en (or are in the ignored list), but still exist in $code:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage );
48 }
49
50 # Messages without variables
51 if ( in_array( 'variables', $wgChecks ) ) {
52 $messagesWithoutVariables = $wgLanguages->getMessagesWithoutVariables( $code );
53 $messagesWithoutVariablesNumber = count( $messagesWithoutVariables );
54 $wgLanguages->outputMessagesList( $messagesWithoutVariables, $code, "\n$messagesWithoutVariablesNumber messages of $messagesNumber in $code don't use some variables while en uses them:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage );
55 }
56
57 # Empty messages
58 if ( in_array( 'empty', $wgChecks ) ) {
59 $emptyMessages = $wgLanguages->getEmptyMessages( $code );
60 $emptyMessagesNumber = count( $emptyMessages );
61 $wgLanguages->outputMessagesList( $emptyMessages, $code, "\n$emptyMessagesNumber messages of $messagesNumber in $code are empty or -:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage );
62 }
63
64 # Messages with whitespace
65 if ( in_array( 'whitespace', $wgChecks ) ) {
66 $messagesWithWhitespace = $wgLanguages->getMessagesWithWhitespace( $code );
67 $messagesWithWhitespaceNumber = count( $messagesWithWhitespace );
68 $wgLanguages->outputMessagesList( $messagesWithWhitespace, $code, "\n$messagesWithWhitespaceNumber messages of $messagesNumber in $code have a trailing whitespace:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage );
69 }
70
71 # Non-XHTML messages
72 if ( in_array( 'xhtml', $wgChecks ) ) {
73 $nonXHTMLMessages = $wgLanguages->getNonXHTMLMessages( $code );
74 $nonXHTMLMessagesNumber = count( $nonXHTMLMessages );
75 $wgLanguages->outputMessagesList( $nonXHTMLMessages, $code, "\n$nonXHTMLMessagesNumber messages of $messagesNumber in $code are not well-formed XHTML:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage );
76 }
77
78 # Messages with wrong characters
79 if ( in_array( 'chars', $wgChecks ) ) {
80 $messagesWithWrongChars = $wgLanguages->getMessagesWithWrongChars( $code );
81 $messagesWithWrongCharsNumber = count( $messagesWithWrongChars );
82 $wgLanguages->outputMessagesList( $messagesWithWrongChars, $code, "\n$messagesWithWrongCharsNumber messages of $messagesNumber in $code include hidden chars which should not be used in the messages:", $wgDisplayLevel, $wgLinks, $wgWikiLanguage );
83 }
84 }
85
86 # Show help
87 if ( isset( $options['help'] ) ) {
88 echo <<<END
89 Run this script to check a specific language file, or all of them.
90 Parameters:
91 * lang: Language code (default: the installation default language). You can also specify "all" to check all the languages.
92 * help: Show this help.
93 * level: Show the following level (default: 2).
94 * links: Link the message values (default off).
95 * wikilang: For the links, what is the content language of the wiki to display the output in (default en).
96 * whitelist: Make only the following checks (form: code,code).
97 * blacklist: Don't make the following checks (form: code,code).
98 * duplicate: Additionally check for messages which are translated the same to English (default off).
99 * noexif: Don't check for EXIF messages (a bit hard and boring to translate), if you know that they are currently not translated and want to focus on other problems (default off).
100 Check codes (ideally, all of them should result 0; all the checks are executed by default):
101 * untranslated: Messages which are required to translate, but are not translated.
102 * obsolete: Messages which are untranslatable, but translated.
103 * variables: Messages without variables which should be used.
104 * empty: Empty messages.
105 * whitespace: Messages which have trailing whitespace.
106 * xhtml: Messages which are not well-formed XHTML.
107 * chars: Messages with hidden characters.
108 Display levels (default: 2):
109 * 0: Skip the checks (useful for checking syntax).
110 * 1: Show only the stub headers and number of wrong messages, without list of messages.
111 * 2: Show only the headers and the message keys, without the message values.
112 * 3: Show both the headers and the complete messages, with both keys and values.
113
114 END;
115 exit();
116 }
117
118 # Get the language code
119 if ( isset( $options['lang'] ) ) {
120 $wgCode = $options['lang'];
121 } else {
122 $wgCode = $wgContLang->getCode();
123 }
124
125 # Get the display level
126 if ( isset( $options['level'] ) ) {
127 $wgDisplayLevel = $options['level'];
128 } else {
129 $wgDisplayLevel = 2;
130 }
131
132 # Get the links options
133 $wgLinks = isset( $options['links'] );
134 $wgWikiLanguage = isset( $options['wikilang'] ) ? $options['wikilang'] : 'en';
135
136 # Get the checks to do
137 $wgChecks = array( 'untranslated', 'obsolete', 'variables', 'empty', 'whitespace', 'xhtml', 'chars' );
138 if ( isset( $options['whitelist'] ) ) {
139 $wgChecks = explode( ',', $options['whitelist'] );
140 } elseif ( isset( $options['blacklist'] ) ) {
141 $wgChecks = array_diff( $wgChecks, explode( ',', $options['blacklist'] ) );
142 }
143
144 # Add duplicate option if specified
145 if ( isset( $options['duplicate'] ) ) {
146 $wgChecks[] = 'duplicate';
147 }
148
149 # Should check for EXIF?
150 $wgCheckEXIF = !isset( $options['noexif'] );
151
152 # Get language objects
153 $wgLanguages = new languages( $wgCheckEXIF );
154
155 # Get the general messages
156 $wgGeneralMessages = $wgLanguages->getGeneralMessages();
157 $wgRequiredMessagesNumber = count( $wgGeneralMessages['required'] );
158
159 # Check the language
160 if ( $wgCode == 'all' ) {
161 foreach ( $wgLanguages->getLanguages() as $language ) {
162 if ( $language != 'en' && $language != 'enRTL' ) {
163 checkLanguage( $language );
164 }
165 }
166 } else {
167 # Can't check English
168 if ( $wgCode == 'en' ) {
169 echo "Current selected language is English, which cannot be checked.\n";
170 } else if ( $wgCode == 'enRTL' ) {
171 echo "Current selected language is RTL English, which cannot be checked.\n";
172 } else {
173 checkLanguage( $wgCode );
174 }
175 }
176
177 ?>