Merge "Clean up X-Content-Dimensions"
[lhc/web/wiklou.git] / maintenance / findHooks.php
1 <?php
2 /**
3 * Simple script that try to find documented hook and hooks actually
4 * in the code and show what's missing.
5 *
6 * This script assumes that:
7 * - hooks names in hooks.txt are at the beginning of a line and single quoted.
8 * - hooks names in code are the first parameter of wfRunHooks.
9 *
10 * if --online option is passed, the script will compare the hooks in the code
11 * with the ones at https://www.mediawiki.org/wiki/Manual:Hooks
12 *
13 * Any instance of wfRunHooks that doesn't meet these parameters will be noted.
14 *
15 * Copyright © Antoine Musso
16 *
17 * This program is free software; you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation; either version 2 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License along
28 * with this program; if not, write to the Free Software Foundation, Inc.,
29 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
30 * http://www.gnu.org/copyleft/gpl.html
31 *
32 * @file
33 * @ingroup Maintenance
34 * @author Antoine Musso <hashar at free dot fr>
35 */
36
37 require_once __DIR__ . '/Maintenance.php';
38
39 /**
40 * Maintenance script that compares documented and actually present mismatches.
41 *
42 * @ingroup Maintenance
43 */
44 class FindHooks extends Maintenance {
45 const FIND_NON_RECURSIVE = 0;
46 const FIND_RECURSIVE = 1;
47
48 /*
49 * Hooks that are ignored
50 */
51 protected static $ignore = [ 'Test' ];
52
53 public function __construct() {
54 parent::__construct();
55 $this->addDescription( 'Find hooks that are undocumented, missing, or just plain wrong' );
56 $this->addOption( 'online', 'Check against MediaWiki.org hook documentation' );
57 }
58
59 public function getDbType() {
60 return Maintenance::DB_NONE;
61 }
62
63 public function execute() {
64 global $IP;
65
66 $documentedHooks = $this->getHooksFromDoc( $IP . '/docs/hooks.txt' );
67 $potentialHooks = [];
68 $badHooks = [];
69
70 $recurseDirs = [
71 "$IP/includes/",
72 "$IP/mw-config/",
73 "$IP/languages/",
74 "$IP/maintenance/",
75 // Omit $IP/tests/phpunit as it contains hook tests that shouldn't be documented
76 "$IP/tests/parser",
77 "$IP/tests/phpunit/suites",
78 ];
79 $nonRecurseDirs = [
80 "$IP/",
81 ];
82 $extraFiles = [
83 "$IP/tests/phpunit/MediaWikiTestCase.php",
84 ];
85
86 foreach ( $recurseDirs as $dir ) {
87 $ret = $this->getHooksFromDir( $dir, self::FIND_RECURSIVE );
88 $potentialHooks = array_merge( $potentialHooks, $ret['good'] );
89 $badHooks = array_merge( $badHooks, $ret['bad'] );
90 }
91 foreach ( $nonRecurseDirs as $dir ) {
92 $ret = $this->getHooksFromDir( $dir );
93 $potentialHooks = array_merge( $potentialHooks, $ret['good'] );
94 $badHooks = array_merge( $badHooks, $ret['bad'] );
95 }
96 foreach ( $extraFiles as $file ) {
97 $potentialHooks = array_merge( $potentialHooks, $this->getHooksFromFile( $file ) );
98 $badHooks = array_merge( $badHooks, $this->getBadHooksFromFile( $file ) );
99 }
100
101 $documented = array_keys( $documentedHooks );
102 $potential = array_keys( $potentialHooks );
103 $potential = array_unique( $potential );
104 $badHooks = array_diff( array_unique( $badHooks ), self::$ignore );
105 $todo = array_diff( $potential, $documented, self::$ignore );
106 $deprecated = array_diff( $documented, $potential, self::$ignore );
107
108 // Check parameter count and references
109 $badParameterCount = $badParameterReference = [];
110 foreach ( $potentialHooks as $hook => $args ) {
111 if ( !isset( $documentedHooks[$hook] ) ) {
112 // Not documented, but that will also be in $todo
113 continue;
114 }
115 $argsDoc = $documentedHooks[$hook];
116 if ( $args === 'unknown' || $argsDoc === 'unknown' ) {
117 // Could not get parameter information
118 continue;
119 }
120 if ( count( $argsDoc ) !== count( $args ) ) {
121 $badParameterCount[] = $hook . ': Doc: ' . count( $argsDoc ) . ' vs. Code: ' . count( $args );
122 } else {
123 // Check if & is equal
124 foreach ( $argsDoc as $index => $argDoc ) {
125 $arg = $args[$index];
126 if ( ( $arg[0] === '&' ) !== ( $argDoc[0] === '&' ) ) {
127 $badParameterReference[] = $hook . ': References different: Doc: ' . $argDoc .
128 ' vs. Code: ' . $arg;
129 }
130 }
131 }
132 }
133
134 // Print the results
135 $this->printArray( 'Undocumented', $todo );
136 $this->printArray( 'Documented and not found', $deprecated );
137 $this->printArray( 'Unclear hook calls', $badHooks );
138 $this->printArray( 'Different parameter count', $badParameterCount );
139 $this->printArray( 'Different parameter reference', $badParameterReference );
140
141 if ( !$todo && !$deprecated && !$badHooks
142 && !$badParameterCount && !$badParameterReference
143 ) {
144 $this->output( "Looks good!\n" );
145 } else {
146 $this->error( 'The script finished with errors.', 1 );
147 }
148 }
149
150 /**
151 * Get the hook documentation, either locally or from MediaWiki.org
152 * @param string $doc
153 * @return array Array: key => hook name; value => array of arguments or string 'unknown'
154 */
155 private function getHooksFromDoc( $doc ) {
156 if ( $this->hasOption( 'online' ) ) {
157 return $this->getHooksFromOnlineDoc();
158 } else {
159 return $this->getHooksFromLocalDoc( $doc );
160 }
161 }
162
163 /**
164 * Get hooks from a local file (for example docs/hooks.txt)
165 * @param string $doc Filename to look in
166 * @return array Array: key => hook name; value => array of arguments or string 'unknown'
167 */
168 private function getHooksFromLocalDoc( $doc ) {
169 $m = [];
170 $content = file_get_contents( $doc );
171 preg_match_all(
172 "/\n'(.*?)':.*((?:\n.+)*)/",
173 $content,
174 $m,
175 PREG_SET_ORDER
176 );
177
178 // Extract the documented parameter
179 $hooks = [];
180 foreach ( $m as $match ) {
181 $args = [];
182 if ( isset( $match[2] ) ) {
183 $n = [];
184 if ( preg_match_all( "/\n(&?\\$\w+):.+/", $match[2], $n ) ) {
185 $args = $n[1];
186 }
187 }
188 $hooks[$match[1]] = $args;
189 }
190 return $hooks;
191 }
192
193 /**
194 * Get hooks from www.mediawiki.org using the API
195 * @return array Array: key => hook name; value => string 'unknown'
196 */
197 private function getHooksFromOnlineDoc() {
198 $allhooks = $this->getHooksFromOnlineDocCategory( 'MediaWiki_hooks' );
199 $removed = $this->getHooksFromOnlineDocCategory( 'Removed_hooks' );
200 return array_diff_key( $allhooks, $removed );
201 }
202
203 /**
204 * @param string $title
205 * @return array
206 */
207 private function getHooksFromOnlineDocCategory( $title ) {
208 $params = [
209 'action' => 'query',
210 'list' => 'categorymembers',
211 'cmtitle' => "Category:$title",
212 'cmlimit' => 500,
213 'format' => 'json',
214 'continue' => '',
215 ];
216
217 $retval = [];
218 while ( true ) {
219 $json = Http::get(
220 wfAppendQuery( 'http://www.mediawiki.org/w/api.php', $params ),
221 [],
222 __METHOD__
223 );
224 $data = FormatJson::decode( $json, true );
225 foreach ( $data['query']['categorymembers'] as $page ) {
226 if ( preg_match( '/Manual\:Hooks\/([a-zA-Z0-9- :]+)/', $page['title'], $m ) ) {
227 // parameters are unknown, because that needs parsing of wikitext
228 $retval[str_replace( ' ', '_', $m[1] )] = 'unknown';
229 }
230 }
231 if ( !isset( $data['continue'] ) ) {
232 return $retval;
233 }
234 $params = array_replace( $params, $data['continue'] );
235 }
236 }
237
238 /**
239 * Get hooks from a PHP file
240 * @param string $filePath Full file path to the PHP file.
241 * @return array Array: key => hook name; value => array of arguments or string 'unknown'
242 */
243 private function getHooksFromFile( $filePath ) {
244 $content = file_get_contents( $filePath );
245 $m = [];
246 preg_match_all(
247 // All functions which runs hooks
248 '/(?:wfRunHooks|Hooks\:\:run)\s*\(\s*' .
249 // First argument is the hook name as string
250 '([\'"])(.*?)\1' .
251 // Comma for second argument
252 '(?:\s*(,))?' .
253 // Second argument must start with array to be processed
254 '(?:\s*(?:array\s*\(|\[)' .
255 // Matching inside array - allows one deep of brackets
256 '((?:[^\(\)\[\]]|\((?-1)\)|\[(?-1)\])*)' .
257 // End
258 '[\)\]])?/',
259 $content,
260 $m,
261 PREG_SET_ORDER
262 );
263
264 // Extract parameter
265 $hooks = [];
266 foreach ( $m as $match ) {
267 $args = [];
268 if ( isset( $match[4] ) ) {
269 $n = [];
270 if ( preg_match_all( '/((?:[^,\(\)]|\([^\(\)]*\))+)/', $match[4], $n ) ) {
271 $args = array_map( 'trim', $n[1] );
272 // remove empty entries from trailing spaces
273 $args = array_filter( $args );
274 }
275 } elseif ( isset( $match[3] ) ) {
276 // Found a parameter for Hooks::run,
277 // but could not extract the hooks argument,
278 // because there are given by a variable
279 $args = 'unknown';
280 }
281 $hooks[$match[2]] = $args;
282 }
283
284 return $hooks;
285 }
286
287 /**
288 * Get bad hooks (where the hook name could not be determined) from a PHP file
289 * @param string $filePath Full filename to the PHP file.
290 * @return array Array of bad wfRunHooks() lines
291 */
292 private function getBadHooksFromFile( $filePath ) {
293 $content = file_get_contents( $filePath );
294 $m = [];
295 // We want to skip the "function wfRunHooks()" one. :)
296 preg_match_all( '/(?<!function )wfRunHooks\(\s*[^\s\'"].*/', $content, $m );
297 $list = [];
298 foreach ( $m[0] as $match ) {
299 $list[] = $match . "(" . $filePath . ")";
300 }
301
302 return $list;
303 }
304
305 /**
306 * Get hooks from a directory of PHP files.
307 * @param string $dir Directory path to start at
308 * @param int $recursive Pass self::FIND_RECURSIVE
309 * @return array Array: key => hook name; value => array of arguments or string 'unknown'
310 */
311 private function getHooksFromDir( $dir, $recurse = 0 ) {
312 $good = [];
313 $bad = [];
314
315 if ( $recurse === self::FIND_RECURSIVE ) {
316 $iterator = new RecursiveIteratorIterator(
317 new RecursiveDirectoryIterator( $dir, RecursiveDirectoryIterator::SKIP_DOTS ),
318 RecursiveIteratorIterator::SELF_FIRST
319 );
320 } else {
321 $iterator = new DirectoryIterator( $dir );
322 }
323
324 foreach ( $iterator as $info ) {
325 // Ignore directories, work only on php files,
326 if ( $info->isFile() && in_array( $info->getExtension(), [ 'php', 'inc' ] )
327 // Skip this file as it contains text that looks like a bad wfRunHooks() call
328 && $info->getRealPath() !== __FILE__
329 ) {
330 $good = array_merge( $good, $this->getHooksFromFile( $info->getRealPath() ) );
331 $bad = array_merge( $bad, $this->getBadHooksFromFile( $info->getRealPath() ) );
332 }
333 }
334
335 return [ 'good' => $good, 'bad' => $bad ];
336 }
337
338 /**
339 * Nicely sort an print an array
340 * @param string $msg A message to show before the value
341 * @param array $arr
342 */
343 private function printArray( $msg, $arr ) {
344 asort( $arr );
345
346 foreach ( $arr as $v ) {
347 $this->output( "$msg: $v\n" );
348 }
349 }
350 }
351
352 $maintClass = 'FindHooks';
353 require_once RUN_MAINTENANCE_IF_MAIN;