Merge "Provide command to adjust phpunit.xml for code coverage"
[lhc/web/wiklou.git] / includes / PathRouter.php
1 <?php
2 /**
3 * Parser to extract query parameters out of REQUEST_URI paths.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 /**
24 * PathRouter class.
25 * This class can take patterns such as /wiki/$1 and use them to
26 * parse query parameters out of REQUEST_URI paths.
27 *
28 * $router->add( "/wiki/$1" );
29 * - Matches /wiki/Foo style urls and extracts the title
30 * $router->add( [ 'edit' => "/edit/$key" ], [ 'action' => '$key' ] );
31 * - Matches /edit/Foo style urls and sets action=edit
32 * $router->add( '/$2/$1',
33 * [ 'variant' => '$2' ],
34 * [ '$2' => [ 'zh-hant', 'zh-hans' ] ]
35 * );
36 * - Matches /zh-hant/Foo or /zh-hans/Foo
37 * $router->addStrict( "/foo/Bar", [ 'title' => 'Baz' ] );
38 * - Matches /foo/Bar explicitly and uses "Baz" as the title
39 * $router->add( '/help/$1', [ 'title' => 'Help:$1' ] );
40 * - Matches /help/Foo with "Help:Foo" as the title
41 * $router->add( '/$1', [ 'foo' => [ 'value' => 'bar$2' ] ] );
42 * - Matches /Foo and sets 'foo' to 'bar$2' without $2 being replaced
43 * $router->add( '/$1', [ 'data:foo' => 'bar' ], [ 'callback' => 'functionname' ] );
44 * - Matches /Foo, adds the key 'foo' with the value 'bar' to the data array
45 * and calls functionname( &$matches, $data );
46 *
47 * Path patterns:
48 * - Paths may contain $# patterns such as $1, $2, etc...
49 * - $1 will match 0 or more while the rest will match 1 or more
50 * - Unless you use addStrict "/wiki" and "/wiki/" will be expanded to "/wiki/$1"
51 *
52 * Params:
53 * - In a pattern $1, $2, etc... will be replaced with the relevant contents
54 * - If you used a keyed array as a path pattern, $key will be replaced with
55 * the relevant contents
56 * - The default behavior is equivalent to `[ 'title' => '$1' ]`,
57 * if you don't want the title parameter you can explicitly use `[ 'title' => false ]`
58 * - You can specify a value that won't have replacements in it
59 * using `'foo' => [ 'value' => 'bar' ];`
60 *
61 * Options:
62 * - The option keys $1, $2, etc... can be specified to restrict the possible values
63 * of that variable. A string can be used for a single value, or an array for multiple.
64 * - When the option key 'strict' is set (Using addStrict is simpler than doing this directly)
65 * the path won't have $1 implicitly added to it.
66 * - The option key 'callback' can specify a callback that will be run when a path is matched.
67 * The callback will have the arguments ( &$matches, $data ) and the matches array can
68 * be modified.
69 *
70 * @since 1.19
71 * @author Daniel Friesen
72 */
73 class PathRouter {
74
75 /**
76 * @var array
77 */
78 private $patterns = [];
79
80 /**
81 * Protected helper to do the actual bulk work of adding a single pattern.
82 * This is in a separate method so that add() can handle the difference between
83 * a single string $path and an array $path that contains multiple path
84 * patterns each with an associated $key to pass on.
85 * @param string $path
86 * @param array $params
87 * @param array $options
88 * @param null|string $key
89 */
90 protected function doAdd( $path, $params, $options, $key = null ) {
91 // Make sure all paths start with a /
92 if ( $path[0] !== '/' ) {
93 $path = '/' . $path;
94 }
95
96 if ( !isset( $options['strict'] ) || !$options['strict'] ) {
97 // Unless this is a strict path make sure that the path has a $1
98 if ( strpos( $path, '$1' ) === false ) {
99 if ( substr( $path, -1 ) !== '/' ) {
100 $path .= '/';
101 }
102 $path .= '$1';
103 }
104 }
105
106 // If 'title' is not specified and our path pattern contains a $1
107 // Add a default 'title' => '$1' rule to the parameters.
108 if ( !isset( $params['title'] ) && strpos( $path, '$1' ) !== false ) {
109 $params['title'] = '$1';
110 }
111 // If the user explicitly marked 'title' as false then omit it from the matches
112 if ( isset( $params['title'] ) && $params['title'] === false ) {
113 unset( $params['title'] );
114 }
115
116 // Loop over our parameters and convert basic key => string
117 // patterns into fully descriptive array form
118 foreach ( $params as $paramName => $paramData ) {
119 if ( is_string( $paramData ) ) {
120 if ( preg_match( '/\$(\d+|key)/u', $paramData ) ) {
121 $paramArrKey = 'pattern';
122 } else {
123 // If there's no replacement use a value instead
124 // of a pattern for a little more efficiency
125 $paramArrKey = 'value';
126 }
127 $params[$paramName] = [
128 $paramArrKey => $paramData
129 ];
130 }
131 }
132
133 // Loop over our options and convert any single value $# restrictions
134 // into an array so we only have to do in_array tests.
135 foreach ( $options as $optionName => $optionData ) {
136 if ( preg_match( '/^\$\d+$/u', $optionName ) && !is_array( $optionData ) ) {
137 $options[$optionName] = [ $optionData ];
138 }
139 }
140
141 $pattern = (object)[
142 'path' => $path,
143 'params' => $params,
144 'options' => $options,
145 'key' => $key,
146 ];
147 $pattern->weight = self::makeWeight( $pattern );
148 $this->patterns[] = $pattern;
149 }
150
151 /**
152 * Add a new path pattern to the path router
153 *
154 * @param string|array $path The path pattern to add
155 * @param array $params The params for this path pattern
156 * @param array $options The options for this path pattern
157 */
158 public function add( $path, $params = [], $options = [] ) {
159 if ( is_array( $path ) ) {
160 foreach ( $path as $key => $onePath ) {
161 $this->doAdd( $onePath, $params, $options, $key );
162 }
163 } else {
164 $this->doAdd( $path, $params, $options );
165 }
166 }
167
168 /**
169 * Add a new path pattern to the path router with the strict option on
170 * @see self::add
171 * @param string|array $path
172 * @param array $params
173 * @param array $options
174 */
175 public function addStrict( $path, $params = [], $options = [] ) {
176 $options['strict'] = true;
177 $this->add( $path, $params, $options );
178 }
179
180 /**
181 * Protected helper to re-sort our patterns so that the most specific
182 * (most heavily weighted) patterns are at the start of the array.
183 */
184 protected function sortByWeight() {
185 $weights = [];
186 foreach ( $this->patterns as $key => $pattern ) {
187 $weights[$key] = $pattern->weight;
188 }
189 array_multisort( $weights, SORT_DESC, SORT_NUMERIC, $this->patterns );
190 }
191
192 /**
193 * @param object $pattern
194 * @return float|int
195 */
196 protected static function makeWeight( $pattern ) {
197 # Start with a weight of 0
198 $weight = 0;
199
200 // Explode the path to work with
201 $path = explode( '/', $pattern->path );
202
203 # For each level of the path
204 foreach ( $path as $piece ) {
205 if ( preg_match( '/^\$(\d+|key)$/u', $piece ) ) {
206 # For a piece that is only a $1 variable add 1 points of weight
207 $weight += 1;
208 } elseif ( preg_match( '/\$(\d+|key)/u', $piece ) ) {
209 # For a piece that simply contains a $1 variable add 2 points of weight
210 $weight += 2;
211 } else {
212 # For a solid piece add a full 3 points of weight
213 $weight += 3;
214 }
215 }
216
217 foreach ( $pattern->options as $key => $option ) {
218 if ( preg_match( '/^\$\d+$/u', $key ) ) {
219 # Add 0.5 for restrictions to values
220 # This way given two separate "/$2/$1" patterns the
221 # one with a limited set of $2 values will dominate
222 # the one that'll match more loosely
223 $weight += 0.5;
224 }
225 }
226
227 return $weight;
228 }
229
230 /**
231 * Parse a path and return the query matches for the path
232 *
233 * @param string $path The path to parse
234 * @return array The array of matches for the path
235 */
236 public function parse( $path ) {
237 // Make sure our patterns are sorted by weight so the most specific
238 // matches are tested first
239 $this->sortByWeight();
240
241 $matches = $this->internalParse( $path );
242 if ( is_null( $matches ) ) {
243 // Try with the normalized path (T100782)
244 $path = wfRemoveDotSegments( $path );
245 $path = preg_replace( '#/+#', '/', $path );
246 $matches = $this->internalParse( $path );
247 }
248
249 // We know the difference between null (no matches) and
250 // [] (a match with no data) but our WebRequest caller
251 // expects [] even when we have no matches so return
252 // a [] when we have null
253 return $matches ?? [];
254 }
255
256 /**
257 * Match a path against each defined pattern
258 *
259 * @param string $path
260 * @return array|null
261 */
262 protected function internalParse( $path ) {
263 $matches = null;
264
265 foreach ( $this->patterns as $pattern ) {
266 $matches = self::extractTitle( $path, $pattern );
267 if ( !is_null( $matches ) ) {
268 break;
269 }
270 }
271 return $matches;
272 }
273
274 /**
275 * @param string $path
276 * @param object $pattern
277 * @return array|null
278 */
279 protected static function extractTitle( $path, $pattern ) {
280 // Convert the path pattern into a regexp we can match with
281 $regexp = preg_quote( $pattern->path, '#' );
282 // .* for the $1
283 $regexp = preg_replace( '#\\\\\$1#u', '(?P<par1>.*)', $regexp );
284 // .+ for the rest of the parameter numbers
285 $regexp = preg_replace( '#\\\\\$(\d+)#u', '(?P<par$1>.+?)', $regexp );
286 $regexp = "#^{$regexp}$#";
287
288 $matches = [];
289 $data = [];
290
291 // Try to match the path we were asked to parse with our regexp
292 if ( preg_match( $regexp, $path, $m ) ) {
293 // Ensure that any $# restriction we have set in our {$option}s
294 // matches properly here.
295 foreach ( $pattern->options as $key => $option ) {
296 if ( preg_match( '/^\$\d+$/u', $key ) ) {
297 $n = intval( substr( $key, 1 ) );
298 $value = rawurldecode( $m["par{$n}"] );
299 if ( !in_array( $value, $option ) ) {
300 // If any restriction does not match return null
301 // to signify that this rule did not match.
302 return null;
303 }
304 }
305 }
306
307 // Give our $data array a copy of every $# that was matched
308 foreach ( $m as $matchKey => $matchValue ) {
309 if ( preg_match( '/^par\d+$/u', $matchKey ) ) {
310 $n = intval( substr( $matchKey, 3 ) );
311 $data['$' . $n] = rawurldecode( $matchValue );
312 }
313 }
314 // If present give our $data array a $key as well
315 if ( isset( $pattern->key ) ) {
316 $data['$key'] = $pattern->key;
317 }
318
319 // Go through our parameters for this match and add data to our matches and data arrays
320 foreach ( $pattern->params as $paramName => $paramData ) {
321 $value = null;
322 // Differentiate data: from normal parameters and keep the correct
323 // array key around (ie: foo for data:foo)
324 if ( preg_match( '/^data:/u', $paramName ) ) {
325 $isData = true;
326 $key = substr( $paramName, 5 );
327 } else {
328 $isData = false;
329 $key = $paramName;
330 }
331
332 if ( isset( $paramData['value'] ) ) {
333 // For basic values just set the raw data as the value
334 $value = $paramData['value'];
335 } elseif ( isset( $paramData['pattern'] ) ) {
336 // For patterns we have to make value replacements on the string
337 $value = self::expandParamValue( $m, $pattern->key ?? null,
338 $paramData['pattern'] );
339 if ( $value === false ) {
340 // Pattern required data that wasn't available, abort
341 return null;
342 }
343 }
344
345 // Send things that start with data: to $data, the rest to $matches
346 if ( $isData ) {
347 $data[$key] = $value;
348 } else {
349 $matches[$key] = $value;
350 }
351 }
352
353 // If this match includes a callback, execute it
354 if ( isset( $pattern->options['callback'] ) ) {
355 call_user_func_array( $pattern->options['callback'], [ &$matches, $data ] );
356 }
357 } else {
358 // Our regexp didn't match, return null to signify no match.
359 return null;
360 }
361 // Fall through, everything went ok, return our matches array
362 return $matches;
363 }
364
365 /**
366 * Replace $key etc. in param values with the matched strings from the path.
367 *
368 * @param array $pathMatches The match results from the path
369 * @param string|null $key The key of the matching pattern
370 * @param string $value The param value to be expanded
371 * @return string|false
372 */
373 protected static function expandParamValue( $pathMatches, $key, $value ) {
374 $error = false;
375
376 $replacer = function ( $m ) use ( $pathMatches, $key, &$error ) {
377 if ( $m[1] == "key" ) {
378 if ( is_null( $key ) ) {
379 $error = true;
380
381 return '';
382 }
383
384 return $key;
385 } else {
386 $d = $m[1];
387 if ( !isset( $pathMatches["par$d"] ) ) {
388 $error = true;
389
390 return '';
391 }
392
393 return rawurldecode( $pathMatches["par$d"] );
394 }
395 };
396
397 $value = preg_replace_callback( '/\$(\d+|key)/u', $replacer, $value );
398 if ( $error ) {
399 return false;
400 }
401
402 return $value;
403 }
404
405 /**
406 * @internal For use by Title and WebRequest only.
407 * @param array $actionPaths
408 * @param string $articlePath
409 * @return string[]|false
410 */
411 public static function getActionPaths( array $actionPaths, $articlePath ) {
412 if ( !$actionPaths ) {
413 return false;
414 }
415 // Processing of urls for this feature requires that 'view' is set.
416 // By default, set it to the pretty article path.
417 if ( !isset( $actionPaths['view'] ) ) {
418 $actionPaths['view'] = $articlePath;
419 }
420 return $actionPaths;
421 }
422 }