Merge "Selenium: replace UserLoginPage with BlankPage where possible"
[lhc/web/wiklou.git] / includes / Rest / PathTemplateMatcher / PathMatcher.php
1 <?php
2
3 namespace MediaWiki\Rest\PathTemplateMatcher;
4
5 /**
6 * A tree-based path routing algorithm.
7 *
8 * This container builds defined routing templates into a tree, allowing
9 * paths to be efficiently matched against all templates. The match time is
10 * independent of the number of registered path templates.
11 *
12 * Efficient matching comes at the cost of a potentially significant setup time.
13 * We measured ~10ms for 1000 templates. Using getCacheData() and
14 * newFromCache(), this setup time may be amortized over multiple requests.
15 */
16 class PathMatcher {
17 /**
18 * An array of trees indexed by the number of path components in the input.
19 *
20 * A tree node consists of an associative array in which the key is a match
21 * specifier string, and the value is another node. A leaf node, which is
22 * identifiable by its fixed depth in the tree, consists of an associative
23 * array with the following keys:
24 * - template: The path template string
25 * - paramNames: A list of parameter names extracted from the template
26 * - userData: The user data supplied to add()
27 *
28 * A match specifier string may be either "*", which matches any path
29 * component, or a literal string prefixed with "=", which matches the
30 * specified deprefixed string literal.
31 *
32 * @var array
33 */
34 private $treesByLength = [];
35
36 /**
37 * Create a PathMatcher from cache data
38 *
39 * @param array $data The data array previously returned by getCacheData()
40 * @return PathMatcher
41 */
42 public static function newFromCache( $data ) {
43 $matcher = new self;
44 $matcher->treesByLength = $data;
45 return $matcher;
46 }
47
48 /**
49 * Get a data array for later use by newFromCache().
50 *
51 * The internal format is private to PathMatcher, but note that it includes
52 * any data passed as $userData to add(). The array returned will be
53 * serializable as long as all $userData values are serializable.
54 *
55 * @return array
56 */
57 public function getCacheData() {
58 return $this->treesByLength;
59 }
60
61 /**
62 * Determine whether a path template component is a parameter
63 *
64 * @param string $part
65 * @return bool
66 */
67 private function isParam( $part ) {
68 $partLength = strlen( $part );
69 return $partLength > 2 && $part[0] === '{' && $part[$partLength - 1] === '}';
70 }
71
72 /**
73 * If a path template component is a parameter, return the parameter name.
74 * Otherwise, return false.
75 *
76 * @param string $part
77 * @return string|false
78 */
79 private function getParamName( $part ) {
80 if ( $this->isParam( $part ) ) {
81 return substr( $part, 1, -1 );
82 } else {
83 return false;
84 }
85 }
86
87 /**
88 * Recursively search the match tree, checking whether the proposed path
89 * template, passed as an array of component parts, can be added to the
90 * matcher without ambiguity.
91 *
92 * Ambiguity means that a path exists which matches multiple templates.
93 *
94 * The function calls itself recursively, incrementing $index so as to
95 * ignore a prefix of the input, in order to check deeper parts of the
96 * match tree.
97 *
98 * If a conflict is discovered, the conflicting leaf node is returned.
99 * Otherwise, false is returned.
100 *
101 * @param array $node The tree node to check against
102 * @param string[] $parts The array of path template parts
103 * @param int $index The current index into $parts
104 * @return array|false
105 */
106 private function findConflict( $node, $parts, $index = 0 ) {
107 if ( $index >= count( $parts ) ) {
108 // If we reached the leaf node then a conflict is detected
109 return $node;
110 }
111 $part = $parts[$index];
112 $result = false;
113 if ( $this->isParam( $part ) ) {
114 foreach ( $node as $key => $childNode ) {
115 $result = $this->findConflict( $childNode, $parts, $index + 1 );
116 if ( $result !== false ) {
117 break;
118 }
119 }
120 } else {
121 if ( isset( $node["=$part"] ) ) {
122 $result = $this->findConflict( $node["=$part"], $parts, $index + 1 );
123 }
124 if ( $result === false && isset( $node['*'] ) ) {
125 $result = $this->findConflict( $node['*'], $parts, $index + 1 );
126 }
127 }
128 return $result;
129 }
130
131 /**
132 * Add a template to the matcher.
133 *
134 * The path template consists of components separated by "/". Each component
135 * may be either a parameter of the form {paramName}, or a literal string.
136 * A parameter matches any input path component, whereas a literal string
137 * matches itself.
138 *
139 * Path templates must not conflict with each other, that is, any input
140 * path must match at most one path template. If a path template conflicts
141 * with another already registered, this function throws a PathConflict
142 * exception.
143 *
144 * @param string $template The path template
145 * @param mixed $userData User data used to identify the matched route to
146 * the caller of match()
147 * @throws PathConflict
148 */
149 public function add( $template, $userData ) {
150 $parts = explode( '/', $template );
151 $length = count( $parts );
152 if ( !isset( $this->treesByLength[$length] ) ) {
153 $this->treesByLength[$length] = [];
154 }
155 $tree =& $this->treesByLength[$length];
156 $conflict = $this->findConflict( $tree, $parts );
157 if ( $conflict !== false ) {
158 throw new PathConflict( $template, $userData, $conflict );
159 }
160
161 $params = [];
162 foreach ( $parts as $index => $part ) {
163 $paramName = $this->getParamName( $part );
164 if ( $paramName !== false ) {
165 $params[] = $paramName;
166 $key = '*';
167 } else {
168 $key = "=$part";
169 }
170 if ( $index === $length - 1 ) {
171 $tree[$key] = [
172 'template' => $template,
173 'paramNames' => $params,
174 'userData' => $userData
175 ];
176 } elseif ( !isset( $tree[$key] ) ) {
177 $tree[$key] = [];
178 }
179 $tree =& $tree[$key];
180 }
181 }
182
183 /**
184 * Match a path against the current match trees.
185 *
186 * If the path matches a previously added path template, an array will be
187 * returned with the following keys:
188 * - params: An array mapping parameter names to their detected values
189 * - userData: The user data passed to add(), which identifies the route
190 *
191 * If the path does not match any template, false is returned.
192 *
193 * @param string $path
194 * @return array|false
195 */
196 public function match( $path ) {
197 $parts = explode( '/', $path );
198 $length = count( $parts );
199 if ( !isset( $this->treesByLength[$length] ) ) {
200 return false;
201 }
202 $node = $this->treesByLength[$length];
203
204 $paramValues = [];
205 foreach ( $parts as $part ) {
206 if ( isset( $node["=$part"] ) ) {
207 $node = $node["=$part"];
208 } elseif ( isset( $node['*'] ) ) {
209 $node = $node['*'];
210 $paramValues[] = $part;
211 } else {
212 return false;
213 }
214 }
215
216 return [
217 'params' => array_combine( $node['paramNames'], $paramValues ),
218 'userData' => $node['userData']
219 ];
220 }
221 }