Merge "ChangesList: HTML escape the timestamp"
[lhc/web/wiklou.git] / includes / MagicWordArray.php
1 <?php
2
3 /**
4 * See docs/magicword.txt.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Parser
23 */
24
25 use MediaWiki\Logger\LoggerFactory;
26 use MediaWiki\MediaWikiServices;
27
28 /**
29 * Class for handling an array of magic words
30 * @ingroup Parser
31 */
32 class MagicWordArray {
33 /** @var array */
34 public $names = [];
35
36 /** @var MagicWordFactory */
37 private $factory;
38
39 /** @var array */
40 private $hash;
41
42 private $baseRegex;
43
44 private $regex;
45
46 /**
47 * @param array $names
48 * @param MagicWordFactory|null $factory
49 */
50 public function __construct( $names = [], MagicWordFactory $factory = null ) {
51 $this->names = $names;
52 $this->factory = $factory;
53 if ( !$factory ) {
54 $this->factory = MediaWikiServices::getInstance()->getMagicWordFactory();
55 }
56 }
57
58 /**
59 * Add a magic word by name
60 *
61 * @param string $name
62 */
63 public function add( $name ) {
64 $this->names[] = $name;
65 $this->hash = $this->baseRegex = $this->regex = null;
66 }
67
68 /**
69 * Add a number of magic words by name
70 *
71 * @param array $names
72 */
73 public function addArray( $names ) {
74 $this->names = array_merge( $this->names, array_values( $names ) );
75 $this->hash = $this->baseRegex = $this->regex = null;
76 }
77
78 /**
79 * Get a 2-d hashtable for this array
80 * @return array
81 */
82 public function getHash() {
83 if ( is_null( $this->hash ) ) {
84 global $wgContLang;
85 $this->hash = [ 0 => [], 1 => [] ];
86 foreach ( $this->names as $name ) {
87 $magic = $this->factory->get( $name );
88 $case = intval( $magic->isCaseSensitive() );
89 foreach ( $magic->getSynonyms() as $syn ) {
90 if ( !$case ) {
91 $syn = $wgContLang->lc( $syn );
92 }
93 $this->hash[$case][$syn] = $name;
94 }
95 }
96 }
97 return $this->hash;
98 }
99
100 /**
101 * Get the base regex
102 * @return array
103 */
104 public function getBaseRegex() {
105 if ( is_null( $this->baseRegex ) ) {
106 $this->baseRegex = [ 0 => '', 1 => '' ];
107 $allGroups = [];
108 foreach ( $this->names as $name ) {
109 $magic = $this->factory->get( $name );
110 $case = intval( $magic->isCaseSensitive() );
111 foreach ( $magic->getSynonyms() as $i => $syn ) {
112 // Group name must start with a non-digit in PCRE 8.34+
113 $it = strtr( $i, '0123456789', 'abcdefghij' );
114 $groupName = $it . '_' . $name;
115 $group = '(?P<' . $groupName . '>' . preg_quote( $syn, '/' ) . ')';
116 // look for same group names to avoid same named subpatterns in the regex
117 if ( isset( $allGroups[$groupName] ) ) {
118 throw new MWException(
119 __METHOD__ . ': duplicate internal name in magic word array: ' . $name
120 );
121 }
122 $allGroups[$groupName] = true;
123 if ( $this->baseRegex[$case] === '' ) {
124 $this->baseRegex[$case] = $group;
125 } else {
126 $this->baseRegex[$case] .= '|' . $group;
127 }
128 }
129 }
130 }
131 return $this->baseRegex;
132 }
133
134 /**
135 * Get an unanchored regex that does not match parameters
136 * @return array
137 */
138 public function getRegex() {
139 if ( is_null( $this->regex ) ) {
140 $base = $this->getBaseRegex();
141 $this->regex = [ '', '' ];
142 if ( $this->baseRegex[0] !== '' ) {
143 $this->regex[0] = "/{$base[0]}/iuS";
144 }
145 if ( $this->baseRegex[1] !== '' ) {
146 $this->regex[1] = "/{$base[1]}/S";
147 }
148 }
149 return $this->regex;
150 }
151
152 /**
153 * Get a regex for matching variables with parameters
154 *
155 * @return string
156 */
157 public function getVariableRegex() {
158 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
159 }
160
161 /**
162 * Get a regex anchored to the start of the string that does not match parameters
163 *
164 * @return array
165 */
166 public function getRegexStart() {
167 $base = $this->getBaseRegex();
168 $newRegex = [ '', '' ];
169 if ( $base[0] !== '' ) {
170 $newRegex[0] = "/^(?:{$base[0]})/iuS";
171 }
172 if ( $base[1] !== '' ) {
173 $newRegex[1] = "/^(?:{$base[1]})/S";
174 }
175 return $newRegex;
176 }
177
178 /**
179 * Get an anchored regex for matching variables with parameters
180 *
181 * @return array
182 */
183 public function getVariableStartToEndRegex() {
184 $base = $this->getBaseRegex();
185 $newRegex = [ '', '' ];
186 if ( $base[0] !== '' ) {
187 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
188 }
189 if ( $base[1] !== '' ) {
190 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
191 }
192 return $newRegex;
193 }
194
195 /**
196 * @since 1.20
197 * @return array
198 */
199 public function getNames() {
200 return $this->names;
201 }
202
203 /**
204 * Parse a match array from preg_match
205 * Returns array(magic word ID, parameter value)
206 * If there is no parameter value, that element will be false.
207 *
208 * @param array $m
209 *
210 * @throws MWException
211 * @return array
212 */
213 public function parseMatch( $m ) {
214 reset( $m );
215 while ( ( $key = key( $m ) ) !== null ) {
216 $value = current( $m );
217 next( $m );
218 if ( $key === 0 || $value === '' ) {
219 continue;
220 }
221 $parts = explode( '_', $key, 2 );
222 if ( count( $parts ) != 2 ) {
223 // This shouldn't happen
224 // continue;
225 throw new MWException( __METHOD__ . ': bad parameter name' );
226 }
227 list( /* $synIndex */, $magicName ) = $parts;
228 $paramValue = next( $m );
229 return [ $magicName, $paramValue ];
230 }
231 // This shouldn't happen either
232 throw new MWException( __METHOD__ . ': parameter not found' );
233 }
234
235 /**
236 * Match some text, with parameter capture
237 * Returns an array with the magic word name in the first element and the
238 * parameter in the second element.
239 * Both elements are false if there was no match.
240 *
241 * @param string $text
242 *
243 * @return array
244 */
245 public function matchVariableStartToEnd( $text ) {
246 $regexes = $this->getVariableStartToEndRegex();
247 foreach ( $regexes as $regex ) {
248 if ( $regex !== '' ) {
249 $m = [];
250 if ( preg_match( $regex, $text, $m ) ) {
251 return $this->parseMatch( $m );
252 }
253 }
254 }
255 return [ false, false ];
256 }
257
258 /**
259 * Match some text, without parameter capture
260 * Returns the magic word name, or false if there was no capture
261 *
262 * @param string $text
263 *
264 * @return string|bool False on failure
265 */
266 public function matchStartToEnd( $text ) {
267 $hash = $this->getHash();
268 if ( isset( $hash[1][$text] ) ) {
269 return $hash[1][$text];
270 }
271 global $wgContLang;
272 $lc = $wgContLang->lc( $text );
273 if ( isset( $hash[0][$lc] ) ) {
274 return $hash[0][$lc];
275 }
276 return false;
277 }
278
279 /**
280 * Returns an associative array, ID => param value, for all items that match
281 * Removes the matched items from the input string (passed by reference)
282 *
283 * @param string &$text
284 *
285 * @return array
286 */
287 public function matchAndRemove( &$text ) {
288 $found = [];
289 $regexes = $this->getRegex();
290 foreach ( $regexes as $regex ) {
291 if ( $regex === '' ) {
292 continue;
293 }
294 $matches = [];
295 $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
296 if ( $res === false ) {
297 LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
298 'code' => preg_last_error(),
299 'regex' => $regex,
300 'text' => $text,
301 ] );
302 } elseif ( $res ) {
303 foreach ( $matches as $m ) {
304 list( $name, $param ) = $this->parseMatch( $m );
305 $found[$name] = $param;
306 }
307 }
308 $res = preg_replace( $regex, '', $text );
309 if ( $res === null ) {
310 LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace returned null', [
311 'code' => preg_last_error(),
312 'regex' => $regex,
313 'text' => $text,
314 ] );
315 }
316 $text = $res;
317 }
318 return $found;
319 }
320
321 /**
322 * Return the ID of the magic word at the start of $text, and remove
323 * the prefix from $text.
324 * Return false if no match found and $text is not modified.
325 * Does not match parameters.
326 *
327 * @param string &$text
328 *
329 * @return int|bool False on failure
330 */
331 public function matchStartAndRemove( &$text ) {
332 $regexes = $this->getRegexStart();
333 foreach ( $regexes as $regex ) {
334 if ( $regex === '' ) {
335 continue;
336 }
337 if ( preg_match( $regex, $text, $m ) ) {
338 list( $id, ) = $this->parseMatch( $m );
339 if ( strlen( $m[0] ) >= strlen( $text ) ) {
340 $text = '';
341 } else {
342 $text = substr( $text, strlen( $m[0] ) );
343 }
344 return $id;
345 }
346 }
347 return false;
348 }
349 }