widget: Fix changes of copyright year
[lhc/web/wiklou.git] / includes / MagicWordArray.php
1 <?php
2
3 /**
4 * See docs/magicword.txt.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Parser
23 */
24
25 use MediaWiki\Logger\LoggerFactory;
26
27 /**
28 * Class for handling an array of magic words
29 * @ingroup Parser
30 */
31 class MagicWordArray {
32 /** @var array */
33 public $names = [];
34
35 /** @var array */
36 private $hash;
37
38 private $baseRegex;
39
40 private $regex;
41
42 /**
43 * @param array $names
44 */
45 public function __construct( $names = [] ) {
46 $this->names = $names;
47 }
48
49 /**
50 * Add a magic word by name
51 *
52 * @param string $name
53 */
54 public function add( $name ) {
55 $this->names[] = $name;
56 $this->hash = $this->baseRegex = $this->regex = null;
57 }
58
59 /**
60 * Add a number of magic words by name
61 *
62 * @param array $names
63 */
64 public function addArray( $names ) {
65 $this->names = array_merge( $this->names, array_values( $names ) );
66 $this->hash = $this->baseRegex = $this->regex = null;
67 }
68
69 /**
70 * Get a 2-d hashtable for this array
71 * @return array
72 */
73 public function getHash() {
74 if ( is_null( $this->hash ) ) {
75 global $wgContLang;
76 $this->hash = [ 0 => [], 1 => [] ];
77 foreach ( $this->names as $name ) {
78 $magic = MagicWord::get( $name );
79 $case = intval( $magic->isCaseSensitive() );
80 foreach ( $magic->getSynonyms() as $syn ) {
81 if ( !$case ) {
82 $syn = $wgContLang->lc( $syn );
83 }
84 $this->hash[$case][$syn] = $name;
85 }
86 }
87 }
88 return $this->hash;
89 }
90
91 /**
92 * Get the base regex
93 * @return array
94 */
95 public function getBaseRegex() {
96 if ( is_null( $this->baseRegex ) ) {
97 $this->baseRegex = [ 0 => '', 1 => '' ];
98 $allGroups = [];
99 foreach ( $this->names as $name ) {
100 $magic = MagicWord::get( $name );
101 $case = intval( $magic->isCaseSensitive() );
102 foreach ( $magic->getSynonyms() as $i => $syn ) {
103 // Group name must start with a non-digit in PCRE 8.34+
104 $it = strtr( $i, '0123456789', 'abcdefghij' );
105 $groupName = $it . '_' . $name;
106 $group = '(?P<' . $groupName . '>' . preg_quote( $syn, '/' ) . ')';
107 // look for same group names to avoid same named subpatterns in the regex
108 if ( isset( $allGroups[$groupName] ) ) {
109 throw new MWException(
110 __METHOD__ . ': duplicate internal name in magic word array: ' . $name
111 );
112 }
113 $allGroups[$groupName] = true;
114 if ( $this->baseRegex[$case] === '' ) {
115 $this->baseRegex[$case] = $group;
116 } else {
117 $this->baseRegex[$case] .= '|' . $group;
118 }
119 }
120 }
121 }
122 return $this->baseRegex;
123 }
124
125 /**
126 * Get an unanchored regex that does not match parameters
127 * @return array
128 */
129 public function getRegex() {
130 if ( is_null( $this->regex ) ) {
131 $base = $this->getBaseRegex();
132 $this->regex = [ '', '' ];
133 if ( $this->baseRegex[0] !== '' ) {
134 $this->regex[0] = "/{$base[0]}/iuS";
135 }
136 if ( $this->baseRegex[1] !== '' ) {
137 $this->regex[1] = "/{$base[1]}/S";
138 }
139 }
140 return $this->regex;
141 }
142
143 /**
144 * Get a regex for matching variables with parameters
145 *
146 * @return string
147 */
148 public function getVariableRegex() {
149 return str_replace( "\\$1", "(.*?)", $this->getRegex() );
150 }
151
152 /**
153 * Get a regex anchored to the start of the string that does not match parameters
154 *
155 * @return array
156 */
157 public function getRegexStart() {
158 $base = $this->getBaseRegex();
159 $newRegex = [ '', '' ];
160 if ( $base[0] !== '' ) {
161 $newRegex[0] = "/^(?:{$base[0]})/iuS";
162 }
163 if ( $base[1] !== '' ) {
164 $newRegex[1] = "/^(?:{$base[1]})/S";
165 }
166 return $newRegex;
167 }
168
169 /**
170 * Get an anchored regex for matching variables with parameters
171 *
172 * @return array
173 */
174 public function getVariableStartToEndRegex() {
175 $base = $this->getBaseRegex();
176 $newRegex = [ '', '' ];
177 if ( $base[0] !== '' ) {
178 $newRegex[0] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[0]})$/iuS" );
179 }
180 if ( $base[1] !== '' ) {
181 $newRegex[1] = str_replace( "\\$1", "(.*?)", "/^(?:{$base[1]})$/S" );
182 }
183 return $newRegex;
184 }
185
186 /**
187 * @since 1.20
188 * @return array
189 */
190 public function getNames() {
191 return $this->names;
192 }
193
194 /**
195 * Parse a match array from preg_match
196 * Returns array(magic word ID, parameter value)
197 * If there is no parameter value, that element will be false.
198 *
199 * @param array $m
200 *
201 * @throws MWException
202 * @return array
203 */
204 public function parseMatch( $m ) {
205 reset( $m );
206 while ( ( $key = key( $m ) ) !== null ) {
207 $value = current( $m );
208 next( $m );
209 if ( $key === 0 || $value === '' ) {
210 continue;
211 }
212 $parts = explode( '_', $key, 2 );
213 if ( count( $parts ) != 2 ) {
214 // This shouldn't happen
215 // continue;
216 throw new MWException( __METHOD__ . ': bad parameter name' );
217 }
218 list( /* $synIndex */, $magicName ) = $parts;
219 $paramValue = next( $m );
220 return [ $magicName, $paramValue ];
221 }
222 // This shouldn't happen either
223 throw new MWException( __METHOD__ . ': parameter not found' );
224 }
225
226 /**
227 * Match some text, with parameter capture
228 * Returns an array with the magic word name in the first element and the
229 * parameter in the second element.
230 * Both elements are false if there was no match.
231 *
232 * @param string $text
233 *
234 * @return array
235 */
236 public function matchVariableStartToEnd( $text ) {
237 $regexes = $this->getVariableStartToEndRegex();
238 foreach ( $regexes as $regex ) {
239 if ( $regex !== '' ) {
240 $m = [];
241 if ( preg_match( $regex, $text, $m ) ) {
242 return $this->parseMatch( $m );
243 }
244 }
245 }
246 return [ false, false ];
247 }
248
249 /**
250 * Match some text, without parameter capture
251 * Returns the magic word name, or false if there was no capture
252 *
253 * @param string $text
254 *
255 * @return string|bool False on failure
256 */
257 public function matchStartToEnd( $text ) {
258 $hash = $this->getHash();
259 if ( isset( $hash[1][$text] ) ) {
260 return $hash[1][$text];
261 }
262 global $wgContLang;
263 $lc = $wgContLang->lc( $text );
264 if ( isset( $hash[0][$lc] ) ) {
265 return $hash[0][$lc];
266 }
267 return false;
268 }
269
270 /**
271 * Returns an associative array, ID => param value, for all items that match
272 * Removes the matched items from the input string (passed by reference)
273 *
274 * @param string &$text
275 *
276 * @return array
277 */
278 public function matchAndRemove( &$text ) {
279 $found = [];
280 $regexes = $this->getRegex();
281 foreach ( $regexes as $regex ) {
282 if ( $regex === '' ) {
283 continue;
284 }
285 $matches = [];
286 $res = preg_match_all( $regex, $text, $matches, PREG_SET_ORDER );
287 if ( $res === false ) {
288 LoggerFactory::getInstance( 'parser' )->warning( 'preg_match_all returned false', [
289 'code' => preg_last_error(),
290 'regex' => $regex,
291 'text' => $text,
292 ] );
293 } elseif ( $res ) {
294 foreach ( $matches as $m ) {
295 list( $name, $param ) = $this->parseMatch( $m );
296 $found[$name] = $param;
297 }
298 }
299 $res = preg_replace( $regex, '', $text );
300 if ( $res === null ) {
301 LoggerFactory::getInstance( 'parser' )->warning( 'preg_replace returned null', [
302 'code' => preg_last_error(),
303 'regex' => $regex,
304 'text' => $text,
305 ] );
306 }
307 $text = $res;
308 }
309 return $found;
310 }
311
312 /**
313 * Return the ID of the magic word at the start of $text, and remove
314 * the prefix from $text.
315 * Return false if no match found and $text is not modified.
316 * Does not match parameters.
317 *
318 * @param string &$text
319 *
320 * @return int|bool False on failure
321 */
322 public function matchStartAndRemove( &$text ) {
323 $regexes = $this->getRegexStart();
324 foreach ( $regexes as $regex ) {
325 if ( $regex === '' ) {
326 continue;
327 }
328 if ( preg_match( $regex, $text, $m ) ) {
329 list( $id, ) = $this->parseMatch( $m );
330 if ( strlen( $m[0] ) >= strlen( $text ) ) {
331 $text = '';
332 } else {
333 $text = substr( $text, strlen( $m[0] ) );
334 }
335 return $id;
336 }
337 }
338 return false;
339 }
340 }