Merge "Revert "Don't check namespace in SpecialWantedtemplates""
[lhc/web/wiklou.git] / includes / parser / Preprocessor.php
1 <?php
2 /**
3 * Interfaces for preprocessors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\Logger\LoggerFactory;
25
26 /**
27 * @ingroup Parser
28 */
29 abstract class Preprocessor {
30
31 const CACHE_VERSION = 1;
32
33 /**
34 * @var array Brace matching rules.
35 */
36 protected $rules = array(
37 '{' => array(
38 'end' => '}',
39 'names' => array(
40 2 => 'template',
41 3 => 'tplarg',
42 ),
43 'min' => 2,
44 'max' => 3,
45 ),
46 '[' => array(
47 'end' => ']',
48 'names' => array( 2 => null ),
49 'min' => 2,
50 'max' => 2,
51 )
52 );
53
54 /**
55 * Store a document tree in the cache.
56 *
57 * @param string $text
58 * @param int $flags
59 */
60 protected function cacheSetTree( $text, $flags, $tree ) {
61 $config = RequestContext::getMain()->getConfig();
62
63 $length = strlen( $text );
64 $threshold = $config->get( 'PreprocessorCacheThreshold' );
65 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
66 return false;
67 }
68
69 $key = wfMemcKey(
70 // TODO: Once we require PHP 5.5, use static::class instead of
71 // get_called_class() or get_class( $this ).
72 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : get_called_class(),
73 md5( $text ), $flags );
74 $value = sprintf( "%08d", static::CACHE_VERSION ) . $tree;
75
76 $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
77 $cache->set( $key, $value, 86400 );
78
79 LoggerFactory::getInstance( 'Preprocessor' )
80 ->info( "Cached preprocessor output (key: $key)" );
81 }
82
83 /**
84 * Attempt to load a precomputed document tree for some given wikitext
85 * from the cache.
86 *
87 * @param string $text
88 * @param int $flags
89 * @return PPNode_Hash_Tree|bool
90 */
91 protected function cacheGetTree( $text, $flags ) {
92 $config = RequestContext::getMain()->getConfig();
93
94 $length = strlen( $text );
95 $threshold = $config->get( 'PreprocessorCacheThreshold' );
96 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
97 return false;
98 }
99
100 $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
101
102 $key = wfMemcKey(
103 // TODO: Once we require PHP 5.5, use static::class instead of
104 // get_called_class() or get_class( $this ).
105 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : get_called_class(),
106 md5( $text ), $flags );
107
108 $value = $cache->get( $key );
109 if ( !$value ) {
110 return false;
111 }
112
113 $version = intval( substr( $value, 0, 8 ) );
114 if ( $version !== static::CACHE_VERSION ) {
115 return false;
116 }
117
118 LoggerFactory::getInstance( 'Preprocessor' )
119 ->info( "Loaded preprocessor output from cache (key: $key)" );
120
121 return substr( $value, 8 );
122 }
123
124 /**
125 * Create a new top-level frame for expansion of a page
126 *
127 * @return PPFrame
128 */
129 abstract public function newFrame();
130
131 /**
132 * Create a new custom frame for programmatic use of parameter replacement
133 * as used in some extensions.
134 *
135 * @param array $args
136 *
137 * @return PPFrame
138 */
139 abstract public function newCustomFrame( $args );
140
141 /**
142 * Create a new custom node for programmatic use of parameter replacement
143 * as used in some extensions.
144 *
145 * @param array $values
146 */
147 abstract public function newPartNodeArray( $values );
148
149 /**
150 * Preprocess text to a PPNode
151 *
152 * @param string $text
153 * @param int $flags
154 *
155 * @return PPNode
156 */
157 abstract public function preprocessToObj( $text, $flags = 0 );
158 }
159
160 /**
161 * @ingroup Parser
162 */
163 interface PPFrame {
164 const NO_ARGS = 1;
165 const NO_TEMPLATES = 2;
166 const STRIP_COMMENTS = 4;
167 const NO_IGNORE = 8;
168 const RECOVER_COMMENTS = 16;
169 const NO_TAGS = 32;
170
171 const RECOVER_ORIG = 59; // = 1|2|8|16|32 no constant expression support in PHP yet
172
173 /** This constant exists when $indexOffset is supported in newChild() */
174 const SUPPORTS_INDEX_OFFSET = 1;
175
176 /**
177 * Create a child frame
178 *
179 * @param array|bool $args
180 * @param bool|Title $title
181 * @param int $indexOffset A number subtracted from the index attributes of the arguments
182 *
183 * @return PPFrame
184 */
185 public function newChild( $args = false, $title = false, $indexOffset = 0 );
186
187 /**
188 * Expand a document tree node, caching the result on its parent with the given key
189 * @param string|int $key
190 * @param string|PPNode $root
191 * @param int $flags
192 * @return string
193 */
194 public function cachedExpand( $key, $root, $flags = 0 );
195
196 /**
197 * Expand a document tree node
198 * @param string|PPNode $root
199 * @param int $flags
200 * @return string
201 */
202 public function expand( $root, $flags = 0 );
203
204 /**
205 * Implode with flags for expand()
206 * @param string $sep
207 * @param int $flags
208 * @param string|PPNode $args,...
209 * @return string
210 */
211 public function implodeWithFlags( $sep, $flags /*, ... */ );
212
213 /**
214 * Implode with no flags specified
215 * @param string $sep
216 * @param string|PPNode $args,...
217 * @return string
218 */
219 public function implode( $sep /*, ... */ );
220
221 /**
222 * Makes an object that, when expand()ed, will be the same as one obtained
223 * with implode()
224 * @param string $sep
225 * @param string|PPNode $args,...
226 * @return PPNode
227 */
228 public function virtualImplode( $sep /*, ... */ );
229
230 /**
231 * Virtual implode with brackets
232 * @param string $start
233 * @param string $sep
234 * @param string $end
235 * @param string|PPNode $args,...
236 * @return PPNode
237 */
238 public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
239
240 /**
241 * Returns true if there are no arguments in this frame
242 *
243 * @return bool
244 */
245 public function isEmpty();
246
247 /**
248 * Returns all arguments of this frame
249 * @return array
250 */
251 public function getArguments();
252
253 /**
254 * Returns all numbered arguments of this frame
255 * @return array
256 */
257 public function getNumberedArguments();
258
259 /**
260 * Returns all named arguments of this frame
261 * @return array
262 */
263 public function getNamedArguments();
264
265 /**
266 * Get an argument to this frame by name
267 * @param string $name
268 * @return bool
269 */
270 public function getArgument( $name );
271
272 /**
273 * Returns true if the infinite loop check is OK, false if a loop is detected
274 *
275 * @param Title $title
276 * @return bool
277 */
278 public function loopCheck( $title );
279
280 /**
281 * Return true if the frame is a template frame
282 * @return bool
283 */
284 public function isTemplate();
285
286 /**
287 * Set the "volatile" flag.
288 *
289 * Note that this is somewhat of a "hack" in order to make extensions
290 * with side effects (such as Cite) work with the PHP parser. New
291 * extensions should be written in a way that they do not need this
292 * function, because other parsers (such as Parsoid) are not guaranteed
293 * to respect it, and it may be removed in the future.
294 *
295 * @param bool $flag
296 */
297 public function setVolatile( $flag = true );
298
299 /**
300 * Get the "volatile" flag.
301 *
302 * Callers should avoid caching the result of an expansion if it has the
303 * volatile flag set.
304 *
305 * @see self::setVolatile()
306 * @return bool
307 */
308 public function isVolatile();
309
310 /**
311 * Get the TTL of the frame's output.
312 *
313 * This is the maximum amount of time, in seconds, that this frame's
314 * output should be cached for. A value of null indicates that no
315 * maximum has been specified.
316 *
317 * Note that this TTL only applies to caching frames as parts of pages.
318 * It is not relevant to caching the entire rendered output of a page.
319 *
320 * @return int|null
321 */
322 public function getTTL();
323
324 /**
325 * Set the TTL of the output of this frame and all of its ancestors.
326 * Has no effect if the new TTL is greater than the one already set.
327 * Note that it is the caller's responsibility to change the cache
328 * expiry of the page as a whole, if such behavior is desired.
329 *
330 * @see self::getTTL()
331 * @param int $ttl
332 */
333 public function setTTL( $ttl );
334
335 /**
336 * Get a title of frame
337 *
338 * @return Title
339 */
340 public function getTitle();
341 }
342
343 /**
344 * There are three types of nodes:
345 * * Tree nodes, which have a name and contain other nodes as children
346 * * Array nodes, which also contain other nodes but aren't considered part of a tree
347 * * Leaf nodes, which contain the actual data
348 *
349 * This interface provides access to the tree structure and to the contents of array nodes,
350 * but it does not provide access to the internal structure of leaf nodes. Access to leaf
351 * data is provided via two means:
352 * * PPFrame::expand(), which provides expanded text
353 * * The PPNode::split*() functions, which provide metadata about certain types of tree node
354 * @ingroup Parser
355 */
356 interface PPNode {
357 /**
358 * Get an array-type node containing the children of this node.
359 * Returns false if this is not a tree node.
360 * @return PPNode
361 */
362 public function getChildren();
363
364 /**
365 * Get the first child of a tree node. False if there isn't one.
366 *
367 * @return PPNode
368 */
369 public function getFirstChild();
370
371 /**
372 * Get the next sibling of any node. False if there isn't one
373 * @return PPNode
374 */
375 public function getNextSibling();
376
377 /**
378 * Get all children of this tree node which have a given name.
379 * Returns an array-type node, or false if this is not a tree node.
380 * @param string $type
381 * @return bool|PPNode
382 */
383 public function getChildrenOfType( $type );
384
385 /**
386 * Returns the length of the array, or false if this is not an array-type node
387 */
388 public function getLength();
389
390 /**
391 * Returns an item of an array-type node
392 * @param int $i
393 * @return bool|PPNode
394 */
395 public function item( $i );
396
397 /**
398 * Get the name of this node. The following names are defined here:
399 *
400 * h A heading node.
401 * template A double-brace node.
402 * tplarg A triple-brace node.
403 * title The first argument to a template or tplarg node.
404 * part Subsequent arguments to a template or tplarg node.
405 * #nodelist An array-type node
406 *
407 * The subclass may define various other names for tree and leaf nodes.
408 * @return string
409 */
410 public function getName();
411
412 /**
413 * Split a "<part>" node into an associative array containing:
414 * name PPNode name
415 * index String index
416 * value PPNode value
417 * @return array
418 */
419 public function splitArg();
420
421 /**
422 * Split an "<ext>" node into an associative array containing name, attr, inner and close
423 * All values in the resulting array are PPNodes. Inner and close are optional.
424 * @return array
425 */
426 public function splitExt();
427
428 /**
429 * Split an "<h>" node
430 * @return array
431 */
432 public function splitHeading();
433 }