Merge "Move up devunt's name to Developers"
[lhc/web/wiklou.git] / includes / parser / Preprocessor.php
1 <?php
2 /**
3 * Interfaces for preprocessors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\Logger\LoggerFactory;
25
26 /**
27 * @ingroup Parser
28 */
29 abstract class Preprocessor {
30
31 const CACHE_VERSION = 1;
32
33 /**
34 * @var array Brace matching rules.
35 */
36 protected $rules = [
37 '{' => [
38 'end' => '}',
39 'names' => [
40 2 => 'template',
41 3 => 'tplarg',
42 ],
43 'min' => 2,
44 'max' => 3,
45 ],
46 '[' => [
47 'end' => ']',
48 'names' => [ 2 => null ],
49 'min' => 2,
50 'max' => 2,
51 ]
52 ];
53
54 /**
55 * Store a document tree in the cache.
56 *
57 * @param string $text
58 * @param int $flags
59 */
60 protected function cacheSetTree( $text, $flags, $tree ) {
61 $config = RequestContext::getMain()->getConfig();
62
63 $length = strlen( $text );
64 $threshold = $config->get( 'PreprocessorCacheThreshold' );
65 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
66 return false;
67 }
68
69 $key = wfMemcKey(
70 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
71 md5( $text ), $flags );
72 $value = sprintf( "%08d", static::CACHE_VERSION ) . $tree;
73
74 $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
75 $cache->set( $key, $value, 86400 );
76
77 LoggerFactory::getInstance( 'Preprocessor' )
78 ->info( "Cached preprocessor output (key: $key)" );
79 }
80
81 /**
82 * Attempt to load a precomputed document tree for some given wikitext
83 * from the cache.
84 *
85 * @param string $text
86 * @param int $flags
87 * @return PPNode_Hash_Tree|bool
88 */
89 protected function cacheGetTree( $text, $flags ) {
90 $config = RequestContext::getMain()->getConfig();
91
92 $length = strlen( $text );
93 $threshold = $config->get( 'PreprocessorCacheThreshold' );
94 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
95 return false;
96 }
97
98 $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
99
100 $key = wfMemcKey(
101 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
102 md5( $text ), $flags );
103
104 $value = $cache->get( $key );
105 if ( !$value ) {
106 return false;
107 }
108
109 $version = intval( substr( $value, 0, 8 ) );
110 if ( $version !== static::CACHE_VERSION ) {
111 return false;
112 }
113
114 LoggerFactory::getInstance( 'Preprocessor' )
115 ->info( "Loaded preprocessor output from cache (key: $key)" );
116
117 return substr( $value, 8 );
118 }
119
120 /**
121 * Create a new top-level frame for expansion of a page
122 *
123 * @return PPFrame
124 */
125 abstract public function newFrame();
126
127 /**
128 * Create a new custom frame for programmatic use of parameter replacement
129 * as used in some extensions.
130 *
131 * @param array $args
132 *
133 * @return PPFrame
134 */
135 abstract public function newCustomFrame( $args );
136
137 /**
138 * Create a new custom node for programmatic use of parameter replacement
139 * as used in some extensions.
140 *
141 * @param array $values
142 */
143 abstract public function newPartNodeArray( $values );
144
145 /**
146 * Preprocess text to a PPNode
147 *
148 * @param string $text
149 * @param int $flags
150 *
151 * @return PPNode
152 */
153 abstract public function preprocessToObj( $text, $flags = 0 );
154 }
155
156 /**
157 * @ingroup Parser
158 */
159 interface PPFrame {
160 const NO_ARGS = 1;
161 const NO_TEMPLATES = 2;
162 const STRIP_COMMENTS = 4;
163 const NO_IGNORE = 8;
164 const RECOVER_COMMENTS = 16;
165 const NO_TAGS = 32;
166
167 const RECOVER_ORIG = 59; // = 1|2|8|16|32 no constant expression support in PHP yet
168
169 /** This constant exists when $indexOffset is supported in newChild() */
170 const SUPPORTS_INDEX_OFFSET = 1;
171
172 /**
173 * Create a child frame
174 *
175 * @param array|bool $args
176 * @param bool|Title $title
177 * @param int $indexOffset A number subtracted from the index attributes of the arguments
178 *
179 * @return PPFrame
180 */
181 public function newChild( $args = false, $title = false, $indexOffset = 0 );
182
183 /**
184 * Expand a document tree node, caching the result on its parent with the given key
185 * @param string|int $key
186 * @param string|PPNode $root
187 * @param int $flags
188 * @return string
189 */
190 public function cachedExpand( $key, $root, $flags = 0 );
191
192 /**
193 * Expand a document tree node
194 * @param string|PPNode $root
195 * @param int $flags
196 * @return string
197 */
198 public function expand( $root, $flags = 0 );
199
200 /**
201 * Implode with flags for expand()
202 * @param string $sep
203 * @param int $flags
204 * @param string|PPNode $args,...
205 * @return string
206 */
207 public function implodeWithFlags( $sep, $flags /*, ... */ );
208
209 /**
210 * Implode with no flags specified
211 * @param string $sep
212 * @param string|PPNode $args,...
213 * @return string
214 */
215 public function implode( $sep /*, ... */ );
216
217 /**
218 * Makes an object that, when expand()ed, will be the same as one obtained
219 * with implode()
220 * @param string $sep
221 * @param string|PPNode $args,...
222 * @return PPNode
223 */
224 public function virtualImplode( $sep /*, ... */ );
225
226 /**
227 * Virtual implode with brackets
228 * @param string $start
229 * @param string $sep
230 * @param string $end
231 * @param string|PPNode $args,...
232 * @return PPNode
233 */
234 public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
235
236 /**
237 * Returns true if there are no arguments in this frame
238 *
239 * @return bool
240 */
241 public function isEmpty();
242
243 /**
244 * Returns all arguments of this frame
245 * @return array
246 */
247 public function getArguments();
248
249 /**
250 * Returns all numbered arguments of this frame
251 * @return array
252 */
253 public function getNumberedArguments();
254
255 /**
256 * Returns all named arguments of this frame
257 * @return array
258 */
259 public function getNamedArguments();
260
261 /**
262 * Get an argument to this frame by name
263 * @param int|string $name
264 * @return string|bool
265 */
266 public function getArgument( $name );
267
268 /**
269 * Returns true if the infinite loop check is OK, false if a loop is detected
270 *
271 * @param Title $title
272 * @return bool
273 */
274 public function loopCheck( $title );
275
276 /**
277 * Return true if the frame is a template frame
278 * @return bool
279 */
280 public function isTemplate();
281
282 /**
283 * Set the "volatile" flag.
284 *
285 * Note that this is somewhat of a "hack" in order to make extensions
286 * with side effects (such as Cite) work with the PHP parser. New
287 * extensions should be written in a way that they do not need this
288 * function, because other parsers (such as Parsoid) are not guaranteed
289 * to respect it, and it may be removed in the future.
290 *
291 * @param bool $flag
292 */
293 public function setVolatile( $flag = true );
294
295 /**
296 * Get the "volatile" flag.
297 *
298 * Callers should avoid caching the result of an expansion if it has the
299 * volatile flag set.
300 *
301 * @see self::setVolatile()
302 * @return bool
303 */
304 public function isVolatile();
305
306 /**
307 * Get the TTL of the frame's output.
308 *
309 * This is the maximum amount of time, in seconds, that this frame's
310 * output should be cached for. A value of null indicates that no
311 * maximum has been specified.
312 *
313 * Note that this TTL only applies to caching frames as parts of pages.
314 * It is not relevant to caching the entire rendered output of a page.
315 *
316 * @return int|null
317 */
318 public function getTTL();
319
320 /**
321 * Set the TTL of the output of this frame and all of its ancestors.
322 * Has no effect if the new TTL is greater than the one already set.
323 * Note that it is the caller's responsibility to change the cache
324 * expiry of the page as a whole, if such behavior is desired.
325 *
326 * @see self::getTTL()
327 * @param int $ttl
328 */
329 public function setTTL( $ttl );
330
331 /**
332 * Get a title of frame
333 *
334 * @return Title
335 */
336 public function getTitle();
337 }
338
339 /**
340 * There are three types of nodes:
341 * * Tree nodes, which have a name and contain other nodes as children
342 * * Array nodes, which also contain other nodes but aren't considered part of a tree
343 * * Leaf nodes, which contain the actual data
344 *
345 * This interface provides access to the tree structure and to the contents of array nodes,
346 * but it does not provide access to the internal structure of leaf nodes. Access to leaf
347 * data is provided via two means:
348 * * PPFrame::expand(), which provides expanded text
349 * * The PPNode::split*() functions, which provide metadata about certain types of tree node
350 * @ingroup Parser
351 */
352 interface PPNode {
353 /**
354 * Get an array-type node containing the children of this node.
355 * Returns false if this is not a tree node.
356 * @return PPNode
357 */
358 public function getChildren();
359
360 /**
361 * Get the first child of a tree node. False if there isn't one.
362 *
363 * @return PPNode
364 */
365 public function getFirstChild();
366
367 /**
368 * Get the next sibling of any node. False if there isn't one
369 * @return PPNode
370 */
371 public function getNextSibling();
372
373 /**
374 * Get all children of this tree node which have a given name.
375 * Returns an array-type node, or false if this is not a tree node.
376 * @param string $type
377 * @return bool|PPNode
378 */
379 public function getChildrenOfType( $type );
380
381 /**
382 * Returns the length of the array, or false if this is not an array-type node
383 */
384 public function getLength();
385
386 /**
387 * Returns an item of an array-type node
388 * @param int $i
389 * @return bool|PPNode
390 */
391 public function item( $i );
392
393 /**
394 * Get the name of this node. The following names are defined here:
395 *
396 * h A heading node.
397 * template A double-brace node.
398 * tplarg A triple-brace node.
399 * title The first argument to a template or tplarg node.
400 * part Subsequent arguments to a template or tplarg node.
401 * #nodelist An array-type node
402 *
403 * The subclass may define various other names for tree and leaf nodes.
404 * @return string
405 */
406 public function getName();
407
408 /**
409 * Split a "<part>" node into an associative array containing:
410 * name PPNode name
411 * index String index
412 * value PPNode value
413 * @return array
414 */
415 public function splitArg();
416
417 /**
418 * Split an "<ext>" node into an associative array containing name, attr, inner and close
419 * All values in the resulting array are PPNodes. Inner and close are optional.
420 * @return array
421 */
422 public function splitExt();
423
424 /**
425 * Split an "<h>" node
426 * @return array
427 */
428 public function splitHeading();
429 }