Merge "Fix 'Tags' padding to keep it farther from the edge and document the source...
[lhc/web/wiklou.git] / includes / parser / Preprocessor.php
1 <?php
2 /**
3 * Interfaces for preprocessors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\Logger\LoggerFactory;
25
26 /**
27 * @ingroup Parser
28 */
29 abstract class Preprocessor {
30
31 const CACHE_VERSION = 1;
32
33 /**
34 * @var array Brace matching rules.
35 */
36 protected $rules = [
37 '{' => [
38 'end' => '}',
39 'names' => [
40 2 => 'template',
41 3 => 'tplarg',
42 ],
43 'min' => 2,
44 'max' => 3,
45 ],
46 '[' => [
47 'end' => ']',
48 'names' => [ 2 => null ],
49 'min' => 2,
50 'max' => 2,
51 ],
52 '-{' => [
53 'end' => '}-',
54 'names' => [ 2 => null ],
55 'min' => 2,
56 'max' => 2,
57 ],
58 ];
59
60 /**
61 * Store a document tree in the cache.
62 *
63 * @param string $text
64 * @param int $flags
65 * @param string $tree
66 */
67 protected function cacheSetTree( $text, $flags, $tree ) {
68 $config = RequestContext::getMain()->getConfig();
69
70 $length = strlen( $text );
71 $threshold = $config->get( 'PreprocessorCacheThreshold' );
72 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
73 return;
74 }
75
76 $cache = ObjectCache::getLocalClusterInstance();
77 $key = $cache->makeKey(
78 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
79 md5( $text ), $flags );
80 $value = sprintf( "%08d", static::CACHE_VERSION ) . $tree;
81
82 $cache->set( $key, $value, 86400 );
83
84 LoggerFactory::getInstance( 'Preprocessor' )
85 ->info( "Cached preprocessor output (key: $key)" );
86 }
87
88 /**
89 * Attempt to load a precomputed document tree for some given wikitext
90 * from the cache.
91 *
92 * @param string $text
93 * @param int $flags
94 * @return PPNode_Hash_Tree|bool
95 */
96 protected function cacheGetTree( $text, $flags ) {
97 $config = RequestContext::getMain()->getConfig();
98
99 $length = strlen( $text );
100 $threshold = $config->get( 'PreprocessorCacheThreshold' );
101 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
102 return false;
103 }
104
105 $cache = ObjectCache::getLocalClusterInstance();
106
107 $key = $cache->makeKey(
108 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
109 md5( $text ), $flags );
110
111 $value = $cache->get( $key );
112 if ( !$value ) {
113 return false;
114 }
115
116 $version = intval( substr( $value, 0, 8 ) );
117 if ( $version !== static::CACHE_VERSION ) {
118 return false;
119 }
120
121 LoggerFactory::getInstance( 'Preprocessor' )
122 ->info( "Loaded preprocessor output from cache (key: $key)" );
123
124 return substr( $value, 8 );
125 }
126
127 /**
128 * Create a new top-level frame for expansion of a page
129 *
130 * @return PPFrame
131 */
132 abstract public function newFrame();
133
134 /**
135 * Create a new custom frame for programmatic use of parameter replacement
136 * as used in some extensions.
137 *
138 * @param array $args
139 *
140 * @return PPFrame
141 */
142 abstract public function newCustomFrame( $args );
143
144 /**
145 * Create a new custom node for programmatic use of parameter replacement
146 * as used in some extensions.
147 *
148 * @param array $values
149 */
150 abstract public function newPartNodeArray( $values );
151
152 /**
153 * Preprocess text to a PPNode
154 *
155 * @param string $text
156 * @param int $flags
157 *
158 * @return PPNode
159 */
160 abstract public function preprocessToObj( $text, $flags = 0 );
161 }
162
163 /**
164 * @ingroup Parser
165 */
166 interface PPFrame {
167 const NO_ARGS = 1;
168 const NO_TEMPLATES = 2;
169 const STRIP_COMMENTS = 4;
170 const NO_IGNORE = 8;
171 const RECOVER_COMMENTS = 16;
172 const NO_TAGS = 32;
173
174 const RECOVER_ORIG = self::NO_ARGS | self::NO_TEMPLATES | self::NO_IGNORE |
175 self::RECOVER_COMMENTS | self::NO_TAGS;
176
177 /** This constant exists when $indexOffset is supported in newChild() */
178 const SUPPORTS_INDEX_OFFSET = 1;
179
180 /**
181 * Create a child frame
182 *
183 * @param array|bool $args
184 * @param bool|Title $title
185 * @param int $indexOffset A number subtracted from the index attributes of the arguments
186 *
187 * @return PPFrame
188 */
189 public function newChild( $args = false, $title = false, $indexOffset = 0 );
190
191 /**
192 * Expand a document tree node, caching the result on its parent with the given key
193 * @param string|int $key
194 * @param string|PPNode $root
195 * @param int $flags
196 * @return string
197 */
198 public function cachedExpand( $key, $root, $flags = 0 );
199
200 /**
201 * Expand a document tree node
202 * @param string|PPNode $root
203 * @param int $flags
204 * @return string
205 */
206 public function expand( $root, $flags = 0 );
207
208 /**
209 * Implode with flags for expand()
210 * @param string $sep
211 * @param int $flags
212 * @param string|PPNode $args,...
213 * @return string
214 */
215 public function implodeWithFlags( $sep, $flags /*, ... */ );
216
217 /**
218 * Implode with no flags specified
219 * @param string $sep
220 * @param string|PPNode $args,...
221 * @return string
222 */
223 public function implode( $sep /*, ... */ );
224
225 /**
226 * Makes an object that, when expand()ed, will be the same as one obtained
227 * with implode()
228 * @param string $sep
229 * @param string|PPNode $args,...
230 * @return PPNode
231 */
232 public function virtualImplode( $sep /*, ... */ );
233
234 /**
235 * Virtual implode with brackets
236 * @param string $start
237 * @param string $sep
238 * @param string $end
239 * @param string|PPNode $args,...
240 * @return PPNode
241 */
242 public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
243
244 /**
245 * Returns true if there are no arguments in this frame
246 *
247 * @return bool
248 */
249 public function isEmpty();
250
251 /**
252 * Returns all arguments of this frame
253 * @return array
254 */
255 public function getArguments();
256
257 /**
258 * Returns all numbered arguments of this frame
259 * @return array
260 */
261 public function getNumberedArguments();
262
263 /**
264 * Returns all named arguments of this frame
265 * @return array
266 */
267 public function getNamedArguments();
268
269 /**
270 * Get an argument to this frame by name
271 * @param int|string $name
272 * @return string|bool
273 */
274 public function getArgument( $name );
275
276 /**
277 * Returns true if the infinite loop check is OK, false if a loop is detected
278 *
279 * @param Title $title
280 * @return bool
281 */
282 public function loopCheck( $title );
283
284 /**
285 * Return true if the frame is a template frame
286 * @return bool
287 */
288 public function isTemplate();
289
290 /**
291 * Set the "volatile" flag.
292 *
293 * Note that this is somewhat of a "hack" in order to make extensions
294 * with side effects (such as Cite) work with the PHP parser. New
295 * extensions should be written in a way that they do not need this
296 * function, because other parsers (such as Parsoid) are not guaranteed
297 * to respect it, and it may be removed in the future.
298 *
299 * @param bool $flag
300 */
301 public function setVolatile( $flag = true );
302
303 /**
304 * Get the "volatile" flag.
305 *
306 * Callers should avoid caching the result of an expansion if it has the
307 * volatile flag set.
308 *
309 * @see self::setVolatile()
310 * @return bool
311 */
312 public function isVolatile();
313
314 /**
315 * Get the TTL of the frame's output.
316 *
317 * This is the maximum amount of time, in seconds, that this frame's
318 * output should be cached for. A value of null indicates that no
319 * maximum has been specified.
320 *
321 * Note that this TTL only applies to caching frames as parts of pages.
322 * It is not relevant to caching the entire rendered output of a page.
323 *
324 * @return int|null
325 */
326 public function getTTL();
327
328 /**
329 * Set the TTL of the output of this frame and all of its ancestors.
330 * Has no effect if the new TTL is greater than the one already set.
331 * Note that it is the caller's responsibility to change the cache
332 * expiry of the page as a whole, if such behavior is desired.
333 *
334 * @see self::getTTL()
335 * @param int $ttl
336 */
337 public function setTTL( $ttl );
338
339 /**
340 * Get a title of frame
341 *
342 * @return Title
343 */
344 public function getTitle();
345 }
346
347 /**
348 * There are three types of nodes:
349 * * Tree nodes, which have a name and contain other nodes as children
350 * * Array nodes, which also contain other nodes but aren't considered part of a tree
351 * * Leaf nodes, which contain the actual data
352 *
353 * This interface provides access to the tree structure and to the contents of array nodes,
354 * but it does not provide access to the internal structure of leaf nodes. Access to leaf
355 * data is provided via two means:
356 * * PPFrame::expand(), which provides expanded text
357 * * The PPNode::split*() functions, which provide metadata about certain types of tree node
358 * @ingroup Parser
359 */
360 interface PPNode {
361 /**
362 * Get an array-type node containing the children of this node.
363 * Returns false if this is not a tree node.
364 * @return PPNode
365 */
366 public function getChildren();
367
368 /**
369 * Get the first child of a tree node. False if there isn't one.
370 *
371 * @return PPNode
372 */
373 public function getFirstChild();
374
375 /**
376 * Get the next sibling of any node. False if there isn't one
377 * @return PPNode
378 */
379 public function getNextSibling();
380
381 /**
382 * Get all children of this tree node which have a given name.
383 * Returns an array-type node, or false if this is not a tree node.
384 * @param string $type
385 * @return bool|PPNode
386 */
387 public function getChildrenOfType( $type );
388
389 /**
390 * Returns the length of the array, or false if this is not an array-type node
391 */
392 public function getLength();
393
394 /**
395 * Returns an item of an array-type node
396 * @param int $i
397 * @return bool|PPNode
398 */
399 public function item( $i );
400
401 /**
402 * Get the name of this node. The following names are defined here:
403 *
404 * h A heading node.
405 * template A double-brace node.
406 * tplarg A triple-brace node.
407 * title The first argument to a template or tplarg node.
408 * part Subsequent arguments to a template or tplarg node.
409 * #nodelist An array-type node
410 *
411 * The subclass may define various other names for tree and leaf nodes.
412 * @return string
413 */
414 public function getName();
415
416 /**
417 * Split a "<part>" node into an associative array containing:
418 * name PPNode name
419 * index String index
420 * value PPNode value
421 * @return array
422 */
423 public function splitArg();
424
425 /**
426 * Split an "<ext>" node into an associative array containing name, attr, inner and close
427 * All values in the resulting array are PPNodes. Inner and close are optional.
428 * @return array
429 */
430 public function splitExt();
431
432 /**
433 * Split an "<h>" node
434 * @return array
435 */
436 public function splitHeading();
437 }