Merge "JsonContent: Remove never-used caption styles"
[lhc/web/wiklou.git] / includes / parser / Preprocessor.php
1 <?php
2 /**
3 * Interfaces for preprocessors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\Logger\LoggerFactory;
25 use MediaWiki\MediaWikiServices;
26
27 /**
28 * @ingroup Parser
29 */
30 abstract class Preprocessor {
31
32 const CACHE_VERSION = 1;
33
34 /**
35 * @var array Brace matching rules.
36 */
37 protected $rules = [
38 '{' => [
39 'end' => '}',
40 'names' => [
41 2 => 'template',
42 3 => 'tplarg',
43 ],
44 'min' => 2,
45 'max' => 3,
46 ],
47 '[' => [
48 'end' => ']',
49 'names' => [ 2 => null ],
50 'min' => 2,
51 'max' => 2,
52 ],
53 '-{' => [
54 'end' => '}-',
55 'names' => [ 2 => null ],
56 'min' => 2,
57 'max' => 2,
58 ],
59 ];
60
61 /**
62 * Store a document tree in the cache.
63 *
64 * @param string $text
65 * @param int $flags
66 * @param string $tree
67 */
68 protected function cacheSetTree( $text, $flags, $tree ) {
69 $config = RequestContext::getMain()->getConfig();
70
71 $length = strlen( $text );
72 $threshold = $config->get( 'PreprocessorCacheThreshold' );
73 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
74 return;
75 }
76
77 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
78 $key = $cache->makeKey(
79 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
80 md5( $text ),
81 $flags
82 );
83 $value = sprintf( "%08d", static::CACHE_VERSION ) . $tree;
84
85 $cache->set( $key, $value, 86400 );
86
87 LoggerFactory::getInstance( 'Preprocessor' )
88 ->info( "Cached preprocessor output (key: $key)" );
89 }
90
91 /**
92 * Attempt to load a precomputed document tree for some given wikitext
93 * from the cache.
94 *
95 * @param string $text
96 * @param int $flags
97 * @return PPNode_Hash_Tree|bool
98 */
99 protected function cacheGetTree( $text, $flags ) {
100 $config = RequestContext::getMain()->getConfig();
101
102 $length = strlen( $text );
103 $threshold = $config->get( 'PreprocessorCacheThreshold' );
104 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
105 return false;
106 }
107
108 $cache = MediaWikiServices::getInstance()->getMainWANObjectCache();
109
110 $key = $cache->makeKey(
111 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
112 md5( $text ),
113 $flags
114 );
115
116 $value = $cache->get( $key );
117 if ( !$value ) {
118 return false;
119 }
120
121 $version = intval( substr( $value, 0, 8 ) );
122 if ( $version !== static::CACHE_VERSION ) {
123 return false;
124 }
125
126 LoggerFactory::getInstance( 'Preprocessor' )
127 ->info( "Loaded preprocessor output from cache (key: $key)" );
128
129 return substr( $value, 8 );
130 }
131
132 /**
133 * Create a new top-level frame for expansion of a page
134 *
135 * @return PPFrame
136 */
137 abstract public function newFrame();
138
139 /**
140 * Create a new custom frame for programmatic use of parameter replacement
141 * as used in some extensions.
142 *
143 * @param array $args
144 *
145 * @return PPFrame
146 */
147 abstract public function newCustomFrame( $args );
148
149 /**
150 * Create a new custom node for programmatic use of parameter replacement
151 * as used in some extensions.
152 *
153 * @param array $values
154 */
155 abstract public function newPartNodeArray( $values );
156
157 /**
158 * Preprocess text to a PPNode
159 *
160 * @param string $text
161 * @param int $flags
162 *
163 * @return PPNode
164 */
165 abstract public function preprocessToObj( $text, $flags = 0 );
166 }
167
168 /**
169 * @ingroup Parser
170 */
171 interface PPFrame {
172 const NO_ARGS = 1;
173 const NO_TEMPLATES = 2;
174 const STRIP_COMMENTS = 4;
175 const NO_IGNORE = 8;
176 const RECOVER_COMMENTS = 16;
177 const NO_TAGS = 32;
178
179 const RECOVER_ORIG = self::NO_ARGS | self::NO_TEMPLATES | self::NO_IGNORE |
180 self::RECOVER_COMMENTS | self::NO_TAGS;
181
182 /** This constant exists when $indexOffset is supported in newChild() */
183 const SUPPORTS_INDEX_OFFSET = 1;
184
185 /**
186 * Create a child frame
187 *
188 * @param array|bool $args
189 * @param bool|Title $title
190 * @param int $indexOffset A number subtracted from the index attributes of the arguments
191 *
192 * @return PPFrame
193 */
194 public function newChild( $args = false, $title = false, $indexOffset = 0 );
195
196 /**
197 * Expand a document tree node, caching the result on its parent with the given key
198 * @param string|int $key
199 * @param string|PPNode $root
200 * @param int $flags
201 * @return string
202 */
203 public function cachedExpand( $key, $root, $flags = 0 );
204
205 /**
206 * Expand a document tree node
207 * @param string|PPNode $root
208 * @param int $flags
209 * @return string
210 */
211 public function expand( $root, $flags = 0 );
212
213 /**
214 * Implode with flags for expand()
215 * @param string $sep
216 * @param int $flags
217 * @param string|PPNode $args,...
218 * @return string
219 */
220 public function implodeWithFlags( $sep, $flags /*, ... */ );
221
222 /**
223 * Implode with no flags specified
224 * @param string $sep
225 * @param string|PPNode $args,...
226 * @return string
227 */
228 public function implode( $sep /*, ... */ );
229
230 /**
231 * Makes an object that, when expand()ed, will be the same as one obtained
232 * with implode()
233 * @param string $sep
234 * @param string|PPNode $args,...
235 * @return PPNode
236 */
237 public function virtualImplode( $sep /*, ... */ );
238
239 /**
240 * Virtual implode with brackets
241 * @param string $start
242 * @param string $sep
243 * @param string $end
244 * @param string|PPNode $args,...
245 * @return PPNode
246 */
247 public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
248
249 /**
250 * Returns true if there are no arguments in this frame
251 *
252 * @return bool
253 */
254 public function isEmpty();
255
256 /**
257 * Returns all arguments of this frame
258 * @return array
259 */
260 public function getArguments();
261
262 /**
263 * Returns all numbered arguments of this frame
264 * @return array
265 */
266 public function getNumberedArguments();
267
268 /**
269 * Returns all named arguments of this frame
270 * @return array
271 */
272 public function getNamedArguments();
273
274 /**
275 * Get an argument to this frame by name
276 * @param int|string $name
277 * @return string|bool
278 */
279 public function getArgument( $name );
280
281 /**
282 * Returns true if the infinite loop check is OK, false if a loop is detected
283 *
284 * @param Title $title
285 * @return bool
286 */
287 public function loopCheck( $title );
288
289 /**
290 * Return true if the frame is a template frame
291 * @return bool
292 */
293 public function isTemplate();
294
295 /**
296 * Set the "volatile" flag.
297 *
298 * Note that this is somewhat of a "hack" in order to make extensions
299 * with side effects (such as Cite) work with the PHP parser. New
300 * extensions should be written in a way that they do not need this
301 * function, because other parsers (such as Parsoid) are not guaranteed
302 * to respect it, and it may be removed in the future.
303 *
304 * @param bool $flag
305 */
306 public function setVolatile( $flag = true );
307
308 /**
309 * Get the "volatile" flag.
310 *
311 * Callers should avoid caching the result of an expansion if it has the
312 * volatile flag set.
313 *
314 * @see self::setVolatile()
315 * @return bool
316 */
317 public function isVolatile();
318
319 /**
320 * Get the TTL of the frame's output.
321 *
322 * This is the maximum amount of time, in seconds, that this frame's
323 * output should be cached for. A value of null indicates that no
324 * maximum has been specified.
325 *
326 * Note that this TTL only applies to caching frames as parts of pages.
327 * It is not relevant to caching the entire rendered output of a page.
328 *
329 * @return int|null
330 */
331 public function getTTL();
332
333 /**
334 * Set the TTL of the output of this frame and all of its ancestors.
335 * Has no effect if the new TTL is greater than the one already set.
336 * Note that it is the caller's responsibility to change the cache
337 * expiry of the page as a whole, if such behavior is desired.
338 *
339 * @see self::getTTL()
340 * @param int $ttl
341 */
342 public function setTTL( $ttl );
343
344 /**
345 * Get a title of frame
346 *
347 * @return Title
348 */
349 public function getTitle();
350 }
351
352 /**
353 * There are three types of nodes:
354 * * Tree nodes, which have a name and contain other nodes as children
355 * * Array nodes, which also contain other nodes but aren't considered part of a tree
356 * * Leaf nodes, which contain the actual data
357 *
358 * This interface provides access to the tree structure and to the contents of array nodes,
359 * but it does not provide access to the internal structure of leaf nodes. Access to leaf
360 * data is provided via two means:
361 * * PPFrame::expand(), which provides expanded text
362 * * The PPNode::split*() functions, which provide metadata about certain types of tree node
363 * @ingroup Parser
364 */
365 interface PPNode {
366 /**
367 * Get an array-type node containing the children of this node.
368 * Returns false if this is not a tree node.
369 * @return PPNode
370 */
371 public function getChildren();
372
373 /**
374 * Get the first child of a tree node. False if there isn't one.
375 *
376 * @return PPNode
377 */
378 public function getFirstChild();
379
380 /**
381 * Get the next sibling of any node. False if there isn't one
382 * @return PPNode
383 */
384 public function getNextSibling();
385
386 /**
387 * Get all children of this tree node which have a given name.
388 * Returns an array-type node, or false if this is not a tree node.
389 * @param string $type
390 * @return bool|PPNode
391 */
392 public function getChildrenOfType( $type );
393
394 /**
395 * Returns the length of the array, or false if this is not an array-type node
396 */
397 public function getLength();
398
399 /**
400 * Returns an item of an array-type node
401 * @param int $i
402 * @return bool|PPNode
403 */
404 public function item( $i );
405
406 /**
407 * Get the name of this node. The following names are defined here:
408 *
409 * h A heading node.
410 * template A double-brace node.
411 * tplarg A triple-brace node.
412 * title The first argument to a template or tplarg node.
413 * part Subsequent arguments to a template or tplarg node.
414 * #nodelist An array-type node
415 *
416 * The subclass may define various other names for tree and leaf nodes.
417 * @return string
418 */
419 public function getName();
420
421 /**
422 * Split a "<part>" node into an associative array containing:
423 * name PPNode name
424 * index String index
425 * value PPNode value
426 * @return array
427 */
428 public function splitArg();
429
430 /**
431 * Split an "<ext>" node into an associative array containing name, attr, inner and close
432 * All values in the resulting array are PPNodes. Inner and close are optional.
433 * @return array
434 */
435 public function splitExt();
436
437 /**
438 * Split an "<h>" node
439 * @return array
440 */
441 public function splitHeading();
442 }