Merge "Database::__construct(): Don't set $mTrxAtomicLevels to an SplStack"
[lhc/web/wiklou.git] / includes / parser / Preprocessor.php
1 <?php
2 /**
3 * Interfaces for preprocessors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\Logger\LoggerFactory;
25
26 /**
27 * @ingroup Parser
28 */
29 abstract class Preprocessor {
30
31 const CACHE_VERSION = 1;
32
33 /**
34 * Store a document tree in the cache.
35 *
36 * @param string $text
37 * @param int $flags
38 */
39 protected function cacheSetTree( $text, $flags, $tree ) {
40 $config = RequestContext::getMain()->getConfig();
41
42 $length = strlen( $text );
43 $threshold = $config->get( 'PreprocessorCacheThreshold' );
44 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
45 return false;
46 }
47
48 $key = wfMemcKey(
49 defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX : __CLASS__,
50 md5( $text ), $flags );
51 $value = sprintf( "%08d", self::CACHE_VERSION ) . $tree;
52
53 $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
54 $cache->set( $key, $value, 86400 );
55
56 LoggerFactory::getInstance( 'Preprocessor' )
57 ->info( "Cached preprocessor output (key: $key)" );
58 }
59
60 /**
61 * Attempt to load a precomputed document tree for some given wikitext
62 * from the cache.
63 *
64 * @param string $text
65 * @param int $flags
66 * @return PPNode_Hash_Tree|bool
67 */
68 protected function cacheGetTree( $text, $flags ) {
69 $config = RequestContext::getMain()->getConfig();
70
71 $length = strlen( $text );
72 $threshold = $config->get( 'PreprocessorCacheThreshold' );
73 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
74 return false;
75 }
76
77 $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
78
79 $key = wfMemcKey(
80 defined( 'self::CACHE_PREFIX' ) ? self::CACHE_PREFIX : __CLASS__,
81 md5( $text ), $flags );
82
83 $value = $cache->get( $key );
84 if ( !$value ) {
85 return false;
86 }
87
88 $version = intval( substr( $value, 0, 8 ) );
89 if ( $version !== self::CACHE_VERSION ) {
90 return false;
91 }
92
93 LoggerFactory::getInstance( 'Preprocessor' )
94 ->info( "Loaded preprocessor output from cache (key: $key)" );
95
96 return substr( $value, 8 );
97 }
98
99 /**
100 * Create a new top-level frame for expansion of a page
101 *
102 * @return PPFrame
103 */
104 abstract public function newFrame();
105
106 /**
107 * Create a new custom frame for programmatic use of parameter replacement
108 * as used in some extensions.
109 *
110 * @param array $args
111 *
112 * @return PPFrame
113 */
114 abstract public function newCustomFrame( $args );
115
116 /**
117 * Create a new custom node for programmatic use of parameter replacement
118 * as used in some extensions.
119 *
120 * @param array $values
121 */
122 abstract public function newPartNodeArray( $values );
123
124 /**
125 * Preprocess text to a PPNode
126 *
127 * @param string $text
128 * @param int $flags
129 *
130 * @return PPNode
131 */
132 abstract public function preprocessToObj( $text, $flags = 0 );
133 }
134
135 /**
136 * @ingroup Parser
137 */
138 interface PPFrame {
139 const NO_ARGS = 1;
140 const NO_TEMPLATES = 2;
141 const STRIP_COMMENTS = 4;
142 const NO_IGNORE = 8;
143 const RECOVER_COMMENTS = 16;
144 const NO_TAGS = 32;
145
146 const RECOVER_ORIG = 59; // = 1|2|8|16|32 no constant expression support in PHP yet
147
148 /** This constant exists when $indexOffset is supported in newChild() */
149 const SUPPORTS_INDEX_OFFSET = 1;
150
151 /**
152 * Create a child frame
153 *
154 * @param array|bool $args
155 * @param bool|Title $title
156 * @param int $indexOffset A number subtracted from the index attributes of the arguments
157 *
158 * @return PPFrame
159 */
160 public function newChild( $args = false, $title = false, $indexOffset = 0 );
161
162 /**
163 * Expand a document tree node, caching the result on its parent with the given key
164 * @param string|int $key
165 * @param string|PPNode $root
166 * @param int $flags
167 * @return string
168 */
169 public function cachedExpand( $key, $root, $flags = 0 );
170
171 /**
172 * Expand a document tree node
173 * @param string|PPNode $root
174 * @param int $flags
175 * @return string
176 */
177 public function expand( $root, $flags = 0 );
178
179 /**
180 * Implode with flags for expand()
181 * @param string $sep
182 * @param int $flags
183 * @param string|PPNode $args,...
184 * @return string
185 */
186 public function implodeWithFlags( $sep, $flags /*, ... */ );
187
188 /**
189 * Implode with no flags specified
190 * @param string $sep
191 * @param string|PPNode $args,...
192 * @return string
193 */
194 public function implode( $sep /*, ... */ );
195
196 /**
197 * Makes an object that, when expand()ed, will be the same as one obtained
198 * with implode()
199 * @param string $sep
200 * @param string|PPNode $args,...
201 * @return PPNode
202 */
203 public function virtualImplode( $sep /*, ... */ );
204
205 /**
206 * Virtual implode with brackets
207 * @param string $start
208 * @param string $sep
209 * @param string $end
210 * @param string|PPNode $args,...
211 * @return PPNode
212 */
213 public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
214
215 /**
216 * Returns true if there are no arguments in this frame
217 *
218 * @return bool
219 */
220 public function isEmpty();
221
222 /**
223 * Returns all arguments of this frame
224 * @return array
225 */
226 public function getArguments();
227
228 /**
229 * Returns all numbered arguments of this frame
230 * @return array
231 */
232 public function getNumberedArguments();
233
234 /**
235 * Returns all named arguments of this frame
236 * @return array
237 */
238 public function getNamedArguments();
239
240 /**
241 * Get an argument to this frame by name
242 * @param string $name
243 * @return bool
244 */
245 public function getArgument( $name );
246
247 /**
248 * Returns true if the infinite loop check is OK, false if a loop is detected
249 *
250 * @param Title $title
251 * @return bool
252 */
253 public function loopCheck( $title );
254
255 /**
256 * Return true if the frame is a template frame
257 * @return bool
258 */
259 public function isTemplate();
260
261 /**
262 * Set the "volatile" flag.
263 *
264 * Note that this is somewhat of a "hack" in order to make extensions
265 * with side effects (such as Cite) work with the PHP parser. New
266 * extensions should be written in a way that they do not need this
267 * function, because other parsers (such as Parsoid) are not guaranteed
268 * to respect it, and it may be removed in the future.
269 *
270 * @param bool $flag
271 */
272 public function setVolatile( $flag = true );
273
274 /**
275 * Get the "volatile" flag.
276 *
277 * Callers should avoid caching the result of an expansion if it has the
278 * volatile flag set.
279 *
280 * @see self::setVolatile()
281 * @return bool
282 */
283 public function isVolatile();
284
285 /**
286 * Get the TTL of the frame's output.
287 *
288 * This is the maximum amount of time, in seconds, that this frame's
289 * output should be cached for. A value of null indicates that no
290 * maximum has been specified.
291 *
292 * Note that this TTL only applies to caching frames as parts of pages.
293 * It is not relevant to caching the entire rendered output of a page.
294 *
295 * @return int|null
296 */
297 public function getTTL();
298
299 /**
300 * Set the TTL of the output of this frame and all of its ancestors.
301 * Has no effect if the new TTL is greater than the one already set.
302 * Note that it is the caller's responsibility to change the cache
303 * expiry of the page as a whole, if such behavior is desired.
304 *
305 * @see self::getTTL()
306 * @param int $ttl
307 */
308 public function setTTL( $ttl );
309
310 /**
311 * Get a title of frame
312 *
313 * @return Title
314 */
315 public function getTitle();
316 }
317
318 /**
319 * There are three types of nodes:
320 * * Tree nodes, which have a name and contain other nodes as children
321 * * Array nodes, which also contain other nodes but aren't considered part of a tree
322 * * Leaf nodes, which contain the actual data
323 *
324 * This interface provides access to the tree structure and to the contents of array nodes,
325 * but it does not provide access to the internal structure of leaf nodes. Access to leaf
326 * data is provided via two means:
327 * * PPFrame::expand(), which provides expanded text
328 * * The PPNode::split*() functions, which provide metadata about certain types of tree node
329 * @ingroup Parser
330 */
331 interface PPNode {
332 /**
333 * Get an array-type node containing the children of this node.
334 * Returns false if this is not a tree node.
335 * @return PPNode
336 */
337 public function getChildren();
338
339 /**
340 * Get the first child of a tree node. False if there isn't one.
341 *
342 * @return PPNode
343 */
344 public function getFirstChild();
345
346 /**
347 * Get the next sibling of any node. False if there isn't one
348 * @return PPNode
349 */
350 public function getNextSibling();
351
352 /**
353 * Get all children of this tree node which have a given name.
354 * Returns an array-type node, or false if this is not a tree node.
355 * @param string $type
356 * @return bool|PPNode
357 */
358 public function getChildrenOfType( $type );
359
360 /**
361 * Returns the length of the array, or false if this is not an array-type node
362 */
363 public function getLength();
364
365 /**
366 * Returns an item of an array-type node
367 * @param int $i
368 * @return bool|PPNode
369 */
370 public function item( $i );
371
372 /**
373 * Get the name of this node. The following names are defined here:
374 *
375 * h A heading node.
376 * template A double-brace node.
377 * tplarg A triple-brace node.
378 * title The first argument to a template or tplarg node.
379 * part Subsequent arguments to a template or tplarg node.
380 * #nodelist An array-type node
381 *
382 * The subclass may define various other names for tree and leaf nodes.
383 * @return string
384 */
385 public function getName();
386
387 /**
388 * Split a "<part>" node into an associative array containing:
389 * name PPNode name
390 * index String index
391 * value PPNode value
392 * @return array
393 */
394 public function splitArg();
395
396 /**
397 * Split an "<ext>" node into an associative array containing name, attr, inner and close
398 * All values in the resulting array are PPNodes. Inner and close are optional.
399 * @return array
400 */
401 public function splitExt();
402
403 /**
404 * Split an "<h>" node
405 * @return array
406 */
407 public function splitHeading();
408 }