Merge "Added a separate error message for mkdir failures"
[lhc/web/wiklou.git] / includes / parser / Preprocessor.php
1 <?php
2 /**
3 * Interfaces for preprocessors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\Logger\LoggerFactory;
25
26 /**
27 * @ingroup Parser
28 */
29 abstract class Preprocessor {
30
31 const CACHE_VERSION = 1;
32
33 /**
34 * @var array Brace matching rules.
35 */
36 protected $rules = [
37 '{' => [
38 'end' => '}',
39 'names' => [
40 2 => 'template',
41 3 => 'tplarg',
42 ],
43 'min' => 2,
44 'max' => 3,
45 ],
46 '[' => [
47 'end' => ']',
48 'names' => [ 2 => null ],
49 'min' => 2,
50 'max' => 2,
51 ],
52 '-{' => [
53 'end' => '}-',
54 'names' => [ 2 => null ],
55 'min' => 2,
56 'max' => 2,
57 ],
58 ];
59
60 /**
61 * Store a document tree in the cache.
62 *
63 * @param string $text
64 * @param int $flags
65 */
66 protected function cacheSetTree( $text, $flags, $tree ) {
67 $config = RequestContext::getMain()->getConfig();
68
69 $length = strlen( $text );
70 $threshold = $config->get( 'PreprocessorCacheThreshold' );
71 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
72 return false;
73 }
74
75 $cache = ObjectCache::getLocalClusterInstance();
76 $key = $cache->makeKey(
77 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
78 md5( $text ), $flags );
79 $value = sprintf( "%08d", static::CACHE_VERSION ) . $tree;
80
81 $cache->set( $key, $value, 86400 );
82
83 LoggerFactory::getInstance( 'Preprocessor' )
84 ->info( "Cached preprocessor output (key: $key)" );
85 }
86
87 /**
88 * Attempt to load a precomputed document tree for some given wikitext
89 * from the cache.
90 *
91 * @param string $text
92 * @param int $flags
93 * @return PPNode_Hash_Tree|bool
94 */
95 protected function cacheGetTree( $text, $flags ) {
96 $config = RequestContext::getMain()->getConfig();
97
98 $length = strlen( $text );
99 $threshold = $config->get( 'PreprocessorCacheThreshold' );
100 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
101 return false;
102 }
103
104 $cache = ObjectCache::getLocalClusterInstance();
105
106 $key = $cache->makeKey(
107 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : static::class,
108 md5( $text ), $flags );
109
110 $value = $cache->get( $key );
111 if ( !$value ) {
112 return false;
113 }
114
115 $version = intval( substr( $value, 0, 8 ) );
116 if ( $version !== static::CACHE_VERSION ) {
117 return false;
118 }
119
120 LoggerFactory::getInstance( 'Preprocessor' )
121 ->info( "Loaded preprocessor output from cache (key: $key)" );
122
123 return substr( $value, 8 );
124 }
125
126 /**
127 * Create a new top-level frame for expansion of a page
128 *
129 * @return PPFrame
130 */
131 abstract public function newFrame();
132
133 /**
134 * Create a new custom frame for programmatic use of parameter replacement
135 * as used in some extensions.
136 *
137 * @param array $args
138 *
139 * @return PPFrame
140 */
141 abstract public function newCustomFrame( $args );
142
143 /**
144 * Create a new custom node for programmatic use of parameter replacement
145 * as used in some extensions.
146 *
147 * @param array $values
148 */
149 abstract public function newPartNodeArray( $values );
150
151 /**
152 * Preprocess text to a PPNode
153 *
154 * @param string $text
155 * @param int $flags
156 *
157 * @return PPNode
158 */
159 abstract public function preprocessToObj( $text, $flags = 0 );
160 }
161
162 /**
163 * @ingroup Parser
164 */
165 interface PPFrame {
166 const NO_ARGS = 1;
167 const NO_TEMPLATES = 2;
168 const STRIP_COMMENTS = 4;
169 const NO_IGNORE = 8;
170 const RECOVER_COMMENTS = 16;
171 const NO_TAGS = 32;
172
173 const RECOVER_ORIG = 59; // = 1|2|8|16|32 no constant expression support in PHP yet
174
175 /** This constant exists when $indexOffset is supported in newChild() */
176 const SUPPORTS_INDEX_OFFSET = 1;
177
178 /**
179 * Create a child frame
180 *
181 * @param array|bool $args
182 * @param bool|Title $title
183 * @param int $indexOffset A number subtracted from the index attributes of the arguments
184 *
185 * @return PPFrame
186 */
187 public function newChild( $args = false, $title = false, $indexOffset = 0 );
188
189 /**
190 * Expand a document tree node, caching the result on its parent with the given key
191 * @param string|int $key
192 * @param string|PPNode $root
193 * @param int $flags
194 * @return string
195 */
196 public function cachedExpand( $key, $root, $flags = 0 );
197
198 /**
199 * Expand a document tree node
200 * @param string|PPNode $root
201 * @param int $flags
202 * @return string
203 */
204 public function expand( $root, $flags = 0 );
205
206 /**
207 * Implode with flags for expand()
208 * @param string $sep
209 * @param int $flags
210 * @param string|PPNode $args,...
211 * @return string
212 */
213 public function implodeWithFlags( $sep, $flags /*, ... */ );
214
215 /**
216 * Implode with no flags specified
217 * @param string $sep
218 * @param string|PPNode $args,...
219 * @return string
220 */
221 public function implode( $sep /*, ... */ );
222
223 /**
224 * Makes an object that, when expand()ed, will be the same as one obtained
225 * with implode()
226 * @param string $sep
227 * @param string|PPNode $args,...
228 * @return PPNode
229 */
230 public function virtualImplode( $sep /*, ... */ );
231
232 /**
233 * Virtual implode with brackets
234 * @param string $start
235 * @param string $sep
236 * @param string $end
237 * @param string|PPNode $args,...
238 * @return PPNode
239 */
240 public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
241
242 /**
243 * Returns true if there are no arguments in this frame
244 *
245 * @return bool
246 */
247 public function isEmpty();
248
249 /**
250 * Returns all arguments of this frame
251 * @return array
252 */
253 public function getArguments();
254
255 /**
256 * Returns all numbered arguments of this frame
257 * @return array
258 */
259 public function getNumberedArguments();
260
261 /**
262 * Returns all named arguments of this frame
263 * @return array
264 */
265 public function getNamedArguments();
266
267 /**
268 * Get an argument to this frame by name
269 * @param int|string $name
270 * @return string|bool
271 */
272 public function getArgument( $name );
273
274 /**
275 * Returns true if the infinite loop check is OK, false if a loop is detected
276 *
277 * @param Title $title
278 * @return bool
279 */
280 public function loopCheck( $title );
281
282 /**
283 * Return true if the frame is a template frame
284 * @return bool
285 */
286 public function isTemplate();
287
288 /**
289 * Set the "volatile" flag.
290 *
291 * Note that this is somewhat of a "hack" in order to make extensions
292 * with side effects (such as Cite) work with the PHP parser. New
293 * extensions should be written in a way that they do not need this
294 * function, because other parsers (such as Parsoid) are not guaranteed
295 * to respect it, and it may be removed in the future.
296 *
297 * @param bool $flag
298 */
299 public function setVolatile( $flag = true );
300
301 /**
302 * Get the "volatile" flag.
303 *
304 * Callers should avoid caching the result of an expansion if it has the
305 * volatile flag set.
306 *
307 * @see self::setVolatile()
308 * @return bool
309 */
310 public function isVolatile();
311
312 /**
313 * Get the TTL of the frame's output.
314 *
315 * This is the maximum amount of time, in seconds, that this frame's
316 * output should be cached for. A value of null indicates that no
317 * maximum has been specified.
318 *
319 * Note that this TTL only applies to caching frames as parts of pages.
320 * It is not relevant to caching the entire rendered output of a page.
321 *
322 * @return int|null
323 */
324 public function getTTL();
325
326 /**
327 * Set the TTL of the output of this frame and all of its ancestors.
328 * Has no effect if the new TTL is greater than the one already set.
329 * Note that it is the caller's responsibility to change the cache
330 * expiry of the page as a whole, if such behavior is desired.
331 *
332 * @see self::getTTL()
333 * @param int $ttl
334 */
335 public function setTTL( $ttl );
336
337 /**
338 * Get a title of frame
339 *
340 * @return Title
341 */
342 public function getTitle();
343 }
344
345 /**
346 * There are three types of nodes:
347 * * Tree nodes, which have a name and contain other nodes as children
348 * * Array nodes, which also contain other nodes but aren't considered part of a tree
349 * * Leaf nodes, which contain the actual data
350 *
351 * This interface provides access to the tree structure and to the contents of array nodes,
352 * but it does not provide access to the internal structure of leaf nodes. Access to leaf
353 * data is provided via two means:
354 * * PPFrame::expand(), which provides expanded text
355 * * The PPNode::split*() functions, which provide metadata about certain types of tree node
356 * @ingroup Parser
357 */
358 interface PPNode {
359 /**
360 * Get an array-type node containing the children of this node.
361 * Returns false if this is not a tree node.
362 * @return PPNode
363 */
364 public function getChildren();
365
366 /**
367 * Get the first child of a tree node. False if there isn't one.
368 *
369 * @return PPNode
370 */
371 public function getFirstChild();
372
373 /**
374 * Get the next sibling of any node. False if there isn't one
375 * @return PPNode
376 */
377 public function getNextSibling();
378
379 /**
380 * Get all children of this tree node which have a given name.
381 * Returns an array-type node, or false if this is not a tree node.
382 * @param string $type
383 * @return bool|PPNode
384 */
385 public function getChildrenOfType( $type );
386
387 /**
388 * Returns the length of the array, or false if this is not an array-type node
389 */
390 public function getLength();
391
392 /**
393 * Returns an item of an array-type node
394 * @param int $i
395 * @return bool|PPNode
396 */
397 public function item( $i );
398
399 /**
400 * Get the name of this node. The following names are defined here:
401 *
402 * h A heading node.
403 * template A double-brace node.
404 * tplarg A triple-brace node.
405 * title The first argument to a template or tplarg node.
406 * part Subsequent arguments to a template or tplarg node.
407 * #nodelist An array-type node
408 *
409 * The subclass may define various other names for tree and leaf nodes.
410 * @return string
411 */
412 public function getName();
413
414 /**
415 * Split a "<part>" node into an associative array containing:
416 * name PPNode name
417 * index String index
418 * value PPNode value
419 * @return array
420 */
421 public function splitArg();
422
423 /**
424 * Split an "<ext>" node into an associative array containing name, attr, inner and close
425 * All values in the resulting array are PPNodes. Inner and close are optional.
426 * @return array
427 */
428 public function splitExt();
429
430 /**
431 * Split an "<h>" node
432 * @return array
433 */
434 public function splitHeading();
435 }