Merge "In UploadStash, prioritize core metadata over file handler metadata"
[lhc/web/wiklou.git] / includes / parser / Preprocessor.php
1 <?php
2 /**
3 * Interfaces for preprocessors
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Parser
22 */
23
24 use MediaWiki\Logger\LoggerFactory;
25
26 /**
27 * @ingroup Parser
28 */
29 abstract class Preprocessor {
30
31 const CACHE_VERSION = 1;
32
33 /**
34 * Store a document tree in the cache.
35 *
36 * @param string $text
37 * @param int $flags
38 */
39 protected function cacheSetTree( $text, $flags, $tree ) {
40 $config = RequestContext::getMain()->getConfig();
41
42 $length = strlen( $text );
43 $threshold = $config->get( 'PreprocessorCacheThreshold' );
44 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
45 return false;
46 }
47
48 $key = wfMemcKey(
49 // TODO: Once we require PHP 5.5, use static::class instead of
50 // get_called_class() or get_class( $this ).
51 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : get_called_class(),
52 md5( $text ), $flags );
53 $value = sprintf( "%08d", static::CACHE_VERSION ) . $tree;
54
55 $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
56 $cache->set( $key, $value, 86400 );
57
58 LoggerFactory::getInstance( 'Preprocessor' )
59 ->info( "Cached preprocessor output (key: $key)" );
60 }
61
62 /**
63 * Attempt to load a precomputed document tree for some given wikitext
64 * from the cache.
65 *
66 * @param string $text
67 * @param int $flags
68 * @return PPNode_Hash_Tree|bool
69 */
70 protected function cacheGetTree( $text, $flags ) {
71 $config = RequestContext::getMain()->getConfig();
72
73 $length = strlen( $text );
74 $threshold = $config->get( 'PreprocessorCacheThreshold' );
75 if ( $threshold === false || $length < $threshold || $length > 1e6 ) {
76 return false;
77 }
78
79 $cache = ObjectCache::getInstance( $config->get( 'MainCacheType' ) );
80
81 $key = wfMemcKey(
82 // TODO: Once we require PHP 5.5, use static::class instead of
83 // get_called_class() or get_class( $this ).
84 defined( 'static::CACHE_PREFIX' ) ? static::CACHE_PREFIX : get_called_class(),
85 md5( $text ), $flags );
86
87 $value = $cache->get( $key );
88 if ( !$value ) {
89 return false;
90 }
91
92 $version = intval( substr( $value, 0, 8 ) );
93 if ( $version !== static::CACHE_VERSION ) {
94 return false;
95 }
96
97 LoggerFactory::getInstance( 'Preprocessor' )
98 ->info( "Loaded preprocessor output from cache (key: $key)" );
99
100 return substr( $value, 8 );
101 }
102
103 /**
104 * Create a new top-level frame for expansion of a page
105 *
106 * @return PPFrame
107 */
108 abstract public function newFrame();
109
110 /**
111 * Create a new custom frame for programmatic use of parameter replacement
112 * as used in some extensions.
113 *
114 * @param array $args
115 *
116 * @return PPFrame
117 */
118 abstract public function newCustomFrame( $args );
119
120 /**
121 * Create a new custom node for programmatic use of parameter replacement
122 * as used in some extensions.
123 *
124 * @param array $values
125 */
126 abstract public function newPartNodeArray( $values );
127
128 /**
129 * Preprocess text to a PPNode
130 *
131 * @param string $text
132 * @param int $flags
133 *
134 * @return PPNode
135 */
136 abstract public function preprocessToObj( $text, $flags = 0 );
137 }
138
139 /**
140 * @ingroup Parser
141 */
142 interface PPFrame {
143 const NO_ARGS = 1;
144 const NO_TEMPLATES = 2;
145 const STRIP_COMMENTS = 4;
146 const NO_IGNORE = 8;
147 const RECOVER_COMMENTS = 16;
148 const NO_TAGS = 32;
149
150 const RECOVER_ORIG = 59; // = 1|2|8|16|32 no constant expression support in PHP yet
151
152 /** This constant exists when $indexOffset is supported in newChild() */
153 const SUPPORTS_INDEX_OFFSET = 1;
154
155 /**
156 * Create a child frame
157 *
158 * @param array|bool $args
159 * @param bool|Title $title
160 * @param int $indexOffset A number subtracted from the index attributes of the arguments
161 *
162 * @return PPFrame
163 */
164 public function newChild( $args = false, $title = false, $indexOffset = 0 );
165
166 /**
167 * Expand a document tree node, caching the result on its parent with the given key
168 * @param string|int $key
169 * @param string|PPNode $root
170 * @param int $flags
171 * @return string
172 */
173 public function cachedExpand( $key, $root, $flags = 0 );
174
175 /**
176 * Expand a document tree node
177 * @param string|PPNode $root
178 * @param int $flags
179 * @return string
180 */
181 public function expand( $root, $flags = 0 );
182
183 /**
184 * Implode with flags for expand()
185 * @param string $sep
186 * @param int $flags
187 * @param string|PPNode $args,...
188 * @return string
189 */
190 public function implodeWithFlags( $sep, $flags /*, ... */ );
191
192 /**
193 * Implode with no flags specified
194 * @param string $sep
195 * @param string|PPNode $args,...
196 * @return string
197 */
198 public function implode( $sep /*, ... */ );
199
200 /**
201 * Makes an object that, when expand()ed, will be the same as one obtained
202 * with implode()
203 * @param string $sep
204 * @param string|PPNode $args,...
205 * @return PPNode
206 */
207 public function virtualImplode( $sep /*, ... */ );
208
209 /**
210 * Virtual implode with brackets
211 * @param string $start
212 * @param string $sep
213 * @param string $end
214 * @param string|PPNode $args,...
215 * @return PPNode
216 */
217 public function virtualBracketedImplode( $start, $sep, $end /*, ... */ );
218
219 /**
220 * Returns true if there are no arguments in this frame
221 *
222 * @return bool
223 */
224 public function isEmpty();
225
226 /**
227 * Returns all arguments of this frame
228 * @return array
229 */
230 public function getArguments();
231
232 /**
233 * Returns all numbered arguments of this frame
234 * @return array
235 */
236 public function getNumberedArguments();
237
238 /**
239 * Returns all named arguments of this frame
240 * @return array
241 */
242 public function getNamedArguments();
243
244 /**
245 * Get an argument to this frame by name
246 * @param string $name
247 * @return bool
248 */
249 public function getArgument( $name );
250
251 /**
252 * Returns true if the infinite loop check is OK, false if a loop is detected
253 *
254 * @param Title $title
255 * @return bool
256 */
257 public function loopCheck( $title );
258
259 /**
260 * Return true if the frame is a template frame
261 * @return bool
262 */
263 public function isTemplate();
264
265 /**
266 * Set the "volatile" flag.
267 *
268 * Note that this is somewhat of a "hack" in order to make extensions
269 * with side effects (such as Cite) work with the PHP parser. New
270 * extensions should be written in a way that they do not need this
271 * function, because other parsers (such as Parsoid) are not guaranteed
272 * to respect it, and it may be removed in the future.
273 *
274 * @param bool $flag
275 */
276 public function setVolatile( $flag = true );
277
278 /**
279 * Get the "volatile" flag.
280 *
281 * Callers should avoid caching the result of an expansion if it has the
282 * volatile flag set.
283 *
284 * @see self::setVolatile()
285 * @return bool
286 */
287 public function isVolatile();
288
289 /**
290 * Get the TTL of the frame's output.
291 *
292 * This is the maximum amount of time, in seconds, that this frame's
293 * output should be cached for. A value of null indicates that no
294 * maximum has been specified.
295 *
296 * Note that this TTL only applies to caching frames as parts of pages.
297 * It is not relevant to caching the entire rendered output of a page.
298 *
299 * @return int|null
300 */
301 public function getTTL();
302
303 /**
304 * Set the TTL of the output of this frame and all of its ancestors.
305 * Has no effect if the new TTL is greater than the one already set.
306 * Note that it is the caller's responsibility to change the cache
307 * expiry of the page as a whole, if such behavior is desired.
308 *
309 * @see self::getTTL()
310 * @param int $ttl
311 */
312 public function setTTL( $ttl );
313
314 /**
315 * Get a title of frame
316 *
317 * @return Title
318 */
319 public function getTitle();
320 }
321
322 /**
323 * There are three types of nodes:
324 * * Tree nodes, which have a name and contain other nodes as children
325 * * Array nodes, which also contain other nodes but aren't considered part of a tree
326 * * Leaf nodes, which contain the actual data
327 *
328 * This interface provides access to the tree structure and to the contents of array nodes,
329 * but it does not provide access to the internal structure of leaf nodes. Access to leaf
330 * data is provided via two means:
331 * * PPFrame::expand(), which provides expanded text
332 * * The PPNode::split*() functions, which provide metadata about certain types of tree node
333 * @ingroup Parser
334 */
335 interface PPNode {
336 /**
337 * Get an array-type node containing the children of this node.
338 * Returns false if this is not a tree node.
339 * @return PPNode
340 */
341 public function getChildren();
342
343 /**
344 * Get the first child of a tree node. False if there isn't one.
345 *
346 * @return PPNode
347 */
348 public function getFirstChild();
349
350 /**
351 * Get the next sibling of any node. False if there isn't one
352 * @return PPNode
353 */
354 public function getNextSibling();
355
356 /**
357 * Get all children of this tree node which have a given name.
358 * Returns an array-type node, or false if this is not a tree node.
359 * @param string $type
360 * @return bool|PPNode
361 */
362 public function getChildrenOfType( $type );
363
364 /**
365 * Returns the length of the array, or false if this is not an array-type node
366 */
367 public function getLength();
368
369 /**
370 * Returns an item of an array-type node
371 * @param int $i
372 * @return bool|PPNode
373 */
374 public function item( $i );
375
376 /**
377 * Get the name of this node. The following names are defined here:
378 *
379 * h A heading node.
380 * template A double-brace node.
381 * tplarg A triple-brace node.
382 * title The first argument to a template or tplarg node.
383 * part Subsequent arguments to a template or tplarg node.
384 * #nodelist An array-type node
385 *
386 * The subclass may define various other names for tree and leaf nodes.
387 * @return string
388 */
389 public function getName();
390
391 /**
392 * Split a "<part>" node into an associative array containing:
393 * name PPNode name
394 * index String index
395 * value PPNode value
396 * @return array
397 */
398 public function splitArg();
399
400 /**
401 * Split an "<ext>" node into an associative array containing name, attr, inner and close
402 * All values in the resulting array are PPNodes. Inner and close are optional.
403 * @return array
404 */
405 public function splitExt();
406
407 /**
408 * Split an "<h>" node
409 * @return array
410 */
411 public function splitHeading();
412 }