Merge "StringUtils: Add a utility for checking if a string is a valid regex"
[lhc/web/wiklou.git] / includes / parser / PPFrame_DOM.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @ingroup Parser
20 */
21
22 /**
23 * An expansion frame, used as a context to expand the result of preprocessToObj()
24 * @deprecated since 1.34, use PPFrame_Hash
25 * @ingroup Parser
26 * @phan-file-suppress PhanUndeclaredMethod
27 */
28 // phpcs:ignore Squiz.Classes.ValidClassName.NotCamelCaps
29 class PPFrame_DOM implements PPFrame {
30
31 /**
32 * @var Preprocessor
33 */
34 public $preprocessor;
35
36 /**
37 * @var Parser
38 */
39 public $parser;
40
41 /**
42 * @var Title
43 */
44 public $title;
45 public $titleCache;
46
47 /**
48 * Hashtable listing templates which are disallowed for expansion in this frame,
49 * having been encountered previously in parent frames.
50 */
51 public $loopCheckHash;
52
53 /**
54 * Recursion depth of this frame, top = 0
55 * Note that this is NOT the same as expansion depth in expand()
56 */
57 public $depth;
58
59 private $volatile = false;
60 private $ttl = null;
61
62 /**
63 * @var array
64 */
65 protected $childExpansionCache;
66
67 /**
68 * Construct a new preprocessor frame.
69 * @param Preprocessor $preprocessor The parent preprocessor
70 */
71 public function __construct( $preprocessor ) {
72 $this->preprocessor = $preprocessor;
73 $this->parser = $preprocessor->parser;
74 $this->title = $this->parser->getTitle();
75 $this->titleCache = [ $this->title ? $this->title->getPrefixedDBkey() : false ];
76 $this->loopCheckHash = [];
77 $this->depth = 0;
78 $this->childExpansionCache = [];
79 }
80
81 /**
82 * Create a new child frame
83 * $args is optionally a multi-root PPNode or array containing the template arguments
84 *
85 * @param bool|array|PPNode_DOM $args
86 * @param Title|bool $title
87 * @param int $indexOffset
88 * @return PPTemplateFrame_DOM
89 */
90 public function newChild( $args = false, $title = false, $indexOffset = 0 ) {
91 $namedArgs = [];
92 $numberedArgs = [];
93 if ( $title === false ) {
94 $title = $this->title;
95 }
96 if ( $args !== false ) {
97 $xpath = false;
98 if ( $args instanceof PPNode_DOM ) {
99 $args = $args->node;
100 }
101 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
102 foreach ( $args as $arg ) {
103 if ( $arg instanceof PPNode_DOM ) {
104 $arg = $arg->node;
105 }
106 if ( !$xpath || $xpath->document !== $arg->ownerDocument ) {
107 $xpath = new DOMXPath( $arg->ownerDocument );
108 }
109
110 $nameNodes = $xpath->query( 'name', $arg );
111 $value = $xpath->query( 'value', $arg );
112 if ( $nameNodes->item( 0 )->hasAttributes() ) {
113 // Numbered parameter
114 $index = $nameNodes->item( 0 )->attributes->getNamedItem( 'index' )->textContent;
115 $index = $index - $indexOffset;
116 if ( isset( $namedArgs[$index] ) || isset( $numberedArgs[$index] ) ) {
117 $this->parser->getOutput()->addWarning( wfMessage( 'duplicate-args-warning',
118 wfEscapeWikiText( $this->title ),
119 wfEscapeWikiText( $title ),
120 wfEscapeWikiText( $index ) )->text() );
121 $this->parser->addTrackingCategory( 'duplicate-args-category' );
122 }
123 $numberedArgs[$index] = $value->item( 0 );
124 unset( $namedArgs[$index] );
125 } else {
126 // Named parameter
127 $name = trim( $this->expand( $nameNodes->item( 0 ), PPFrame::STRIP_COMMENTS ) );
128 if ( isset( $namedArgs[$name] ) || isset( $numberedArgs[$name] ) ) {
129 $this->parser->getOutput()->addWarning( wfMessage( 'duplicate-args-warning',
130 wfEscapeWikiText( $this->title ),
131 wfEscapeWikiText( $title ),
132 wfEscapeWikiText( $name ) )->text() );
133 $this->parser->addTrackingCategory( 'duplicate-args-category' );
134 }
135 $namedArgs[$name] = $value->item( 0 );
136 unset( $numberedArgs[$name] );
137 }
138 }
139 }
140 return new PPTemplateFrame_DOM( $this->preprocessor, $this, $numberedArgs, $namedArgs, $title );
141 }
142
143 /**
144 * @throws MWException
145 * @param string|int $key
146 * @param string|PPNode_DOM|DOMNode|DOMNodeList $root
147 * @param int $flags
148 * @return string
149 */
150 public function cachedExpand( $key, $root, $flags = 0 ) {
151 // we don't have a parent, so we don't have a cache
152 return $this->expand( $root, $flags );
153 }
154
155 /**
156 * @throws MWException
157 * @param string|PPNode_DOM|DOMNode|DOMNodeList $root
158 * @param int $flags
159 * @return string
160 */
161 public function expand( $root, $flags = 0 ) {
162 static $expansionDepth = 0;
163 if ( is_string( $root ) ) {
164 return $root;
165 }
166
167 if ( ++$this->parser->mPPNodeCount > $this->parser->mOptions->getMaxPPNodeCount() ) {
168 $this->parser->limitationWarn( 'node-count-exceeded',
169 $this->parser->mPPNodeCount,
170 $this->parser->mOptions->getMaxPPNodeCount()
171 );
172 return '<span class="error">Node-count limit exceeded</span>';
173 }
174
175 if ( $expansionDepth > $this->parser->mOptions->getMaxPPExpandDepth() ) {
176 $this->parser->limitationWarn( 'expansion-depth-exceeded',
177 $expansionDepth,
178 $this->parser->mOptions->getMaxPPExpandDepth()
179 );
180 return '<span class="error">Expansion depth limit exceeded</span>';
181 }
182 ++$expansionDepth;
183 if ( $expansionDepth > $this->parser->mHighestExpansionDepth ) {
184 $this->parser->mHighestExpansionDepth = $expansionDepth;
185 }
186
187 if ( $root instanceof PPNode_DOM ) {
188 $root = $root->node;
189 }
190 if ( $root instanceof DOMDocument ) {
191 $root = $root->documentElement;
192 }
193
194 $outStack = [ '', '' ];
195 $iteratorStack = [ false, $root ];
196 $indexStack = [ 0, 0 ];
197
198 while ( count( $iteratorStack ) > 1 ) {
199 $level = count( $outStack ) - 1;
200 $iteratorNode =& $iteratorStack[$level];
201 $out =& $outStack[$level];
202 $index =& $indexStack[$level];
203
204 if ( $iteratorNode instanceof PPNode_DOM ) {
205 $iteratorNode = $iteratorNode->node;
206 }
207
208 if ( is_array( $iteratorNode ) ) {
209 if ( $index >= count( $iteratorNode ) ) {
210 // All done with this iterator
211 $iteratorStack[$level] = false;
212 $contextNode = false;
213 } else {
214 $contextNode = $iteratorNode[$index];
215 $index++;
216 }
217 } elseif ( $iteratorNode instanceof DOMNodeList ) {
218 if ( $index >= $iteratorNode->length ) {
219 // All done with this iterator
220 $iteratorStack[$level] = false;
221 $contextNode = false;
222 } else {
223 $contextNode = $iteratorNode->item( $index );
224 $index++;
225 }
226 } else {
227 // Copy to $contextNode and then delete from iterator stack,
228 // because this is not an iterator but we do have to execute it once
229 $contextNode = $iteratorStack[$level];
230 $iteratorStack[$level] = false;
231 }
232
233 if ( $contextNode instanceof PPNode_DOM ) {
234 $contextNode = $contextNode->node;
235 }
236
237 $newIterator = false;
238
239 if ( $contextNode === false ) {
240 // nothing to do
241 } elseif ( is_string( $contextNode ) ) {
242 $out .= $contextNode;
243 } elseif ( is_array( $contextNode ) || $contextNode instanceof DOMNodeList ) {
244 $newIterator = $contextNode;
245 } elseif ( $contextNode instanceof DOMNode ) {
246 if ( $contextNode->nodeType == XML_TEXT_NODE ) {
247 $out .= $contextNode->nodeValue;
248 } elseif ( $contextNode->nodeName == 'template' ) {
249 # Double-brace expansion
250 $xpath = new DOMXPath( $contextNode->ownerDocument );
251 $titles = $xpath->query( 'title', $contextNode );
252 $title = $titles->item( 0 );
253 $parts = $xpath->query( 'part', $contextNode );
254 if ( $flags & PPFrame::NO_TEMPLATES ) {
255 $newIterator = $this->virtualBracketedImplode( '{{', '|', '}}', $title, $parts );
256 } else {
257 $lineStart = $contextNode->getAttribute( 'lineStart' );
258 $params = [
259 'title' => new PPNode_DOM( $title ),
260 'parts' => new PPNode_DOM( $parts ),
261 'lineStart' => $lineStart ];
262 $ret = $this->parser->braceSubstitution( $params, $this );
263 if ( isset( $ret['object'] ) ) {
264 $newIterator = $ret['object'];
265 } else {
266 $out .= $ret['text'];
267 }
268 }
269 } elseif ( $contextNode->nodeName == 'tplarg' ) {
270 # Triple-brace expansion
271 $xpath = new DOMXPath( $contextNode->ownerDocument );
272 $titles = $xpath->query( 'title', $contextNode );
273 $title = $titles->item( 0 );
274 $parts = $xpath->query( 'part', $contextNode );
275 if ( $flags & PPFrame::NO_ARGS ) {
276 $newIterator = $this->virtualBracketedImplode( '{{{', '|', '}}}', $title, $parts );
277 } else {
278 $params = [
279 'title' => new PPNode_DOM( $title ),
280 'parts' => new PPNode_DOM( $parts ) ];
281 $ret = $this->parser->argSubstitution( $params, $this );
282 if ( isset( $ret['object'] ) ) {
283 $newIterator = $ret['object'];
284 } else {
285 $out .= $ret['text'];
286 }
287 }
288 } elseif ( $contextNode->nodeName == 'comment' ) {
289 # HTML-style comment
290 # Remove it in HTML, pre+remove and STRIP_COMMENTS modes
291 # Not in RECOVER_COMMENTS mode (msgnw) though.
292 if ( ( $this->parser->ot['html']
293 || ( $this->parser->ot['pre'] && $this->parser->mOptions->getRemoveComments() )
294 || ( $flags & PPFrame::STRIP_COMMENTS )
295 ) && !( $flags & PPFrame::RECOVER_COMMENTS )
296 ) {
297 $out .= '';
298 } elseif ( $this->parser->ot['wiki'] && !( $flags & PPFrame::RECOVER_COMMENTS ) ) {
299 # Add a strip marker in PST mode so that pstPass2() can
300 # run some old-fashioned regexes on the result.
301 # Not in RECOVER_COMMENTS mode (extractSections) though.
302 $out .= $this->parser->insertStripItem( $contextNode->textContent );
303 } else {
304 # Recover the literal comment in RECOVER_COMMENTS and pre+no-remove
305 $out .= $contextNode->textContent;
306 }
307 } elseif ( $contextNode->nodeName == 'ignore' ) {
308 # Output suppression used by <includeonly> etc.
309 # OT_WIKI will only respect <ignore> in substed templates.
310 # The other output types respect it unless NO_IGNORE is set.
311 # extractSections() sets NO_IGNORE and so never respects it.
312 if ( ( !isset( $this->parent ) && $this->parser->ot['wiki'] )
313 || ( $flags & PPFrame::NO_IGNORE )
314 ) {
315 $out .= $contextNode->textContent;
316 } else {
317 $out .= '';
318 }
319 } elseif ( $contextNode->nodeName == 'ext' ) {
320 # Extension tag
321 $xpath = new DOMXPath( $contextNode->ownerDocument );
322 $names = $xpath->query( 'name', $contextNode );
323 $attrs = $xpath->query( 'attr', $contextNode );
324 $inners = $xpath->query( 'inner', $contextNode );
325 $closes = $xpath->query( 'close', $contextNode );
326 if ( $flags & PPFrame::NO_TAGS ) {
327 $s = '<' . $this->expand( $names->item( 0 ), $flags );
328 if ( $attrs->length > 0 ) {
329 $s .= $this->expand( $attrs->item( 0 ), $flags );
330 }
331 if ( $inners->length > 0 ) {
332 $s .= '>' . $this->expand( $inners->item( 0 ), $flags );
333 if ( $closes->length > 0 ) {
334 $s .= $this->expand( $closes->item( 0 ), $flags );
335 }
336 } else {
337 $s .= '/>';
338 }
339 $out .= $s;
340 } else {
341 $params = [
342 'name' => new PPNode_DOM( $names->item( 0 ) ),
343 'attr' => $attrs->length > 0 ? new PPNode_DOM( $attrs->item( 0 ) ) : null,
344 'inner' => $inners->length > 0 ? new PPNode_DOM( $inners->item( 0 ) ) : null,
345 'close' => $closes->length > 0 ? new PPNode_DOM( $closes->item( 0 ) ) : null,
346 ];
347 $out .= $this->parser->extensionSubstitution( $params, $this );
348 }
349 } elseif ( $contextNode->nodeName == 'h' ) {
350 # Heading
351 $s = $this->expand( $contextNode->childNodes, $flags );
352
353 # Insert a heading marker only for <h> children of <root>
354 # This is to stop extractSections from going over multiple tree levels
355 if ( $contextNode->parentNode->nodeName == 'root' && $this->parser->ot['html'] ) {
356 # Insert heading index marker
357 $headingIndex = $contextNode->getAttribute( 'i' );
358 $titleText = $this->title->getPrefixedDBkey();
359 $this->parser->mHeadings[] = [ $titleText, $headingIndex ];
360 $serial = count( $this->parser->mHeadings ) - 1;
361 $marker = Parser::MARKER_PREFIX . "-h-$serial-" . Parser::MARKER_SUFFIX;
362 $count = $contextNode->getAttribute( 'level' );
363 $s = substr( $s, 0, $count ) . $marker . substr( $s, $count );
364 $this->parser->mStripState->addGeneral( $marker, '' );
365 }
366 $out .= $s;
367 } else {
368 # Generic recursive expansion
369 $newIterator = $contextNode->childNodes;
370 }
371 } else {
372 throw new MWException( __METHOD__ . ': Invalid parameter type' );
373 }
374
375 if ( $newIterator !== false ) {
376 if ( $newIterator instanceof PPNode_DOM ) {
377 $newIterator = $newIterator->node;
378 }
379 $outStack[] = '';
380 $iteratorStack[] = $newIterator;
381 $indexStack[] = 0;
382 } elseif ( $iteratorStack[$level] === false ) {
383 // Return accumulated value to parent
384 // With tail recursion
385 while ( $iteratorStack[$level] === false && $level > 0 ) {
386 $outStack[$level - 1] .= $out;
387 array_pop( $outStack );
388 array_pop( $iteratorStack );
389 array_pop( $indexStack );
390 $level--;
391 }
392 }
393 }
394 --$expansionDepth;
395 return $outStack[0];
396 }
397
398 /**
399 * @param string $sep
400 * @param int $flags
401 * @param string|PPNode_DOM|DOMNode ...$args
402 * @return string
403 */
404 public function implodeWithFlags( $sep, $flags, ...$args ) {
405 $first = true;
406 $s = '';
407 foreach ( $args as $root ) {
408 if ( $root instanceof PPNode_DOM ) {
409 $root = $root->node;
410 }
411 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
412 $root = [ $root ];
413 }
414 foreach ( $root as $node ) {
415 if ( $first ) {
416 $first = false;
417 } else {
418 $s .= $sep;
419 }
420 $s .= $this->expand( $node, $flags );
421 }
422 }
423 return $s;
424 }
425
426 /**
427 * Implode with no flags specified
428 * This previously called implodeWithFlags but has now been inlined to reduce stack depth
429 *
430 * @param string $sep
431 * @param string|PPNode_DOM|DOMNode ...$args
432 * @return string
433 */
434 public function implode( $sep, ...$args ) {
435 $first = true;
436 $s = '';
437 foreach ( $args as $root ) {
438 if ( $root instanceof PPNode_DOM ) {
439 $root = $root->node;
440 }
441 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
442 $root = [ $root ];
443 }
444 foreach ( $root as $node ) {
445 if ( $first ) {
446 $first = false;
447 } else {
448 $s .= $sep;
449 }
450 $s .= $this->expand( $node );
451 }
452 }
453 return $s;
454 }
455
456 /**
457 * Makes an object that, when expand()ed, will be the same as one obtained
458 * with implode()
459 *
460 * @param string $sep
461 * @param string|PPNode_DOM|DOMNode ...$args
462 * @return array
463 * @suppress PhanParamSignatureMismatch
464 */
465 public function virtualImplode( $sep, ...$args ) {
466 $out = [];
467 $first = true;
468
469 foreach ( $args as $root ) {
470 if ( $root instanceof PPNode_DOM ) {
471 $root = $root->node;
472 }
473 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
474 $root = [ $root ];
475 }
476 foreach ( $root as $node ) {
477 if ( $first ) {
478 $first = false;
479 } else {
480 $out[] = $sep;
481 }
482 $out[] = $node;
483 }
484 }
485 return $out;
486 }
487
488 /**
489 * Virtual implode with brackets
490 * @param string $start
491 * @param string $sep
492 * @param string $end
493 * @param string|PPNode_DOM|DOMNode ...$args
494 * @return array
495 * @suppress PhanParamSignatureMismatch
496 */
497 public function virtualBracketedImplode( $start, $sep, $end, ...$args ) {
498 $out = [ $start ];
499 $first = true;
500
501 foreach ( $args as $root ) {
502 if ( $root instanceof PPNode_DOM ) {
503 $root = $root->node;
504 }
505 if ( !is_array( $root ) && !( $root instanceof DOMNodeList ) ) {
506 $root = [ $root ];
507 }
508 foreach ( $root as $node ) {
509 if ( $first ) {
510 $first = false;
511 } else {
512 $out[] = $sep;
513 }
514 $out[] = $node;
515 }
516 }
517 $out[] = $end;
518 return $out;
519 }
520
521 public function __toString() {
522 return 'frame{}';
523 }
524
525 public function getPDBK( $level = false ) {
526 if ( $level === false ) {
527 return $this->title->getPrefixedDBkey();
528 } else {
529 return $this->titleCache[$level] ?? false;
530 }
531 }
532
533 /**
534 * @return array
535 */
536 public function getArguments() {
537 return [];
538 }
539
540 /**
541 * @return array
542 */
543 public function getNumberedArguments() {
544 return [];
545 }
546
547 /**
548 * @return array
549 */
550 public function getNamedArguments() {
551 return [];
552 }
553
554 /**
555 * Returns true if there are no arguments in this frame
556 *
557 * @return bool
558 */
559 public function isEmpty() {
560 return true;
561 }
562
563 /**
564 * @param int|string $name
565 * @return bool Always false in this implementation.
566 */
567 public function getArgument( $name ) {
568 return false;
569 }
570
571 /**
572 * Returns true if the infinite loop check is OK, false if a loop is detected
573 *
574 * @param Title $title
575 * @return bool
576 */
577 public function loopCheck( $title ) {
578 return !isset( $this->loopCheckHash[$title->getPrefixedDBkey()] );
579 }
580
581 /**
582 * Return true if the frame is a template frame
583 *
584 * @return bool
585 */
586 public function isTemplate() {
587 return false;
588 }
589
590 /**
591 * Get a title of frame
592 *
593 * @return Title
594 */
595 public function getTitle() {
596 return $this->title;
597 }
598
599 /**
600 * Set the volatile flag
601 *
602 * @param bool $flag
603 */
604 public function setVolatile( $flag = true ) {
605 $this->volatile = $flag;
606 }
607
608 /**
609 * Get the volatile flag
610 *
611 * @return bool
612 */
613 public function isVolatile() {
614 return $this->volatile;
615 }
616
617 /**
618 * Set the TTL
619 *
620 * @param int $ttl
621 */
622 public function setTTL( $ttl ) {
623 if ( $ttl !== null && ( $this->ttl === null || $ttl < $this->ttl ) ) {
624 $this->ttl = $ttl;
625 }
626 }
627
628 /**
629 * Get the TTL
630 *
631 * @return int|null
632 */
633 public function getTTL() {
634 return $this->ttl;
635 }
636 }