Merge "Improve Doxygen template used by mwdocgen.php"
[lhc/web/wiklou.git] / includes / tidy / Balancer.php
1 <?php
2 /**
3 * An implementation of the tree building portion of the HTML5 parsing
4 * spec.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Parser
23 * @since 1.27
24 * @author C. Scott Ananian, 2016
25 */
26 namespace MediaWiki\Tidy;
27
28 use Wikimedia\Assert\Assert;
29 use Wikimedia\Assert\ParameterAssertionException;
30 use \ExplodeIterator;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
33 use \Sanitizer;
34
35 // A note for future librarization[1] -- this file is a good candidate
36 // for splitting into an independent library, except that it is currently
37 // highly optimized for MediaWiki use. It only implements the portions
38 // of the HTML5 tree builder used by tags supported by MediaWiki, and
39 // does not contain a true tokenizer pass, instead relying on
40 // comment stripping, attribute normalization, and escaping done by
41 // the MediaWiki Sanitizer. It also deliberately avoids building
42 // a true DOM in memory, instead serializing elements to an output string
43 // as soon as possible (usually as soon as the tag is closed) to reduce
44 // its memory footprint.
45
46 // We've been gradually lifting some of these restrictions to handle
47 // non-sanitized output generated by extensions, but we shortcut the tokenizer
48 // for speed (primarily by splitting on `<`) and so rely on syntactic
49 // well-formedness.
50
51 // On the other hand, I've been pretty careful to note with comments in the
52 // code the places where this implementation omits features of the spec or
53 // depends on the MediaWiki Sanitizer. Perhaps in the future we'll want to
54 // implement the missing pieces and make this a standalone PHP HTML5 parser.
55 // In order to do so, some sort of MediaWiki-specific API will need
56 // to be added to (a) allow the Balancer to bypass the tokenizer,
57 // and (b) support on-the-fly flattening instead of DOM node creation.
58
59 // [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
60
61 /**
62 * Utility constants and sets for the HTML5 tree building algorithm.
63 * Sets are associative arrays indexed first by namespace and then by
64 * lower-cased tag name.
65 *
66 * @ingroup Parser
67 * @since 1.27
68 */
69 class BalanceSets {
70 const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
71 const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
72 const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
73
74 public static $unsupportedSet = [
75 self::HTML_NAMESPACE => [
76 'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
77 'frame' => true,
78 'plaintext' => true, 'isindex' => true,
79 'xmp' => true, 'iframe' => true, 'noembed' => true,
80 'noscript' => true, 'script' => true,
81 'title' => true
82 ]
83 ];
84
85 public static $emptyElementSet = [
86 self::HTML_NAMESPACE => [
87 'area' => true, 'base' => true, 'basefont' => true,
88 'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
89 'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
90 'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
91 'param' => true, 'source' => true, 'track' => true, 'wbr' => true
92 ]
93 ];
94
95 public static $extraLinefeedSet = [
96 self::HTML_NAMESPACE => [
97 'pre' => true, 'textarea' => true, 'listing' => true,
98 ]
99 ];
100
101 public static $headingSet = [
102 self::HTML_NAMESPACE => [
103 'h1' => true, 'h2' => true, 'h3' => true,
104 'h4' => true, 'h5' => true, 'h6' => true
105 ]
106 ];
107
108 public static $specialSet = [
109 self::HTML_NAMESPACE => [
110 'address' => true, 'applet' => true, 'area' => true,
111 'article' => true, 'aside' => true, 'base' => true,
112 'basefont' => true, 'bgsound' => true, 'blockquote' => true,
113 'body' => true, 'br' => true, 'button' => true, 'caption' => true,
114 'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
115 'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
116 'dt' => true, 'embed' => true, 'fieldset' => true,
117 'figcaption' => true, 'figure' => true, 'footer' => true,
118 'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
119 'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
120 'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
121 'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
122 'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
123 'listing' => true, 'main' => true, 'marquee' => true,
124 'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
125 'noembed' => true, 'noframes' => true, 'noscript' => true,
126 'object' => true, 'ol' => true, 'p' => true, 'param' => true,
127 'plaintext' => true, 'pre' => true, 'script' => true,
128 'section' => true, 'select' => true, 'source' => true,
129 'style' => true, 'summary' => true, 'table' => true,
130 'tbody' => true, 'td' => true, 'template' => true,
131 'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
132 'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
133 'wbr' => true, 'xmp' => true
134 ],
135 self::SVG_NAMESPACE => [
136 'foreignobject' => true, 'desc' => true, 'title' => true
137 ],
138 self::MATHML_NAMESPACE => [
139 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
140 'mtext' => true, 'annotation-xml' => true
141 ]
142 ];
143
144 public static $addressDivPSet = [
145 self::HTML_NAMESPACE => [
146 'address' => true, 'div' => true, 'p' => true
147 ]
148 ];
149
150 public static $tableSectionRowSet = [
151 self::HTML_NAMESPACE => [
152 'table' => true, 'thead' => true, 'tbody' => true,
153 'tfoot' => true, 'tr' => true
154 ]
155 ];
156
157 public static $impliedEndTagsSet = [
158 self::HTML_NAMESPACE => [
159 'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
160 'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
161 'rt' => true, 'rtc' => true
162 ]
163 ];
164
165 public static $thoroughImpliedEndTagsSet = [
166 self::HTML_NAMESPACE => [
167 'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
168 'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
169 'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
170 'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
171 'thead' => true, 'tr' => true
172 ]
173 ];
174
175 public static $tableCellSet = [
176 self::HTML_NAMESPACE => [
177 'td' => true, 'th' => true
178 ]
179 ];
180 public static $tableContextSet = [
181 self::HTML_NAMESPACE => [
182 'table' => true, 'template' => true, 'html' => true
183 ]
184 ];
185
186 public static $tableBodyContextSet = [
187 self::HTML_NAMESPACE => [
188 'tbody' => true, 'tfoot' => true, 'thead' => true,
189 'template' => true, 'html' => true
190 ]
191 ];
192
193 public static $tableRowContextSet = [
194 self::HTML_NAMESPACE => [
195 'tr' => true, 'template' => true, 'html' => true
196 ]
197 ];
198
199 // See https://html.spec.whatwg.org/multipage/forms.html#form-associated-element
200 public static $formAssociatedSet = [
201 self::HTML_NAMESPACE => [
202 'button' => true, 'fieldset' => true, 'input' => true,
203 'keygen' => true, 'object' => true, 'output' => true,
204 'select' => true, 'textarea' => true, 'img' => true
205 ]
206 ];
207
208 public static $inScopeSet = [
209 self::HTML_NAMESPACE => [
210 'applet' => true, 'caption' => true, 'html' => true,
211 'marquee' => true, 'object' => true,
212 'table' => true, 'td' => true, 'template' => true,
213 'th' => true
214 ],
215 self::SVG_NAMESPACE => [
216 'foreignobject' => true, 'desc' => true, 'title' => true
217 ],
218 self::MATHML_NAMESPACE => [
219 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
220 'mtext' => true, 'annotation-xml' => true
221 ]
222 ];
223
224 private static $inListItemScopeSet = null;
225 public static function inListItemScopeSet() {
226 if ( self::$inListItemScopeSet === null ) {
227 self::$inListItemScopeSet = self::$inScopeSet;
228 self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
229 self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
230 }
231 return self::$inListItemScopeSet;
232 }
233
234 private static $inButtonScopeSet = null;
235 public static function inButtonScopeSet() {
236 if ( self::$inButtonScopeSet === null ) {
237 self::$inButtonScopeSet = self::$inScopeSet;
238 self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
239 }
240 return self::$inButtonScopeSet;
241 }
242
243 public static $inTableScopeSet = [
244 self::HTML_NAMESPACE => [
245 'html' => true, 'table' => true, 'template' => true
246 ]
247 ];
248
249 public static $inInvertedSelectScopeSet = [
250 self::HTML_NAMESPACE => [
251 'option' => true, 'optgroup' => true
252 ]
253 ];
254
255 public static $mathmlTextIntegrationPointSet = [
256 self::MATHML_NAMESPACE => [
257 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
258 'mtext' => true
259 ]
260 ];
261
262 public static $htmlIntegrationPointSet = [
263 self::SVG_NAMESPACE => [
264 'foreignobject' => true,
265 'desc' => true,
266 'title' => true
267 ]
268 ];
269
270 // For tidy compatibility.
271 public static $tidyPWrapSet = [
272 self::HTML_NAMESPACE => [
273 'body' => true, 'blockquote' => true,
274 // We parse with <body> as the fragment context, but the top-level
275 // element on the stack is actually <html>. We could use the
276 // "adjusted current node" everywhere to work around this, but it's
277 // easier just to add <html> to the p-wrap set.
278 'html' => true,
279 ],
280 ];
281 public static $tidyInlineSet = [
282 self::HTML_NAMESPACE => [
283 'a' => true, 'abbr' => true, 'acronym' => true, 'applet' => true,
284 'b' => true, 'basefont' => true, 'bdo' => true, 'big' => true,
285 'br' => true, 'button' => true, 'cite' => true, 'code' => true,
286 'dfn' => true, 'em' => true, 'font' => true, 'i' => true,
287 'iframe' => true, 'img' => true, 'input' => true, 'kbd' => true,
288 'label' => true, 'legend' => true, 'map' => true, 'object' => true,
289 'param' => true, 'q' => true, 'rb' => true, 'rbc' => true,
290 'rp' => true, 'rt' => true, 'rtc' => true, 'ruby' => true,
291 's' => true, 'samp' => true, 'select' => true, 'small' => true,
292 'span' => true, 'strike' => true, 'strong' => true, 'sub' => true,
293 'sup' => true, 'textarea' => true, 'tt' => true, 'u' => true,
294 'var' => true,
295 ],
296 ];
297 }
298
299 /**
300 * A BalanceElement is a simplified version of a DOM Node. The main
301 * difference is that we only keep BalanceElements around for nodes
302 * currently on the BalanceStack of open elements. As soon as an
303 * element is closed, with some minor exceptions relating to the
304 * tree builder "adoption agency algorithm", the element and all its
305 * children are serialized to a string using the flatten() method.
306 * This keeps our memory usage low.
307 *
308 * @ingroup Parser
309 * @since 1.27
310 */
311 class BalanceElement {
312 /**
313 * The namespace of the element.
314 * @var string $namespaceURI
315 */
316 public $namespaceURI;
317 /**
318 * The lower-cased name of the element.
319 * @var string $localName
320 */
321 public $localName;
322 /**
323 * Attributes for the element, in array form
324 * @var array $attribs
325 */
326 public $attribs;
327
328 /**
329 * Parent of this element, or the string "flat" if this element has
330 * already been flattened into its parent.
331 * @var string|null $parent
332 */
333 public $parent;
334
335 /**
336 * An array of children of this element. Typically only the last
337 * child will be an actual BalanceElement object; the rest will
338 * be strings, representing either text nodes or flattened
339 * BalanceElement objects.
340 * @var array $children
341 */
342 public $children;
343
344 /**
345 * A unique string identifier for Noah's Ark purposes, lazy initialized
346 */
347 private $noahKey;
348
349 /**
350 * The next active formatting element in the list, or null if this is the
351 * end of the AFE list or if the element is not in the AFE list.
352 */
353 public $nextAFE;
354
355 /**
356 * The previous active formatting element in the list, or null if this is
357 * the start of the list or if the element is not in the AFE list.
358 */
359 public $prevAFE;
360
361 /**
362 * The next element in the Noah's Ark species bucket.
363 */
364 public $nextNoah;
365
366 /**
367 * Make a new BalanceElement corresponding to the HTML DOM Element
368 * with the given localname, namespace, and attributes.
369 *
370 * @param string $namespaceURI The namespace of the element.
371 * @param string $localName The lowercased name of the tag.
372 * @param array $attribs Attributes of the element
373 */
374 public function __construct( $namespaceURI, $localName, array $attribs ) {
375 $this->localName = $localName;
376 $this->namespaceURI = $namespaceURI;
377 $this->attribs = $attribs;
378 $this->contents = '';
379 $this->parent = null;
380 $this->children = [];
381 }
382
383 /**
384 * Remove the given child from this element.
385 * @param BalanceElement $elt
386 */
387 private function removeChild( BalanceElement $elt ) {
388 Assert::precondition(
389 $this->parent !== 'flat', "Can't removeChild after flattening $this"
390 );
391 Assert::parameter(
392 $elt->parent === $this, 'elt', 'must have $this as a parent'
393 );
394 $idx = array_search( $elt, $this->children, true );
395 Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
396 $elt->parent = null;
397 array_splice( $this->children, $idx, 1 );
398 }
399
400 /**
401 * Find $a in the list of children and insert $b before it.
402 * @param BalanceElement $a
403 * @param BalanceElement|string $b
404 */
405 public function insertBefore( BalanceElement $a, $b ) {
406 Assert::precondition(
407 $this->parent !== 'flat', "Can't insertBefore after flattening."
408 );
409 $idx = array_search( $a, $this->children, true );
410 Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
411 if ( is_string( $b ) ) {
412 array_splice( $this->children, $idx, 0, [ $b ] );
413 } else {
414 Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
415 if ( $b->parent !== null ) {
416 $b->parent->removeChild( $b );
417 }
418 array_splice( $this->children, $idx, 0, [ $b ] );
419 $b->parent = $this;
420 }
421 }
422
423 /**
424 * Append $elt to the end of the list of children.
425 * @param BalanceElement|string $elt
426 */
427 public function appendChild( $elt ) {
428 Assert::precondition(
429 $this->parent !== 'flat', "Can't appendChild after flattening."
430 );
431 if ( is_string( $elt ) ) {
432 array_push( $this->children, $elt );
433 return;
434 }
435 // Remove $elt from parent, if it had one.
436 if ( $elt->parent !== null ) {
437 $elt->parent->removeChild( $elt );
438 }
439 array_push( $this->children, $elt );
440 $elt->parent = $this;
441 }
442
443 /**
444 * Transfer all of the children of $elt to $this.
445 * @param BalanceElement $elt
446 */
447 public function adoptChildren( BalanceElement $elt ) {
448 Assert::precondition(
449 $elt->parent !== 'flat', "Can't adoptChildren after flattening."
450 );
451 foreach ( $elt->children as $child ) {
452 if ( !is_string( $child ) ) {
453 // This is an optimization which avoids an O(n^2) set of
454 // array_splice operations.
455 $child->parent = null;
456 }
457 $this->appendChild( $child );
458 }
459 $elt->children = [];
460 }
461
462 /**
463 * Flatten this node and all of its children into a string, as specified
464 * by the HTML serialization specification, and replace this node
465 * in its parent by that string.
466 *
467 * @param array $config Balancer configuration; see Balancer::__construct().
468 *
469 * @see __toString()
470 */
471 public function flatten( array $config ) {
472 Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
473 Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
474 $idx = array_search( $this, $this->parent->children, true );
475 Assert::parameter(
476 $idx !== false, '$this', 'must be a child of its parent'
477 );
478 $tidyCompat = $config['tidyCompat'];
479 if ( $tidyCompat ) {
480 $blank = true;
481 foreach ( $this->children as $elt ) {
482 if ( !is_string( $elt ) ) {
483 $elt = $elt->flatten( $config );
484 }
485 if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
486 $blank = false;
487 }
488 }
489 if ( $this->isHtmlNamed( 'mw:p-wrap' ) ) {
490 $this->localName = 'p';
491 } elseif ( $blank ) {
492 // Add 'mw-empty-elt' class so elements can be hidden via CSS
493 // for compatibility with legacy tidy.
494 if ( !count( $this->attribs ) &&
495 ( $this->localName === 'tr' || $this->localName === 'li' )
496 ) {
497 $this->attribs = [ 'class' => "mw-empty-elt" ];
498 }
499 $blank = false;
500 }
501 $flat = $blank ? '' : "{$this}";
502 } else {
503 $flat = "{$this}";
504 }
505 $this->parent->children[$idx] = $flat;
506 $this->parent = 'flat'; // for assertion checking
507 return $flat;
508 }
509
510 /**
511 * Serialize this node and all of its children to a string, as specified
512 * by the HTML serialization specification.
513 *
514 * @return string The serialization of the BalanceElement
515 * @see https://html.spec.whatwg.org/multipage/syntax.html#serialising-html-fragments
516 */
517 public function __toString() {
518 $encAttribs = '';
519 foreach ( $this->attribs as $name => $value ) {
520 $encValue = Sanitizer::encodeAttribute( $value );
521 $encAttribs .= " $name=\"$encValue\"";
522 }
523 if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
524 $out = "<{$this->localName}{$encAttribs}>";
525 $len = strlen( $out );
526 // flatten children
527 foreach ( $this->children as $elt ) {
528 $out .= "{$elt}";
529 }
530 $out .= "</{$this->localName}>";
531 if (
532 $this->isA( BalanceSets::$extraLinefeedSet ) &&
533 $out[$len] === "\n"
534 ) {
535 // Double the linefeed after pre/listing/textarea
536 // according to the HTML5 fragment serialization algorithm.
537 $out = substr( $out, 0, $len + 1 ) .
538 substr( $out, $len );
539 }
540 } else {
541 $out = "<{$this->localName}{$encAttribs} />";
542 Assert::invariant(
543 count( $this->children ) === 0,
544 "Empty elements shouldn't have children."
545 );
546 }
547 return $out;
548 }
549
550 // Utility functions on BalanceElements.
551
552 /**
553 * Determine if $this represents a specific HTML tag, is a member of
554 * a tag set, or is equal to another BalanceElement.
555 *
556 * @param BalanceElement|array|string $set The target BalanceElement,
557 * set (from the BalanceSets class), or string (HTML tag name).
558 * @return bool
559 */
560 public function isA( $set ) {
561 if ( $set instanceof BalanceElement ) {
562 return $this === $set;
563 } elseif ( is_array( $set ) ) {
564 return isset( $set[$this->namespaceURI] ) &&
565 isset( $set[$this->namespaceURI][$this->localName] );
566 } else {
567 // assume this is an HTML element name.
568 return $this->isHtml() && $this->localName === $set;
569 }
570 }
571
572 /**
573 * Determine if this element is an HTML element with the specified name
574 * @param string $tagName
575 * @return bool
576 */
577 public function isHtmlNamed( $tagName ) {
578 return $this->namespaceURI === BalanceSets::HTML_NAMESPACE
579 && $this->localName === $tagName;
580 }
581
582 /**
583 * Determine if $this represents an element in the HTML namespace.
584 *
585 * @return bool
586 */
587 public function isHtml() {
588 return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
589 }
590
591 /**
592 * Determine if $this represents a MathML text integration point,
593 * as defined in the HTML5 specification.
594 *
595 * @return bool
596 * @see https://html.spec.whatwg.org/multipage/syntax.html#mathml-text-integration-point
597 */
598 public function isMathmlTextIntegrationPoint() {
599 return $this->isA( BalanceSets::$mathmlTextIntegrationPointSet );
600 }
601
602 /**
603 * Determine if $this represents an HTML integration point,
604 * as defined in the HTML5 specification.
605 *
606 * @return bool
607 * @see https://html.spec.whatwg.org/multipage/syntax.html#html-integration-point
608 */
609 public function isHtmlIntegrationPoint() {
610 if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
611 return true;
612 }
613 if (
614 $this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
615 $this->localName === 'annotation-xml' &&
616 isset( $this->attribs['encoding'] ) &&
617 ( strcasecmp( $this->attribs['encoding'], 'text/html' ) == 0 ||
618 strcasecmp( $this->attribs['encoding'], 'application/xhtml+xml' ) == 0 )
619 ) {
620 return true;
621 }
622 return false;
623 }
624
625 /**
626 * Get a string key for the Noah's Ark algorithm
627 */
628 public function getNoahKey() {
629 if ( $this->noahKey === null ) {
630 $attribs = $this->attribs;
631 ksort( $attribs );
632 $this->noahKey = serialize( [ $this->namespaceURI, $this->localName, $attribs ] );
633 }
634 return $this->noahKey;
635 }
636 }
637
638 /**
639 * The "stack of open elements" as defined in the HTML5 tree builder
640 * spec. This contains methods to ensure that content (start tags, text)
641 * are inserted at the correct place in the output string, and to
642 * flatten BalanceElements are they are closed to avoid holding onto
643 * a complete DOM tree for the document in memory.
644 *
645 * The stack defines a PHP iterator to traverse it in "reverse order",
646 * that is, the most-recently-added element is visited first in a
647 * foreach loop.
648 *
649 * @ingroup Parser
650 * @since 1.27
651 * @see https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
652 */
653 class BalanceStack implements IteratorAggregate {
654 /**
655 * Backing storage for the stack.
656 * @var array $elements
657 */
658 private $elements = [];
659 /**
660 * Foster parent mode determines how nodes are inserted into the
661 * stack.
662 * @var bool $fosterParentMode
663 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
664 */
665 public $fosterParentMode = false;
666 /**
667 * Configuration options governing flattening.
668 * @var array $config
669 * @see Balancer::__construct()
670 */
671 private $config;
672 /**
673 * Reference to the current element
674 */
675 public $currentNode;
676
677 /**
678 * Create a new BalanceStack with a single BalanceElement on it,
679 * representing the root &lt;html&gt; node.
680 * @param array $config Balancer configuration; see Balancer::_construct().
681 */
682 public function __construct( array $config ) {
683 // always a root <html> element on the stack
684 array_push(
685 $this->elements,
686 new BalanceElement( BalanceSets::HTML_NAMESPACE, 'html', [] )
687 );
688 $this->currentNode = $this->elements[0];
689 $this->config = $config;
690 }
691
692 /**
693 * Return a string representing the output of the tree builder:
694 * all the children of the root &lt;html&gt; node.
695 * @return string
696 */
697 public function getOutput() {
698 // Don't include the outer '<html>....</html>'
699 $out = '';
700 foreach ( $this->elements[0]->children as $elt ) {
701 $out .= is_string( $elt ) ? $elt :
702 $elt->flatten( $this->config );
703 }
704 return $out;
705 }
706
707 /**
708 * Insert a comment at the appropriate place for inserting a node.
709 * @param string $value Content of the comment.
710 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-comment
711 */
712 public function insertComment( $value ) {
713 // Just another type of text node, except for tidy p-wrapping.
714 return $this->insertText( '<!--' . $value . '-->', true );
715 }
716
717 /**
718 * Insert text at the appropriate place for inserting a node.
719 * @param string $value
720 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
721 */
722 public function insertText( $value, $isComment = false ) {
723 if (
724 $this->fosterParentMode &&
725 $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
726 ) {
727 $this->fosterParent( $value );
728 } elseif (
729 $this->config['tidyCompat'] && !$isComment &&
730 $this->currentNode->isA( BalanceSets::$tidyPWrapSet )
731 ) {
732 $this->insertHTMLElement( 'mw:p-wrap', [] );
733 return $this->insertText( $value );
734 } else {
735 $this->currentNode->appendChild( $value );
736 }
737 }
738
739 /**
740 * Insert a BalanceElement at the appropriate place, pushing it
741 * on to the open elements stack.
742 * @param string $namespaceURI The element namespace
743 * @param string $tag The tag name
744 * @param string $attribs Normalized attributes, as a string.
745 * @return BalanceElement
746 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-foreign-element
747 */
748 public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
749 return $this->insertElement(
750 new BalanceElement( $namespaceURI, $tag, $attribs )
751 );
752 }
753
754 /**
755 * Insert an HTML element at the appropriate place, pushing it on to
756 * the open elements stack.
757 * @param string $tag The tag name
758 * @param string $attribs Normalized attributes, as a string.
759 * @return BalanceElement
760 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-an-html-element
761 */
762 public function insertHTMLElement( $tag, $attribs ) {
763 return $this->insertForeignElement(
764 BalanceSets::HTML_NAMESPACE, $tag, $attribs
765 );
766 }
767
768 /**
769 * Insert an element at the appropriate place and push it on to the
770 * open elements stack.
771 * @param BalanceElement $elt
772 * @return BalanceElement
773 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
774 */
775 public function insertElement( BalanceElement $elt ) {
776 if (
777 $this->currentNode->isHtmlNamed( 'mw:p-wrap' ) &&
778 !$elt->isA( BalanceSets::$tidyInlineSet )
779 ) {
780 // Tidy compatibility.
781 $this->pop();
782 }
783 if (
784 $this->fosterParentMode &&
785 $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
786 ) {
787 $elt = $this->fosterParent( $elt );
788 } else {
789 $this->currentNode->appendChild( $elt );
790 }
791 Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
792 Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
793 array_push( $this->elements, $elt );
794 $this->currentNode = $elt;
795 return $elt;
796 }
797
798 /**
799 * Determine if the stack has $tag in scope.
800 * @param BalanceElement|array|string $tag
801 * @return bool
802 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-scope
803 */
804 public function inScope( $tag ) {
805 return $this->inSpecificScope( $tag, BalanceSets::$inScopeSet );
806 }
807
808 /**
809 * Determine if the stack has $tag in button scope.
810 * @param BalanceElement|array|string $tag
811 * @return bool
812 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-button-scope
813 */
814 public function inButtonScope( $tag ) {
815 return $this->inSpecificScope( $tag, BalanceSets::inButtonScopeSet() );
816 }
817
818 /**
819 * Determine if the stack has $tag in list item scope.
820 * @param BalanceElement|array|string $tag
821 * @return bool
822 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-list-item-scope
823 */
824 public function inListItemScope( $tag ) {
825 return $this->inSpecificScope( $tag, BalanceSets::inListItemScopeSet() );
826 }
827
828 /**
829 * Determine if the stack has $tag in table scope.
830 * @param BalanceElement|array|string $tag
831 * @return bool
832 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-table-scope
833 */
834 public function inTableScope( $tag ) {
835 return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet );
836 }
837
838 /**
839 * Determine if the stack has $tag in select scope.
840 * @param BalanceElement|array|string $tag
841 * @return bool
842 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-select-scope
843 */
844 public function inSelectScope( $tag ) {
845 // Can't use inSpecificScope to implement this, since it involves
846 // *inverting* a set of tags. Implement manually.
847 foreach ( $this as $elt ) {
848 if ( $elt->isA( $tag ) ) {
849 return true;
850 }
851 if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) {
852 return false;
853 }
854 }
855 return false;
856 }
857
858 /**
859 * Determine if the stack has $tag in a specific scope, $set.
860 * @param BalanceElement|array|string $tag
861 * @param BalanceElement|array|string $set
862 * @return bool
863 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-the-specific-scope
864 */
865 public function inSpecificScope( $tag, $set ) {
866 foreach ( $this as $elt ) {
867 if ( $elt->isA( $tag ) ) {
868 return true;
869 }
870 if ( $elt->isA( $set ) ) {
871 return false;
872 }
873 }
874 return false;
875 }
876
877 /**
878 * Generate implied end tags.
879 * @param string $butnot
880 * @param bool $thorough True if we should generate end tags thoroughly.
881 * @see https://html.spec.whatwg.org/multipage/syntax.html#generate-implied-end-tags
882 */
883 public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
884 $endTagSet = $thorough ?
885 BalanceSets::$thoroughImpliedEndTagsSet :
886 BalanceSets::$impliedEndTagsSet;
887 while ( $this->currentNode ) {
888 if ( $butnot !== null && $this->currentNode->isHtmlNamed( $butnot ) ) {
889 break;
890 }
891 if ( !$this->currentNode->isA( $endTagSet ) ) {
892 break;
893 }
894 $this->pop();
895 }
896 }
897
898 /**
899 * Return the adjusted current node.
900 */
901 public function adjustedCurrentNode( $fragmentContext ) {
902 return ( $fragmentContext && count( $this->elements ) === 1 ) ?
903 $fragmentContext : $this->currentNode;
904 }
905
906 /**
907 * Return an iterator over this stack which visits the current node
908 * first, and the root node last.
909 * @return Iterator
910 */
911 public function getIterator() {
912 return new ReverseArrayIterator( $this->elements );
913 }
914
915 /**
916 * Return the BalanceElement at the given position $idx, where
917 * position 0 represents the root element.
918 * @param int $idx
919 * @return BalanceElement
920 */
921 public function node( $idx ) {
922 return $this->elements[ $idx ];
923 }
924
925 /**
926 * Replace the element at position $idx in the BalanceStack with $elt.
927 * @param int $idx
928 * @param BalanceElement $elt
929 */
930 public function replaceAt( $idx, BalanceElement $elt ) {
931 Assert::precondition(
932 $this->elements[$idx]->parent !== 'flat',
933 'Replaced element should not have already been flattened.'
934 );
935 Assert::precondition(
936 $elt->parent !== 'flat',
937 'New element should not have already been flattened.'
938 );
939 $this->elements[$idx] = $elt;
940 if ( $idx === count( $this->elements ) - 1 ) {
941 $this->currentNode = $elt;
942 }
943 }
944
945 /**
946 * Return the position of the given BalanceElement, set, or
947 * HTML tag name string in the BalanceStack.
948 * @param BalanceElement|array|string $tag
949 * @return int
950 */
951 public function indexOf( $tag ) {
952 for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
953 if ( $this->elements[$i]->isA( $tag ) ) {
954 return $i;
955 }
956 }
957 return -1;
958 }
959
960 /**
961 * Return the number of elements currently in the BalanceStack.
962 * @return int
963 */
964 public function length() {
965 return count( $this->elements );
966 }
967
968 /**
969 * Remove the current node from the BalanceStack, flattening it
970 * in the process.
971 */
972 public function pop() {
973 $elt = array_pop( $this->elements );
974 if ( count( $this->elements ) ) {
975 $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
976 } else {
977 $this->currentNode = null;
978 }
979 if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
980 $elt->flatten( $this->config );
981 }
982 }
983
984 /**
985 * Remove all nodes up to and including position $idx from the
986 * BalanceStack, flattening them in the process.
987 * @param int $idx
988 */
989 public function popTo( $idx ) {
990 for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
991 $this->pop();
992 }
993 }
994
995 /**
996 * Pop elements off the stack up to and including the first
997 * element with the specified HTML tagname (or matching the given
998 * set).
999 * @param BalanceElement|array|string $tag
1000 */
1001 public function popTag( $tag ) {
1002 while ( $this->currentNode ) {
1003 if ( $this->currentNode->isA( $tag ) ) {
1004 $this->pop();
1005 break;
1006 }
1007 $this->pop();
1008 }
1009 }
1010
1011 /**
1012 * Pop elements off the stack *not including* the first element
1013 * in the specified set.
1014 * @param BalanceElement|array|string $set
1015 */
1016 public function clearToContext( $set ) {
1017 // Note that we don't loop to 0. Never pop the <html> elt off.
1018 for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1019 if ( $this->currentNode->isA( $set ) ) {
1020 break;
1021 }
1022 $this->pop();
1023 }
1024 }
1025
1026 /**
1027 * Remove the given $elt from the BalanceStack, optionally
1028 * flattening it in the process.
1029 * @param BalanceElement $elt The element to remove.
1030 * @param bool $flatten Whether to flatten the removed element.
1031 */
1032 public function removeElement( BalanceElement $elt, $flatten = true ) {
1033 Assert::parameter(
1034 $elt->parent !== 'flat',
1035 '$elt',
1036 '$elt should not already have been flattened.'
1037 );
1038 Assert::parameter(
1039 $elt->parent->parent !== 'flat',
1040 '$elt',
1041 'The parent of $elt should not already have been flattened.'
1042 );
1043 $idx = array_search( $elt, $this->elements, true );
1044 Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
1045 array_splice( $this->elements, $idx, 1 );
1046 if ( $idx === count( $this->elements ) ) {
1047 $this->currentNode = $this->elements[$idx - 1];
1048 }
1049 if ( $flatten ) {
1050 // serialize $elt into its parent
1051 // otherwise, it will eventually serialize when the parent
1052 // is serialized, we just hold onto the memory for its
1053 // tree of objects a little longer.
1054 $elt->flatten( $this->config );
1055 }
1056 Assert::postcondition(
1057 array_search( $elt, $this->elements, true ) === false,
1058 '$elt should no longer be in open elements stack'
1059 );
1060 }
1061
1062 /**
1063 * Find $a in the BalanceStack and insert $b after it.
1064 * @param BalanceElement $a
1065 * @param BalanceElement $b
1066 */
1067 public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1068 $idx = $this->indexOf( $a );
1069 Assert::parameter( $idx !== false, '$a', 'must be in stack' );
1070 if ( $idx === count( $this->elements ) - 1 ) {
1071 array_push( $this->elements, $b );
1072 $this->currentNode = $b;
1073 } else {
1074 array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1075 }
1076 }
1077
1078 // Fostering and adoption.
1079
1080 /**
1081 * Foster parent the given $elt in the stack of open elements.
1082 * @param BalanceElement|string $elt
1083 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
1084 */
1085 private function fosterParent( $elt ) {
1086 $lastTable = $this->indexOf( 'table' );
1087 $lastTemplate = $this->indexOf( 'template' );
1088 $parent = null;
1089 $before = null;
1090
1091 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1092 $parent = $this->elements[$lastTemplate];
1093 } elseif ( $lastTable >= 0 ) {
1094 $parent = $this->elements[$lastTable]->parent;
1095 // Assume all tables have parents, since we're not running scripts!
1096 Assert::invariant(
1097 $parent !== null, "All tables should have parents"
1098 );
1099 $before = $this->elements[$lastTable];
1100 } else {
1101 $parent = $this->elements[0]; // the `html` element.
1102 }
1103
1104 if ( $this->config['tidyCompat'] ) {
1105 if ( is_string( $elt ) ) {
1106 // We're fostering text: do we need a p-wrapper?
1107 if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
1108 $this->insertHTMLElement( 'mw:p-wrap', [] );
1109 $this->insertText( $elt );
1110 return $elt;
1111 }
1112 } else {
1113 // We're fostering an element; do we need to merge p-wrappers?
1114 if ( $elt->isHtmlNamed( 'mw:p-wrap' ) ) {
1115 $idx = $before ?
1116 array_search( $before, $parent->children, true ) :
1117 count( $parent->children );
1118 $after = $idx > 0 ? $parent->children[$idx - 1] : '';
1119 if (
1120 $after instanceof BalanceElement &&
1121 $after->isHtmlNamed( 'mw:p-wrap' )
1122 ) {
1123 return $after; // Re-use existing p-wrapper.
1124 }
1125 }
1126 }
1127 }
1128
1129 if ( $before ) {
1130 $parent->insertBefore( $before, $elt );
1131 } else {
1132 $parent->appendChild( $elt );
1133 }
1134 return $elt;
1135 }
1136
1137 /**
1138 * Run the "adoption agency algoritm" (AAA) for the given subject
1139 * tag name.
1140 * @param string $tag The subject tag name.
1141 * @param BalanceActiveFormattingElements $afe The current
1142 * active formatting elements list.
1143 * @return true if the adoption agency algorithm "did something", false
1144 * if more processing is required by the caller.
1145 * @see https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1146 */
1147 public function adoptionAgency( $tag, $afe ) {
1148 // If the current node is an HTML element whose tag name is subject,
1149 // and the current node is not in the list of active formatting
1150 // elements, then pop the current node off the stack of open
1151 // elements and abort these steps.
1152 if (
1153 $this->currentNode->isHtmlNamed( $tag ) &&
1154 !$afe->isInList( $this->currentNode )
1155 ) {
1156 $this->pop();
1157 return true; // no more handling required
1158 }
1159
1160 // Outer loop: If outer loop counter is greater than or
1161 // equal to eight, then abort these steps.
1162 for ( $outer = 0; $outer < 8; $outer++ ) {
1163 // Let the formatting element be the last element in the list
1164 // of active formatting elements that: is between the end of
1165 // the list and the last scope marker in the list, if any, or
1166 // the start of the list otherwise, and has the same tag name
1167 // as the token.
1168 $fmtElt = $afe->findElementByTag( $tag );
1169
1170 // If there is no such node, then abort these steps and instead
1171 // act as described in the "any other end tag" entry below.
1172 if ( !$fmtElt ) {
1173 return false; // false means handle by the default case
1174 }
1175
1176 // Otherwise, if there is such a node, but that node is not in
1177 // the stack of open elements, then this is a parse error;
1178 // remove the element from the list, and abort these steps.
1179 $index = $this->indexOf( $fmtElt );
1180 if ( $index < 0 ) {
1181 $afe->remove( $fmtElt );
1182 return true; // true means no more handling required
1183 }
1184
1185 // Otherwise, if there is such a node, and that node is also in
1186 // the stack of open elements, but the element is not in scope,
1187 // then this is a parse error; ignore the token, and abort
1188 // these steps.
1189 if ( !$this->inScope( $fmtElt ) ) {
1190 return true;
1191 }
1192
1193 // Let the furthest block be the topmost node in the stack of
1194 // open elements that is lower in the stack than the formatting
1195 // element, and is an element in the special category. There
1196 // might not be one.
1197 $furthestBlock = null;
1198 $furthestBlockIndex = -1;
1199 $stackLength = $this->length();
1200 for ( $i = $index+1; $i < $stackLength; $i++ ) {
1201 if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
1202 $furthestBlock = $this->node( $i );
1203 $furthestBlockIndex = $i;
1204 break;
1205 }
1206 }
1207
1208 // If there is no furthest block, then the UA must skip the
1209 // subsequent steps and instead just pop all the nodes from the
1210 // bottom of the stack of open elements, from the current node
1211 // up to and including the formatting element, and remove the
1212 // formatting element from the list of active formatting
1213 // elements.
1214 if ( !$furthestBlock ) {
1215 $this->popTag( $fmtElt );
1216 $afe->remove( $fmtElt );
1217 return true;
1218 }
1219
1220 // Let the common ancestor be the element immediately above
1221 // the formatting element in the stack of open elements.
1222 $ancestor = $this->node( $index-1 );
1223
1224 // Let a bookmark note the position of the formatting
1225 // element in the list of active formatting elements
1226 // relative to the elements on either side of it in the
1227 // list.
1228 $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
1229 $afe->insertAfter( $fmtElt, $BOOKMARK );
1230
1231 // Let node and last node be the furthest block.
1232 $node = $furthestBlock;
1233 $lastNode = $furthestBlock;
1234 $nodeIndex = $furthestBlockIndex;
1235 $isAFE = false;
1236
1237 // Inner loop
1238 for ( $inner = 1; true; $inner++ ) {
1239 // Let node be the element immediately above node in
1240 // the stack of open elements, or if node is no longer
1241 // in the stack of open elements (e.g. because it got
1242 // removed by this algorithm), the element that was
1243 // immediately above node in the stack of open elements
1244 // before node was removed.
1245 $node = $this->node( --$nodeIndex );
1246
1247 // If node is the formatting element, then go
1248 // to the next step in the overall algorithm.
1249 if ( $node === $fmtElt ) break;
1250
1251 // If the inner loop counter is greater than three and node
1252 // is in the list of active formatting elements, then remove
1253 // node from the list of active formatting elements.
1254 $isAFE = $afe->isInList( $node );
1255 if ( $inner > 3 && $isAFE ) {
1256 $afe->remove( $node );
1257 $isAFE = false;
1258 }
1259
1260 // If node is not in the list of active formatting
1261 // elements, then remove node from the stack of open
1262 // elements and then go back to the step labeled inner
1263 // loop.
1264 if ( !$isAFE ) {
1265 // Don't flatten here, since we're about to relocate
1266 // parts of this $node.
1267 $this->removeElement( $node, false );
1268 continue;
1269 }
1270
1271 // Create an element for the token for which the
1272 // element node was created with common ancestor as
1273 // the intended parent, replace the entry for node
1274 // in the list of active formatting elements with an
1275 // entry for the new element, replace the entry for
1276 // node in the stack of open elements with an entry for
1277 // the new element, and let node be the new element.
1278 $newElt = new BalanceElement(
1279 $node->namespaceURI, $node->localName, $node->attribs );
1280 $afe->replace( $node, $newElt );
1281 $this->replaceAt( $nodeIndex, $newElt );
1282 $node = $newElt;
1283
1284 // If last node is the furthest block, then move the
1285 // aforementioned bookmark to be immediately after the
1286 // new node in the list of active formatting elements.
1287 if ( $lastNode === $furthestBlock ) {
1288 $afe->remove( $BOOKMARK );
1289 $afe->insertAfter( $newElt, $BOOKMARK );
1290 }
1291
1292 // Insert last node into node, first removing it from
1293 // its previous parent node if any.
1294 $node->appendChild( $lastNode );
1295
1296 // Let last node be node.
1297 $lastNode = $node;
1298 }
1299
1300 // If the common ancestor node is a table, tbody, tfoot,
1301 // thead, or tr element, then, foster parent whatever last
1302 // node ended up being in the previous step, first removing
1303 // it from its previous parent node if any.
1304 if (
1305 $this->fosterParentMode &&
1306 $ancestor->isA( BalanceSets::$tableSectionRowSet )
1307 ) {
1308 $this->fosterParent( $lastNode );
1309 } else {
1310 // Otherwise, append whatever last node ended up being in
1311 // the previous step to the common ancestor node, first
1312 // removing it from its previous parent node if any.
1313 $ancestor->appendChild( $lastNode );
1314 }
1315
1316 // Create an element for the token for which the
1317 // formatting element was created, with furthest block
1318 // as the intended parent.
1319 $newElt2 = new BalanceElement(
1320 $fmtElt->namespaceURI, $fmtElt->localName, $fmtElt->attribs );
1321
1322 // Take all of the child nodes of the furthest block and
1323 // append them to the element created in the last step.
1324 $newElt2->adoptChildren( $furthestBlock );
1325
1326 // Append that new element to the furthest block.
1327 $furthestBlock->appendChild( $newElt2 );
1328
1329 // Remove the formatting element from the list of active
1330 // formatting elements, and insert the new element into the
1331 // list of active formatting elements at the position of
1332 // the aforementioned bookmark.
1333 $afe->remove( $fmtElt );
1334 $afe->replace( $BOOKMARK, $newElt2 );
1335
1336 // Remove the formatting element from the stack of open
1337 // elements, and insert the new element into the stack of
1338 // open elements immediately below the position of the
1339 // furthest block in that stack.
1340 $this->removeElement( $fmtElt );
1341 $this->insertAfter( $furthestBlock, $newElt2 );
1342 }
1343
1344 return true;
1345 }
1346
1347 /**
1348 * Return the contents of the open elements stack as a string for
1349 * debugging.
1350 * @return string
1351 */
1352 public function __toString() {
1353 $r = [];
1354 foreach ( $this->elements as $elt ) {
1355 array_push( $r, $elt->localName );
1356 }
1357 return implode( $r, ' ' );
1358 }
1359 }
1360
1361 /**
1362 * A pseudo-element used as a marker in the list of active formatting elements
1363 *
1364 * @ingroup Parser
1365 * @since 1.27
1366 */
1367 class BalanceMarker {
1368 public $nextAFE;
1369 public $prevAFE;
1370 }
1371
1372 /**
1373 * The list of active formatting elements, which is used to handle
1374 * mis-nested formatting element tags in the HTML5 tree builder
1375 * specification.
1376 *
1377 * @ingroup Parser
1378 * @since 1.27
1379 * @see https://html.spec.whatwg.org/multipage/syntax.html#list-of-active-formatting-elements
1380 */
1381 class BalanceActiveFormattingElements {
1382 /** The last (most recent) element in the list */
1383 private $tail;
1384
1385 /** The first (least recent) element in the list */
1386 private $head;
1387
1388 /**
1389 * An array of arrays representing the population of elements in each bucket
1390 * according to the Noah's Ark clause. The outer array is stack-like, with each
1391 * integer-indexed element representing a segment of the list, bounded by
1392 * markers. The first element represents the segment of the list before the
1393 * first marker.
1394 *
1395 * The inner arrays are indexed by "Noah key", which is a string which uniquely
1396 * identifies each bucket according to the rules in the spec. The value in
1397 * the inner array is the first (least recently inserted) element in the bucket,
1398 * and subsequent members of the bucket can be found by iterating through the
1399 * singly-linked list via $node->nextNoah.
1400 *
1401 * This is optimised for the most common case of inserting into a bucket
1402 * with zero members, and deleting a bucket containing one member. In the
1403 * worst case, iteration through the list is still O(1) in the document
1404 * size, since each bucket can have at most 3 members.
1405 */
1406 private $noahTableStack = [ [] ];
1407
1408 public function __destruct() {
1409 for ( $node = $this->head; $node; $node = $next ) {
1410 $next = $node->nextAFE;
1411 $node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1412 }
1413 $this->head = $this->tail = $this->noahTableStack = null;
1414 }
1415
1416 public function insertMarker() {
1417 $elt = new BalanceMarker;
1418 if ( $this->tail ) {
1419 $this->tail->nextAFE = $elt;
1420 $elt->prevAFE = $this->tail;
1421 } else {
1422 $this->head = $elt;
1423 }
1424 $this->tail = $elt;
1425 $this->noahTableStack[] = [];
1426 }
1427
1428 /**
1429 * Follow the steps required when the spec requires us to "push onto the
1430 * list of active formatting elements".
1431 * @param BalanceElement $elt
1432 */
1433 public function push( BalanceElement $elt ) {
1434 // Must not be in the list already
1435 if ( $elt->prevAFE !== null || $this->head === $elt ) {
1436 throw new ParameterAssertionException( '$elt',
1437 'Cannot insert a node into the AFE list twice' );
1438 }
1439
1440 // "Noah's Ark clause" -- if there are already three copies of
1441 // this element before we encounter a marker, then drop the last
1442 // one.
1443 $noahKey = $elt->getNoahKey();
1444 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1445 if ( !isset( $table[$noahKey] ) ) {
1446 $table[$noahKey] = $elt;
1447 } else {
1448 $count = 1;
1449 $head = $tail = $table[$noahKey];
1450 while ( $tail->nextNoah ) {
1451 $tail = $tail->nextNoah;
1452 $count++;
1453 }
1454 if ( $count >= 3 ) {
1455 $this->remove( $head );
1456 }
1457 $tail->nextNoah = $elt;
1458 }
1459 // Add to the main AFE list
1460 if ( $this->tail ) {
1461 $this->tail->nextAFE = $elt;
1462 $elt->prevAFE = $this->tail;
1463 } else {
1464 $this->head = $elt;
1465 }
1466 $this->tail = $elt;
1467 }
1468
1469 /**
1470 * Follow the steps required when the spec asks us to "clear the list of
1471 * active formatting elements up to the last marker".
1472 */
1473 public function clearToMarker() {
1474 // Iterate back through the list starting from the tail
1475 $tail = $this->tail;
1476 while ( $tail && !( $tail instanceof BalanceMarker ) ) {
1477 // Unlink the element
1478 $prev = $tail->prevAFE;
1479 $tail->prevAFE = null;
1480 if ( $prev ) {
1481 $prev->nextAFE = null;
1482 }
1483 $tail->nextNoah = null;
1484 $tail = $prev;
1485 }
1486 // If we finished on a marker, unlink it and pop it off the Noah table stack
1487 if ( $tail ) {
1488 $prev = $tail->prevAFE;
1489 if ( $prev ) {
1490 $prev->nextAFE = null;
1491 }
1492 $tail = $prev;
1493 array_pop( $this->noahTableStack );
1494 } else {
1495 // No marker: wipe the top-level Noah table (which is the only one)
1496 $this->noahTableStack[0] = [];
1497 }
1498 // If we removed all the elements, clear the head pointer
1499 if ( !$tail ) {
1500 $this->head = null;
1501 }
1502 $this->tail = $tail;
1503 }
1504
1505 /**
1506 * Find and return the last element with the specified tag between the
1507 * end of the list and the last marker on the list.
1508 * Used when parsing &lt;a&gt; "in body mode".
1509 */
1510 public function findElementByTag( $tag ) {
1511 $elt = $this->tail;
1512 while ( $elt && !( $elt instanceof BalanceMarker ) ) {
1513 if ( $elt->localName === $tag ) {
1514 return $elt;
1515 }
1516 $elt = $elt->prevAFE;
1517 }
1518 return null;
1519 }
1520
1521 /**
1522 * Determine whether an element is in the list of formatting elements.
1523 * @return boolean
1524 */
1525 public function isInList( BalanceElement $elt ) {
1526 return $this->head === $elt || $elt->prevAFE;
1527 }
1528
1529 /**
1530 * Find the element $elt in the list and remove it.
1531 * Used when parsing &lt;a&gt; in body mode.
1532 */
1533 public function remove( BalanceElement $elt ) {
1534 if ( $this->head !== $elt && !$elt->prevAFE ) {
1535 throw new ParameterAssertionException( '$elt',
1536 "Attempted to remove an element which is not in the AFE list" );
1537 }
1538 // Update head and tail pointers
1539 if ( $this->head === $elt ) {
1540 $this->head = $elt->nextAFE;
1541 }
1542 if ( $this->tail === $elt ) {
1543 $this->tail = $elt->prevAFE;
1544 }
1545 // Update previous element
1546 if ( $elt->prevAFE ) {
1547 $elt->prevAFE->nextAFE = $elt->nextAFE;
1548 }
1549 // Update next element
1550 if ( $elt->nextAFE ) {
1551 $elt->nextAFE->prevAFE = $elt->prevAFE;
1552 }
1553 // Clear pointers so that isInList() etc. will work
1554 $elt->prevAFE = $elt->nextAFE = null;
1555 // Update Noah list
1556 $this->removeFromNoahList( $elt );
1557 }
1558
1559 private function addToNoahList( BalanceElement $elt ) {
1560 $noahKey = $elt->getNoahKey();
1561 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1562 if ( !isset( $table[$noahKey] ) ) {
1563 $table[$noahKey] = $elt;
1564 } else {
1565 $tail = $table[$noahKey];
1566 while ( $tail->nextNoah ) {
1567 $tail = $tail->nextNoah;
1568 }
1569 $tail->nextNoah = $elt;
1570 }
1571 }
1572
1573 private function removeFromNoahList( BalanceElement $elt ) {
1574 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1575 $key = $elt->getNoahKey();
1576 $noahElt = $table[$key];
1577 if ( $noahElt === $elt ) {
1578 if ( $noahElt->nextNoah ) {
1579 $table[$key] = $noahElt->nextNoah;
1580 $noahElt->nextNoah = null;
1581 } else {
1582 unset( $table[$key] );
1583 }
1584 } else {
1585 do {
1586 $prevNoahElt = $noahElt;
1587 $noahElt = $prevNoahElt->nextNoah;
1588 if ( $noahElt === $elt ) {
1589 // Found it, unlink
1590 $prevNoahElt->nextNoah = $elt->nextNoah;
1591 $elt->nextNoah = null;
1592 break;
1593 }
1594 } while ( $noahElt );
1595 }
1596 }
1597
1598 /**
1599 * Find element $a in the list and replace it with element $b
1600 */
1601 public function replace( BalanceElement $a, BalanceElement $b ) {
1602 if ( $this->head !== $a && !$a->prevAFE ) {
1603 throw new ParameterAssertionException( '$a',
1604 "Attempted to replace an element which is not in the AFE list" );
1605 }
1606 // Update head and tail pointers
1607 if ( $this->head === $a ) {
1608 $this->head = $b;
1609 }
1610 if ( $this->tail === $a ) {
1611 $this->tail = $b;
1612 }
1613 // Update previous element
1614 if ( $a->prevAFE ) {
1615 $a->prevAFE->nextAFE = $b;
1616 }
1617 // Update next element
1618 if ( $a->nextAFE ) {
1619 $a->nextAFE->prevAFE = $b;
1620 }
1621 $b->prevAFE = $a->prevAFE;
1622 $b->nextAFE = $a->nextAFE;
1623 $a->nextAFE = $a->prevAFE = null;
1624 // Update Noah list
1625 $this->removeFromNoahList( $a );
1626 $this->addToNoahList( $b );
1627 }
1628
1629 /**
1630 * Find $a in the list and insert $b after it.
1631 */
1632 public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1633 if ( $this->head !== $a && !$a->prevAFE ) {
1634 throw new ParameterAssertionException( '$a',
1635 "Attempted to insert after an element which is not in the AFE list" );
1636 }
1637 if ( $this->tail === $a ) {
1638 $this->tail = $b;
1639 }
1640 if ( $a->nextAFE ) {
1641 $a->nextAFE->prevAFE = $b;
1642 }
1643 $b->nextAFE = $a->nextAFE;
1644 $b->prevAFE = $a;
1645 $a->nextAFE = $b;
1646 $this->addToNoahList( $b );
1647 }
1648
1649 // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
1650 /**
1651 * Reconstruct the active formatting elements.
1652 * @param BalanceStack $stack The open elements stack
1653 * @see https://html.spec.whatwg.org/multipage/syntax.html#reconstruct-the-active-formatting-elements
1654 */
1655 // @codingStandardsIgnoreEnd
1656 public function reconstruct( $stack ) {
1657 $entry = $this->tail;
1658 // If there are no entries in the list of active formatting elements,
1659 // then there is nothing to reconstruct
1660 if ( !$entry ) {
1661 return;
1662 }
1663 // If the last is a marker, do nothing.
1664 if ( $entry instanceof BalanceMarker ) {
1665 return;
1666 }
1667 // Or if it is an open element, do nothing.
1668 if ( $stack->indexOf( $entry ) >= 0 ) {
1669 return;
1670 }
1671
1672 // Loop backward through the list until we find a marker or an
1673 // open element
1674 $foundIt = false;
1675 while ( $entry->prevAFE ) {
1676 $entry = $entry->prevAFE;
1677 if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1678 $foundIt = true;
1679 break;
1680 }
1681 }
1682
1683 // Now loop forward, starting from the element after the current one (or
1684 // the first element if we didn't find a marker or open element),
1685 // recreating formatting elements and pushing them back onto the list
1686 // of open elements.
1687 if ( $foundIt ) {
1688 $entry = $entry->nextAFE;
1689 }
1690 do {
1691 $newElement = $stack->insertHTMLElement(
1692 $entry->localName,
1693 $entry->attribs );
1694 $this->replace( $entry, $newElement );
1695 $entry = $newElement->nextAFE;
1696 } while ( $entry );
1697 }
1698
1699 /**
1700 * Get a string representation of the AFE list, for debugging
1701 */
1702 public function __toString() {
1703 $prev = null;
1704 $s = '';
1705 for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1706 if ( $node instanceof BalanceMarker ) {
1707 $s .= "MARKER\n";
1708 continue;
1709 }
1710 $s .= $node->localName . '#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1711 if ( $node->nextNoah ) {
1712 $s .= " (noah sibling: {$node->nextNoah->localName}#" .
1713 substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1714 ')';
1715 }
1716 if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1717 $s .= " (reverse link is wrong!)";
1718 }
1719 $s .= "\n";
1720 }
1721 if ( $prev !== $this->tail ) {
1722 $s .= "(tail pointer is wrong!)\n";
1723 }
1724 return $s;
1725 }
1726 }
1727
1728 /**
1729 * An implementation of the tree building portion of the HTML5 parsing
1730 * spec.
1731 *
1732 * This is used to balance and tidy output so that the result can
1733 * always be cleanly serialized/deserialized by an HTML5 parser. It
1734 * does *not* guarantee "conforming" output -- the HTML5 spec contains
1735 * a number of constraints which are not enforced by the HTML5 parsing
1736 * process. But the result will be free of gross errors: misnested or
1737 * unclosed tags, for example, and will be unchanged by spec-complient
1738 * parsing followed by serialization.
1739 *
1740 * The tree building stage is structured as a state machine.
1741 * When comparing the implementation to
1742 * https://www.w3.org/TR/html5/syntax.html#tree-construction
1743 * note that each state is implemented as a function with a
1744 * name ending in `Mode` (because the HTML spec refers to them
1745 * as insertion modes). The current insertion mode is held by
1746 * the $parseMode property.
1747 *
1748 * The following simplifications have been made:
1749 * - We handle body content only (ie, we start `in body`.)
1750 * - The document is never in "quirks mode".
1751 * - All occurrences of < and > have been entity escaped, so we
1752 * can parse tags by simply splitting on those two characters.
1753 * (This also simplifies the handling of < inside <textarea>.)
1754 * The character < must not appear inside comments.
1755 * Similarly, all attributes have been "cleaned" and are double-quoted
1756 * and escaped.
1757 * - All null characters are assumed to have been removed.
1758 * - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
1759 * <frame>, <plaintext>, <isindex>, <xmp>, <iframe>,
1760 * <noembed>, <noscript>, <script>, <title>. As a result,
1761 * further simplifications can be made:
1762 * - `frameset-ok` is not tracked.
1763 * - `head element pointer` is not tracked (but presumed non-null)
1764 * - Tokenizer has only a single mode. (<textarea> wants RCDATA and
1765 * <style>/<noframes> want RAWTEXT modes which we only loosely emulate.)
1766 *
1767 * We generally mark places where we omit cases from the spec due to
1768 * disallowed elements with a comment: `// OMITTED: <element-name>`.
1769 *
1770 * The HTML spec keeps a flag during the parsing process to track
1771 * whether or not a "parse error" has been encountered. We don't
1772 * bother to track that flag, we just implement the error-handling
1773 * process as specified.
1774 *
1775 * @ingroup Parser
1776 * @since 1.27
1777 * @see https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
1778 */
1779 class Balancer {
1780 private $parseMode;
1781 private $bitsIterator;
1782 private $allowedHtmlElements;
1783 private $afe;
1784 private $stack;
1785 private $strict;
1786 private $allowComments;
1787 private $config;
1788
1789 private $textIntegrationMode;
1790 private $pendingTableText;
1791 private $originalInsertionMode;
1792 private $fragmentContext;
1793 private $formElementPointer;
1794 private $ignoreLinefeed;
1795 private $inRCDATA;
1796 private $inRAWTEXT;
1797
1798 /**
1799 * Valid HTML5 comments.
1800 * Regex borrowed from Tim Starling's "remex-html" project.
1801 */
1802 const VALID_COMMENT_REGEX = "~ !--
1803 ( # 1. Comment match detector
1804 > | -> | # Invalid short close
1805 ( # 2. Comment contents
1806 (?:
1807 (?! --> )
1808 (?! --!> )
1809 (?! --! \z )
1810 (?! -- \z )
1811 (?! - \z )
1812 .
1813 )*+
1814 )
1815 ( # 3. Comment close
1816 --> | # Normal close
1817 --!> | # Comment end bang
1818 ( # 4. Indicate matches requiring EOF
1819 --! | # EOF in comment end bang state
1820 -- | # EOF in comment end state
1821 - | # EOF in comment end dash state
1822 # EOF in comment state
1823 )
1824 )
1825 )
1826 ([^<]*) \z # 5. Non-tag text after the comment
1827 ~xs";
1828
1829 /**
1830 * Create a new Balancer.
1831 * @param array $config Balancer configuration. Includes:
1832 * 'strict' : boolean, defaults to false.
1833 * When true, enforces syntactic constraints on input:
1834 * all non-tag '<' must be escaped, all attributes must be
1835 * separated by a single space and double-quoted. This is
1836 * consistent with the output of the Sanitizer.
1837 * 'allowedHtmlElements' : array, defaults to null.
1838 * When present, the keys of this associative array give
1839 * the acceptable HTML tag names. When not present, no
1840 * tag sanitization is done.
1841 * 'tidyCompat' : boolean, defaults to false.
1842 * When true, the serialization algorithm is tweaked to
1843 * provide historical compatibility with the old "tidy"
1844 * program: <p>-wrapping is done to the children of
1845 * <body> and <blockquote> elements, and empty elements
1846 * are removed.
1847 * 'allowComments': boolean, defaults to true.
1848 * When true, allows HTML comments in the input.
1849 * The Sanitizer generally strips all comments, so if you
1850 * are running on sanitized output you can set this to
1851 * false to get a bit more performance.
1852 */
1853 public function __construct( array $config = [] ) {
1854 $this->config = $config = $config + [
1855 'strict' => false,
1856 'allowedHtmlElements' => null,
1857 'tidyCompat' => false,
1858 'allowComments' => true,
1859 ];
1860 $this->allowedHtmlElements = $config['allowedHtmlElements'];
1861 $this->strict = $config['strict'];
1862 $this->allowComments = $config['allowComments'];
1863 if ( $this->allowedHtmlElements !== null ) {
1864 // Sanity check!
1865 $bad = array_uintersect_assoc(
1866 $this->allowedHtmlElements,
1867 BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE],
1868 function( $a, $b ) {
1869 // Ignore the values (just intersect the keys) by saying
1870 // all values are equal to each other.
1871 return 0;
1872 }
1873 );
1874 if ( count( $bad ) > 0 ) {
1875 $badstr = implode( array_keys( $bad ), ',' );
1876 throw new ParameterAssertionException(
1877 '$config',
1878 'Balance attempted with sanitization including ' .
1879 "unsupported elements: {$badstr}"
1880 );
1881 }
1882 }
1883 }
1884
1885 /**
1886 * Return a balanced HTML string for the HTML fragment given by $text,
1887 * subject to the caveats listed in the class description. The result
1888 * will typically be idempotent -- that is, rebalancing the output
1889 * would result in no change.
1890 *
1891 * @param string $text The markup to be balanced
1892 * @param callable $processingCallback Callback to do any variable or
1893 * parameter replacements in HTML attributes values
1894 * @param array|bool $processingArgs Arguments for the processing callback
1895 * @return string The balanced markup
1896 */
1897 public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1898 $this->parseMode = 'inBodyMode';
1899 $this->bitsIterator = new ExplodeIterator( '<', $text );
1900 $this->afe = new BalanceActiveFormattingElements();
1901 $this->stack = new BalanceStack( $this->config );
1902 $this->processingCallback = $processingCallback;
1903 $this->processingArgs = $processingArgs;
1904
1905 $this->textIntegrationMode =
1906 $this->ignoreLinefeed =
1907 $this->inRCDATA =
1908 $this->inRAWTEXT = false;
1909
1910 // The stack is constructed with an <html> element already on it.
1911 // Set this up as a fragment parsed with <body> as the context.
1912 $this->fragmentContext =
1913 new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
1914 $this->resetInsertionMode();
1915 $this->formElementPointer = null;
1916 for ( $e = $this->fragmentContext; $e != null; $e = $e->parent ) {
1917 if ( $e->isHtmlNamed( 'form' ) ) {
1918 $this->formElementPointer = $e;
1919 break;
1920 }
1921 }
1922
1923 // First element is text not tag
1924 $x = $this->bitsIterator->current();
1925 $this->bitsIterator->next();
1926 $this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1927 // Now process each tag.
1928 while ( $this->bitsIterator->valid() ) {
1929 $this->advance();
1930 }
1931 $this->insertToken( 'eof', null );
1932 $result = $this->stack->getOutput();
1933 // Free memory before returning.
1934 $this->bitsIterator = null;
1935 $this->afe = null;
1936 $this->stack = null;
1937 $this->fragmentContext = null;
1938 $this->formElementPointer = null;
1939 return $result;
1940 }
1941
1942 /**
1943 * Pass a token to the tree builder. The $token will be one of the
1944 * strings "tag", "endtag", or "text".
1945 */
1946 private function insertToken( $token, $value, $attribs = null, $selfClose = false ) {
1947 // validate tags against $unsupportedSet
1948 if ( $token === 'tag' || $token === 'endtag' ) {
1949 if ( isset( BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE][$value] ) ) {
1950 // As described in "simplifications" above, these tags are
1951 // not supported in the balancer.
1952 Assert::invariant(
1953 !$this->strict,
1954 "Unsupported $token <$value> found."
1955 );
1956 return false;
1957 }
1958 } elseif ( $token === 'text' && $value === '' ) {
1959 // Don't actually inject the empty string as a text token.
1960 return true;
1961 }
1962 // Support pre/listing/textarea by suppressing initial linefeed
1963 if ( $this->ignoreLinefeed ) {
1964 $this->ignoreLinefeed = false;
1965 if ( $token === 'text' ) {
1966 if ( $value[0] === "\n" ) {
1967 if ( $value === "\n" ) {
1968 // Nothing would be left, don't inject the empty string.
1969 return true;
1970 }
1971 $value = substr( $value, 1 );
1972 }
1973 }
1974 }
1975 // Some hoops we have to jump through
1976 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
1977
1978 $isForeign = true;
1979 if (
1980 $this->stack->length() === 0 ||
1981 $adjusted->isHtml() ||
1982 $token === 'eof'
1983 ) {
1984 $isForeign = false;
1985 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
1986 if ( $token === 'text' ) {
1987 $isForeign = false;
1988 } elseif (
1989 $token === 'tag' &&
1990 $value !== 'mglyph' && $value !== 'malignmark'
1991 ) {
1992 $isForeign = false;
1993 }
1994 } elseif (
1995 $adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
1996 $adjusted->localName === 'annotation-xml' &&
1997 $token === 'tag' && $value === 'svg'
1998 ) {
1999 $isForeign = false;
2000 } elseif (
2001 $adjusted->isHtmlIntegrationPoint() &&
2002 ( $token === 'tag' || $token === 'text' )
2003 ) {
2004 $isForeign = false;
2005 }
2006 if ( $isForeign ) {
2007 return $this->insertForeignToken( $token, $value, $attribs, $selfClose );
2008 } else {
2009 $func = $this->parseMode;
2010 return $this->$func( $token, $value, $attribs, $selfClose );
2011 }
2012 }
2013
2014 private function insertForeignToken( $token, $value, $attribs = null, $selfClose = false ) {
2015 if ( $token === 'text' ) {
2016 $this->stack->insertText( $value );
2017 return true;
2018 } elseif ( $token === 'tag' ) {
2019 switch ( $value ) {
2020 case 'font':
2021 if ( isset( $attribs['color'] )
2022 || isset( $attribs['face'] )
2023 || isset( $attribs['size'] )
2024 ) {
2025 break;
2026 }
2027 // otherwise, fall through
2028 case 'b':
2029 case 'big':
2030 case 'blockquote':
2031 case 'body':
2032 case 'br':
2033 case 'center':
2034 case 'code':
2035 case 'dd':
2036 case 'div':
2037 case 'dl':
2038 case 'dt':
2039 case 'em':
2040 case 'embed':
2041 case 'h1':
2042 case 'h2':
2043 case 'h3':
2044 case 'h4':
2045 case 'h5':
2046 case 'h6':
2047 case 'head':
2048 case 'hr':
2049 case 'i':
2050 case 'img':
2051 case 'li':
2052 case 'listing':
2053 case 'menu':
2054 case 'meta':
2055 case 'nobr':
2056 case 'ol':
2057 case 'p':
2058 case 'pre':
2059 case 'ruby':
2060 case 's':
2061 case 'small':
2062 case 'span':
2063 case 'strong':
2064 case 'strike':
2065 case 'sub':
2066 case 'sup':
2067 case 'table':
2068 case 'tt':
2069 case 'u':
2070 case 'ul':
2071 case 'var':
2072 if ( $this->fragmentContext ) {
2073 break;
2074 }
2075 while ( true ) {
2076 $this->stack->pop();
2077 $node = $this->stack->currentNode;
2078 if (
2079 $node->isMathmlTextIntegrationPoint() ||
2080 $node->isHtmlIntegrationPoint() ||
2081 $node->isHtml()
2082 ) {
2083 break;
2084 }
2085 }
2086 return $this->insertToken( $token, $value, $attribs, $selfClose );
2087 }
2088 // "Any other start tag"
2089 $adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
2090 $this->fragmentContext : $this->stack->currentNode;
2091 $this->stack->insertForeignElement(
2092 $adjusted->namespaceURI, $value, $attribs
2093 );
2094 if ( $selfClose ) {
2095 $this->stack->pop();
2096 }
2097 return true;
2098 } elseif ( $token === 'endtag' ) {
2099 $first = true;
2100 foreach ( $this->stack as $i => $node ) {
2101 if ( $node->isHtml() && !$first ) {
2102 // process the end tag as HTML
2103 $func = $this->parseMode;
2104 return $this->$func( $token, $value, $attribs, $selfClose );
2105 } elseif ( $i === 0 ) {
2106 return true;
2107 } elseif ( $node->localName === $value ) {
2108 $this->stack->popTag( $node );
2109 return true;
2110 }
2111 $first = false;
2112 }
2113 }
2114 }
2115
2116 /**
2117 * Grab the next "token" from $bitsIterator. This is either a open/close
2118 * tag or text or a comment, depending on whether the Sanitizer approves.
2119 */
2120 private function advance() {
2121 $x = $this->bitsIterator->current();
2122 $this->bitsIterator->next();
2123 $regs = [];
2124 // Handle comments. These won't be generated by mediawiki (they
2125 // are stripped in the Sanitizer) but may be generated by extensions.
2126 if (
2127 $this->allowComments &&
2128 !( $this->inRCDATA || $this->inRAWTEXT ) &&
2129 preg_match( Balancer::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2130 // verify EOF condition where necessary
2131 ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2132 ) {
2133 $contents = $regs[2][0];
2134 $rest = $regs[5][0];
2135 $this->insertToken( 'comment', $contents );
2136 $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2137 return;
2138 }
2139 // $slash: Does the current element start with a '/'?
2140 // $t: Current element name
2141 // $attribStr: String between element name and >
2142 // $brace: Ending '>' or '/>'
2143 // $rest: Everything until the next element from the $bitsIterator
2144 if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2145 list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
2146 $t = strtolower( $t );
2147 if ( $this->strict ) {
2148 // Verify that attributes are all properly double-quoted
2149 Assert::invariant(
2150 preg_match(
2151 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2152 ),
2153 "Bad attribute string found"
2154 );
2155 }
2156 } else {
2157 Assert::invariant(
2158 !$this->strict, "< found which does not start a valid tag"
2159 );
2160 $slash = $t = $attribStr = $brace = $rest = null;
2161 }
2162 $goodTag = $t;
2163 if ( $this->inRCDATA ) {
2164 if ( $slash && $t === $this->inRCDATA ) {
2165 $this->inRCDATA = false;
2166 } else {
2167 // No tags allowed; this emulates the "rcdata" tokenizer mode.
2168 $goodTag = false;
2169 }
2170 }
2171 if ( $this->inRAWTEXT ) {
2172 if ( $slash && $t === $this->inRAWTEXT ) {
2173 $this->inRAWTEXT = false;
2174 } else {
2175 // No tags allowed, no entity-escaping done.
2176 $goodTag = false;
2177 }
2178 }
2179 $sanitize = $this->allowedHtmlElements !== null;
2180 if ( $sanitize ) {
2181 $goodTag = $t && isset( $this->allowedHtmlElements[$t] );
2182 }
2183 if ( $goodTag ) {
2184 if ( is_callable( $this->processingCallback ) ) {
2185 call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2186 }
2187 if ( $sanitize ) {
2188 $goodTag = Sanitizer::validateTag( $attribStr, $t );
2189 }
2190 }
2191 if ( $goodTag ) {
2192 if ( $sanitize ) {
2193 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2194 $attribs = Sanitizer::validateTagAttributes( $attribs, $t );
2195 } else {
2196 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2197 }
2198 $goodTag = $this->insertToken(
2199 $slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
2200 );
2201 }
2202 if ( $goodTag ) {
2203 $rest = str_replace( '>', '&gt;', $rest );
2204 $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2205 } elseif ( $this->inRAWTEXT ) {
2206 $this->insertToken( 'text', "<$x" );
2207 } else {
2208 // bad tag; serialize entire thing as text.
2209 $this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
2210 }
2211 }
2212
2213 private function switchMode( $mode ) {
2214 Assert::parameter(
2215 substr( $mode, -4 )==='Mode', '$mode', 'should end in Mode'
2216 );
2217 $oldMode = $this->parseMode;
2218 $this->parseMode = $mode;
2219 return $oldMode;
2220 }
2221
2222 private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfClose ) {
2223 $this->switchMode( $mode );
2224 return $this->insertToken( $token, $value, $attribs, $selfClose );
2225 }
2226
2227 private function resetInsertionMode() {
2228 $last = false;
2229 foreach ( $this->stack as $i => $node ) {
2230 if ( $i === 0 ) {
2231 $last = true;
2232 if ( $this->fragmentContext ) {
2233 $node = $this->fragmentContext;
2234 }
2235 }
2236 if ( $node->isHtml() ) {
2237 switch ( $node->localName ) {
2238 case 'select':
2239 $stackLength = $this->stack->length();
2240 for ( $j = $i + 1; $j < $stackLength-1; $j++ ) {
2241 $ancestor = $this->stack->node( $stackLength-$j-1 );
2242 if ( $ancestor->isHtmlNamed( 'template' ) ) {
2243 break;
2244 }
2245 if ( $ancestor->isHtmlNamed( 'table' ) ) {
2246 $this->switchMode( 'inSelectInTableMode' );
2247 return;
2248 }
2249 }
2250 $this->switchMode( 'inSelectMode' );
2251 return;
2252 case 'tr':
2253 $this->switchMode( 'inRowMode' );
2254 return;
2255 case 'tbody':
2256 case 'tfoot':
2257 case 'thead':
2258 $this->switchMode( 'inTableBodyMode' );
2259 return;
2260 case 'caption':
2261 $this->switchMode( 'inCaptionMode' );
2262 return;
2263 case 'colgroup':
2264 $this->switchMode( 'inColumnGroupMode' );
2265 return;
2266 case 'table':
2267 $this->switchMode( 'inTableMode' );
2268 return;
2269 case 'template':
2270 $this->switchMode(
2271 array_slice( $this->templateInsertionModes, -1 )[0]
2272 );
2273 return;
2274 case 'body':
2275 $this->switchMode( 'inBodyMode' );
2276 return;
2277 // OMITTED: <frameset>
2278 // OMITTED: <html>
2279 // OMITTED: <head>
2280 default:
2281 if ( !$last ) {
2282 // OMITTED: <head>
2283 if ( $node->isA( BalanceSets::$tableCellSet ) ) {
2284 $this->switchMode( 'inCellMode' );
2285 return;
2286 }
2287 }
2288 }
2289 }
2290 if ( $last ) {
2291 $this->switchMode( 'inBodyMode' );
2292 return;
2293 }
2294 }
2295 }
2296
2297 private function stopParsing() {
2298 // Most of the spec methods are inapplicable, other than step 2:
2299 // "pop all the nodes off the stack of open elements".
2300 // We're going to keep the top-most <html> element on the stack, though.
2301
2302 // Clear the AFE list first, otherwise the element objects will stay live
2303 // during serialization, potentially using O(N^2) memory. Note that
2304 // popping the stack will never result in reconstructing the active
2305 // formatting elements.
2306 $this->afe = null;
2307 $this->stack->popTo( 1 );
2308 }
2309
2310 private function parseRawText( $value, $attribs = null ) {
2311 $this->stack->insertHTMLElement( $value, $attribs );
2312 $this->inRAWTEXT = $value;
2313 $this->originalInsertionMode = $this->switchMode( 'inTextMode' );
2314 return true;
2315 }
2316
2317 private function inTextMode( $token, $value, $attribs = null, $selfClose = false ) {
2318 if ( $token === 'text' ) {
2319 $this->stack->insertText( $value );
2320 return true;
2321 } elseif ( $token === 'eof' ) {
2322 $this->stack->pop();
2323 return $this->switchModeAndReprocess(
2324 $this->originalInsertionMode, $token, $value, $attribs, $selfClose
2325 );
2326 } elseif ( $token === 'endtag' ) {
2327 $this->stack->pop();
2328 $this->switchMode( $this->originalInsertionMode );
2329 return true;
2330 }
2331 return true;
2332 }
2333
2334 private function inHeadMode( $token, $value, $attribs = null, $selfClose = false ) {
2335 if ( $token === 'text' ) {
2336 if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2337 $this->stack->insertText( $matches[0] );
2338 $value = substr( $value, strlen( $matches[0] ) );
2339 }
2340 if ( strlen( $value ) === 0 ) {
2341 return true; // All text handled.
2342 }
2343 // Fall through to handle non-whitespace below.
2344 } elseif ( $token === 'tag' ) {
2345 switch ( $value ) {
2346 case 'meta':
2347 // OMITTED: in a full HTML parser, this might change the encoding.
2348 // falls through
2349 // OMITTED: <html>
2350 case 'base':
2351 case 'basefont':
2352 case 'bgsound':
2353 case 'link':
2354 $this->stack->insertHTMLElement( $value, $attribs );
2355 $this->stack->pop();
2356 return true;
2357 // OMITTED: <title>
2358 // OMITTED: <noscript>
2359 case 'noframes':
2360 case 'style':
2361 return $this->parseRawText( $value, $attribs );
2362 // OMITTED: <script>
2363 case 'template':
2364 $this->stack->insertHTMLElement( $value, $attribs );
2365 $this->afe->insertMarker();
2366 // OMITTED: frameset_ok
2367 $this->switchMode( 'inTemplateMode' );
2368 $this->templateInsertionModes[] = $this->parseMode;
2369 return true;
2370 // OMITTED: <head>
2371 }
2372 } elseif ( $token === 'endtag' ) {
2373 switch ( $value ) {
2374 // OMITTED: <head>
2375 // OMITTED: <body>
2376 // OMITTED: <html>
2377 case 'br':
2378 break; // handle at the bottom of the function
2379 case 'template':
2380 if ( $this->stack->indexOf( $value ) < 0 ) {
2381 return true; // Ignore the token.
2382 }
2383 $this->stack->generateImpliedEndTags( null, true /* thorough */ );
2384 $this->stack->popTag( $value );
2385 $this->afe->clearToMarker();
2386 array_pop( $this->templateInsertionModes );
2387 $this->resetInsertionMode();
2388 return true;
2389 default:
2390 // ignore any other end tag
2391 return true;
2392 }
2393 } elseif ( $token === 'comment' ) {
2394 $this->stack->insertComment( $value );
2395 return true;
2396 }
2397
2398 // If not handled above
2399 $this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
2400 // Then redo this one
2401 return $this->insertToken( $token, $value, $attribs, $selfClose );
2402 }
2403
2404 private function inBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
2405 if ( $token === 'text' ) {
2406 $this->afe->reconstruct( $this->stack );
2407 $this->stack->insertText( $value );
2408 return true;
2409 } elseif ( $token === 'eof' ) {
2410 if ( !empty( $this->templateInsertionModes ) ) {
2411 return $this->inTemplateMode( $token, $value, $attribs, $selfClose );
2412 }
2413 $this->stopParsing();
2414 return true;
2415 } elseif ( $token === 'tag' ) {
2416 switch ( $value ) {
2417 // OMITTED: <html>
2418 case 'base':
2419 case 'basefont':
2420 case 'bgsound':
2421 case 'link':
2422 case 'meta':
2423 case 'noframes':
2424 // OMITTED: <script>
2425 case 'style':
2426 case 'template':
2427 // OMITTED: <title>
2428 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2429 // OMITTED: <body>
2430 // OMITTED: <frameset>
2431
2432 case 'address':
2433 case 'article':
2434 case 'aside':
2435 case 'blockquote':
2436 case 'center':
2437 case 'details':
2438 case 'dialog':
2439 case 'dir':
2440 case 'div':
2441 case 'dl':
2442 case 'fieldset':
2443 case 'figcaption':
2444 case 'figure':
2445 case 'footer':
2446 case 'header':
2447 case 'hgroup':
2448 case 'main':
2449 case 'menu':
2450 case 'nav':
2451 case 'ol':
2452 case 'p':
2453 case 'section':
2454 case 'summary':
2455 case 'ul':
2456 if ( $this->stack->inButtonScope( 'p' ) ) {
2457 $this->inBodyMode( 'endtag', 'p' );
2458 }
2459 $this->stack->insertHTMLElement( $value, $attribs );
2460 return true;
2461
2462 case 'h1':
2463 case 'h2':
2464 case 'h3':
2465 case 'h4':
2466 case 'h5':
2467 case 'h6':
2468 if ( $this->stack->inButtonScope( 'p' ) ) {
2469 $this->inBodyMode( 'endtag', 'p' );
2470 }
2471 if ( $this->stack->currentNode->isA( BalanceSets::$headingSet ) ) {
2472 $this->stack->pop();
2473 }
2474 $this->stack->insertHTMLElement( $value, $attribs );
2475 return true;
2476
2477 case 'pre':
2478 case 'listing':
2479 if ( $this->stack->inButtonScope( 'p' ) ) {
2480 $this->inBodyMode( 'endtag', 'p' );
2481 }
2482 $this->stack->insertHTMLElement( $value, $attribs );
2483 $this->ignoreLinefeed = true;
2484 // OMITTED: frameset_ok
2485 return true;
2486
2487 case 'form':
2488 if (
2489 $this->formElementPointer &&
2490 $this->stack->indexOf( 'template' ) < 0
2491 ) {
2492 return true; // in a form, not in a template.
2493 }
2494 if ( $this->stack->inButtonScope( "p" ) ) {
2495 $this->inBodyMode( 'endtag', 'p' );
2496 }
2497 $elt = $this->stack->insertHTMLElement( $value, $attribs );
2498 if ( $this->stack->indexOf( 'template' ) < 0 ) {
2499 $this->formElementPointer = $elt;
2500 }
2501 return true;
2502
2503 case 'li':
2504 // OMITTED: frameset_ok
2505 foreach ( $this->stack as $node ) {
2506 if ( $node->isHtmlNamed( 'li' ) ) {
2507 $this->inBodyMode( 'endtag', 'li' );
2508 break;
2509 }
2510 if (
2511 $node->isA( BalanceSets::$specialSet ) &&
2512 !$node->isA( BalanceSets::$addressDivPSet )
2513 ) {
2514 break;
2515 }
2516 }
2517 if ( $this->stack->inButtonScope( 'p' ) ) {
2518 $this->inBodyMode( 'endtag', 'p' );
2519 }
2520 $this->stack->insertHTMLElement( $value, $attribs );
2521 return true;
2522
2523 case 'dd':
2524 case 'dt':
2525 // OMITTED: frameset_ok
2526 foreach ( $this->stack as $node ) {
2527 if ( $node->isHtmlNamed( 'dd' ) ) {
2528 $this->inBodyMode( 'endtag', 'dd' );
2529 break;
2530 }
2531 if ( $node->isHtmlNamed( 'dt' ) ) {
2532 $this->inBodyMode( 'endtag', 'dt' );
2533 break;
2534 }
2535 if (
2536 $node->isA( BalanceSets::$specialSet ) &&
2537 !$node->isA( BalanceSets::$addressDivPSet )
2538 ) {
2539 break;
2540 }
2541 }
2542 if ( $this->stack->inButtonScope( 'p' ) ) {
2543 $this->inBodyMode( 'endtag', 'p' );
2544 }
2545 $this->stack->insertHTMLElement( $value, $attribs );
2546 return true;
2547
2548 // OMITTED: <plaintext>
2549
2550 case 'button':
2551 if ( $this->stack->inScope( 'button' ) ) {
2552 $this->inBodyMode( 'endtag', 'button' );
2553 return $this->insertToken( $token, $value, $attribs, $selfClose );
2554 }
2555 $this->afe->reconstruct( $this->stack );
2556 $this->stack->insertHTMLElement( $value, $attribs );
2557 return true;
2558
2559 case 'a':
2560 $activeElement = $this->afe->findElementByTag( 'a' );
2561 if ( $activeElement ) {
2562 $this->inBodyMode( 'endtag', 'a' );
2563 if ( $this->afe->isInList( $activeElement ) ) {
2564 $this->afe->remove( $activeElement );
2565 // Don't flatten here, since when we fall
2566 // through below we might foster parent
2567 // the new <a> tag inside this one.
2568 $this->stack->removeElement( $activeElement, false );
2569 }
2570 }
2571 // Falls through
2572 case 'b':
2573 case 'big':
2574 case 'code':
2575 case 'em':
2576 case 'font':
2577 case 'i':
2578 case 's':
2579 case 'small':
2580 case 'strike':
2581 case 'strong':
2582 case 'tt':
2583 case 'u':
2584 $this->afe->reconstruct( $this->stack );
2585 $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2586 return true;
2587
2588 case 'nobr':
2589 $this->afe->reconstruct( $this->stack );
2590 if ( $this->stack->inScope( 'nobr' ) ) {
2591 $this->inBodyMode( 'endtag', 'nobr' );
2592 $this->afe->reconstruct( $this->stack );
2593 }
2594 $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2595 return true;
2596
2597 case 'applet':
2598 case 'marquee':
2599 case 'object':
2600 $this->afe->reconstruct( $this->stack );
2601 $this->stack->insertHTMLElement( $value, $attribs );
2602 $this->afe->insertMarker();
2603 // OMITTED: frameset_ok
2604 return true;
2605
2606 case 'table':
2607 // The document is never in "quirks mode"; see simplifications
2608 // above.
2609 if ( $this->stack->inButtonScope( 'p' ) ) {
2610 $this->inBodyMode( 'endtag', 'p' );
2611 }
2612 $this->stack->insertHTMLElement( $value, $attribs );
2613 // OMITTED: frameset_ok
2614 $this->switchMode( 'inTableMode' );
2615 return true;
2616
2617 case 'area':
2618 case 'br':
2619 case 'embed':
2620 case 'img':
2621 case 'keygen':
2622 case 'wbr':
2623 $this->afe->reconstruct( $this->stack );
2624 $this->stack->insertHTMLElement( $value, $attribs );
2625 $this->stack->pop();
2626 // OMITTED: frameset_ok
2627 return true;
2628
2629 case 'input':
2630 $this->afe->reconstruct( $this->stack );
2631 $this->stack->insertHTMLElement( $value, $attribs );
2632 $this->stack->pop();
2633 // OMITTED: frameset_ok
2634 // (hence we don't need to examine the tag's "type" attribute)
2635 return true;
2636
2637 case 'menuitem':
2638 case 'param':
2639 case 'source':
2640 case 'track':
2641 $this->stack->insertHTMLElement( $value, $attribs );
2642 $this->stack->pop();
2643 return true;
2644
2645 case 'hr':
2646 if ( $this->stack->inButtonScope( 'p' ) ) {
2647 $this->inBodyMode( 'endtag', 'p' );
2648 }
2649 $this->stack->insertHTMLElement( $value, $attribs );
2650 $this->stack->pop();
2651 return true;
2652
2653 case 'image':
2654 // warts!
2655 return $this->inBodyMode( $token, 'img', $attribs, $selfClose );
2656
2657 // OMITTED: <isindex>
2658
2659 case 'textarea':
2660 $this->stack->insertHTMLElement( $value, $attribs );
2661 $this->ignoreLinefeed = true;
2662 $this->inRCDATA = $value; // emulate rcdata tokenizer mode
2663 // OMITTED: frameset_ok
2664 return true;
2665
2666 // OMITTED: <xmp>
2667 // OMITTED: <iframe>
2668 // OMITTED: <noembed>
2669 // OMITTED: <noscript>
2670
2671 case 'select':
2672 $this->afe->reconstruct( $this->stack );
2673 $this->stack->insertHTMLElement( $value, $attribs );
2674 switch ( $this->parseMode ) {
2675 case 'inTableMode':
2676 case 'inCaptionMode':
2677 case 'inTableBodyMode':
2678 case 'inRowMode':
2679 case 'inCellMode':
2680 $this->switchMode( 'inSelectInTableMode' );
2681 return true;
2682 default:
2683 $this->switchMode( 'inSelectMode' );
2684 return true;
2685 }
2686
2687 case 'optgroup':
2688 case 'option':
2689 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
2690 $this->inBodyMode( 'endtag', 'option' );
2691 }
2692 $this->afe->reconstruct( $this->stack );
2693 $this->stack->insertHTMLElement( $value, $attribs );
2694 return true;
2695
2696 case 'rb':
2697 case 'rtc':
2698 if ( $this->stack->inScope( 'ruby' ) ) {
2699 $this->stack->generateImpliedEndTags();
2700 }
2701 $this->stack->insertHTMLElement( $value, $attribs );
2702 return true;
2703
2704 case 'rp':
2705 case 'rt':
2706 if ( $this->stack->inScope( 'ruby' ) ) {
2707 $this->stack->generateImpliedEndTags( 'rtc' );
2708 }
2709 $this->stack->insertHTMLElement( $value, $attribs );
2710 return true;
2711
2712 case 'math':
2713 $this->afe->reconstruct( $this->stack );
2714 // We skip the spec's "adjust MathML attributes" and
2715 // "adjust foreign attributes" steps, since the browser will
2716 // do this later when it parses the output and it doesn't affect
2717 // balancing.
2718 $this->stack->insertForeignElement(
2719 BalanceSets::MATHML_NAMESPACE, $value, $attribs
2720 );
2721 if ( $selfClose ) {
2722 // emit explicit </math> tag.
2723 $this->stack->pop();
2724 }
2725 return true;
2726
2727 case 'svg':
2728 $this->afe->reconstruct( $this->stack );
2729 // We skip the spec's "adjust SVG attributes" and
2730 // "adjust foreign attributes" steps, since the browser will
2731 // do this later when it parses the output and it doesn't affect
2732 // balancing.
2733 $this->stack->insertForeignElement(
2734 BalanceSets::SVG_NAMESPACE, $value, $attribs
2735 );
2736 if ( $selfClose ) {
2737 // emit explicit </svg> tag.
2738 $this->stack->pop();
2739 }
2740 return true;
2741
2742 case 'caption':
2743 case 'col':
2744 case 'colgroup':
2745 // OMITTED: <frame>
2746 case 'head':
2747 case 'tbody':
2748 case 'td':
2749 case 'tfoot':
2750 case 'th':
2751 case 'thead':
2752 case 'tr':
2753 // Ignore table tags if we're not inTableMode
2754 return true;
2755 }
2756
2757 // Handle any other start tag here
2758 $this->afe->reconstruct( $this->stack );
2759 $this->stack->insertHTMLElement( $value, $attribs );
2760 return true;
2761 } elseif ( $token === 'endtag' ) {
2762 switch ( $value ) {
2763 // </body>,</html> are unsupported.
2764
2765 case 'template':
2766 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2767
2768 case 'address':
2769 case 'article':
2770 case 'aside':
2771 case 'blockquote':
2772 case 'button':
2773 case 'center':
2774 case 'details':
2775 case 'dialog':
2776 case 'dir':
2777 case 'div':
2778 case 'dl':
2779 case 'fieldset':
2780 case 'figcaption':
2781 case 'figure':
2782 case 'footer':
2783 case 'header':
2784 case 'hgroup':
2785 case 'listing':
2786 case 'main':
2787 case 'menu':
2788 case 'nav':
2789 case 'ol':
2790 case 'pre':
2791 case 'section':
2792 case 'summary':
2793 case 'ul':
2794 // Ignore if there is not a matching open tag
2795 if ( !$this->stack->inScope( $value ) ) {
2796 return true;
2797 }
2798 $this->stack->generateImpliedEndTags();
2799 $this->stack->popTag( $value );
2800 return true;
2801
2802 case 'form':
2803 if ( $this->stack->indexOf( 'template' ) < 0 ) {
2804 $openform = $this->formElementPointer;
2805 $this->formElementPointer = null;
2806 if ( !$openform || !$this->stack->inScope( $openform ) ) {
2807 return true;
2808 }
2809 $this->stack->generateImpliedEndTags();
2810 // Don't flatten yet if we're removing a <form> element
2811 // out-of-order. (eg. `<form><div></form>`)
2812 $flatten = ( $this->stack->currentNode === $openform );
2813 $this->stack->removeElement( $openform, $flatten );
2814 } else {
2815 if ( !$this->stack->inScope( 'form' ) ) {
2816 return true;
2817 }
2818 $this->stack->generateImpliedEndTags();
2819 $this->stack->popTag( 'form' );
2820 }
2821 return true;
2822
2823 case 'p':
2824 if ( !$this->stack->inButtonScope( 'p' ) ) {
2825 $this->inBodyMode( 'tag', 'p', [] );
2826 return $this->insertToken( $token, $value, $attribs, $selfClose );
2827 }
2828 $this->stack->generateImpliedEndTags( $value );
2829 $this->stack->popTag( $value );
2830 return true;
2831
2832 case 'li':
2833 if ( !$this->stack->inListItemScope( $value ) ) {
2834 return true; // ignore
2835 }
2836 $this->stack->generateImpliedEndTags( $value );
2837 $this->stack->popTag( $value );
2838 return true;
2839
2840 case 'dd':
2841 case 'dt':
2842 if ( !$this->stack->inScope( $value ) ) {
2843 return true; // ignore
2844 }
2845 $this->stack->generateImpliedEndTags( $value );
2846 $this->stack->popTag( $value );
2847 return true;
2848
2849 case 'h1':
2850 case 'h2':
2851 case 'h3':
2852 case 'h4':
2853 case 'h5':
2854 case 'h6':
2855 if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2856 return true; // ignore
2857 }
2858 $this->stack->generateImpliedEndTags();
2859 $this->stack->popTag( BalanceSets::$headingSet );
2860 return true;
2861
2862 case 'sarcasm':
2863 // Take a deep breath, then:
2864 break;
2865
2866 case 'a':
2867 case 'b':
2868 case 'big':
2869 case 'code':
2870 case 'em':
2871 case 'font':
2872 case 'i':
2873 case 'nobr':
2874 case 's':
2875 case 'small':
2876 case 'strike':
2877 case 'strong':
2878 case 'tt':
2879 case 'u':
2880 if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
2881 return true; // If we did something, we're done.
2882 }
2883 break; // Go to the "any other end tag" case.
2884
2885 case 'applet':
2886 case 'marquee':
2887 case 'object':
2888 if ( !$this->stack->inScope( $value ) ) {
2889 return true; // ignore
2890 }
2891 $this->stack->generateImpliedEndTags();
2892 $this->stack->popTag( $value );
2893 $this->afe->clearToMarker();
2894 return true;
2895
2896 case 'br':
2897 // Turn </br> into <br>
2898 return $this->inBodyMode( 'tag', $value, [] );
2899 }
2900
2901 // Any other end tag goes here
2902 foreach ( $this->stack as $i => $node ) {
2903 if ( $node->isHtmlNamed( $value ) ) {
2904 $this->stack->generateImpliedEndTags( $value );
2905 $this->stack->popTo( $i ); // including $i
2906 break;
2907 } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2908 return true; // ignore this close token.
2909 }
2910 }
2911 return true;
2912 } elseif ( $token === 'comment' ) {
2913 $this->stack->insertComment( $value );
2914 return true;
2915 } else {
2916 Assert::invariant( false, "Bad token type: $token" );
2917 }
2918 }
2919
2920 private function inTableMode( $token, $value, $attribs = null, $selfClose = false ) {
2921 if ( $token === 'text' ) {
2922 if ( $this->textIntegrationMode ) {
2923 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
2924 } elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
2925 $this->pendingTableText = '';
2926 $this->originalInsertionMode = $this->parseMode;
2927 return $this->switchModeAndReprocess( 'inTableTextMode',
2928 $token, $value, $attribs, $selfClose );
2929 }
2930 // fall through to default case.
2931 } elseif ( $token === 'eof' ) {
2932 $this->stopParsing();
2933 return true;
2934 } elseif ( $token === 'tag' ) {
2935 switch ( $value ) {
2936 case 'caption':
2937 $this->afe->insertMarker();
2938 $this->stack->insertHTMLElement( $value, $attribs );
2939 $this->switchMode( 'inCaptionMode' );
2940 return true;
2941 case 'colgroup':
2942 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2943 $this->stack->insertHTMLElement( $value, $attribs );
2944 $this->switchMode( 'inColumnGroupMode' );
2945 return true;
2946 case 'col':
2947 $this->inTableMode( 'tag', 'colgroup', [] );
2948 return $this->insertToken( $token, $value, $attribs, $selfClose );
2949 case 'tbody':
2950 case 'tfoot':
2951 case 'thead':
2952 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2953 $this->stack->insertHTMLElement( $value, $attribs );
2954 $this->switchMode( 'inTableBodyMode' );
2955 return true;
2956 case 'td':
2957 case 'th':
2958 case 'tr':
2959 $this->inTableMode( 'tag', 'tbody', [] );
2960 return $this->insertToken( $token, $value, $attribs, $selfClose );
2961 case 'table':
2962 if ( !$this->stack->inTableScope( $value ) ) {
2963 return true; // Ignore this tag.
2964 }
2965 $this->inTableMode( 'endtag', $value );
2966 return $this->insertToken( $token, $value, $attribs, $selfClose );
2967
2968 case 'style':
2969 // OMITTED: <script>
2970 case 'template':
2971 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
2972
2973 case 'input':
2974 if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
2975 break; // Handle this as "everything else"
2976 }
2977 $this->stack->insertHTMLElement( $value, $attribs );
2978 $this->stack->pop();
2979 return true;
2980
2981 case 'form':
2982 if (
2983 $this->formElementPointer ||
2984 $this->stack->indexOf( 'template' ) >= 0
2985 ) {
2986 return true; // ignore this token
2987 }
2988 $this->formElementPointer =
2989 $this->stack->insertHTMLElement( $value, $attribs );
2990 $this->stack->popTag( $this->formElementPointer );
2991 return true;
2992 }
2993 // Fall through for "anything else" clause.
2994 } elseif ( $token === 'endtag' ) {
2995 switch ( $value ) {
2996 case 'table':
2997 if ( !$this->stack->inTableScope( $value ) ) {
2998 return true; // Ignore.
2999 }
3000 $this->stack->popTag( $value );
3001 $this->resetInsertionMode();
3002 return true;
3003 // OMITTED: <body>
3004 case 'caption':
3005 case 'col':
3006 case 'colgroup':
3007 // OMITTED: <html>
3008 case 'tbody':
3009 case 'td':
3010 case 'tfoot':
3011 case 'th':
3012 case 'thead':
3013 case 'tr':
3014 return true; // Ignore the token.
3015 case 'template':
3016 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3017 }
3018 // Fall through for "anything else" clause.
3019 } elseif ( $token === 'comment' ) {
3020 $this->stack->insertComment( $value );
3021 return true;
3022 }
3023 // This is the "anything else" case:
3024 $this->stack->fosterParentMode = true;
3025 $this->inBodyMode( $token, $value, $attribs, $selfClose );
3026 $this->stack->fosterParentMode = false;
3027 return true;
3028 }
3029
3030 private function inTableTextMode( $token, $value, $attribs = null, $selfClose = false ) {
3031 if ( $token === 'text' ) {
3032 $this->pendingTableText .= $value;
3033 return true;
3034 }
3035 // Non-text token:
3036 $text = $this->pendingTableText;
3037 $this->pendingTableText = '';
3038 if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3039 // This should match the "anything else" case inTableMode
3040 $this->stack->fosterParentMode = true;
3041 $this->inBodyMode( 'text', $text );
3042 $this->stack->fosterParentMode = false;
3043 } else {
3044 // Pending text is just whitespace.
3045 $this->stack->insertText( $text );
3046 }
3047 return $this->switchModeAndReprocess(
3048 $this->originalInsertionMode, $token, $value, $attribs, $selfClose
3049 );
3050 }
3051
3052 // helper for inCaptionMode
3053 private function endCaption() {
3054 if ( !$this->stack->inTableScope( 'caption' ) ) {
3055 return false;
3056 }
3057 $this->stack->generateImpliedEndTags();
3058 $this->stack->popTag( 'caption' );
3059 $this->afe->clearToMarker();
3060 $this->switchMode( 'inTableMode' );
3061 return true;
3062 }
3063
3064 private function inCaptionMode( $token, $value, $attribs = null, $selfClose = false ) {
3065 if ( $token === 'tag' ) {
3066 switch ( $value ) {
3067 case 'caption':
3068 case 'col':
3069 case 'colgroup':
3070 case 'tbody':
3071 case 'td':
3072 case 'tfoot':
3073 case 'th':
3074 case 'thead':
3075 case 'tr':
3076 if ( $this->endCaption() ) {
3077 $this->insertToken( $token, $value, $attribs, $selfClose );
3078 }
3079 return true;
3080 }
3081 // Fall through to "anything else" case.
3082 } elseif ( $token === 'endtag' ) {
3083 switch ( $value ) {
3084 case 'caption':
3085 $this->endCaption();
3086 return true;
3087 case 'table':
3088 if ( $this->endCaption() ) {
3089 $this->insertToken( $token, $value, $attribs, $selfClose );
3090 }
3091 return true;
3092 case 'body':
3093 case 'col':
3094 case 'colgroup':
3095 // OMITTED: <html>
3096 case 'tbody':
3097 case 'td':
3098 case 'tfoot':
3099 case 'th':
3100 case 'thead':
3101 case 'tr':
3102 // Ignore the token
3103 return true;
3104 }
3105 // Fall through to "anything else" case.
3106 }
3107 // The Anything Else case
3108 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3109 }
3110
3111 private function inColumnGroupMode( $token, $value, $attribs = null, $selfClose = false ) {
3112 if ( $token === 'text' ) {
3113 if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
3114 $this->stack->insertText( $matches[0] );
3115 $value = substr( $value, strlen( $matches[0] ) );
3116 }
3117 if ( strlen( $value ) === 0 ) {
3118 return true; // All text handled.
3119 }
3120 // Fall through to handle non-whitespace below.
3121 } elseif ( $token === 'tag' ) {
3122 switch ( $value ) {
3123 // OMITTED: <html>
3124 case 'col':
3125 $this->stack->insertHTMLElement( $value, $attribs );
3126 $this->stack->pop();
3127 return true;
3128 case 'template':
3129 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3130 }
3131 // Fall through for "anything else".
3132 } elseif ( $token === 'endtag' ) {
3133 switch ( $value ) {
3134 case 'colgroup':
3135 if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3136 return true; // Ignore the token.
3137 }
3138 $this->stack->pop();
3139 $this->switchMode( 'inTableMode' );
3140 return true;
3141 case 'col':
3142 return true; // Ignore the token.
3143 case 'template':
3144 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3145 }
3146 // Fall through for "anything else".
3147 } elseif ( $token === 'eof' ) {
3148 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3149 } elseif ( $token === 'comment' ) {
3150 $this->stack->insertComment( $value );
3151 return true;
3152 }
3153
3154 // Anything else
3155 if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3156 return true; // Ignore the token.
3157 }
3158 $this->inColumnGroupMode( 'endtag', 'colgroup' );
3159 return $this->insertToken( $token, $value, $attribs, $selfClose );
3160 }
3161
3162 // Helper function for inTableBodyMode
3163 private function endSection() {
3164 if ( !(
3165 $this->stack->inTableScope( 'tbody' ) ||
3166 $this->stack->inTableScope( 'thead' ) ||
3167 $this->stack->inTableScope( 'tfoot' )
3168 ) ) {
3169 return false;
3170 }
3171 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3172 $this->stack->pop();
3173 $this->switchMode( 'inTableMode' );
3174 return true;
3175 }
3176 private function inTableBodyMode( $token, $value, $attribs = null, $selfClose = false ) {
3177 if ( $token === 'tag' ) {
3178 switch ( $value ) {
3179 case 'tr':
3180 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3181 $this->stack->insertHTMLElement( $value, $attribs );
3182 $this->switchMode( 'inRowMode' );
3183 return true;
3184 case 'th':
3185 case 'td':
3186 $this->inTableBodyMode( 'tag', 'tr', [] );
3187 $this->insertToken( $token, $value, $attribs, $selfClose );
3188 return true;
3189 case 'caption':
3190 case 'col':
3191 case 'colgroup':
3192 case 'tbody':
3193 case 'tfoot':
3194 case 'thead':
3195 if ( $this->endSection() ) {
3196 $this->insertToken( $token, $value, $attribs, $selfClose );
3197 }
3198 return true;
3199 }
3200 } elseif ( $token === 'endtag' ) {
3201 switch ( $value ) {
3202 case 'table':
3203 if ( $this->endSection() ) {
3204 $this->insertToken( $token, $value, $attribs, $selfClose );
3205 }
3206 return true;
3207 case 'tbody':
3208 case 'tfoot':
3209 case 'thead':
3210 if ( $this->stack->inTableScope( $value ) ) {
3211 $this->endSection();
3212 }
3213 return true;
3214 // OMITTED: <body>
3215 case 'caption':
3216 case 'col':
3217 case 'colgroup':
3218 // OMITTED: <html>
3219 case 'td':
3220 case 'th':
3221 case 'tr':
3222 return true; // Ignore the token.
3223 }
3224 }
3225 // Anything else:
3226 return $this->inTableMode( $token, $value, $attribs, $selfClose );
3227 }
3228
3229 // Helper function for inRowMode
3230 private function endRow() {
3231 if ( !$this->stack->inTableScope( 'tr' ) ) {
3232 return false;
3233 }
3234 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3235 $this->stack->pop();
3236 $this->switchMode( 'inTableBodyMode' );
3237 return true;
3238 }
3239 private function inRowMode( $token, $value, $attribs = null, $selfClose = false ) {
3240 if ( $token === 'tag' ) {
3241 switch ( $value ) {
3242 case 'th':
3243 case 'td':
3244 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3245 $this->stack->insertHTMLElement( $value, $attribs );
3246 $this->switchMode( 'inCellMode' );
3247 $this->afe->insertMarker();
3248 return true;
3249 case 'caption':
3250 case 'col':
3251 case 'colgroup':
3252 case 'tbody':
3253 case 'tfoot':
3254 case 'thead':
3255 case 'tr':
3256 if ( $this->endRow() ) {
3257 $this->insertToken( $token, $value, $attribs, $selfClose );
3258 }
3259 return true;
3260 }
3261 } elseif ( $token === 'endtag' ) {
3262 switch ( $value ) {
3263 case 'tr':
3264 $this->endRow();
3265 return true;
3266 case 'table':
3267 if ( $this->endRow() ) {
3268 $this->insertToken( $token, $value, $attribs, $selfClose );
3269 }
3270 return true;
3271 case 'tbody':
3272 case 'tfoot':
3273 case 'thead':
3274 if (
3275 $this->stack->inTableScope( $value ) &&
3276 $this->endRow()
3277 ) {
3278 $this->insertToken( $token, $value, $attribs, $selfClose );
3279 }
3280 return true;
3281 // OMITTED: <body>
3282 case 'caption':
3283 case 'col':
3284 case 'colgroup':
3285 // OMITTED: <html>
3286 case 'td':
3287 case 'th':
3288 return true; // Ignore the token.
3289 }
3290 }
3291 // Anything else:
3292 return $this->inTableMode( $token, $value, $attribs, $selfClose );
3293 }
3294
3295 // Helper for inCellMode
3296 private function endCell() {
3297 if ( $this->stack->inTableScope( 'td' ) ) {
3298 $this->inCellMode( 'endtag', 'td' );
3299 return true;
3300 } elseif ( $this->stack->inTableScope( 'th' ) ) {
3301 $this->inCellMode( 'endtag', 'th' );
3302 return true;
3303 } else {
3304 return false;
3305 }
3306 }
3307 private function inCellMode( $token, $value, $attribs = null, $selfClose = false ) {
3308 if ( $token === 'tag' ) {
3309 switch ( $value ) {
3310 case 'caption':
3311 case 'col':
3312 case 'colgroup':
3313 case 'tbody':
3314 case 'td':
3315 case 'tfoot':
3316 case 'th':
3317 case 'thead':
3318 case 'tr':
3319 if ( $this->endCell() ) {
3320 $this->insertToken( $token, $value, $attribs, $selfClose );
3321 }
3322 return true;
3323 }
3324 } elseif ( $token === 'endtag' ) {
3325 switch ( $value ) {
3326 case 'td':
3327 case 'th':
3328 if ( $this->stack->inTableScope( $value ) ) {
3329 $this->stack->generateImpliedEndTags();
3330 $this->stack->popTag( $value );
3331 $this->afe->clearToMarker();
3332 $this->switchMode( 'inRowMode' );
3333 }
3334 return true;
3335 // OMITTED: <body>
3336 case 'caption':
3337 case 'col':
3338 case 'colgroup':
3339 // OMITTED: <html>
3340 return true;
3341
3342 case 'table':
3343 case 'tbody':
3344 case 'tfoot':
3345 case 'thead':
3346 case 'tr':
3347 if ( $this->stack->inTableScope( $value ) ) {
3348 $this->stack->generateImpliedEndTags();
3349 $this->stack->popTag( BalanceSets::$tableCellSet );
3350 $this->afe->clearToMarker();
3351 $this->switchMode( 'inRowMode' );
3352 $this->insertToken( $token, $value, $attribs, $selfClose );
3353 }
3354 return true;
3355 }
3356 }
3357 // Anything else:
3358 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3359 }
3360
3361 private function inSelectMode( $token, $value, $attribs = null, $selfClose = false ) {
3362 if ( $token === 'text' ) {
3363 $this->stack->insertText( $value );
3364 return true;
3365 } elseif ( $token === 'eof' ) {
3366 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3367 } elseif ( $token === 'tag' ) {
3368 switch ( $value ) {
3369 // OMITTED: <html>
3370 case 'option':
3371 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3372 $this->stack->pop();
3373 }
3374 $this->stack->insertHTMLElement( $value, $attribs );
3375 return true;
3376 case 'optgroup':
3377 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3378 $this->stack->pop();
3379 }
3380 if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3381 $this->stack->pop();
3382 }
3383 $this->stack->insertHTMLElement( $value, $attribs );
3384 return true;
3385 case 'select':
3386 $this->inSelectMode( 'endtag', $value ); // treat it like endtag
3387 return true;
3388 case 'input':
3389 case 'keygen':
3390 case 'textarea':
3391 if ( !$this->stack->inSelectScope( 'select' ) ) {
3392 return true; // ignore token (fragment case)
3393 }
3394 $this->inSelectMode( 'endtag', 'select' );
3395 return $this->insertToken( $token, $value, $attribs, $selfClose );
3396 case 'script':
3397 case 'template':
3398 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3399 }
3400 } elseif ( $token === 'endtag' ) {
3401 switch ( $value ) {
3402 case 'optgroup':
3403 if (
3404 $this->stack->currentNode->isHtmlNamed( 'option' ) &&
3405 $this->stack->length() >= 2 &&
3406 $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' )
3407 ) {
3408 $this->stack->pop();
3409 }
3410 if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3411 $this->stack->pop();
3412 }
3413 return true;
3414 case 'option':
3415 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3416 $this->stack->pop();
3417 }
3418 return true;
3419 case 'select':
3420 if ( !$this->stack->inSelectScope( $value ) ) {
3421 return true; // fragment case
3422 }
3423 $this->stack->popTag( $value );
3424 $this->resetInsertionMode();
3425 return true;
3426 case 'template':
3427 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3428 }
3429 } elseif ( $token === 'comment' ) {
3430 $this->stack->insertComment( $value );
3431 return true;
3432 }
3433 // anything else: just ignore the token
3434 return true;
3435 }
3436
3437 private function inSelectInTableMode( $token, $value, $attribs = null, $selfClose = false ) {
3438 switch ( $value ) {
3439 case 'caption':
3440 case 'table':
3441 case 'tbody':
3442 case 'tfoot':
3443 case 'thead':
3444 case 'tr':
3445 case 'td':
3446 case 'th':
3447 if ( $token === 'tag' ) {
3448 $this->inSelectInTableMode( 'endtag', 'select' );
3449 return $this->insertToken( $token, $value, $attribs, $selfClose );
3450 } elseif ( $token === 'endtag' ) {
3451 if ( $this->stack->inTableScope( $value ) ) {
3452 $this->inSelectInTableMode( 'endtag', 'select' );
3453 return $this->insertToken( $token, $value, $attribs, $selfClose );
3454 }
3455 return true;
3456 }
3457 }
3458 // anything else
3459 return $this->inSelectMode( $token, $value, $attribs, $selfClose );
3460 }
3461
3462 private function inTemplateMode( $token, $value, $attribs = null, $selfClose = false ) {
3463 if ( $token === 'text' || $token === 'comment' ) {
3464 return $this->inBodyMode( $token, $value, $attribs, $selfClose );
3465 } elseif ( $token === 'eof' ) {
3466 if ( $this->stack->indexOf( 'template' ) < 0 ) {
3467 $this->stopParsing();
3468 } else {
3469 $this->stack->popTag( 'template' );
3470 $this->afe->clearToMarker();
3471 array_pop( $this->templateInsertionModes );
3472 $this->resetInsertionMode();
3473 $this->insertToken( $token, $value, $attribs, $selfClose );
3474 }
3475 return true;
3476 } elseif ( $token === 'tag' ) {
3477 switch ( $value ) {
3478 case 'base':
3479 case 'basefont':
3480 case 'bgsound':
3481 case 'link':
3482 case 'meta':
3483 case 'noframes':
3484 // OMITTED: <script>
3485 case 'style':
3486 case 'template':
3487 // OMITTED: <title>
3488 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3489
3490 case 'caption':
3491 case 'colgroup':
3492 case 'tbody':
3493 case 'tfoot':
3494 case 'thead':
3495 return $this->switchModeAndReprocess(
3496 'inTableMode', $token, $value, $attribs, $selfClose
3497 );
3498
3499 case 'col':
3500 return $this->switchModeAndReprocess(
3501 'inColumnGroupMode', $token, $value, $attribs, $selfClose
3502 );
3503
3504 case 'tr':
3505 return $this->switchModeAndReprocess(
3506 'inTableBodyMode', $token, $value, $attribs, $selfClose
3507 );
3508
3509 case 'td':
3510 case 'th':
3511 return $this->switchModeAndReprocess(
3512 'inRowMode', $token, $value, $attribs, $selfClose
3513 );
3514 }
3515 return $this->switchModeAndReprocess(
3516 'inBodyMode', $token, $value, $attribs, $selfClose
3517 );
3518 } elseif ( $token === 'endtag' ) {
3519 switch ( $value ) {
3520 case 'template':
3521 return $this->inHeadMode( $token, $value, $attribs, $selfClose );
3522 }
3523 return true;
3524 } else {
3525 Assert::invariant( false, "Bad token type: $token" );
3526 }
3527 }
3528 }