Balancer: remove redundant assignment
[lhc/web/wiklou.git] / includes / tidy / Balancer.php
1 <?php
2 /**
3 * An implementation of the tree building portion of the HTML5 parsing
4 * spec.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Parser
23 * @since 1.27
24 * @author C. Scott Ananian, 2016
25 */
26 namespace MediaWiki\Tidy;
27
28 use Wikimedia\Assert\Assert;
29 use Wikimedia\Assert\ParameterAssertionException;
30 use \ExplodeIterator;
31 use \IteratorAggregate;
32 use \ReverseArrayIterator;
33 use \Sanitizer;
34
35 // A note for future librarization[1] -- this file is a good candidate
36 // for splitting into an independent library, except that it is currently
37 // highly optimized for MediaWiki use. It only implements the portions
38 // of the HTML5 tree builder used by tags supported by MediaWiki, and
39 // does not contain a true tokenizer pass, instead relying on
40 // comment stripping, attribute normalization, and escaping done by
41 // the MediaWiki Sanitizer. It also deliberately avoids building
42 // a true DOM in memory, instead serializing elements to an output string
43 // as soon as possible (usually as soon as the tag is closed) to reduce
44 // its memory footprint.
45
46 // We've been gradually lifting some of these restrictions to handle
47 // non-sanitized output generated by extensions, but we shortcut the tokenizer
48 // for speed (primarily by splitting on `<`) and so rely on syntactic
49 // well-formedness.
50
51 // On the other hand, I've been pretty careful to note with comments in the
52 // code the places where this implementation omits features of the spec or
53 // depends on the MediaWiki Sanitizer. Perhaps in the future we'll want to
54 // implement the missing pieces and make this a standalone PHP HTML5 parser.
55 // In order to do so, some sort of MediaWiki-specific API will need
56 // to be added to (a) allow the Balancer to bypass the tokenizer,
57 // and (b) support on-the-fly flattening instead of DOM node creation.
58
59 // [1]: https://www.mediawiki.org/wiki/Library_infrastructure_for_MediaWiki
60
61 /**
62 * Utility constants and sets for the HTML5 tree building algorithm.
63 * Sets are associative arrays indexed first by namespace and then by
64 * lower-cased tag name.
65 *
66 * @ingroup Parser
67 * @since 1.27
68 */
69 class BalanceSets {
70 const HTML_NAMESPACE = 'http://www.w3.org/1999/xhtml';
71 const MATHML_NAMESPACE = 'http://www.w3.org/1998/Math/MathML';
72 const SVG_NAMESPACE = 'http://www.w3.org/2000/svg';
73
74 public static $unsupportedSet = [
75 self::HTML_NAMESPACE => [
76 'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
77 'frame' => true,
78 'plaintext' => true, 'isindex' => true,
79 'xmp' => true, 'iframe' => true, 'noembed' => true,
80 'noscript' => true, 'script' => true,
81 'title' => true
82 ]
83 ];
84
85 public static $emptyElementSet = [
86 self::HTML_NAMESPACE => [
87 'area' => true, 'base' => true, 'basefont' => true,
88 'bgsound' => true, 'br' => true, 'col' => true, 'command' => true,
89 'embed' => true, 'frame' => true, 'hr' => true, 'img' => true,
90 'input' => true, 'keygen' => true, 'link' => true, 'meta' => true,
91 'param' => true, 'source' => true, 'track' => true, 'wbr' => true
92 ]
93 ];
94
95 public static $extraLinefeedSet = [
96 self::HTML_NAMESPACE => [
97 'pre' => true, 'textarea' => true, 'listing' => true,
98 ]
99 ];
100
101 public static $headingSet = [
102 self::HTML_NAMESPACE => [
103 'h1' => true, 'h2' => true, 'h3' => true,
104 'h4' => true, 'h5' => true, 'h6' => true
105 ]
106 ];
107
108 public static $specialSet = [
109 self::HTML_NAMESPACE => [
110 'address' => true, 'applet' => true, 'area' => true,
111 'article' => true, 'aside' => true, 'base' => true,
112 'basefont' => true, 'bgsound' => true, 'blockquote' => true,
113 'body' => true, 'br' => true, 'button' => true, 'caption' => true,
114 'center' => true, 'col' => true, 'colgroup' => true, 'dd' => true,
115 'details' => true, 'dir' => true, 'div' => true, 'dl' => true,
116 'dt' => true, 'embed' => true, 'fieldset' => true,
117 'figcaption' => true, 'figure' => true, 'footer' => true,
118 'form' => true, 'frame' => true, 'frameset' => true, 'h1' => true,
119 'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
120 'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
121 'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
122 'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
123 'listing' => true, 'main' => true, 'marquee' => true,
124 'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
125 'noembed' => true, 'noframes' => true, 'noscript' => true,
126 'object' => true, 'ol' => true, 'p' => true, 'param' => true,
127 'plaintext' => true, 'pre' => true, 'script' => true,
128 'section' => true, 'select' => true, 'source' => true,
129 'style' => true, 'summary' => true, 'table' => true,
130 'tbody' => true, 'td' => true, 'template' => true,
131 'textarea' => true, 'tfoot' => true, 'th' => true, 'thead' => true,
132 'title' => true, 'tr' => true, 'track' => true, 'ul' => true,
133 'wbr' => true, 'xmp' => true
134 ],
135 self::SVG_NAMESPACE => [
136 'foreignobject' => true, 'desc' => true, 'title' => true
137 ],
138 self::MATHML_NAMESPACE => [
139 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
140 'mtext' => true, 'annotation-xml' => true
141 ]
142 ];
143
144 public static $addressDivPSet = [
145 self::HTML_NAMESPACE => [
146 'address' => true, 'div' => true, 'p' => true
147 ]
148 ];
149
150 public static $tableSectionRowSet = [
151 self::HTML_NAMESPACE => [
152 'table' => true, 'thead' => true, 'tbody' => true,
153 'tfoot' => true, 'tr' => true
154 ]
155 ];
156
157 public static $impliedEndTagsSet = [
158 self::HTML_NAMESPACE => [
159 'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
160 'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
161 'rt' => true, 'rtc' => true
162 ]
163 ];
164
165 public static $thoroughImpliedEndTagsSet = [
166 self::HTML_NAMESPACE => [
167 'caption' => true, 'colgroup' => true, 'dd' => true, 'dt' => true,
168 'li' => true, 'optgroup' => true, 'option' => true, 'p' => true,
169 'rb' => true, 'rp' => true, 'rt' => true, 'rtc' => true,
170 'tbody' => true, 'td' => true, 'tfoot' => true, 'th' => true,
171 'thead' => true, 'tr' => true
172 ]
173 ];
174
175 public static $tableCellSet = [
176 self::HTML_NAMESPACE => [
177 'td' => true, 'th' => true
178 ]
179 ];
180 public static $tableContextSet = [
181 self::HTML_NAMESPACE => [
182 'table' => true, 'template' => true, 'html' => true
183 ]
184 ];
185
186 public static $tableBodyContextSet = [
187 self::HTML_NAMESPACE => [
188 'tbody' => true, 'tfoot' => true, 'thead' => true,
189 'template' => true, 'html' => true
190 ]
191 ];
192
193 public static $tableRowContextSet = [
194 self::HTML_NAMESPACE => [
195 'tr' => true, 'template' => true, 'html' => true
196 ]
197 ];
198
199 // See https://html.spec.whatwg.org/multipage/forms.html#form-associated-element
200 public static $formAssociatedSet = [
201 self::HTML_NAMESPACE => [
202 'button' => true, 'fieldset' => true, 'input' => true,
203 'keygen' => true, 'object' => true, 'output' => true,
204 'select' => true, 'textarea' => true, 'img' => true
205 ]
206 ];
207
208 public static $inScopeSet = [
209 self::HTML_NAMESPACE => [
210 'applet' => true, 'caption' => true, 'html' => true,
211 'marquee' => true, 'object' => true,
212 'table' => true, 'td' => true, 'template' => true,
213 'th' => true
214 ],
215 self::SVG_NAMESPACE => [
216 'foreignobject' => true, 'desc' => true, 'title' => true
217 ],
218 self::MATHML_NAMESPACE => [
219 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
220 'mtext' => true, 'annotation-xml' => true
221 ]
222 ];
223
224 private static $inListItemScopeSet = null;
225 public static function inListItemScopeSet() {
226 if ( self::$inListItemScopeSet === null ) {
227 self::$inListItemScopeSet = self::$inScopeSet;
228 self::$inListItemScopeSet[self::HTML_NAMESPACE]['ol'] = true;
229 self::$inListItemScopeSet[self::HTML_NAMESPACE]['ul'] = true;
230 }
231 return self::$inListItemScopeSet;
232 }
233
234 private static $inButtonScopeSet = null;
235 public static function inButtonScopeSet() {
236 if ( self::$inButtonScopeSet === null ) {
237 self::$inButtonScopeSet = self::$inScopeSet;
238 self::$inButtonScopeSet[self::HTML_NAMESPACE]['button'] = true;
239 }
240 return self::$inButtonScopeSet;
241 }
242
243 public static $inTableScopeSet = [
244 self::HTML_NAMESPACE => [
245 'html' => true, 'table' => true, 'template' => true
246 ]
247 ];
248
249 public static $inInvertedSelectScopeSet = [
250 self::HTML_NAMESPACE => [
251 'option' => true, 'optgroup' => true
252 ]
253 ];
254
255 public static $mathmlTextIntegrationPointSet = [
256 self::MATHML_NAMESPACE => [
257 'mi' => true, 'mo' => true, 'mn' => true, 'ms' => true,
258 'mtext' => true
259 ]
260 ];
261
262 public static $htmlIntegrationPointSet = [
263 self::SVG_NAMESPACE => [
264 'foreignobject' => true,
265 'desc' => true,
266 'title' => true
267 ]
268 ];
269
270 // For tidy compatibility.
271 public static $tidyPWrapSet = [
272 self::HTML_NAMESPACE => [
273 'body' => true, 'blockquote' => true,
274 // We parse with <body> as the fragment context, but the top-level
275 // element on the stack is actually <html>. We could use the
276 // "adjusted current node" everywhere to work around this, but it's
277 // easier just to add <html> to the p-wrap set.
278 'html' => true,
279 ],
280 ];
281 public static $tidyInlineSet = [
282 self::HTML_NAMESPACE => [
283 'a' => true, 'abbr' => true, 'acronym' => true, 'applet' => true,
284 'b' => true, 'basefont' => true, 'bdo' => true, 'big' => true,
285 'br' => true, 'button' => true, 'cite' => true, 'code' => true,
286 'dfn' => true, 'em' => true, 'font' => true, 'i' => true,
287 'iframe' => true, 'img' => true, 'input' => true, 'kbd' => true,
288 'label' => true, 'legend' => true, 'map' => true, 'object' => true,
289 'param' => true, 'q' => true, 'rb' => true, 'rbc' => true,
290 'rp' => true, 'rt' => true, 'rtc' => true, 'ruby' => true,
291 's' => true, 'samp' => true, 'select' => true, 'small' => true,
292 'span' => true, 'strike' => true, 'strong' => true, 'sub' => true,
293 'sup' => true, 'textarea' => true, 'tt' => true, 'u' => true,
294 'var' => true,
295 ],
296 ];
297 }
298
299 /**
300 * A BalanceElement is a simplified version of a DOM Node. The main
301 * difference is that we only keep BalanceElements around for nodes
302 * currently on the BalanceStack of open elements. As soon as an
303 * element is closed, with some minor exceptions relating to the
304 * tree builder "adoption agency algorithm", the element and all its
305 * children are serialized to a string using the flatten() method.
306 * This keeps our memory usage low.
307 *
308 * @ingroup Parser
309 * @since 1.27
310 */
311 class BalanceElement {
312 /**
313 * The namespace of the element.
314 * @var string $namespaceURI
315 */
316 public $namespaceURI;
317 /**
318 * The lower-cased name of the element.
319 * @var string $localName
320 */
321 public $localName;
322 /**
323 * Attributes for the element, in array form
324 * @var array $attribs
325 */
326 public $attribs;
327
328 /**
329 * Parent of this element, or the string "flat" if this element has
330 * already been flattened into its parent.
331 * @var string|null $parent
332 */
333 public $parent;
334
335 /**
336 * An array of children of this element. Typically only the last
337 * child will be an actual BalanceElement object; the rest will
338 * be strings, representing either text nodes or flattened
339 * BalanceElement objects.
340 * @var array $children
341 */
342 public $children;
343
344 /**
345 * A unique string identifier for Noah's Ark purposes, lazy initialized
346 */
347 private $noahKey;
348
349 /**
350 * The next active formatting element in the list, or null if this is the
351 * end of the AFE list or if the element is not in the AFE list.
352 */
353 public $nextAFE;
354
355 /**
356 * The previous active formatting element in the list, or null if this is
357 * the start of the list or if the element is not in the AFE list.
358 */
359 public $prevAFE;
360
361 /**
362 * The next element in the Noah's Ark species bucket.
363 */
364 public $nextNoah;
365
366 /**
367 * Make a new BalanceElement corresponding to the HTML DOM Element
368 * with the given localname, namespace, and attributes.
369 *
370 * @param string $namespaceURI The namespace of the element.
371 * @param string $localName The lowercased name of the tag.
372 * @param array $attribs Attributes of the element
373 */
374 public function __construct( $namespaceURI, $localName, array $attribs ) {
375 $this->localName = $localName;
376 $this->namespaceURI = $namespaceURI;
377 $this->attribs = $attribs;
378 $this->contents = '';
379 $this->parent = null;
380 $this->children = [];
381 }
382
383 /**
384 * Remove the given child from this element.
385 * @param BalanceElement $elt
386 */
387 private function removeChild( BalanceElement $elt ) {
388 Assert::precondition(
389 $this->parent !== 'flat', "Can't removeChild after flattening $this"
390 );
391 Assert::parameter(
392 $elt->parent === $this, 'elt', 'must have $this as a parent'
393 );
394 $idx = array_search( $elt, $this->children, true );
395 Assert::parameter( $idx !== false, '$elt', 'must be a child of $this' );
396 $elt->parent = null;
397 array_splice( $this->children, $idx, 1 );
398 }
399
400 /**
401 * Find $a in the list of children and insert $b before it.
402 * @param BalanceElement $a
403 * @param BalanceElement|string $b
404 */
405 public function insertBefore( BalanceElement $a, $b ) {
406 Assert::precondition(
407 $this->parent !== 'flat', "Can't insertBefore after flattening."
408 );
409 $idx = array_search( $a, $this->children, true );
410 Assert::parameter( $idx !== false, '$a', 'must be a child of $this' );
411 if ( is_string( $b ) ) {
412 array_splice( $this->children, $idx, 0, [ $b ] );
413 } else {
414 Assert::parameter( $b->parent !== 'flat', '$b', "Can't be flat" );
415 if ( $b->parent !== null ) {
416 $b->parent->removeChild( $b );
417 }
418 array_splice( $this->children, $idx, 0, [ $b ] );
419 $b->parent = $this;
420 }
421 }
422
423 /**
424 * Append $elt to the end of the list of children.
425 * @param BalanceElement|string $elt
426 */
427 public function appendChild( $elt ) {
428 Assert::precondition(
429 $this->parent !== 'flat', "Can't appendChild after flattening."
430 );
431 if ( is_string( $elt ) ) {
432 array_push( $this->children, $elt );
433 return;
434 }
435 // Remove $elt from parent, if it had one.
436 if ( $elt->parent !== null ) {
437 $elt->parent->removeChild( $elt );
438 }
439 array_push( $this->children, $elt );
440 $elt->parent = $this;
441 }
442
443 /**
444 * Transfer all of the children of $elt to $this.
445 * @param BalanceElement $elt
446 */
447 public function adoptChildren( BalanceElement $elt ) {
448 Assert::precondition(
449 $elt->parent !== 'flat', "Can't adoptChildren after flattening."
450 );
451 foreach ( $elt->children as $child ) {
452 if ( !is_string( $child ) ) {
453 // This is an optimization which avoids an O(n^2) set of
454 // array_splice operations.
455 $child->parent = null;
456 }
457 $this->appendChild( $child );
458 }
459 $elt->children = [];
460 }
461
462 /**
463 * Flatten this node and all of its children into a string, as specified
464 * by the HTML serialization specification, and replace this node
465 * in its parent by that string.
466 *
467 * @see __toString()
468 */
469 public function flatten( $tidyCompat = false ) {
470 Assert::parameter( $this->parent !== null, '$this', 'must be a child' );
471 Assert::parameter( $this->parent !== 'flat', '$this', 'already flat' );
472 $idx = array_search( $this, $this->parent->children, true );
473 Assert::parameter(
474 $idx !== false, '$this', 'must be a child of its parent'
475 );
476 if ( $tidyCompat ) {
477 $blank = true;
478 foreach ( $this->children as $elt ) {
479 if ( !is_string( $elt ) ) {
480 $elt = $elt->flatten( $tidyCompat );
481 }
482 if ( $blank && preg_match( '/[^\t\n\f\r ]/', $elt ) ) {
483 $blank = false;
484 }
485 }
486 if ( $this->isHtmlNamed( 'mw:p-wrap' ) ) {
487 $this->localName = 'p';
488 } elseif ( $blank ) {
489 // Add 'mw-empty-elt' class so elements can be hidden via CSS
490 // for compatibility with legacy tidy.
491 if ( !count( $this->attribs ) &&
492 ( $this->localName === 'tr' || $this->localName === 'li' )
493 ) {
494 $this->attribs = [ 'class' => "mw-empty-elt" ];
495 }
496 $blank = false;
497 }
498 $flat = $blank ? '' : "{$this}";
499 } else {
500 $flat = "{$this}";
501 }
502 $this->parent->children[$idx] = $flat;
503 $this->parent = 'flat'; // for assertion checking
504 return $flat;
505 }
506
507 /**
508 * Serialize this node and all of its children to a string, as specified
509 * by the HTML serialization specification.
510 *
511 * @return string The serialization of the BalanceElement
512 * @see https://html.spec.whatwg.org/multipage/syntax.html#serialising-html-fragments
513 */
514 public function __toString() {
515 $encAttribs = '';
516 foreach ( $this->attribs as $name => $value ) {
517 $encValue = Sanitizer::encodeAttribute( $value );
518 $encAttribs .= " $name=\"$encValue\"";
519 }
520 if ( !$this->isA( BalanceSets::$emptyElementSet ) ) {
521 $out = "<{$this->localName}{$encAttribs}>";
522 $len = strlen( $out );
523 // flatten children
524 foreach ( $this->children as $elt ) {
525 $out .= "{$elt}";
526 }
527 $out .= "</{$this->localName}>";
528 if (
529 $this->isA( BalanceSets::$extraLinefeedSet ) &&
530 $out[$len] === "\n"
531 ) {
532 // Double the linefeed after pre/listing/textarea
533 // according to the HTML5 fragment serialization algorithm.
534 $out = substr( $out, 0, $len + 1 ) .
535 substr( $out, $len );
536 }
537 } else {
538 $out = "<{$this->localName}{$encAttribs} />";
539 Assert::invariant(
540 count( $this->children ) === 0,
541 "Empty elements shouldn't have children."
542 );
543 }
544 return $out;
545 }
546
547 // Utility functions on BalanceElements.
548
549 /**
550 * Determine if $this represents a specific HTML tag, is a member of
551 * a tag set, or is equal to another BalanceElement.
552 *
553 * @param BalanceElement|array|string $set The target BalanceElement,
554 * set (from the BalanceSets class), or string (HTML tag name).
555 * @return bool
556 */
557 public function isA( $set ) {
558 if ( $set instanceof BalanceElement ) {
559 return $this === $set;
560 } elseif ( is_array( $set ) ) {
561 return isset( $set[$this->namespaceURI] ) &&
562 isset( $set[$this->namespaceURI][$this->localName] );
563 } else {
564 // assume this is an HTML element name.
565 return $this->isHtml() && $this->localName === $set;
566 }
567 }
568
569 /**
570 * Determine if this element is an HTML element with the specified name
571 * @param string $tagName
572 * @return bool
573 */
574 public function isHtmlNamed( $tagName ) {
575 return $this->namespaceURI === BalanceSets::HTML_NAMESPACE
576 && $this->localName === $tagName;
577 }
578
579 /**
580 * Determine if $this represents an element in the HTML namespace.
581 *
582 * @return bool
583 */
584 public function isHtml() {
585 return $this->namespaceURI === BalanceSets::HTML_NAMESPACE;
586 }
587
588 /**
589 * Determine if $this represents a MathML text integration point,
590 * as defined in the HTML5 specification.
591 *
592 * @return bool
593 * @see https://html.spec.whatwg.org/multipage/syntax.html#mathml-text-integration-point
594 */
595 public function isMathmlTextIntegrationPoint() {
596 return $this->isA( BalanceSets::$mathmlTextIntegrationPointSet );
597 }
598
599 /**
600 * Determine if $this represents an HTML integration point,
601 * as defined in the HTML5 specification.
602 *
603 * @return bool
604 * @see https://html.spec.whatwg.org/multipage/syntax.html#html-integration-point
605 */
606 public function isHtmlIntegrationPoint() {
607 if ( $this->isA( BalanceSets::$htmlIntegrationPointSet ) ) {
608 return true;
609 }
610 if (
611 $this->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
612 $this->localName === 'annotation-xml' &&
613 isset( $this->attribs['encoding'] ) &&
614 ( strcasecmp( $this->attribs['encoding'], 'text/html' ) == 0 ||
615 strcasecmp( $this->attribs['encoding'], 'application/xhtml+xml' ) == 0 )
616 ) {
617 return true;
618 }
619 return false;
620 }
621
622 /**
623 * Get a string key for the Noah's Ark algorithm
624 */
625 public function getNoahKey() {
626 if ( $this->noahKey === null ) {
627 $attribs = $this->attribs;
628 ksort( $attribs );
629 $this->noahKey = serialize( [ $this->namespaceURI, $this->localName, $attribs ] );
630 }
631 return $this->noahKey;
632 }
633 }
634
635 /**
636 * The "stack of open elements" as defined in the HTML5 tree builder
637 * spec. This contains methods to ensure that content (start tags, text)
638 * are inserted at the correct place in the output string, and to
639 * flatten BalanceElements are they are closed to avoid holding onto
640 * a complete DOM tree for the document in memory.
641 *
642 * The stack defines a PHP iterator to traverse it in "reverse order",
643 * that is, the most-recently-added element is visited first in a
644 * foreach loop.
645 *
646 * @ingroup Parser
647 * @since 1.27
648 * @see https://html.spec.whatwg.org/multipage/syntax.html#the-stack-of-open-elements
649 */
650 class BalanceStack implements IteratorAggregate {
651 /**
652 * Backing storage for the stack.
653 * @var array $elements
654 */
655 private $elements = [];
656 /**
657 * Foster parent mode determines how nodes are inserted into the
658 * stack.
659 * @var bool $fosterParentMode
660 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
661 */
662 public $fosterParentMode = false;
663 /**
664 * Tidy compatibility mode, determines behavior of body/blockquote
665 */
666 public $tidyCompat = false;
667 /**
668 * Reference to the current element
669 */
670 public $currentNode;
671
672 /**
673 * Create a new BalanceStack with a single BalanceElement on it,
674 * representing the root &lt;html&gt; node.
675 */
676 public function __construct() {
677 // always a root <html> element on the stack
678 array_push(
679 $this->elements,
680 new BalanceElement( BalanceSets::HTML_NAMESPACE, 'html', [] )
681 );
682 $this->currentNode = $this->elements[0];
683 }
684
685 /**
686 * Return a string representing the output of the tree builder:
687 * all the children of the root &lt;html&gt; node.
688 * @return string
689 */
690 public function getOutput() {
691 // Don't include the outer '<html>....</html>'
692 $out = '';
693 foreach ( $this->elements[0]->children as $elt ) {
694 $out .= is_string( $elt ) ? $elt :
695 $elt->flatten( $this->tidyCompat );
696 }
697 return $out;
698 }
699
700 /**
701 * Insert a comment at the appropriate place for inserting a node.
702 * @param string $value Content of the comment.
703 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-comment
704 */
705 public function insertComment( $value ) {
706 // Just another type of text node, except for tidy p-wrapping.
707 return $this->insertText( '<!--' . $value . '-->', true );
708 }
709
710 /**
711 * Insert text at the appropriate place for inserting a node.
712 * @param string $value
713 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
714 */
715 public function insertText( $value, $isComment = false ) {
716 if (
717 $this->fosterParentMode &&
718 $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
719 ) {
720 $this->fosterParent( $value );
721 } elseif (
722 $this->tidyCompat && !$isComment &&
723 $this->currentNode->isA( BalanceSets::$tidyPWrapSet )
724 ) {
725 $this->insertHTMLELement( 'mw:p-wrap', [] );
726 return $this->insertText( $value );
727 } else {
728 $this->currentNode->appendChild( $value );
729 }
730 }
731
732 /**
733 * Insert a BalanceElement at the appropriate place, pushing it
734 * on to the open elements stack.
735 * @param string $namespaceURI The element namespace
736 * @param string $tag The tag name
737 * @param string $attribs Normalized attributes, as a string.
738 * @return BalanceElement
739 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-a-foreign-element
740 */
741 public function insertForeignElement( $namespaceURI, $tag, $attribs ) {
742 return $this->insertElement(
743 new BalanceElement( $namespaceURI, $tag, $attribs )
744 );
745 }
746
747 /**
748 * Insert an HTML element at the appropriate place, pushing it on to
749 * the open elements stack.
750 * @param string $tag The tag name
751 * @param string $attribs Normalized attributes, as a string.
752 * @return BalanceElement
753 * @see https://html.spec.whatwg.org/multipage/syntax.html#insert-an-html-element
754 */
755 public function insertHTMLElement( $tag, $attribs ) {
756 return $this->insertForeignElement(
757 BalanceSets::HTML_NAMESPACE, $tag, $attribs
758 );
759 }
760
761 /**
762 * Insert an element at the appropriate place and push it on to the
763 * open elements stack.
764 * @param BalanceElement $elt
765 * @return BalanceElement
766 * @see https://html.spec.whatwg.org/multipage/syntax.html#appropriate-place-for-inserting-a-node
767 */
768 public function insertElement( BalanceElement $elt ) {
769 if (
770 $this->currentNode->isHtmlNamed( 'mw:p-wrap' ) &&
771 !$elt->isA( BalanceSets::$tidyInlineSet )
772 ) {
773 // Tidy compatibility.
774 $this->pop();
775 }
776 if (
777 $this->fosterParentMode &&
778 $this->currentNode->isA( BalanceSets::$tableSectionRowSet )
779 ) {
780 $elt = $this->fosterParent( $elt );
781 } else {
782 $this->currentNode->appendChild( $elt );
783 }
784 Assert::invariant( $elt->parent !== null, "$elt must be in tree" );
785 Assert::invariant( $elt->parent !== 'flat', "$elt must not have been previous flattened" );
786 array_push( $this->elements, $elt );
787 $this->currentNode = $elt;
788 return $elt;
789 }
790
791 /**
792 * Determine if the stack has $tag in scope.
793 * @param BalanceElement|array|string $tag
794 * @return bool
795 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-scope
796 */
797 public function inScope( $tag ) {
798 return $this->inSpecificScope( $tag, BalanceSets::$inScopeSet );
799 }
800
801 /**
802 * Determine if the stack has $tag in button scope.
803 * @param BalanceElement|array|string $tag
804 * @return bool
805 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-button-scope
806 */
807 public function inButtonScope( $tag ) {
808 return $this->inSpecificScope( $tag, BalanceSets::inButtonScopeSet() );
809 }
810
811 /**
812 * Determine if the stack has $tag in list item scope.
813 * @param BalanceElement|array|string $tag
814 * @return bool
815 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-list-item-scope
816 */
817 public function inListItemScope( $tag ) {
818 return $this->inSpecificScope( $tag, BalanceSets::inListItemScopeSet() );
819 }
820
821 /**
822 * Determine if the stack has $tag in table scope.
823 * @param BalanceElement|array|string $tag
824 * @return bool
825 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-table-scope
826 */
827 public function inTableScope( $tag ) {
828 return $this->inSpecificScope( $tag, BalanceSets::$inTableScopeSet );
829 }
830
831 /**
832 * Determine if the stack has $tag in select scope.
833 * @param BalanceElement|array|string $tag
834 * @return bool
835 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-select-scope
836 */
837 public function inSelectScope( $tag ) {
838 // Can't use inSpecificScope to implement this, since it involves
839 // *inverting* a set of tags. Implement manually.
840 foreach ( $this as $elt ) {
841 if ( $elt->isA( $tag ) ) {
842 return true;
843 }
844 if ( !$elt->isA( BalanceSets::$inInvertedSelectScopeSet ) ) {
845 return false;
846 }
847 }
848 return false;
849 }
850
851 /**
852 * Determine if the stack has $tag in a specific scope, $set.
853 * @param BalanceElement|array|string $tag
854 * @param BalanceElement|array|string $set
855 * @return bool
856 * @see https://html.spec.whatwg.org/multipage/syntax.html#has-an-element-in-the-specific-scope
857 */
858 public function inSpecificScope( $tag, $set ) {
859 foreach ( $this as $elt ) {
860 if ( $elt->isA( $tag ) ) {
861 return true;
862 }
863 if ( $elt->isA( $set ) ) {
864 return false;
865 }
866 }
867 return false;
868 }
869
870 /**
871 * Generate implied end tags.
872 * @param string $butnot
873 * @param bool $thorough True if we should generate end tags thoroughly.
874 * @see https://html.spec.whatwg.org/multipage/syntax.html#generate-implied-end-tags
875 */
876 public function generateImpliedEndTags( $butnot = null, $thorough = false ) {
877 $endTagSet = $thorough ?
878 BalanceSets::$thoroughImpliedEndTagsSet :
879 BalanceSets::$impliedEndTagsSet;
880 while ( $this->currentNode ) {
881 if ( $butnot !== null && $this->currentNode->isHtmlNamed( $butnot ) ) {
882 break;
883 }
884 if ( !$this->currentNode->isA( $endTagSet ) ) {
885 break;
886 }
887 $this->pop();
888 }
889 }
890
891 /**
892 * Return the adjusted current node.
893 */
894 public function adjustedCurrentNode( $fragmentContext ) {
895 return ( $fragmentContext && count( $this->elements ) === 1 ) ?
896 $fragmentContext : $this->currentNode;
897 }
898
899 /**
900 * Return an iterator over this stack which visits the current node
901 * first, and the root node last.
902 * @return Iterator
903 */
904 public function getIterator() {
905 return new ReverseArrayIterator( $this->elements );
906 }
907
908 /**
909 * Return the BalanceElement at the given position $idx, where
910 * position 0 represents the root element.
911 * @param int $idx
912 * @return BalanceElement
913 */
914 public function node( $idx ) {
915 return $this->elements[ $idx ];
916 }
917
918 /**
919 * Replace the element at position $idx in the BalanceStack with $elt.
920 * @param int $idx
921 * @param BalanceElement $elt
922 */
923 public function replaceAt( $idx, BalanceElement $elt ) {
924 Assert::precondition(
925 $this->elements[$idx]->parent !== 'flat',
926 'Replaced element should not have already been flattened.'
927 );
928 Assert::precondition(
929 $elt->parent !== 'flat',
930 'New element should not have already been flattened.'
931 );
932 $this->elements[$idx] = $elt;
933 if ( $idx === count( $this->elements ) - 1 ) {
934 $this->currentNode = $elt;
935 }
936 }
937
938 /**
939 * Return the position of the given BalanceElement, set, or
940 * HTML tag name string in the BalanceStack.
941 * @param BalanceElement|array|string $tag
942 * @return int
943 */
944 public function indexOf( $tag ) {
945 for ( $i = count( $this->elements ) - 1; $i >= 0; $i-- ) {
946 if ( $this->elements[$i]->isA( $tag ) ) {
947 return $i;
948 }
949 }
950 return -1;
951 }
952
953 /**
954 * Return the number of elements currently in the BalanceStack.
955 * @return int
956 */
957 public function length() {
958 return count( $this->elements );
959 }
960
961 /**
962 * Remove the current node from the BalanceStack, flattening it
963 * in the process.
964 */
965 public function pop() {
966 $elt = array_pop( $this->elements );
967 if ( count( $this->elements ) ) {
968 $this->currentNode = $this->elements[ count( $this->elements ) - 1 ];
969 } else {
970 $this->currentNode = null;
971 }
972 if ( !$elt->isHtmlNamed( 'mw:p-wrap' ) ) {
973 $elt->flatten( $this->tidyCompat );
974 }
975 }
976
977 /**
978 * Remove all nodes up to and including position $idx from the
979 * BalanceStack, flattening them in the process.
980 * @param int $idx
981 */
982 public function popTo( $idx ) {
983 for ( $length = count( $this->elements ); $length > $idx; $length-- ) {
984 $this->pop();
985 }
986 }
987
988 /**
989 * Pop elements off the stack up to and including the first
990 * element with the specified HTML tagname (or matching the given
991 * set).
992 * @param BalanceElement|array|string $tag
993 */
994 public function popTag( $tag ) {
995 while ( $this->currentNode ) {
996 if ( $this->currentNode->isA( $tag ) ) {
997 $this->pop();
998 break;
999 }
1000 $this->pop();
1001 }
1002 }
1003
1004 /**
1005 * Pop elements off the stack *not including* the first element
1006 * in the specified set.
1007 * @param BalanceElement|array|string $set
1008 */
1009 public function clearToContext( $set ) {
1010 // Note that we don't loop to 0. Never pop the <html> elt off.
1011 for ( $length = count( $this->elements ); $length > 1; $length-- ) {
1012 if ( $this->currentNode->isA( $set ) ) {
1013 break;
1014 }
1015 $this->pop();
1016 }
1017 }
1018
1019 /**
1020 * Remove the given $elt from the BalanceStack, optionally
1021 * flattening it in the process.
1022 * @param BalanceElement $elt The element to remove.
1023 * @param bool $flatten Whether to flatten the removed element.
1024 */
1025 public function removeElement( BalanceElement $elt, $flatten = true ) {
1026 Assert::parameter(
1027 $elt->parent !== 'flat',
1028 '$elt',
1029 '$elt should not already have been flattened.'
1030 );
1031 Assert::parameter(
1032 $elt->parent->parent !== 'flat',
1033 '$elt',
1034 'The parent of $elt should not already have been flattened.'
1035 );
1036 $idx = array_search( $elt, $this->elements, true );
1037 Assert::parameter( $idx !== false, '$elt', 'must be in stack' );
1038 array_splice( $this->elements, $idx, 1 );
1039 if ( $idx === count( $this->elements ) ) {
1040 $this->currentNode = $this->elements[$idx - 1];
1041 }
1042 if ( $flatten ) {
1043 // serialize $elt into its parent
1044 // otherwise, it will eventually serialize when the parent
1045 // is serialized, we just hold onto the memory for its
1046 // tree of objects a little longer.
1047 $elt->flatten( $this->tidyCompat );
1048 }
1049 Assert::postcondition(
1050 array_search( $elt, $this->elements, true ) === false,
1051 '$elt should no longer be in open elements stack'
1052 );
1053 }
1054
1055 /**
1056 * Find $a in the BalanceStack and insert $b after it.
1057 * @param BalanceElement $a
1058 * @param BalanceElement $b
1059 */
1060 public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1061 $idx = $this->indexOf( $a );
1062 Assert::parameter( $idx !== false, '$a', 'must be in stack' );
1063 if ( $idx === count( $this->elements ) - 1 ) {
1064 array_push( $this->elements, $b );
1065 $this->currentNode = $b;
1066 } else {
1067 array_splice( $this->elements, $idx + 1, 0, [ $b ] );
1068 }
1069 }
1070
1071 // Fostering and adoption.
1072
1073 /**
1074 * Foster parent the given $elt in the stack of open elements.
1075 * @param BalanceElement|string $elt
1076 * @see https://html.spec.whatwg.org/multipage/syntax.html#foster-parent
1077 */
1078 private function fosterParent( $elt ) {
1079 $lastTable = $this->indexOf( 'table' );
1080 $lastTemplate = $this->indexOf( 'template' );
1081 $parent = null;
1082 $before = null;
1083
1084 if ( $lastTemplate >= 0 && ( $lastTable < 0 || $lastTemplate > $lastTable ) ) {
1085 $parent = $this->elements[$lastTemplate];
1086 } elseif ( $lastTable >= 0 ) {
1087 $parent = $this->elements[$lastTable]->parent;
1088 // Assume all tables have parents, since we're not running scripts!
1089 Assert::invariant(
1090 $parent !== null, "All tables should have parents"
1091 );
1092 $before = $this->elements[$lastTable];
1093 } else {
1094 $parent = $this->elements[0]; // the `html` element.
1095 }
1096
1097 if ( $this->tidyCompat ) {
1098 if ( is_string( $elt ) ) {
1099 // We're fostering text: do we need a p-wrapper?
1100 if ( $parent->isA( BalanceSets::$tidyPWrapSet ) ) {
1101 $this->insertHTMLElement( 'mw:p-wrap', [] );
1102 $this->insertText( $elt );
1103 return $elt;
1104 }
1105 } else {
1106 // We're fostering an element; do we need to merge p-wrappers?
1107 if ( $elt->isHtmlNamed( 'mw:p-wrap' ) ) {
1108 $idx = $before ?
1109 array_search( $before, $parent->children, true ) :
1110 count( $parent->children );
1111 $after = $idx > 0 ? $parent->children[$idx - 1] : '';
1112 if (
1113 $after instanceof BalanceElement &&
1114 $after->isHtmlNamed( 'mw:p-wrap' )
1115 ) {
1116 return $after; // Re-use existing p-wrapper.
1117 }
1118 }
1119 }
1120 }
1121
1122 if ( $before ) {
1123 $parent->insertBefore( $before, $elt );
1124 } else {
1125 $parent->appendChild( $elt );
1126 }
1127 return $elt;
1128 }
1129
1130 /**
1131 * Run the "adoption agency algoritm" (AAA) for the given subject
1132 * tag name.
1133 * @param string $tag The subject tag name.
1134 * @param BalanceActiveFormattingElements $afe The current
1135 * active formatting elements list.
1136 * @return true if the adoption agency algorithm "did something", false
1137 * if more processing is required by the caller.
1138 * @see https://html.spec.whatwg.org/multipage/syntax.html#adoption-agency-algorithm
1139 */
1140 public function adoptionAgency( $tag, $afe ) {
1141 // If the current node is an HTML element whose tag name is subject,
1142 // and the current node is not in the list of active formatting
1143 // elements, then pop the current node off the stack of open
1144 // elements and abort these steps.
1145 if (
1146 $this->currentNode->isHtmlNamed( $tag ) &&
1147 !$afe->isInList( $this->currentNode )
1148 ) {
1149 $this->pop();
1150 return true; // no more handling required
1151 }
1152
1153 // Let outer loop counter be zero.
1154 $outer = 0;
1155
1156 // Outer loop: If outer loop counter is greater than or
1157 // equal to eight, then abort these steps.
1158 while ( $outer < 8 ) {
1159 // Increment outer loop counter by one.
1160 $outer++;
1161
1162 // Let the formatting element be the last element in the list
1163 // of active formatting elements that: is between the end of
1164 // the list and the last scope marker in the list, if any, or
1165 // the start of the list otherwise, and has the same tag name
1166 // as the token.
1167 $fmtelt = $afe->findElementByTag( $tag );
1168
1169 // If there is no such node, then abort these steps and instead
1170 // act as described in the "any other end tag" entry below.
1171 if ( !$fmtelt ) {
1172 return false; // false means handle by the default case
1173 }
1174
1175 // Otherwise, if there is such a node, but that node is not in
1176 // the stack of open elements, then this is a parse error;
1177 // remove the element from the list, and abort these steps.
1178 $index = $this->indexOf( $fmtelt );
1179 if ( $index < 0 ) {
1180 $afe->remove( $fmtelt );
1181 return true; // true means no more handling required
1182 }
1183
1184 // Otherwise, if there is such a node, and that node is also in
1185 // the stack of open elements, but the element is not in scope,
1186 // then this is a parse error; ignore the token, and abort
1187 // these steps.
1188 if ( !$this->inScope( $fmtelt ) ) {
1189 return true;
1190 }
1191
1192 // Let the furthest block be the topmost node in the stack of
1193 // open elements that is lower in the stack than the formatting
1194 // element, and is an element in the special category. There
1195 // might not be one.
1196 $furthestblock = null;
1197 $furthestblockindex = -1;
1198 $stacklen = $this->length();
1199 for ( $i = $index+1; $i < $stacklen; $i++ ) {
1200 if ( $this->node( $i )->isA( BalanceSets::$specialSet ) ) {
1201 $furthestblock = $this->node( $i );
1202 $furthestblockindex = $i;
1203 break;
1204 }
1205 }
1206
1207 // If there is no furthest block, then the UA must skip the
1208 // subsequent steps and instead just pop all the nodes from the
1209 // bottom of the stack of open elements, from the current node
1210 // up to and including the formatting element, and remove the
1211 // formatting element from the list of active formatting
1212 // elements.
1213 if ( !$furthestblock ) {
1214 $this->popTag( $fmtelt );
1215 $afe->remove( $fmtelt );
1216 return true;
1217 } else {
1218 // Let the common ancestor be the element immediately above
1219 // the formatting element in the stack of open elements.
1220 $ancestor = $this->node( $index-1 );
1221
1222 // Let a bookmark note the position of the formatting
1223 // element in the list of active formatting elements
1224 // relative to the elements on either side of it in the
1225 // list.
1226 $BOOKMARK = new BalanceElement( '[bookmark]', '[bookmark]', [] );
1227 $afe->insertAfter( $fmtelt, $BOOKMARK );
1228
1229 // Let node and last node be the furthest block.
1230 $node = $furthestblock;
1231 $lastnode = $furthestblock;
1232 $nodeindex = $furthestblockindex;
1233 $isAFE = false;
1234
1235 // Let inner loop counter be zero.
1236 $inner = 0;
1237
1238 while ( true ) {
1239
1240 // Increment inner loop counter by one.
1241 $inner++;
1242
1243 // Let node be the element immediately above node in
1244 // the stack of open elements, or if node is no longer
1245 // in the stack of open elements (e.g. because it got
1246 // removed by this algorithm), the element that was
1247 // immediately above node in the stack of open elements
1248 // before node was removed.
1249 $node = $this->node( --$nodeindex );
1250
1251 // If node is the formatting element, then go
1252 // to the next step in the overall algorithm.
1253 if ( $node === $fmtelt ) break;
1254
1255 // If the inner loop counter is greater than three and node
1256 // is in the list of active formatting elements, then remove
1257 // node from the list of active formatting elements.
1258 $isAFE = $afe->isInList( $node );
1259 if ( $inner > 3 && $isAFE ) {
1260 $afe->remove( $node );
1261 $isAFE = false;
1262 }
1263
1264 // If node is not in the list of active formatting
1265 // elements, then remove node from the stack of open
1266 // elements and then go back to the step labeled inner
1267 // loop.
1268 if ( !$isAFE ) {
1269 // Don't flatten here, since we're about to relocate
1270 // parts of this $node.
1271 $this->removeElement( $node, false );
1272 continue;
1273 }
1274
1275 // Create an element for the token for which the
1276 // element node was created with common ancestor as
1277 // the intended parent, replace the entry for node
1278 // in the list of active formatting elements with an
1279 // entry for the new element, replace the entry for
1280 // node in the stack of open elements with an entry for
1281 // the new element, and let node be the new element.
1282 $newelt = new BalanceElement(
1283 $node->namespaceURI, $node->localName, $node->attribs );
1284 $afe->replace( $node, $newelt );
1285 $this->replaceAt( $nodeindex, $newelt );
1286 $node = $newelt;
1287
1288 // If last node is the furthest block, then move the
1289 // aforementioned bookmark to be immediately after the
1290 // new node in the list of active formatting elements.
1291 if ( $lastnode === $furthestblock ) {
1292 $afe->remove( $BOOKMARK );
1293 $afe->insertAfter( $newelt, $BOOKMARK );
1294 }
1295
1296 // Insert last node into node, first removing it from
1297 // its previous parent node if any.
1298 $node->appendChild( $lastnode );
1299
1300 // Let last node be node.
1301 $lastnode = $node;
1302 }
1303
1304 // If the common ancestor node is a table, tbody, tfoot,
1305 // thead, or tr element, then, foster parent whatever last
1306 // node ended up being in the previous step, first removing
1307 // it from its previous parent node if any.
1308 if (
1309 $this->fosterParentMode &&
1310 $ancestor->isA( BalanceSets::$tableSectionRowSet )
1311 ) {
1312 $this->fosterParent( $lastnode );
1313 } else {
1314 // Otherwise, append whatever last node ended up being in
1315 // the previous step to the common ancestor node, first
1316 // removing it from its previous parent node if any.
1317 $ancestor->appendChild( $lastnode );
1318 }
1319
1320 // Create an element for the token for which the
1321 // formatting element was created, with furthest block
1322 // as the intended parent.
1323 $newelt2 = new BalanceElement(
1324 $fmtelt->namespaceURI, $fmtelt->localName, $fmtelt->attribs );
1325
1326 // Take all of the child nodes of the furthest block and
1327 // append them to the element created in the last step.
1328 $newelt2->adoptChildren( $furthestblock );
1329
1330 // Append that new element to the furthest block.
1331 $furthestblock->appendChild( $newelt2 );
1332
1333 // Remove the formatting element from the list of active
1334 // formatting elements, and insert the new element into the
1335 // list of active formatting elements at the position of
1336 // the aforementioned bookmark.
1337 $afe->remove( $fmtelt );
1338 $afe->replace( $BOOKMARK, $newelt2 );
1339
1340 // Remove the formatting element from the stack of open
1341 // elements, and insert the new element into the stack of
1342 // open elements immediately below the position of the
1343 // furthest block in that stack.
1344 $this->removeElement( $fmtelt );
1345 $this->insertAfter( $furthestblock, $newelt2 );
1346 }
1347 }
1348
1349 return true;
1350 }
1351
1352 /**
1353 * Return the contents of the open elements stack as a string for
1354 * debugging.
1355 * @return string
1356 */
1357 public function __toString() {
1358 $r = [];
1359 foreach ( $this->elements as $elt ) {
1360 array_push( $r, $elt->localName );
1361 }
1362 return implode( $r, ' ' );
1363 }
1364 }
1365
1366 /**
1367 * A pseudo-element used as a marker in the list of active formatting elements
1368 *
1369 * @ingroup Parser
1370 * @since 1.27
1371 */
1372 class BalanceMarker {
1373 public $nextAFE;
1374 public $prevAFE;
1375 }
1376
1377 /**
1378 * The list of active formatting elements, which is used to handle
1379 * mis-nested formatting element tags in the HTML5 tree builder
1380 * specification.
1381 *
1382 * @ingroup Parser
1383 * @since 1.27
1384 * @see https://html.spec.whatwg.org/multipage/syntax.html#list-of-active-formatting-elements
1385 */
1386 class BalanceActiveFormattingElements {
1387 /** The last (most recent) element in the list */
1388 private $tail;
1389
1390 /** The first (least recent) element in the list */
1391 private $head;
1392
1393 /**
1394 * An array of arrays representing the population of elements in each bucket
1395 * according to the Noah's Ark clause. The outer array is stack-like, with each
1396 * integer-indexed element representing a segment of the list, bounded by
1397 * markers. The first element represents the segment of the list before the
1398 * first marker.
1399 *
1400 * The inner arrays are indexed by "Noah key", which is a string which uniquely
1401 * identifies each bucket according to the rules in the spec. The value in
1402 * the inner array is the first (least recently inserted) element in the bucket,
1403 * and subsequent members of the bucket can be found by iterating through the
1404 * singly-linked list via $node->nextNoah.
1405 *
1406 * This is optimised for the most common case of inserting into a bucket
1407 * with zero members, and deleting a bucket containing one member. In the
1408 * worst case, iteration through the list is still O(1) in the document
1409 * size, since each bucket can have at most 3 members.
1410 */
1411 private $noahTableStack = [ [] ];
1412
1413 public function __destruct() {
1414 for ( $node = $this->head; $node; $node = $next ) {
1415 $next = $node->nextAFE;
1416 $node->prevAFE = $node->nextAFE = $node->nextNoah = null;
1417 }
1418 $this->head = $this->tail = $this->noahTableStack = null;
1419 }
1420
1421 public function insertMarker() {
1422 $elt = new BalanceMarker;
1423 if ( $this->tail ) {
1424 $this->tail->nextAFE = $elt;
1425 $elt->prevAFE = $this->tail;
1426 } else {
1427 $this->head = $elt;
1428 }
1429 $this->tail = $elt;
1430 $this->noahTableStack[] = [];
1431 }
1432
1433 /**
1434 * Follow the steps required when the spec requires us to "push onto the
1435 * list of active formatting elements".
1436 * @param BalanceElement $elt
1437 */
1438 public function push( BalanceElement $elt ) {
1439 // Must not be in the list already
1440 if ( $elt->prevAFE !== null || $this->head === $elt ) {
1441 throw new ParameterAssertionException( '$elt',
1442 'Cannot insert a node into the AFE list twice' );
1443 }
1444
1445 // "Noah's Ark clause" -- if there are already three copies of
1446 // this element before we encounter a marker, then drop the last
1447 // one.
1448 $noahKey = $elt->getNoahKey();
1449 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1450 if ( !isset( $table[$noahKey] ) ) {
1451 $table[$noahKey] = $elt;
1452 } else {
1453 $count = 1;
1454 $head = $tail = $table[$noahKey];
1455 while ( $tail->nextNoah ) {
1456 $tail = $tail->nextNoah;
1457 $count++;
1458 }
1459 if ( $count >= 3 ) {
1460 $this->remove( $head );
1461 }
1462 $tail->nextNoah = $elt;
1463 }
1464 // Add to the main AFE list
1465 if ( $this->tail ) {
1466 $this->tail->nextAFE = $elt;
1467 $elt->prevAFE = $this->tail;
1468 } else {
1469 $this->head = $elt;
1470 }
1471 $this->tail = $elt;
1472 }
1473
1474 /**
1475 * Follow the steps required when the spec asks us to "clear the list of
1476 * active formatting elements up to the last marker".
1477 */
1478 public function clearToMarker() {
1479 // Iterate back through the list starting from the tail
1480 $tail = $this->tail;
1481 while ( $tail && !( $tail instanceof BalanceMarker ) ) {
1482 // Unlink the element
1483 $prev = $tail->prevAFE;
1484 $tail->prevAFE = null;
1485 if ( $prev ) {
1486 $prev->nextAFE = null;
1487 }
1488 $tail->nextNoah = null;
1489 $tail = $prev;
1490 }
1491 // If we finished on a marker, unlink it and pop it off the Noah table stack
1492 if ( $tail ) {
1493 $prev = $tail->prevAFE;
1494 if ( $prev ) {
1495 $prev->nextAFE = null;
1496 }
1497 $tail = $prev;
1498 array_pop( $this->noahTableStack );
1499 } else {
1500 // No marker: wipe the top-level Noah table (which is the only one)
1501 $this->noahTableStack[0] = [];
1502 }
1503 // If we removed all the elements, clear the head pointer
1504 if ( !$tail ) {
1505 $this->head = null;
1506 }
1507 $this->tail = $tail;
1508 }
1509
1510 /**
1511 * Find and return the last element with the specified tag between the
1512 * end of the list and the last marker on the list.
1513 * Used when parsing &lt;a&gt; "in body mode".
1514 */
1515 public function findElementByTag( $tag ) {
1516 $elt = $this->tail;
1517 while ( $elt && !( $elt instanceof BalanceMarker ) ) {
1518 if ( $elt->localName === $tag ) {
1519 return $elt;
1520 }
1521 $elt = $elt->prevAFE;
1522 }
1523 return null;
1524 }
1525
1526 /**
1527 * Determine whether an element is in the list of formatting elements.
1528 * @return boolean
1529 */
1530 public function isInList( BalanceElement $elt ) {
1531 return $this->head === $elt || $elt->prevAFE;
1532 }
1533
1534 /**
1535 * Find the element $elt in the list and remove it.
1536 * Used when parsing &lt;a&gt; in body mode.
1537 */
1538 public function remove( BalanceElement $elt ) {
1539 if ( $this->head !== $elt && !$elt->prevAFE ) {
1540 throw new ParameterAssertionException( '$elt',
1541 "Attempted to remove an element which is not in the AFE list" );
1542 }
1543 // Update head and tail pointers
1544 if ( $this->head === $elt ) {
1545 $this->head = $elt->nextAFE;
1546 }
1547 if ( $this->tail === $elt ) {
1548 $this->tail = $elt->prevAFE;
1549 }
1550 // Update previous element
1551 if ( $elt->prevAFE ) {
1552 $elt->prevAFE->nextAFE = $elt->nextAFE;
1553 }
1554 // Update next element
1555 if ( $elt->nextAFE ) {
1556 $elt->nextAFE->prevAFE = $elt->prevAFE;
1557 }
1558 // Clear pointers so that isInList() etc. will work
1559 $elt->prevAFE = $elt->nextAFE = null;
1560 // Update Noah list
1561 $this->removeFromNoahList( $elt );
1562 }
1563
1564 private function addToNoahList( BalanceElement $elt ) {
1565 $noahKey = $elt->getNoahKey();
1566 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1567 if ( !isset( $table[$noahKey] ) ) {
1568 $table[$noahKey] = $elt;
1569 } else {
1570 $tail = $table[$noahKey];
1571 while ( $tail->nextNoah ) {
1572 $tail = $tail->nextNoah;
1573 }
1574 $tail->nextNoah = $elt;
1575 }
1576 }
1577
1578 private function removeFromNoahList( BalanceElement $elt ) {
1579 $table =& $this->noahTableStack[ count( $this->noahTableStack ) - 1 ];
1580 $key = $elt->getNoahKey();
1581 $noahElt = $table[$key];
1582 if ( $noahElt === $elt ) {
1583 if ( $noahElt->nextNoah ) {
1584 $table[$key] = $noahElt->nextNoah;
1585 $noahElt->nextNoah = null;
1586 } else {
1587 unset( $table[$key] );
1588 }
1589 } else {
1590 do {
1591 $prevNoahElt = $noahElt;
1592 $noahElt = $prevNoahElt->nextNoah;
1593 if ( $noahElt === $elt ) {
1594 // Found it, unlink
1595 $prevNoahElt->nextNoah = $elt->nextNoah;
1596 $elt->nextNoah = null;
1597 break;
1598 }
1599 } while ( $noahElt );
1600 }
1601 }
1602
1603 /**
1604 * Find element $a in the list and replace it with element $b
1605 */
1606 public function replace( BalanceElement $a, BalanceElement $b ) {
1607 if ( $this->head !== $a && !$a->prevAFE ) {
1608 throw new ParameterAssertionException( '$a',
1609 "Attempted to replace an element which is not in the AFE list" );
1610 }
1611 // Update head and tail pointers
1612 if ( $this->head === $a ) {
1613 $this->head = $b;
1614 }
1615 if ( $this->tail === $a ) {
1616 $this->tail = $b;
1617 }
1618 // Update previous element
1619 if ( $a->prevAFE ) {
1620 $a->prevAFE->nextAFE = $b;
1621 }
1622 // Update next element
1623 if ( $a->nextAFE ) {
1624 $a->nextAFE->prevAFE = $b;
1625 }
1626 $b->prevAFE = $a->prevAFE;
1627 $b->nextAFE = $a->nextAFE;
1628 $a->nextAFE = $a->prevAFE = null;
1629 // Update Noah list
1630 $this->removeFromNoahList( $a );
1631 $this->addToNoahList( $b );
1632 }
1633
1634 /**
1635 * Find $a in the list and insert $b after it.
1636 */
1637 public function insertAfter( BalanceElement $a, BalanceElement $b ) {
1638 if ( $this->head !== $a && !$a->prevAFE ) {
1639 throw new ParameterAssertionException( '$a',
1640 "Attempted to insert after an element which is not in the AFE list" );
1641 }
1642 if ( $this->tail === $a ) {
1643 $this->tail = $b;
1644 }
1645 if ( $a->nextAFE ) {
1646 $a->nextAFE->prevAFE = $b;
1647 }
1648 $b->nextAFE = $a->nextAFE;
1649 $b->prevAFE = $a;
1650 $a->nextAFE = $b;
1651 $this->addToNoahList( $b );
1652 }
1653
1654 // @codingStandardsIgnoreStart Generic.Files.LineLength.TooLong
1655 /**
1656 * Reconstruct the active formatting elements.
1657 * @param BalanceStack $stack The open elements stack
1658 * @see https://html.spec.whatwg.org/multipage/syntax.html#reconstruct-the-active-formatting-elements
1659 */
1660 // @codingStandardsIgnoreEnd
1661 public function reconstruct( $stack ) {
1662 $entry = $this->tail;
1663 // If there are no entries in the list of active formatting elements,
1664 // then there is nothing to reconstruct
1665 if ( !$entry ) {
1666 return;
1667 }
1668 // If the last is a marker, do nothing.
1669 if ( $entry instanceof BalanceMarker ) {
1670 return;
1671 }
1672 // Or if it is an open element, do nothing.
1673 if ( $stack->indexOf( $entry ) >= 0 ) {
1674 return;
1675 }
1676
1677 // Loop backward through the list until we find a marker or an
1678 // open element
1679 $foundit = false;
1680 while ( $entry->prevAFE ) {
1681 $entry = $entry->prevAFE;
1682 if ( $entry instanceof BalanceMarker || $stack->indexOf( $entry ) >= 0 ) {
1683 $foundit = true;
1684 break;
1685 }
1686 }
1687
1688 // Now loop forward, starting from the element after the current one (or
1689 // the first element if we didn't find a marker or open element),
1690 // recreating formatting elements and pushing them back onto the list
1691 // of open elements.
1692 if ( $foundit ) {
1693 $entry = $entry->nextAFE;
1694 }
1695 do {
1696 $newElement = $stack->insertHTMLElement(
1697 $entry->localName,
1698 $entry->attribs );
1699 $this->replace( $entry, $newElement );
1700 $entry = $newElement->nextAFE;
1701 } while ( $entry );
1702 }
1703
1704 /**
1705 * Get a string representation of the AFE list, for debugging
1706 */
1707 public function __toString() {
1708 $prev = null;
1709 $s = '';
1710 for ( $node = $this->head; $node; $prev = $node, $node = $node->nextAFE ) {
1711 if ( $node instanceof BalanceMarker ) {
1712 $s .= "MARKER\n";
1713 continue;
1714 }
1715 $s .= $node->localName . '#' . substr( md5( spl_object_hash( $node ) ), 0, 8 );
1716 if ( $node->nextNoah ) {
1717 $s .= " (noah sibling: {$node->nextNoah->localName}#" .
1718 substr( md5( spl_object_hash( $node->nextNoah ) ), 0, 8 ) .
1719 ')';
1720 }
1721 if ( $node->nextAFE && $node->nextAFE->prevAFE !== $node ) {
1722 $s .= " (reverse link is wrong!)";
1723 }
1724 $s .= "\n";
1725 }
1726 if ( $prev !== $this->tail ) {
1727 $s .= "(tail pointer is wrong!)\n";
1728 }
1729 return $s;
1730 }
1731 }
1732
1733 /**
1734 * An implementation of the tree building portion of the HTML5 parsing
1735 * spec.
1736 *
1737 * This is used to balance and tidy output so that the result can
1738 * always be cleanly serialized/deserialized by an HTML5 parser. It
1739 * does *not* guarantee "conforming" output -- the HTML5 spec contains
1740 * a number of constraints which are not enforced by the HTML5 parsing
1741 * process. But the result will be free of gross errors: misnested or
1742 * unclosed tags, for example, and will be unchanged by spec-complient
1743 * parsing followed by serialization.
1744 *
1745 * The tree building stage is structured as a state machine.
1746 * When comparing the implementation to
1747 * https://www.w3.org/TR/html5/syntax.html#tree-construction
1748 * note that each state is implemented as a function with a
1749 * name ending in `Mode` (because the HTML spec refers to them
1750 * as insertion modes). The current insertion mode is held by
1751 * the $parseMode property.
1752 *
1753 * The following simplifications have been made:
1754 * - We handle body content only (ie, we start `in body`.)
1755 * - The document is never in "quirks mode".
1756 * - All occurrences of < and > have been entity escaped, so we
1757 * can parse tags by simply splitting on those two characters.
1758 * (This also simplifies the handling of < inside <textarea>.)
1759 * The character < must not appear inside comments.
1760 * Similarly, all attributes have been "cleaned" and are double-quoted
1761 * and escaped.
1762 * - All null characters are assumed to have been removed.
1763 * - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
1764 * <frame>, <plaintext>, <isindex>, <xmp>, <iframe>,
1765 * <noembed>, <noscript>, <script>, <title>. As a result,
1766 * further simplifications can be made:
1767 * - `frameset-ok` is not tracked.
1768 * - `head element pointer` is not tracked (but presumed non-null)
1769 * - Tokenizer has only a single mode. (<textarea> wants RCDATA and
1770 * <style>/<noframes> want RAWTEXT modes which we only loosely emulate.)
1771 *
1772 * We generally mark places where we omit cases from the spec due to
1773 * disallowed elements with a comment: `// OMITTED: <element-name>`.
1774 *
1775 * The HTML spec keeps a flag during the parsing process to track
1776 * whether or not a "parse error" has been encountered. We don't
1777 * bother to track that flag, we just implement the error-handling
1778 * process as specified.
1779 *
1780 * @ingroup Parser
1781 * @since 1.27
1782 * @see https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
1783 */
1784 class Balancer {
1785 private $parseMode;
1786 private $bitsIterator;
1787 private $allowedHtmlElements;
1788 private $afe;
1789 private $stack;
1790 private $strict;
1791 private $tidyCompat;
1792 private $allowComments;
1793
1794 private $textIntegrationMode;
1795 private $pendingTableText;
1796 private $originalInsertionMode;
1797 private $fragmentContext;
1798 private $formElementPointer;
1799 private $ignoreLinefeed;
1800 private $inRCDATA;
1801 private $inRAWTEXT;
1802
1803 /**
1804 * Valid HTML5 comments.
1805 * Regex borrowed from Tim Starling's "remex-html" project.
1806 */
1807 const VALID_COMMENT_REGEX = "~ !--
1808 ( # 1. Comment match detector
1809 > | -> | # Invalid short close
1810 ( # 2. Comment contents
1811 (?:
1812 (?! --> )
1813 (?! --!> )
1814 (?! --! \z )
1815 (?! -- \z )
1816 (?! - \z )
1817 .
1818 )*+
1819 )
1820 ( # 3. Comment close
1821 --> | # Normal close
1822 --!> | # Comment end bang
1823 ( # 4. Indicate matches requiring EOF
1824 --! | # EOF in comment end bang state
1825 -- | # EOF in comment end state
1826 - | # EOF in comment end dash state
1827 # EOF in comment state
1828 )
1829 )
1830 )
1831 ([^<]*) \z # 5. Non-tag text after the comment
1832 ~xs";
1833
1834 /**
1835 * Create a new Balancer.
1836 * @param array $config Balancer configuration. Includes:
1837 * 'strict' : boolean, defaults to false.
1838 * When true, enforces syntactic constraints on input:
1839 * all non-tag '<' must be escaped, all attributes must be
1840 * separated by a single space and double-quoted. This is
1841 * consistent with the output of the Sanitizer.
1842 * 'allowedHtmlElements' : array, defaults to null.
1843 * When present, the keys of this associative array give
1844 * the acceptable HTML tag names. When not present, no
1845 * tag sanitization is done.
1846 * 'tidyCompat' : boolean, defaults to false.
1847 * When true, the serialization algorithm is tweaked to
1848 * provide historical compatibility with the old "tidy"
1849 * program: <p>-wrapping is done to the children of
1850 * <body> and <blockquote> elements, and empty elements
1851 * are removed.
1852 * 'allowComments': boolean, defaults to true.
1853 * When true, allows HTML comments in the input.
1854 * The Sanitizer generally strips all comments, so if you
1855 * are running on sanitized output you can set this to
1856 * false to get a bit more performance.
1857 */
1858 public function __construct( array $config = [] ) {
1859 $config = $config + [
1860 'strict' => false,
1861 'allowedHtmlElements' => null,
1862 'tidyCompat' => false,
1863 'allowComments' => true,
1864 ];
1865 $this->allowedHtmlElements = $config['allowedHtmlElements'];
1866 $this->strict = $config['strict'];
1867 $this->tidyCompat = $config['tidyCompat'];
1868 $this->allowComments = $config['allowComments'];
1869 if ( $this->allowedHtmlElements !== null ) {
1870 // Sanity check!
1871 $bad = array_uintersect_assoc(
1872 $this->allowedHtmlElements,
1873 BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE],
1874 function( $a, $b ) {
1875 // Ignore the values (just intersect the keys) by saying
1876 // all values are equal to each other.
1877 return 0;
1878 }
1879 );
1880 if ( count( $bad ) > 0 ) {
1881 $badstr = implode( array_keys( $bad ), ',' );
1882 throw new ParameterAssertionException(
1883 '$config',
1884 'Balance attempted with sanitization including ' .
1885 "unsupported elements: {$badstr}"
1886 );
1887 }
1888 }
1889 }
1890
1891 /**
1892 * Return a balanced HTML string for the HTML fragment given by $text,
1893 * subject to the caveats listed in the class description. The result
1894 * will typically be idempotent -- that is, rebalancing the output
1895 * would result in no change.
1896 *
1897 * @param string $text The markup to be balanced
1898 * @param callable $processingCallback Callback to do any variable or
1899 * parameter replacements in HTML attributes values
1900 * @param array|bool $processingArgs Arguments for the processing callback
1901 * @return string The balanced markup
1902 */
1903 public function balance( $text, $processingCallback = null, $processingArgs = [] ) {
1904 $this->parseMode = 'inBodyMode';
1905 $this->bitsIterator = new ExplodeIterator( '<', $text );
1906 $this->afe = new BalanceActiveFormattingElements();
1907 $this->stack = new BalanceStack();
1908 $this->stack->tidyCompat = $this->tidyCompat;
1909 $this->processingCallback = $processingCallback;
1910 $this->processingArgs = $processingArgs;
1911
1912 $this->textIntegrationMode =
1913 $this->ignoreLinefeed =
1914 $this->inRCDATA =
1915 $this->inRAWTEXT = false;
1916
1917 // The stack is constructed with an <html> element already on it.
1918 // Set this up as a fragment parsed with <body> as the context.
1919 $this->fragmentContext =
1920 new BalanceElement( BalanceSets::HTML_NAMESPACE, 'body', [] );
1921 $this->resetInsertionMode();
1922 $this->formElementPointer = null;
1923 for ( $e = $this->fragmentContext; $e != null; $e = $e->parent ) {
1924 if ( $e->isHtmlNamed( 'form' ) ) {
1925 $this->formElementPointer = $e;
1926 break;
1927 }
1928 }
1929
1930 // First element is text not tag
1931 $x = $this->bitsIterator->current();
1932 $this->bitsIterator->next();
1933 $this->insertToken( 'text', str_replace( '>', '&gt;', $x ) );
1934 // Now process each tag.
1935 while ( $this->bitsIterator->valid() ) {
1936 $this->advance();
1937 }
1938 $this->insertToken( 'eof', null );
1939 $result = $this->stack->getOutput();
1940 // Free memory before returning.
1941 $this->bitsIterator = null;
1942 $this->afe = null;
1943 $this->stack = null;
1944 $this->fragmentContext = null;
1945 $this->formElementPointer = null;
1946 return $result;
1947 }
1948
1949 /**
1950 * Pass a token to the tree builder. The $token will be one of the
1951 * strings "tag", "endtag", or "text".
1952 */
1953 private function insertToken( $token, $value, $attribs = null, $selfclose = false ) {
1954 // validate tags against $unsupportedSet
1955 if ( $token === 'tag' || $token === 'endtag' ) {
1956 if ( isset( BalanceSets::$unsupportedSet[BalanceSets::HTML_NAMESPACE][$value] ) ) {
1957 // As described in "simplifications" above, these tags are
1958 // not supported in the balancer.
1959 Assert::invariant(
1960 !$this->strict,
1961 "Unsupported $token <$value> found."
1962 );
1963 return false;
1964 }
1965 } elseif ( $token === 'text' && $value === '' ) {
1966 // Don't actually inject the empty string as a text token.
1967 return true;
1968 }
1969 // Support pre/listing/textarea by suppressing initial linefeed
1970 if ( $this->ignoreLinefeed ) {
1971 $this->ignoreLinefeed = false;
1972 if ( $token === 'text' ) {
1973 if ( $value[0] === "\n" ) {
1974 if ( $value === "\n" ) {
1975 // Nothing would be left, don't inject the empty string.
1976 return true;
1977 }
1978 $value = substr( $value, 1 );
1979 }
1980 }
1981 }
1982 // Some hoops we have to jump through
1983 $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
1984
1985 $isForeign = true;
1986 if (
1987 $this->stack->length() === 0 ||
1988 $adjusted->isHtml() ||
1989 $token === 'eof'
1990 ) {
1991 $isForeign = false;
1992 } elseif ( $adjusted->isMathmlTextIntegrationPoint() ) {
1993 if ( $token === 'text' ) {
1994 $isForeign = false;
1995 } elseif (
1996 $token === 'tag' &&
1997 $value !== 'mglyph' && $value !== 'malignmark'
1998 ) {
1999 $isForeign = false;
2000 }
2001 } elseif (
2002 $adjusted->namespaceURI === BalanceSets::MATHML_NAMESPACE &&
2003 $adjusted->localName === 'annotation-xml' &&
2004 $token === 'tag' && $value === 'svg'
2005 ) {
2006 $isForeign = false;
2007 } elseif (
2008 $adjusted->isHtmlIntegrationPoint() &&
2009 ( $token === 'tag' || $token === 'text' )
2010 ) {
2011 $isForeign = false;
2012 }
2013 if ( $isForeign ) {
2014 return $this->insertForeignToken( $token, $value, $attribs, $selfclose );
2015 } else {
2016 $func = $this->parseMode;
2017 return $this->$func( $token, $value, $attribs, $selfclose );
2018 }
2019 }
2020
2021 private function insertForeignToken( $token, $value, $attribs = null, $selfclose = false ) {
2022 if ( $token === 'text' ) {
2023 $this->stack->insertText( $value );
2024 return true;
2025 } elseif ( $token === 'tag' ) {
2026 switch ( $value ) {
2027 case 'font':
2028 if ( isset( $attribs['color'] )
2029 || isset( $attribs['face'] )
2030 || isset( $attribs['size'] )
2031 ) {
2032 break;
2033 }
2034 // otherwise, fall through
2035 case 'b':
2036 case 'big':
2037 case 'blockquote':
2038 case 'body':
2039 case 'br':
2040 case 'center':
2041 case 'code':
2042 case 'dd':
2043 case 'div':
2044 case 'dl':
2045 case 'dt':
2046 case 'em':
2047 case 'embed':
2048 case 'h1':
2049 case 'h2':
2050 case 'h3':
2051 case 'h4':
2052 case 'h5':
2053 case 'h6':
2054 case 'head':
2055 case 'hr':
2056 case 'i':
2057 case 'img':
2058 case 'li':
2059 case 'listing':
2060 case 'menu':
2061 case 'meta':
2062 case 'nobr':
2063 case 'ol':
2064 case 'p':
2065 case 'pre':
2066 case 'ruby':
2067 case 's':
2068 case 'small':
2069 case 'span':
2070 case 'strong':
2071 case 'strike':
2072 case 'sub':
2073 case 'sup':
2074 case 'table':
2075 case 'tt':
2076 case 'u':
2077 case 'ul':
2078 case 'var':
2079 if ( $this->fragmentContext ) {
2080 break;
2081 }
2082 while ( true ) {
2083 $this->stack->pop();
2084 $node = $this->stack->currentNode;
2085 if (
2086 $node->isMathmlTextIntegrationPoint() ||
2087 $node->isHtmlIntegrationPoint() ||
2088 $node->isHtml()
2089 ) {
2090 break;
2091 }
2092 }
2093 return $this->insertToken( $token, $value, $attribs, $selfclose );
2094 }
2095 // "Any other start tag"
2096 $adjusted = ( $this->fragmentContext && $this->stack->length()===1 ) ?
2097 $this->fragmentContext : $this->stack->currentNode;
2098 $this->stack->insertForeignElement(
2099 $adjusted->namespaceURI, $value, $attribs
2100 );
2101 if ( $selfclose ) {
2102 $this->stack->pop();
2103 }
2104 return true;
2105 } elseif ( $token === 'endtag' ) {
2106 $first = true;
2107 foreach ( $this->stack as $i => $node ) {
2108 if ( $node->isHtml() && !$first ) {
2109 // process the end tag as HTML
2110 $func = $this->parseMode;
2111 return $this->$func( $token, $value, $attribs, $selfclose );
2112 } elseif ( $i === 0 ) {
2113 return true;
2114 } elseif ( $node->localName === $value ) {
2115 $this->stack->popTag( $node );
2116 return true;
2117 }
2118 $first = false;
2119 }
2120 }
2121 }
2122
2123 /**
2124 * Grab the next "token" from $bitsIterator. This is either a open/close
2125 * tag or text or a comment, depending on whether the Sanitizer approves.
2126 */
2127 private function advance() {
2128 $x = $this->bitsIterator->current();
2129 $this->bitsIterator->next();
2130 $regs = [];
2131 // Handle comments. These won't be generated by mediawiki (they
2132 // are stripped in the Sanitizer) but may be generated by extensions.
2133 if (
2134 $this->allowComments &&
2135 !( $this->inRCDATA || $this->inRAWTEXT ) &&
2136 preg_match( Balancer::VALID_COMMENT_REGEX, $x, $regs, PREG_OFFSET_CAPTURE ) &&
2137 // verify EOF condition where necessary
2138 ( $regs[4][1] < 0 || !$this->bitsIterator->valid() )
2139 ) {
2140 $contents = $regs[2][0];
2141 $rest = $regs[5][0];
2142 $this->insertToken( 'comment', $contents );
2143 $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2144 return;
2145 }
2146 // $slash: Does the current element start with a '/'?
2147 // $t: Current element name
2148 // $attribStr: String between element name and >
2149 // $brace: Ending '>' or '/>'
2150 // $rest: Everything until the next element from the $bitsIterator
2151 if ( preg_match( Sanitizer::ELEMENT_BITS_REGEX, $x, $regs ) ) {
2152 list( /* $qbar */, $slash, $t, $attribStr, $brace, $rest ) = $regs;
2153 $t = strtolower( $t );
2154 if ( $this->strict ) {
2155 // Verify that attributes are all properly double-quoted
2156 Assert::invariant(
2157 preg_match(
2158 '/^( [:_A-Z0-9][-.:_A-Z0-9]*="[^"]*")*[ ]*$/i', $attribStr
2159 ),
2160 "Bad attribute string found"
2161 );
2162 }
2163 } else {
2164 Assert::invariant(
2165 !$this->strict, "< found which does not start a valid tag"
2166 );
2167 $slash = $t = $attribStr = $brace = $rest = null;
2168 }
2169 $goodtag = $t;
2170 if ( $this->inRCDATA ) {
2171 if ( $slash && $t === $this->inRCDATA ) {
2172 $this->inRCDATA = false;
2173 } else {
2174 // No tags allowed; this emulates the "rcdata" tokenizer mode.
2175 $goodtag = false;
2176 }
2177 }
2178 if ( $this->inRAWTEXT ) {
2179 if ( $slash && $t === $this->inRAWTEXT ) {
2180 $this->inRAWTEXT = false;
2181 } else {
2182 // No tags allowed, no entity-escaping done.
2183 $goodtag = false;
2184 }
2185 }
2186 $sanitize = $this->allowedHtmlElements !== null;
2187 if ( $sanitize ) {
2188 $goodtag = $t && isset( $this->allowedHtmlElements[$t] );
2189 }
2190 if ( $goodtag ) {
2191 if ( is_callable( $this->processingCallback ) ) {
2192 call_user_func_array( $this->processingCallback, [ &$attribStr, $this->processingArgs ] );
2193 }
2194 if ( $sanitize ) {
2195 $goodtag = Sanitizer::validateTag( $attribStr, $t );
2196 }
2197 }
2198 if ( $goodtag ) {
2199 if ( $sanitize ) {
2200 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2201 $attribs = Sanitizer::validateTagAttributes( $attribs, $t );
2202 } else {
2203 $attribs = Sanitizer::decodeTagAttributes( $attribStr );
2204 }
2205 $goodtag = $this->insertToken(
2206 $slash ? 'endtag' : 'tag', $t, $attribs, $brace === '/>'
2207 );
2208 }
2209 if ( $goodtag ) {
2210 $rest = str_replace( '>', '&gt;', $rest );
2211 $this->insertToken( 'text', str_replace( '>', '&gt;', $rest ) );
2212 } elseif ( $this->inRAWTEXT ) {
2213 $this->insertToken( 'text', "<$x" );
2214 } else {
2215 // bad tag; serialize entire thing as text.
2216 $this->insertToken( 'text', '&lt;' . str_replace( '>', '&gt;', $x ) );
2217 }
2218 }
2219
2220 private function switchMode( $mode ) {
2221 Assert::parameter(
2222 substr( $mode, -4 )==='Mode', '$mode', 'should end in Mode'
2223 );
2224 $oldMode = $this->parseMode;
2225 $this->parseMode = $mode;
2226 return $oldMode;
2227 }
2228
2229 private function switchModeAndReprocess( $mode, $token, $value, $attribs, $selfclose ) {
2230 $this->switchMode( $mode );
2231 return $this->insertToken( $token, $value, $attribs, $selfclose );
2232 }
2233
2234 private function resetInsertionMode() {
2235 $last = false;
2236 foreach ( $this->stack as $i => $node ) {
2237 if ( $i === 0 ) {
2238 $last = true;
2239 if ( $this->fragmentContext ) {
2240 $node = $this->fragmentContext;
2241 }
2242 }
2243 if ( $node->isHtml() ) {
2244 switch ( $node->localName ) {
2245 case 'select':
2246 $stacklen = $this->stack->length();
2247 for ( $j = $i + 1; $j < $stacklen-1; $j++ ) {
2248 $ancestor = $this->stack->node( $stacklen-$j-1 );
2249 if ( $ancestor->isHtmlNamed( 'template' ) ) {
2250 break;
2251 }
2252 if ( $ancestor->isHtmlNamed( 'table' ) ) {
2253 $this->switchMode( 'inSelectInTableMode' );
2254 return;
2255 }
2256 }
2257 $this->switchMode( 'inSelectMode' );
2258 return;
2259 case 'tr':
2260 $this->switchMode( 'inRowMode' );
2261 return;
2262 case 'tbody':
2263 case 'tfoot':
2264 case 'thead':
2265 $this->switchMode( 'inTableBodyMode' );
2266 return;
2267 case 'caption':
2268 $this->switchMode( 'inCaptionMode' );
2269 return;
2270 case 'colgroup':
2271 $this->switchMode( 'inColumnGroupMode' );
2272 return;
2273 case 'table':
2274 $this->switchMode( 'inTableMode' );
2275 return;
2276 case 'template':
2277 $this->switchMode(
2278 array_slice( $this->templateInsertionModes, -1 )[0]
2279 );
2280 return;
2281 case 'body':
2282 $this->switchMode( 'inBodyMode' );
2283 return;
2284 // OMITTED: <frameset>
2285 // OMITTED: <html>
2286 // OMITTED: <head>
2287 default:
2288 if ( !$last ) {
2289 // OMITTED: <head>
2290 if ( $node->isA( BalanceSets::$tableCellSet ) ) {
2291 $this->switchMode( 'inCellMode' );
2292 return;
2293 }
2294 }
2295 }
2296 }
2297 if ( $last ) {
2298 $this->switchMode( 'inBodyMode' );
2299 return;
2300 }
2301 }
2302 }
2303
2304 private function stopParsing() {
2305 // Most of the spec methods are inapplicable, other than step 2:
2306 // "pop all the nodes off the stack of open elements".
2307 // We're going to keep the top-most <html> element on the stack, though.
2308
2309 // Clear the AFE list first, otherwise the element objects will stay live
2310 // during serialization, potentially using O(N^2) memory. Note that
2311 // popping the stack will never result in reconstructing the active
2312 // formatting elements.
2313 $this->afe = null;
2314 $this->stack->popTo( 1 );
2315 }
2316
2317 private function parseRawText( $value, $attribs = null ) {
2318 $this->stack->insertHTMLElement( $value, $attribs );
2319 $this->inRAWTEXT = $value;
2320 $this->originalInsertionMode = $this->switchMode( 'inTextMode' );
2321 return true;
2322 }
2323
2324 private function inTextMode( $token, $value, $attribs = null, $selfclose = false ) {
2325 if ( $token === 'text' ) {
2326 $this->stack->insertText( $value );
2327 return true;
2328 } elseif ( $token === 'eof' ) {
2329 $this->stack->pop();
2330 return $this->switchModeAndReprocess(
2331 $this->originalInsertionMode, $token, $value, $attribs, $selfclose
2332 );
2333 } elseif ( $token === 'endtag' ) {
2334 $this->stack->pop();
2335 $this->switchMode( $this->originalInsertionMode );
2336 return true;
2337 }
2338 return true;
2339 }
2340
2341 private function inHeadMode( $token, $value, $attribs = null, $selfclose = false ) {
2342 if ( $token === 'text' ) {
2343 if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
2344 $this->stack->insertText( $matches[0] );
2345 $value = substr( $value, strlen( $matches[0] ) );
2346 }
2347 if ( strlen( $value ) === 0 ) {
2348 return true; // All text handled.
2349 }
2350 // Fall through to handle non-whitespace below.
2351 } elseif ( $token === 'tag' ) {
2352 switch ( $value ) {
2353 case 'meta':
2354 // OMITTED: in a full HTML parser, this might change the encoding.
2355 // falls through
2356 // OMITTED: <html>
2357 case 'base':
2358 case 'basefont':
2359 case 'bgsound':
2360 case 'link':
2361 $this->stack->insertHTMLElement( $value, $attribs );
2362 $this->stack->pop();
2363 return true;
2364 // OMITTED: <title>
2365 // OMITTED: <noscript>
2366 case 'noframes':
2367 case 'style':
2368 return $this->parseRawText( $value, $attribs );
2369 // OMITTED: <script>
2370 case 'template':
2371 $this->stack->insertHTMLElement( $value, $attribs );
2372 $this->afe->insertMarker();
2373 // OMITTED: frameset_ok
2374 $this->switchMode( 'inTemplateMode' );
2375 $this->templateInsertionModes[] = $this->parseMode;
2376 return true;
2377 // OMITTED: <head>
2378 }
2379 } elseif ( $token === 'endtag' ) {
2380 switch ( $value ) {
2381 // OMITTED: <head>
2382 // OMITTED: <body>
2383 // OMITTED: <html>
2384 case 'br':
2385 break; // handle at the bottom of the function
2386 case 'template':
2387 if ( $this->stack->indexOf( $value ) < 0 ) {
2388 return true; // Ignore the token.
2389 }
2390 $this->stack->generateImpliedEndTags( null, true /* thorough */ );
2391 $this->stack->popTag( $value );
2392 $this->afe->clearToMarker();
2393 array_pop( $this->templateInsertionModes );
2394 $this->resetInsertionMode();
2395 return true;
2396 default:
2397 // ignore any other end tag
2398 return true;
2399 }
2400 } elseif ( $token === 'comment' ) {
2401 $this->stack->insertComment( $value );
2402 return true;
2403 }
2404
2405 // If not handled above
2406 $this->inHeadMode( 'endtag', 'head' ); // synthetic </head>
2407 // Then redo this one
2408 return $this->insertToken( $token, $value, $attribs, $selfclose );
2409 }
2410
2411 private function inBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
2412 if ( $token === 'text' ) {
2413 $this->afe->reconstruct( $this->stack );
2414 $this->stack->insertText( $value );
2415 return true;
2416 } elseif ( $token === 'eof' ) {
2417 if ( !empty( $this->templateInsertionModes ) ) {
2418 return $this->inTemplateMode( $token, $value, $attribs, $selfclose );
2419 }
2420 $this->stopParsing();
2421 return true;
2422 } elseif ( $token === 'tag' ) {
2423 switch ( $value ) {
2424 // OMITTED: <html>
2425 case 'base':
2426 case 'basefont':
2427 case 'bgsound':
2428 case 'link':
2429 case 'meta':
2430 case 'noframes':
2431 // OMITTED: <script>
2432 case 'style':
2433 case 'template':
2434 // OMITTED: <title>
2435 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2436 // OMITTED: <body>
2437 // OMITTED: <frameset>
2438
2439 case 'address':
2440 case 'article':
2441 case 'aside':
2442 case 'blockquote':
2443 case 'center':
2444 case 'details':
2445 case 'dialog':
2446 case 'dir':
2447 case 'div':
2448 case 'dl':
2449 case 'fieldset':
2450 case 'figcaption':
2451 case 'figure':
2452 case 'footer':
2453 case 'header':
2454 case 'hgroup':
2455 case 'main':
2456 case 'menu':
2457 case 'nav':
2458 case 'ol':
2459 case 'p':
2460 case 'section':
2461 case 'summary':
2462 case 'ul':
2463 if ( $this->stack->inButtonScope( 'p' ) ) {
2464 $this->inBodyMode( 'endtag', 'p' );
2465 }
2466 $this->stack->insertHTMLElement( $value, $attribs );
2467 return true;
2468
2469 case 'h1':
2470 case 'h2':
2471 case 'h3':
2472 case 'h4':
2473 case 'h5':
2474 case 'h6':
2475 if ( $this->stack->inButtonScope( 'p' ) ) {
2476 $this->inBodyMode( 'endtag', 'p' );
2477 }
2478 if ( $this->stack->currentNode->isA( BalanceSets::$headingSet ) ) {
2479 $this->stack->pop();
2480 }
2481 $this->stack->insertHTMLElement( $value, $attribs );
2482 return true;
2483
2484 case 'pre':
2485 case 'listing':
2486 if ( $this->stack->inButtonScope( 'p' ) ) {
2487 $this->inBodyMode( 'endtag', 'p' );
2488 }
2489 $this->stack->insertHTMLElement( $value, $attribs );
2490 $this->ignoreLinefeed = true;
2491 // OMITTED: frameset_ok
2492 return true;
2493
2494 case 'form':
2495 if (
2496 $this->formElementPointer &&
2497 $this->stack->indexOf( 'template' ) < 0
2498 ) {
2499 return true; // in a form, not in a template.
2500 }
2501 if ( $this->stack->inButtonScope( "p" ) ) {
2502 $this->inBodyMode( 'endtag', 'p' );
2503 }
2504 $elt = $this->stack->insertHTMLElement( $value, $attribs );
2505 if ( $this->stack->indexOf( 'template' ) < 0 ) {
2506 $this->formElementPointer = $elt;
2507 }
2508 return true;
2509
2510 case 'li':
2511 // OMITTED: frameset_ok
2512 foreach ( $this->stack as $node ) {
2513 if ( $node->isHtmlNamed( 'li' ) ) {
2514 $this->inBodyMode( 'endtag', 'li' );
2515 break;
2516 }
2517 if (
2518 $node->isA( BalanceSets::$specialSet ) &&
2519 !$node->isA( BalanceSets::$addressDivPSet )
2520 ) {
2521 break;
2522 }
2523 }
2524 if ( $this->stack->inButtonScope( 'p' ) ) {
2525 $this->inBodyMode( 'endtag', 'p' );
2526 }
2527 $this->stack->insertHTMLElement( $value, $attribs );
2528 return true;
2529
2530 case 'dd':
2531 case 'dt':
2532 // OMITTED: frameset_ok
2533 foreach ( $this->stack as $node ) {
2534 if ( $node->isHtmlNamed( 'dd' ) ) {
2535 $this->inBodyMode( 'endtag', 'dd' );
2536 break;
2537 }
2538 if ( $node->isHtmlNamed( 'dt' ) ) {
2539 $this->inBodyMode( 'endtag', 'dt' );
2540 break;
2541 }
2542 if (
2543 $node->isA( BalanceSets::$specialSet ) &&
2544 !$node->isA( BalanceSets::$addressDivPSet )
2545 ) {
2546 break;
2547 }
2548 }
2549 if ( $this->stack->inButtonScope( 'p' ) ) {
2550 $this->inBodyMode( 'endtag', 'p' );
2551 }
2552 $this->stack->insertHTMLElement( $value, $attribs );
2553 return true;
2554
2555 // OMITTED: <plaintext>
2556
2557 case 'button':
2558 if ( $this->stack->inScope( 'button' ) ) {
2559 $this->inBodyMode( 'endtag', 'button' );
2560 return $this->insertToken( $token, $value, $attribs, $selfclose );
2561 }
2562 $this->afe->reconstruct( $this->stack );
2563 $this->stack->insertHTMLElement( $value, $attribs );
2564 return true;
2565
2566 case 'a':
2567 $activeElement = $this->afe->findElementByTag( 'a' );
2568 if ( $activeElement ) {
2569 $this->inBodyMode( 'endtag', 'a' );
2570 if ( $this->afe->isInList( $activeElement ) ) {
2571 $this->afe->remove( $activeElement );
2572 // Don't flatten here, since when we fall
2573 // through below we might foster parent
2574 // the new <a> tag inside this one.
2575 $this->stack->removeElement( $activeElement, false );
2576 }
2577 }
2578 // Falls through
2579 case 'b':
2580 case 'big':
2581 case 'code':
2582 case 'em':
2583 case 'font':
2584 case 'i':
2585 case 's':
2586 case 'small':
2587 case 'strike':
2588 case 'strong':
2589 case 'tt':
2590 case 'u':
2591 $this->afe->reconstruct( $this->stack );
2592 $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2593 return true;
2594
2595 case 'nobr':
2596 $this->afe->reconstruct( $this->stack );
2597 if ( $this->stack->inScope( 'nobr' ) ) {
2598 $this->inBodyMode( 'endtag', 'nobr' );
2599 $this->afe->reconstruct( $this->stack );
2600 }
2601 $this->afe->push( $this->stack->insertHTMLElement( $value, $attribs ), $attribs );
2602 return true;
2603
2604 case 'applet':
2605 case 'marquee':
2606 case 'object':
2607 $this->afe->reconstruct( $this->stack );
2608 $this->stack->insertHTMLElement( $value, $attribs );
2609 $this->afe->insertMarker();
2610 // OMITTED: frameset_ok
2611 return true;
2612
2613 case 'table':
2614 // The document is never in "quirks mode"; see simplifications
2615 // above.
2616 if ( $this->stack->inButtonScope( 'p' ) ) {
2617 $this->inBodyMode( 'endtag', 'p' );
2618 }
2619 $this->stack->insertHTMLElement( $value, $attribs );
2620 // OMITTED: frameset_ok
2621 $this->switchMode( 'inTableMode' );
2622 return true;
2623
2624 case 'area':
2625 case 'br':
2626 case 'embed':
2627 case 'img':
2628 case 'keygen':
2629 case 'wbr':
2630 $this->afe->reconstruct( $this->stack );
2631 $this->stack->insertHTMLElement( $value, $attribs );
2632 $this->stack->pop();
2633 // OMITTED: frameset_ok
2634 return true;
2635
2636 case 'input':
2637 $this->afe->reconstruct( $this->stack );
2638 $this->stack->insertHTMLElement( $value, $attribs );
2639 $this->stack->pop();
2640 // OMITTED: frameset_ok
2641 // (hence we don't need to examine the tag's "type" attribute)
2642 return true;
2643
2644 case 'menuitem':
2645 case 'param':
2646 case 'source':
2647 case 'track':
2648 $this->stack->insertHTMLElement( $value, $attribs );
2649 $this->stack->pop();
2650 return true;
2651
2652 case 'hr':
2653 if ( $this->stack->inButtonScope( 'p' ) ) {
2654 $this->inBodyMode( 'endtag', 'p' );
2655 }
2656 $this->stack->insertHTMLElement( $value, $attribs );
2657 $this->stack->pop();
2658 return true;
2659
2660 case 'image':
2661 // warts!
2662 return $this->inBodyMode( $token, 'img', $attribs, $selfclose );
2663
2664 // OMITTED: <isindex>
2665
2666 case 'textarea':
2667 $this->stack->insertHTMLElement( $value, $attribs );
2668 $this->ignoreLinefeed = true;
2669 $this->inRCDATA = $value; // emulate rcdata tokenizer mode
2670 // OMITTED: frameset_ok
2671 return true;
2672
2673 // OMITTED: <xmp>
2674 // OMITTED: <iframe>
2675 // OMITTED: <noembed>
2676 // OMITTED: <noscript>
2677
2678 case 'select':
2679 $this->afe->reconstruct( $this->stack );
2680 $this->stack->insertHTMLElement( $value, $attribs );
2681 switch ( $this->parseMode ) {
2682 case 'inTableMode':
2683 case 'inCaptionMode':
2684 case 'inTableBodyMode':
2685 case 'inRowMode':
2686 case 'inCellMode':
2687 $this->switchMode( 'inSelectInTableMode' );
2688 return true;
2689 default:
2690 $this->switchMode( 'inSelectMode' );
2691 return true;
2692 }
2693
2694 case 'optgroup':
2695 case 'option':
2696 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
2697 $this->inBodyMode( 'endtag', 'option' );
2698 }
2699 $this->afe->reconstruct( $this->stack );
2700 $this->stack->insertHTMLElement( $value, $attribs );
2701 return true;
2702
2703 case 'rb':
2704 case 'rtc':
2705 if ( $this->stack->inScope( 'ruby' ) ) {
2706 $this->stack->generateImpliedEndTags();
2707 }
2708 $this->stack->insertHTMLElement( $value, $attribs );
2709 return true;
2710
2711 case 'rp':
2712 case 'rt':
2713 if ( $this->stack->inScope( 'ruby' ) ) {
2714 $this->stack->generateImpliedEndTags( 'rtc' );
2715 }
2716 $this->stack->insertHTMLElement( $value, $attribs );
2717 return true;
2718
2719 case 'math':
2720 $this->afe->reconstruct( $this->stack );
2721 // We skip the spec's "adjust MathML attributes" and
2722 // "adjust foreign attributes" steps, since the browser will
2723 // do this later when it parses the output and it doesn't affect
2724 // balancing.
2725 $this->stack->insertForeignElement(
2726 BalanceSets::MATHML_NAMESPACE, $value, $attribs
2727 );
2728 if ( $selfclose ) {
2729 // emit explicit </math> tag.
2730 $this->stack->pop();
2731 }
2732 return true;
2733
2734 case 'svg':
2735 $this->afe->reconstruct( $this->stack );
2736 // We skip the spec's "adjust SVG attributes" and
2737 // "adjust foreign attributes" steps, since the browser will
2738 // do this later when it parses the output and it doesn't affect
2739 // balancing.
2740 $this->stack->insertForeignElement(
2741 BalanceSets::SVG_NAMESPACE, $value, $attribs
2742 );
2743 if ( $selfclose ) {
2744 // emit explicit </svg> tag.
2745 $this->stack->pop();
2746 }
2747 return true;
2748
2749 case 'caption':
2750 case 'col':
2751 case 'colgroup':
2752 // OMITTED: <frame>
2753 case 'head':
2754 case 'tbody':
2755 case 'td':
2756 case 'tfoot':
2757 case 'th':
2758 case 'thead':
2759 case 'tr':
2760 // Ignore table tags if we're not inTableMode
2761 return true;
2762 }
2763
2764 // Handle any other start tag here
2765 $this->afe->reconstruct( $this->stack );
2766 $this->stack->insertHTMLElement( $value, $attribs );
2767 return true;
2768 } elseif ( $token === 'endtag' ) {
2769 switch ( $value ) {
2770 // </body>,</html> are unsupported.
2771
2772 case 'template':
2773 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2774
2775 case 'address':
2776 case 'article':
2777 case 'aside':
2778 case 'blockquote':
2779 case 'button':
2780 case 'center':
2781 case 'details':
2782 case 'dialog':
2783 case 'dir':
2784 case 'div':
2785 case 'dl':
2786 case 'fieldset':
2787 case 'figcaption':
2788 case 'figure':
2789 case 'footer':
2790 case 'header':
2791 case 'hgroup':
2792 case 'listing':
2793 case 'main':
2794 case 'menu':
2795 case 'nav':
2796 case 'ol':
2797 case 'pre':
2798 case 'section':
2799 case 'summary':
2800 case 'ul':
2801 // Ignore if there is not a matching open tag
2802 if ( !$this->stack->inScope( $value ) ) {
2803 return true;
2804 }
2805 $this->stack->generateImpliedEndTags();
2806 $this->stack->popTag( $value );
2807 return true;
2808
2809 case 'form':
2810 if ( $this->stack->indexOf( 'template' ) < 0 ) {
2811 $openform = $this->formElementPointer;
2812 $this->formElementPointer = null;
2813 if ( !$openform || !$this->stack->inScope( $openform ) ) {
2814 return true;
2815 }
2816 $this->stack->generateImpliedEndTags();
2817 // Don't flatten yet if we're removing a <form> element
2818 // out-of-order. (eg. `<form><div></form>`)
2819 $flatten = ( $this->stack->currentNode === $openform );
2820 $this->stack->removeElement( $openform, $flatten );
2821 } else {
2822 if ( !$this->stack->inScope( 'form' ) ) {
2823 return true;
2824 }
2825 $this->stack->generateImpliedEndTags();
2826 $this->stack->popTag( 'form' );
2827 }
2828 return true;
2829
2830 case 'p':
2831 if ( !$this->stack->inButtonScope( 'p' ) ) {
2832 $this->inBodyMode( 'tag', 'p', [] );
2833 return $this->insertToken( $token, $value, $attribs, $selfclose );
2834 }
2835 $this->stack->generateImpliedEndTags( $value );
2836 $this->stack->popTag( $value );
2837 return true;
2838
2839 case 'li':
2840 if ( !$this->stack->inListItemScope( $value ) ) {
2841 return true; // ignore
2842 }
2843 $this->stack->generateImpliedEndTags( $value );
2844 $this->stack->popTag( $value );
2845 return true;
2846
2847 case 'dd':
2848 case 'dt':
2849 if ( !$this->stack->inScope( $value ) ) {
2850 return true; // ignore
2851 }
2852 $this->stack->generateImpliedEndTags( $value );
2853 $this->stack->popTag( $value );
2854 return true;
2855
2856 case 'h1':
2857 case 'h2':
2858 case 'h3':
2859 case 'h4':
2860 case 'h5':
2861 case 'h6':
2862 if ( !$this->stack->inScope( BalanceSets::$headingSet ) ) {
2863 return true; // ignore
2864 }
2865 $this->stack->generateImpliedEndTags();
2866 $this->stack->popTag( BalanceSets::$headingSet );
2867 return true;
2868
2869 case 'sarcasm':
2870 // Take a deep breath, then:
2871 break;
2872
2873 case 'a':
2874 case 'b':
2875 case 'big':
2876 case 'code':
2877 case 'em':
2878 case 'font':
2879 case 'i':
2880 case 'nobr':
2881 case 's':
2882 case 'small':
2883 case 'strike':
2884 case 'strong':
2885 case 'tt':
2886 case 'u':
2887 if ( $this->stack->adoptionAgency( $value, $this->afe ) ) {
2888 return true; // If we did something, we're done.
2889 }
2890 break; // Go to the "any other end tag" case.
2891
2892 case 'applet':
2893 case 'marquee':
2894 case 'object':
2895 if ( !$this->stack->inScope( $value ) ) {
2896 return true; // ignore
2897 }
2898 $this->stack->generateImpliedEndTags();
2899 $this->stack->popTag( $value );
2900 $this->afe->clearToMarker();
2901 return true;
2902
2903 case 'br':
2904 // Turn </br> into <br>
2905 return $this->inBodyMode( 'tag', $value, [] );
2906 }
2907
2908 // Any other end tag goes here
2909 foreach ( $this->stack as $i => $node ) {
2910 if ( $node->isHtmlNamed( $value ) ) {
2911 $this->stack->generateImpliedEndTags( $value );
2912 $this->stack->popTo( $i ); // including $i
2913 break;
2914 } elseif ( $node->isA( BalanceSets::$specialSet ) ) {
2915 return true; // ignore this close token.
2916 }
2917 }
2918 return true;
2919 } elseif ( $token === 'comment' ) {
2920 $this->stack->insertComment( $value );
2921 return true;
2922 } else {
2923 Assert::invariant( false, "Bad token type: $token" );
2924 }
2925 }
2926
2927 private function inTableMode( $token, $value, $attribs = null, $selfclose = false ) {
2928 if ( $token === 'text' ) {
2929 if ( $this->textIntegrationMode ) {
2930 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
2931 } elseif ( $this->stack->currentNode->isA( BalanceSets::$tableSectionRowSet ) ) {
2932 $this->pendingTableText = '';
2933 $this->originalInsertionMode = $this->parseMode;
2934 return $this->switchModeAndReprocess( 'inTableTextMode',
2935 $token, $value, $attribs, $selfclose );
2936 }
2937 // fall through to default case.
2938 } elseif ( $token === 'eof' ) {
2939 $this->stopParsing();
2940 return true;
2941 } elseif ( $token === 'tag' ) {
2942 switch ( $value ) {
2943 case 'caption':
2944 $this->afe->insertMarker();
2945 $this->stack->insertHTMLElement( $value, $attribs );
2946 $this->switchMode( 'inCaptionMode' );
2947 return true;
2948 case 'colgroup':
2949 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2950 $this->stack->insertHTMLElement( $value, $attribs );
2951 $this->switchMode( 'inColumnGroupMode' );
2952 return true;
2953 case 'col':
2954 $this->inTableMode( 'tag', 'colgroup', [] );
2955 return $this->insertToken( $token, $value, $attribs, $selfclose );
2956 case 'tbody':
2957 case 'tfoot':
2958 case 'thead':
2959 $this->stack->clearToContext( BalanceSets::$tableContextSet );
2960 $this->stack->insertHTMLElement( $value, $attribs );
2961 $this->switchMode( 'inTableBodyMode' );
2962 return true;
2963 case 'td':
2964 case 'th':
2965 case 'tr':
2966 $this->inTableMode( 'tag', 'tbody', [] );
2967 return $this->insertToken( $token, $value, $attribs, $selfclose );
2968 case 'table':
2969 if ( !$this->stack->inTableScope( $value ) ) {
2970 return true; // Ignore this tag.
2971 }
2972 $this->inTableMode( 'endtag', $value );
2973 return $this->insertToken( $token, $value, $attribs, $selfclose );
2974
2975 case 'style':
2976 // OMITTED: <script>
2977 case 'template':
2978 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
2979
2980 case 'input':
2981 if ( !isset( $attribs['type'] ) || strcasecmp( $attribs['type'], 'hidden' ) !== 0 ) {
2982 break; // Handle this as "everything else"
2983 }
2984 $this->stack->insertHTMLElement( $value, $attribs );
2985 $this->stack->pop();
2986 return true;
2987
2988 case 'form':
2989 if (
2990 $this->formElementPointer ||
2991 $this->stack->indexOf( 'template' ) >= 0
2992 ) {
2993 return true; // ignore this token
2994 }
2995 $this->formElementPointer =
2996 $this->stack->insertHTMLElement( $value, $attribs );
2997 $this->stack->popTag( $this->formElementPointer );
2998 return true;
2999 }
3000 // Fall through for "anything else" clause.
3001 } elseif ( $token === 'endtag' ) {
3002 switch ( $value ) {
3003 case 'table':
3004 if ( !$this->stack->inTableScope( $value ) ) {
3005 return true; // Ignore.
3006 }
3007 $this->stack->popTag( $value );
3008 $this->resetInsertionMode();
3009 return true;
3010 // OMITTED: <body>
3011 case 'caption':
3012 case 'col':
3013 case 'colgroup':
3014 // OMITTED: <html>
3015 case 'tbody':
3016 case 'td':
3017 case 'tfoot':
3018 case 'th':
3019 case 'thead':
3020 case 'tr':
3021 return true; // Ignore the token.
3022 case 'template':
3023 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3024 }
3025 // Fall through for "anything else" clause.
3026 } elseif ( $token === 'comment' ) {
3027 $this->stack->insertComment( $value );
3028 return true;
3029 }
3030 // This is the "anything else" case:
3031 $this->stack->fosterParentMode = true;
3032 $this->inBodyMode( $token, $value, $attribs, $selfclose );
3033 $this->stack->fosterParentMode = false;
3034 return true;
3035 }
3036
3037 private function inTableTextMode( $token, $value, $attribs = null, $selfclose = false ) {
3038 if ( $token === 'text' ) {
3039 $this->pendingTableText .= $value;
3040 return true;
3041 }
3042 // Non-text token:
3043 $text = $this->pendingTableText;
3044 $this->pendingTableText = '';
3045 if ( preg_match( '/[^\x09\x0A\x0C\x0D\x20]/', $text ) ) {
3046 // This should match the "anything else" case inTableMode
3047 $this->stack->fosterParentMode = true;
3048 $this->inBodyMode( 'text', $text );
3049 $this->stack->fosterParentMode = false;
3050 } else {
3051 // Pending text is just whitespace.
3052 $this->stack->insertText( $text );
3053 }
3054 return $this->switchModeAndReprocess(
3055 $this->originalInsertionMode, $token, $value, $attribs, $selfclose
3056 );
3057 }
3058
3059 // helper for inCaptionMode
3060 private function endCaption() {
3061 if ( !$this->stack->inTableScope( 'caption' ) ) {
3062 return false;
3063 }
3064 $this->stack->generateImpliedEndTags();
3065 $this->stack->popTag( 'caption' );
3066 $this->afe->clearToMarker();
3067 $this->switchMode( 'inTableMode' );
3068 return true;
3069 }
3070
3071 private function inCaptionMode( $token, $value, $attribs = null, $selfclose = false ) {
3072 if ( $token === 'tag' ) {
3073 switch ( $value ) {
3074 case 'caption':
3075 case 'col':
3076 case 'colgroup':
3077 case 'tbody':
3078 case 'td':
3079 case 'tfoot':
3080 case 'th':
3081 case 'thead':
3082 case 'tr':
3083 if ( $this->endCaption() ) {
3084 $this->insertToken( $token, $value, $attribs, $selfclose );
3085 }
3086 return true;
3087 }
3088 // Fall through to "anything else" case.
3089 } elseif ( $token === 'endtag' ) {
3090 switch ( $value ) {
3091 case 'caption':
3092 $this->endCaption();
3093 return true;
3094 case 'table':
3095 if ( $this->endCaption() ) {
3096 $this->insertToken( $token, $value, $attribs, $selfclose );
3097 }
3098 return true;
3099 case 'body':
3100 case 'col':
3101 case 'colgroup':
3102 // OMITTED: <html>
3103 case 'tbody':
3104 case 'td':
3105 case 'tfoot':
3106 case 'th':
3107 case 'thead':
3108 case 'tr':
3109 // Ignore the token
3110 return true;
3111 }
3112 // Fall through to "anything else" case.
3113 }
3114 // The Anything Else case
3115 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3116 }
3117
3118 private function inColumnGroupMode( $token, $value, $attribs = null, $selfclose = false ) {
3119 if ( $token === 'text' ) {
3120 if ( preg_match( '/^[\x09\x0A\x0C\x0D\x20]+/', $value, $matches ) ) {
3121 $this->stack->insertText( $matches[0] );
3122 $value = substr( $value, strlen( $matches[0] ) );
3123 }
3124 if ( strlen( $value ) === 0 ) {
3125 return true; // All text handled.
3126 }
3127 // Fall through to handle non-whitespace below.
3128 } elseif ( $token === 'tag' ) {
3129 switch ( $value ) {
3130 // OMITTED: <html>
3131 case 'col':
3132 $this->stack->insertHTMLElement( $value, $attribs );
3133 $this->stack->pop();
3134 return true;
3135 case 'template':
3136 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3137 }
3138 // Fall through for "anything else".
3139 } elseif ( $token === 'endtag' ) {
3140 switch ( $value ) {
3141 case 'colgroup':
3142 if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3143 return true; // Ignore the token.
3144 }
3145 $this->stack->pop();
3146 $this->switchMode( 'inTableMode' );
3147 return true;
3148 case 'col':
3149 return true; // Ignore the token.
3150 case 'template':
3151 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3152 }
3153 // Fall through for "anything else".
3154 } elseif ( $token === 'eof' ) {
3155 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3156 } elseif ( $token === 'comment' ) {
3157 $this->stack->insertComment( $value );
3158 return true;
3159 }
3160
3161 // Anything else
3162 if ( !$this->stack->currentNode->isHtmlNamed( 'colgroup' ) ) {
3163 return true; // Ignore the token.
3164 }
3165 $this->inColumnGroupMode( 'endtag', 'colgroup' );
3166 return $this->insertToken( $token, $value, $attribs, $selfclose );
3167 }
3168
3169 // Helper function for inTableBodyMode
3170 private function endSection() {
3171 if ( !(
3172 $this->stack->inTableScope( 'tbody' ) ||
3173 $this->stack->inTableScope( 'thead' ) ||
3174 $this->stack->inTableScope( 'tfoot' )
3175 ) ) {
3176 return false;
3177 }
3178 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3179 $this->stack->pop();
3180 $this->switchMode( 'inTableMode' );
3181 return true;
3182 }
3183 private function inTableBodyMode( $token, $value, $attribs = null, $selfclose = false ) {
3184 if ( $token === 'tag' ) {
3185 switch ( $value ) {
3186 case 'tr':
3187 $this->stack->clearToContext( BalanceSets::$tableBodyContextSet );
3188 $this->stack->insertHTMLElement( $value, $attribs );
3189 $this->switchMode( 'inRowMode' );
3190 return true;
3191 case 'th':
3192 case 'td':
3193 $this->inTableBodyMode( 'tag', 'tr', [] );
3194 $this->insertToken( $token, $value, $attribs, $selfclose );
3195 return true;
3196 case 'caption':
3197 case 'col':
3198 case 'colgroup':
3199 case 'tbody':
3200 case 'tfoot':
3201 case 'thead':
3202 if ( $this->endSection() ) {
3203 $this->insertToken( $token, $value, $attribs, $selfclose );
3204 }
3205 return true;
3206 }
3207 } elseif ( $token === 'endtag' ) {
3208 switch ( $value ) {
3209 case 'table':
3210 if ( $this->endSection() ) {
3211 $this->insertToken( $token, $value, $attribs, $selfclose );
3212 }
3213 return true;
3214 case 'tbody':
3215 case 'tfoot':
3216 case 'thead':
3217 if ( $this->stack->inTableScope( $value ) ) {
3218 $this->endSection();
3219 }
3220 return true;
3221 // OMITTED: <body>
3222 case 'caption':
3223 case 'col':
3224 case 'colgroup':
3225 // OMITTED: <html>
3226 case 'td':
3227 case 'th':
3228 case 'tr':
3229 return true; // Ignore the token.
3230 }
3231 }
3232 // Anything else:
3233 return $this->inTableMode( $token, $value, $attribs, $selfclose );
3234 }
3235
3236 // Helper function for inRowMode
3237 private function endRow() {
3238 if ( !$this->stack->inTableScope( 'tr' ) ) {
3239 return false;
3240 }
3241 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3242 $this->stack->pop();
3243 $this->switchMode( 'inTableBodyMode' );
3244 return true;
3245 }
3246 private function inRowMode( $token, $value, $attribs = null, $selfclose = false ) {
3247 if ( $token === 'tag' ) {
3248 switch ( $value ) {
3249 case 'th':
3250 case 'td':
3251 $this->stack->clearToContext( BalanceSets::$tableRowContextSet );
3252 $this->stack->insertHTMLElement( $value, $attribs );
3253 $this->switchMode( 'inCellMode' );
3254 $this->afe->insertMarker();
3255 return true;
3256 case 'caption':
3257 case 'col':
3258 case 'colgroup':
3259 case 'tbody':
3260 case 'tfoot':
3261 case 'thead':
3262 case 'tr':
3263 if ( $this->endRow() ) {
3264 $this->insertToken( $token, $value, $attribs, $selfclose );
3265 }
3266 return true;
3267 }
3268 } elseif ( $token === 'endtag' ) {
3269 switch ( $value ) {
3270 case 'tr':
3271 $this->endRow();
3272 return true;
3273 case 'table':
3274 if ( $this->endRow() ) {
3275 $this->insertToken( $token, $value, $attribs, $selfclose );
3276 }
3277 return true;
3278 case 'tbody':
3279 case 'tfoot':
3280 case 'thead':
3281 if (
3282 $this->stack->inTableScope( $value ) &&
3283 $this->endRow()
3284 ) {
3285 $this->insertToken( $token, $value, $attribs, $selfclose );
3286 }
3287 return true;
3288 // OMITTED: <body>
3289 case 'caption':
3290 case 'col':
3291 case 'colgroup':
3292 // OMITTED: <html>
3293 case 'td':
3294 case 'th':
3295 return true; // Ignore the token.
3296 }
3297 }
3298 // Anything else:
3299 return $this->inTableMode( $token, $value, $attribs, $selfclose );
3300 }
3301
3302 // Helper for inCellMode
3303 private function endCell() {
3304 if ( $this->stack->inTableScope( 'td' ) ) {
3305 $this->inCellMode( 'endtag', 'td' );
3306 return true;
3307 } elseif ( $this->stack->inTableScope( 'th' ) ) {
3308 $this->inCellMode( 'endtag', 'th' );
3309 return true;
3310 } else {
3311 return false;
3312 }
3313 }
3314 private function inCellMode( $token, $value, $attribs = null, $selfclose = false ) {
3315 if ( $token === 'tag' ) {
3316 switch ( $value ) {
3317 case 'caption':
3318 case 'col':
3319 case 'colgroup':
3320 case 'tbody':
3321 case 'td':
3322 case 'tfoot':
3323 case 'th':
3324 case 'thead':
3325 case 'tr':
3326 if ( $this->endCell() ) {
3327 $this->insertToken( $token, $value, $attribs, $selfclose );
3328 }
3329 return true;
3330 }
3331 } elseif ( $token === 'endtag' ) {
3332 switch ( $value ) {
3333 case 'td':
3334 case 'th':
3335 if ( $this->stack->inTableScope( $value ) ) {
3336 $this->stack->generateImpliedEndTags();
3337 $this->stack->popTag( $value );
3338 $this->afe->clearToMarker();
3339 $this->switchMode( 'inRowMode' );
3340 }
3341 return true;
3342 // OMITTED: <body>
3343 case 'caption':
3344 case 'col':
3345 case 'colgroup':
3346 // OMITTED: <html>
3347 return true;
3348
3349 case 'table':
3350 case 'tbody':
3351 case 'tfoot':
3352 case 'thead':
3353 case 'tr':
3354 if ( $this->stack->inTableScope( $value ) ) {
3355 $this->stack->generateImpliedEndTags();
3356 $this->stack->popTag( BalanceSets::$tableCellSet );
3357 $this->afe->clearToMarker();
3358 $this->switchMode( 'inRowMode' );
3359 $this->insertToken( $token, $value, $attribs, $selfclose );
3360 }
3361 return true;
3362 }
3363 }
3364 // Anything else:
3365 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3366 }
3367
3368 private function inSelectMode( $token, $value, $attribs = null, $selfclose = false ) {
3369 if ( $token === 'text' ) {
3370 $this->stack->insertText( $value );
3371 return true;
3372 } elseif ( $token === 'eof' ) {
3373 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3374 } elseif ( $token === 'tag' ) {
3375 switch ( $value ) {
3376 // OMITTED: <html>
3377 case 'option':
3378 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3379 $this->stack->pop();
3380 }
3381 $this->stack->insertHTMLElement( $value, $attribs );
3382 return true;
3383 case 'optgroup':
3384 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3385 $this->stack->pop();
3386 }
3387 if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3388 $this->stack->pop();
3389 }
3390 $this->stack->insertHTMLElement( $value, $attribs );
3391 return true;
3392 case 'select':
3393 $this->inSelectMode( 'endtag', $value ); // treat it like endtag
3394 return true;
3395 case 'input':
3396 case 'keygen':
3397 case 'textarea':
3398 if ( !$this->stack->inSelectScope( 'select' ) ) {
3399 return true; // ignore token (fragment case)
3400 }
3401 $this->inSelectMode( 'endtag', 'select' );
3402 return $this->insertToken( $token, $value, $attribs, $selfclose );
3403 case 'script':
3404 case 'template':
3405 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3406 }
3407 } elseif ( $token === 'endtag' ) {
3408 switch ( $value ) {
3409 case 'optgroup':
3410 if (
3411 $this->stack->currentNode->isHtmlNamed( 'option' ) &&
3412 $this->stack->length() >= 2 &&
3413 $this->stack->node( $this->stack->length() - 2 )->isHtmlNamed( 'optgroup' )
3414 ) {
3415 $this->stack->pop();
3416 }
3417 if ( $this->stack->currentNode->isHtmlNamed( 'optgroup' ) ) {
3418 $this->stack->pop();
3419 }
3420 return true;
3421 case 'option':
3422 if ( $this->stack->currentNode->isHtmlNamed( 'option' ) ) {
3423 $this->stack->pop();
3424 }
3425 return true;
3426 case 'select':
3427 if ( !$this->stack->inSelectScope( $value ) ) {
3428 return true; // fragment case
3429 }
3430 $this->stack->popTag( $value );
3431 $this->resetInsertionMode();
3432 return true;
3433 case 'template':
3434 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3435 }
3436 } elseif ( $token === 'comment' ) {
3437 $this->stack->insertComment( $value );
3438 return true;
3439 }
3440 // anything else: just ignore the token
3441 return true;
3442 }
3443
3444 private function inSelectInTableMode( $token, $value, $attribs = null, $selfclose = false ) {
3445 switch ( $value ) {
3446 case 'caption':
3447 case 'table':
3448 case 'tbody':
3449 case 'tfoot':
3450 case 'thead':
3451 case 'tr':
3452 case 'td':
3453 case 'th':
3454 if ( $token === 'tag' ) {
3455 $this->inSelectInTableMode( 'endtag', 'select' );
3456 return $this->insertToken( $token, $value, $attribs, $selfclose );
3457 } elseif ( $token === 'endtag' ) {
3458 if ( $this->stack->inTableScope( $value ) ) {
3459 $this->inSelectInTableMode( 'endtag', 'select' );
3460 return $this->insertToken( $token, $value, $attribs, $selfclose );
3461 }
3462 return true;
3463 }
3464 }
3465 // anything else
3466 return $this->inSelectMode( $token, $value, $attribs, $selfclose );
3467 }
3468
3469 private function inTemplateMode( $token, $value, $attribs = null, $selfclose = false ) {
3470 if ( $token === 'text' || $token === 'comment' ) {
3471 return $this->inBodyMode( $token, $value, $attribs, $selfclose );
3472 } elseif ( $token === 'eof' ) {
3473 if ( $this->stack->indexOf( 'template' ) < 0 ) {
3474 $this->stopParsing();
3475 } else {
3476 $this->stack->popTag( 'template' );
3477 $this->afe->clearToMarker();
3478 array_pop( $this->templateInsertionModes );
3479 $this->resetInsertionMode();
3480 $this->insertToken( $token, $value, $attribs, $selfclose );
3481 }
3482 return true;
3483 } elseif ( $token === 'tag' ) {
3484 switch ( $value ) {
3485 case 'base':
3486 case 'basefont':
3487 case 'bgsound':
3488 case 'link':
3489 case 'meta':
3490 case 'noframes':
3491 // OMITTED: <script>
3492 case 'style':
3493 case 'template':
3494 // OMITTED: <title>
3495 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3496
3497 case 'caption':
3498 case 'colgroup':
3499 case 'tbody':
3500 case 'tfoot':
3501 case 'thead':
3502 return $this->switchModeAndReprocess(
3503 'inTableMode', $token, $value, $attribs, $selfclose
3504 );
3505
3506 case 'col':
3507 return $this->switchModeAndReprocess(
3508 'inColumnGroupMode', $token, $value, $attribs, $selfclose
3509 );
3510
3511 case 'tr':
3512 return $this->switchModeAndReprocess(
3513 'inTableBodyMode', $token, $value, $attribs, $selfclose
3514 );
3515
3516 case 'td':
3517 case 'th':
3518 return $this->switchModeAndReprocess(
3519 'inRowMode', $token, $value, $attribs, $selfclose
3520 );
3521 }
3522 return $this->switchModeAndReprocess(
3523 'inBodyMode', $token, $value, $attribs, $selfclose
3524 );
3525 } elseif ( $token === 'endtag' ) {
3526 switch ( $value ) {
3527 case 'template':
3528 return $this->inHeadMode( $token, $value, $attribs, $selfclose );
3529 }
3530 return true;
3531 } else {
3532 Assert::invariant( false, "Bad token type: $token" );
3533 }
3534 }
3535 }