Merge "Drop index oi_name_archive_name on table oldimage"
[lhc/web/wiklou.git] / includes / tidy / Balancer.php
index 95cbe09..3467b49 100644 (file)
@@ -75,7 +75,7 @@ class BalanceSets {
                self::HTML_NAMESPACE => [
                        'html' => true, 'head' => true, 'body' => true, 'frameset' => true,
                        'frame' => true,
-                       'plaintext' => true, 'isindex' => true,
+                       'plaintext' => true,
                        'xmp' => true, 'iframe' => true, 'noembed' => true,
                        'noscript' => true, 'script' => true,
                        'title' => true
@@ -119,9 +119,9 @@ class BalanceSets {
                        'h2' => true, 'h3' => true, 'h4' => true, 'h5' => true,
                        'h6' => true, 'head' => true, 'header' => true, 'hgroup' => true,
                        'hr' => true, 'html' => true, 'iframe' => true, 'img' => true,
-                       'input' => true, 'isindex' => true, 'li' => true, 'link' => true,
+                       'input' => true, 'li' => true, 'link' => true,
                        'listing' => true, 'main' => true, 'marquee' => true,
-                       'menu' => true, 'menuitem' => true, 'meta' => true, 'nav' => true,
+                       'menu' => true, 'meta' => true, 'nav' => true,
                        'noembed' => true, 'noframes' => true, 'noscript' => true,
                        'object' => true, 'ol' => true, 'p' => true, 'param' => true,
                        'plaintext' => true, 'pre' => true, 'script' => true,
@@ -156,7 +156,8 @@ class BalanceSets {
 
        public static $impliedEndTagsSet = [
                self::HTML_NAMESPACE => [
-                       'dd' => true, 'dt' => true, 'li' => true, 'optgroup' => true,
+                       'dd' => true, 'dt' => true, 'li' => true,
+                       'menuitem' => true, 'optgroup' => true,
                        'option' => true, 'p' => true, 'rb' => true, 'rp' => true,
                        'rt' => true, 'rtc' => true
                ]
@@ -498,6 +499,16 @@ class BalanceElement {
                                        $this->attribs = [ 'class' => "mw-empty-elt" ];
                                }
                                $blank = false;
+                       } elseif (
+                               $this->isA( BalanceSets::$extraLinefeedSet ) &&
+                               count( $this->children ) > 0 &&
+                               substr( $this->children[0], 0, 1 ) == "\n"
+                       ) {
+                               // Double the linefeed after pre/listing/textarea
+                               // according to the (old) HTML5 fragment serialization
+                               // algorithm (see https://github.com/whatwg/html/issues/944)
+                               // to ensure this will round-trip.
+                               array_unshift( $this->children, "\n" );
                        }
                        $flat = $blank ? '' : "{$this}";
                } else {
@@ -529,15 +540,6 @@ class BalanceElement {
                                $out .= "{$elt}";
                        }
                        $out .= "</{$this->localName}>";
-                       if (
-                               $this->isA( BalanceSets::$extraLinefeedSet ) &&
-                               $out[$len] === "\n"
-                       ) {
-                               // Double the linefeed after pre/listing/textarea
-                               // according to the HTML5 fragment serialization algorithm.
-                               $out = substr( $out, 0, $len + 1 ) .
-                                       substr( $out, $len );
-                       }
                } else {
                        $out = "<{$this->localName}{$encAttribs} />";
                        Assert::invariant(
@@ -1770,7 +1772,7 @@ class BalanceActiveFormattingElements {
  *   and escaped.
  * - All null characters are assumed to have been removed.
  * - The following elements are disallowed: <html>, <head>, <body>, <frameset>,
- *   <frame>, <plaintext>, <isindex>, <xmp>, <iframe>,
+ *   <frame>, <plaintext>, <xmp>, <iframe>,
  *   <noembed>, <noscript>, <script>, <title>.  As a result,
  *   further simplifications can be made:
  *   - `frameset-ok` is not tracked.
@@ -1822,7 +1824,7 @@ class Balancer {
         * Regex borrowed from Tim Starling's "remex-html" project.
         */
        const VALID_COMMENT_REGEX = "~ !--
-               (                             # 1. Comment match detector
+               (                           # 1. Comment match detector
                        > | -> | # Invalid short close
                        (                         # 2. Comment contents
                                (?:
@@ -1837,15 +1839,15 @@ class Balancer {
                        (                         # 3. Comment close
                                --> |   # Normal close
                                --!> |  # Comment end bang
-                               (                     # 4. Indicate matches requiring EOF
-                                       --! |   # EOF in comment end bang state
-                                       -- |    # EOF in comment end state
-                                       -  |    # EOF in comment end dash state
-                                               # EOF in comment state
+                               (                       # 4. Indicate matches requiring EOF
+                                       --! |                   # EOF in comment end bang state
+                                       -- |                    # EOF in comment end state
+                                       -  |                    # EOF in comment end dash state
+                                       (?#nothing)             # EOF in comment state
                                )
                        )
                )
-               ([^<]*) \z                    # 5. Non-tag text after the comment
+               ([^<]*) \z                  # 5. Non-tag text after the comment
                ~xs";
 
        /**
@@ -1865,7 +1867,9 @@ class Balancer {
         *         provide historical compatibility with the old "tidy"
         *         program: <p>-wrapping is done to the children of
         *         <body> and <blockquote> elements, and empty elements
-        *         are removed.
+        *         are removed.  The <pre>/<listing>/<textarea> serialization
+        *         is also tweaked to allow lossless round trips.
+        *         (See: https://github.com/whatwg/html/issues/944)
         *     'allowComments': boolean, defaults to true.
         *         When true, allows HTML comments in the input.
         *         The Sanitizer generally strips all comments, so if you
@@ -1997,6 +2001,7 @@ class Balancer {
                // Some hoops we have to jump through
                $adjusted = $this->stack->adjustedCurrentNode( $this->fragmentContext );
 
+               // The spec calls this the "tree construction dispatcher".
                $isForeign = true;
                if (
                        $this->stack->length() === 0 ||
@@ -2037,6 +2042,9 @@ class Balancer {
                if ( $token === 'text' ) {
                        $this->stack->insertText( $value );
                        return true;
+               } elseif ( $token === 'comment' ) {
+                       $this->stack->insertComment( $value );
+                       return true;
                } elseif ( $token === 'tag' ) {
                        switch ( $value ) {
                        case 'font':
@@ -2468,7 +2476,6 @@ class Balancer {
                        case 'header':
                        case 'hgroup':
                        case 'main':
-                       case 'menu':
                        case 'nav':
                        case 'ol':
                        case 'p':
@@ -2481,6 +2488,16 @@ class Balancer {
                                $this->stack->insertHTMLElement( $value, $attribs );
                                return true;
 
+                       case 'menu':
+                               if ( $this->stack->inButtonScope( "p" ) ) {
+                                       $this->inBodyMode( 'endtag', 'p' );
+                               }
+                               if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
+                                       $this->stack->pop();
+                               }
+                               $this->stack->insertHTMLElement( $value, $attribs );
+                               return true;
+
                        case 'h1':
                        case 'h2':
                        case 'h3':
@@ -2656,7 +2673,6 @@ class Balancer {
                                // (hence we don't need to examine the tag's "type" attribute)
                                return true;
 
-                       case 'menuitem':
                        case 'param':
                        case 'source':
                        case 'track':
@@ -2668,6 +2684,9 @@ class Balancer {
                                if ( $this->stack->inButtonScope( 'p' ) ) {
                                        $this->inBodyMode( 'endtag', 'p' );
                                }
+                               if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
+                                       $this->stack->pop();
+                               }
                                $this->stack->insertHTMLElement( $value, $attribs );
                                $this->stack->pop();
                                return true;
@@ -2676,8 +2695,6 @@ class Balancer {
                                // warts!
                                return $this->inBodyMode( $token, 'img', $attribs, $selfClose );
 
-                       // OMITTED: <isindex>
-
                        case 'textarea':
                                $this->stack->insertHTMLElement( $value, $attribs );
                                $this->ignoreLinefeed = true;
@@ -2715,6 +2732,14 @@ class Balancer {
                                $this->stack->insertHTMLElement( $value, $attribs );
                                return true;
 
+                       case 'menuitem':
+                               if ( $this->stack->currentNode->isHtmlNamed( 'menuitem' ) ) {
+                                       $this->stack->pop();
+                               }
+                               $this->afe->reconstruct( $this->stack );
+                               $this->stack->insertHTMLElement( $value, $attribs );
+                               return true;
+
                        case 'rb':
                        case 'rtc':
                                if ( $this->stack->inScope( 'ruby' ) ) {