Revert "Preprocessor: Don't allow unclosed extension tags (matching until end of...
authorLegoktm <legoktm.wikipedia@gmail.com>
Thu, 4 Feb 2016 00:38:35 +0000 (00:38 +0000)
committerLegoktm <legoktm.wikipedia@gmail.com>
Thu, 4 Feb 2016 00:38:35 +0000 (00:38 +0000)
This reverts commit f51d0d9a819f8f1c181350ced2f015ce97985fcc.

Breaks templates with non-closed </noinclude> tags, which
were previously acceptable.

Bug: T125754
Change-Id: I8bafb15eefac4e1d3e727c1c84782636d8b82c2b

includes/parser/Preprocessor_DOM.php
includes/parser/Preprocessor_Hash.php
tests/parser/parserTests.txt
tests/phpunit/includes/parser/PreprocessorTest.php

index 817f153..4ca3a87 100644 (file)
@@ -237,8 +237,6 @@ class Preprocessor_DOM extends Preprocessor {
                $inHeading = false;
                // True if there are no more greater-than (>) signs right of $i
                $noMoreGT = false;
-               // Map of tag name => true if there are no more closing tags of given type right of $i
-               $noMoreClosingTag = array();
                // True to ignore all input up to the next <onlyinclude>
                $findOnlyinclude = $enableOnlyinclude;
                // Do a line-start run without outputting an LF character
@@ -459,21 +457,17 @@ class Preprocessor_DOM extends Preprocessor {
                                } else {
                                        $attrEnd = $tagEndPos;
                                        // Find closing tag
-                                       if (
-                                               !isset( $noMoreClosingTag[$name] ) &&
-                                               preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+                                       if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
                                                        $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 )
                                        ) {
                                                $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
                                                $i = $matches[0][1] + strlen( $matches[0][0] );
                                                $close = '<close>' . htmlspecialchars( $matches[0][0] ) . '</close>';
                                        } else {
-                                               // No end tag -- don't match the tag, treat opening tag as literal and resume parsing.
-                                               $i = $tagEndPos + 1;
-                                               $accum .= htmlspecialchars( substr( $text, $tagStartPos, $tagEndPos + 1 - $tagStartPos ) );
-                                               // Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
-                                               $noMoreClosingTag[$name] = true;
-                                               continue;
+                                               // No end tag -- let it run out to the end of the text.
+                                               $inner = substr( $text, $tagEndPos + 1 );
+                                               $i = $lengthText;
+                                               $close = '';
                                        }
                                }
                                // <includeonly> and <noinclude> just become <ignore> tags
index 28c49fd..50eaefb 100644 (file)
@@ -160,8 +160,6 @@ class Preprocessor_Hash extends Preprocessor {
                $inHeading = false;
                // True if there are no more greater-than (>) signs right of $i
                $noMoreGT = false;
-               // Map of tag name => true if there are no more closing tags of given type right of $i
-               $noMoreClosingTag = array();
                // True to ignore all input up to the next <onlyinclude>
                $findOnlyinclude = $enableOnlyinclude;
                // Do a line-start run without outputting an LF character
@@ -382,21 +380,17 @@ class Preprocessor_Hash extends Preprocessor {
                                } else {
                                        $attrEnd = $tagEndPos;
                                        // Find closing tag
-                                       if (
-                                               !isset( $noMoreClosingTag[$name] ) &&
-                                               preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
+                                       if ( preg_match( "/<\/" . preg_quote( $name, '/' ) . "\s*>/i",
                                                        $text, $matches, PREG_OFFSET_CAPTURE, $tagEndPos + 1 )
                                        ) {
                                                $inner = substr( $text, $tagEndPos + 1, $matches[0][1] - $tagEndPos - 1 );
                                                $i = $matches[0][1] + strlen( $matches[0][0] );
                                                $close = $matches[0][0];
                                        } else {
-                                               // No end tag -- don't match the tag, treat opening tag as literal and resume parsing.
-                                               $i = $tagEndPos + 1;
-                                               $accum->addLiteral( substr( $text, $tagStartPos, $tagEndPos + 1 - $tagStartPos ) );
-                                               // Cache results, otherwise we have O(N^2) performance for input like <foo><foo><foo>...
-                                               $noMoreClosingTag[$name] = true;
-                                               continue;
+                                               // No end tag -- let it run out to the end of the text.
+                                               $inner = substr( $text, $tagEndPos + 1 );
+                                               $i = $lengthText;
+                                               $close = null;
                                        }
                                }
                                // <includeonly> and <noinclude> just become <ignore> tags
index 438fe31..cd2b769 100644 (file)
@@ -2520,6 +2520,7 @@ Barack Obama <President> of the United States
 </p>
 !! end
 
+## PHP parser discards the "<pre " string
 !! test
 Handle broken pre-like tags (bug 64025)
 !! options
@@ -2530,13 +2531,8 @@ parsoid=wt2html
 <table><pre </table>
 !! html/php
 <pre>x</pre>
-<table>&lt;pre </table>
+<table><pre></pre></table>
 
-!! html/php+tidy
-<pre>
-x
-</pre>
-<p>&lt;pre</p>
 !! html/parsoid
 <pre about="#mwt1" typeof="mw:Transclusion" data-parsoid='{"a":{"&lt;pre":null},"sa":{"&lt;pre":""},"stx":"html","pi":[[{"k":"1","spc":["","","",""]}]]}' data-mw='{"parts":[{"template":{"target":{"wt":"echo","href":"./Template:Echo"},"params":{"1":{"wt":"&lt;pre &lt;pre>x&lt;/pre>"}},"i":0}}]}'>x</pre>
 
@@ -10880,8 +10876,6 @@ Un-closed <includeonly>
 !! wikitext
 <includeonly>
 !! html
-<p>&lt;includeonly&gt;
-</p>
 !! end
 
 ## We used to, but no longer wt2wt this test since the default serializer
index b940230..1ebba1a 100644 (file)
@@ -48,7 +48,7 @@ class PreprocessorTest extends MediaWikiTestCase {
                        array( "<noinclude> Foo bar </noinclude>", "<root><ignore>&lt;noinclude&gt;</ignore> Foo bar <ignore>&lt;/noinclude&gt;</ignore></root>" ),
                        array( "<noinclude>\n{{Foo}}\n</noinclude>", "<root><ignore>&lt;noinclude&gt;</ignore>\n<template lineStart=\"1\"><title>Foo</title></template>\n<ignore>&lt;/noinclude&gt;</ignore></root>" ),
                        array( "<noinclude>\n{{Foo}}\n</noinclude>\n", "<root><ignore>&lt;noinclude&gt;</ignore>\n<template lineStart=\"1\"><title>Foo</title></template>\n<ignore>&lt;/noinclude&gt;</ignore>\n</root>" ),
-                       array( "<gallery>foo bar", "<root>&lt;gallery&gt;foo bar</root>" ),
+                       array( "<gallery>foo bar", "<root><ext><name>gallery</name><attr></attr><inner>foo bar</inner></ext></root>" ),
                        array( "<{{foo}}>", "<root>&lt;<template><title>foo</title></template>&gt;</root>" ),
                        array( "<{{{foo}}}>", "<root>&lt;<tplarg><title>foo</title></tplarg>&gt;</root>" ),
                        array( "<gallery></gallery</gallery>", "<root><ext><name>gallery</name><attr></attr><inner>&lt;/gallery</inner><close>&lt;/gallery&gt;</close></ext></root>" ),