Allow stop characters as quoted attribute delimiters
authorArlo Breault <abreault@wikimedia.org>
Wed, 26 Aug 2015 21:33:12 +0000 (14:33 -0700)
committerArlo Breault <abreault@wikimedia.org>
Wed, 26 Aug 2015 21:33:12 +0000 (14:33 -0700)
 * Matches Parsoid in Ibdaa51f94eadc640278594a3eb5dd43356c286ea

Change-Id: I51b6861d7232d857b75881966091ae1e662c13a9

includes/Sanitizer.php
tests/parser/parserTests.txt

index fbf3234..3591504 100644 (file)
@@ -346,8 +346,8 @@ class Sanitizer {
                                  ($space*=$space*
                                        (?:
                                         # The attribute value: quoted or alone
-                                         \"([^<\"]*)\"
-                                        | '([^<']*)'
+                                         \"([^<\"]*)(?:\"|\$)
+                                        | '([^<']*)(?:'|\$)
                                         |  ([a-zA-Z0-9!#$%&()*,\\-.\\/:;<>?@[\\]^_`{|}~]+)
                                        )
                                )?(?=$space|\$)/sx";
index f70a1b0..8fe8c61 100644 (file)
@@ -6859,9 +6859,6 @@ parsoid=wt2html,wt2wt
 </tbody></table>
 !! end
 
-
-# PHP throws away the (semi-broken) "foo" class here; Parsoid
-# preserves it.
 !!test
 Parsoid: Recover better from broken table attributes
 !!options
@@ -6872,7 +6869,7 @@ parsoid=wt2html
 foo
 |}
 !!html/php+tidy
-<table>
+<table class="foo">
 <tr>
 <td class="bar">
 <p>foo</p>
@@ -20616,7 +20613,7 @@ __TOC__
 </div>
 
 <h2><span class="mw-headline" id="Hello"><sup class="in-h2">Hello</sup></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=1" title="Edit section: Hello">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
-<h2><span class="mw-headline" id="b.22.3EEvilbye"><sup> b"&gt;Evilbye</sup></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=2" title="Edit section: b&quot;&gt;Evilbye">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
+<h2><span class="mw-headline" id="b.22.3EEvilbye"><sup class="a"> b"&gt;Evilbye</sup></span><span class="mw-editsection"><span class="mw-editsection-bracket">[</span><a href="/index.php?title=Parser_test&amp;action=edit&amp;section=2" title="Edit section: b&quot;&gt;Evilbye">edit</a><span class="mw-editsection-bracket">]</span></span></h2>
 
 !! end
 
@@ -23407,20 +23404,9 @@ HTML tag with broken attribute value quoting
 !! wikitext
 <span title="Hello world>Foo</span>
 !! html/php
-<p><span>Foo</span>
-</p>
-!! html/parsoid
 <p><span title="Hello world">Foo</span>
 </p>
-!! end
-
-!! test
-Parsoid-only: HTML tag with broken attribute value quoting
-!! options
-parsoid
-!! wikitext
-<span title="Hello world>Foo</span>
-!! html
+!! html/parsoid
 <p><span title="Hello world">Foo</span>
 </p>
 !! end
@@ -23434,7 +23420,7 @@ Table with broken attribute value quoting
 !! html/php
 <table>
 <tr>
-<td>Foo
+<td title="Hello world">Foo
 </td></tr></table>
 
 !! html/parsoid
@@ -23455,9 +23441,9 @@ Table with broken attribute value quoting on consecutive lines
 !! html/php
 <table>
 <tr>
-<td>Foo
+<td title="Hello world">Foo
 </td>
-<td>Bar
+<td style="color:red">Bar
 </td></tr></table>
 
 !! html/parsoid