Add \b to regexes in BlockLevelPass to avoid confusing tr & track
authorBrion Vibber <brion@pobox.com>
Fri, 19 May 2017 22:45:48 +0000 (00:45 +0200)
committerBrion Vibber <brion@pobox.com>
Fri, 19 May 2017 22:53:05 +0000 (00:53 +0200)
With TimedMediaHandler in video.js mode, videos can be inline,
without a wrapper div.

Previously, in this mode two paragraphs where one contained a
video would end up merged into one paragraph, due to BlockLevelPass
matching "<track .../>" against "<tr" in its regexes.

Added \b to a couple of the regexes to protect against such errors,
and corrected a parser test case that had bad output listed, where
"<link .../>" matched against "<li".

Bug: T165817
Change-Id: I06e82b881f5ebddae5e7df7fb940adfa54f6b659

includes/parser/BlockLevelPass.php
tests/parser/parserTests.txt

index 2023d13..599fbf6 100644 (file)
@@ -286,14 +286,14 @@ class BlockLevelPass {
                                # @todo consider using a stack for nestable elements like span, table and div
                                $openMatch = preg_match(
                                        '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
-                                               . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
+                                               . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)\\b/iS',
                                        $t
                                );
                                $closeMatch = preg_match(
                                        '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
                                                . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
                                                . Parser::MARKER_PREFIX
-                                               . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
+                                               . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)\\b/iS',
                                        $t
                                );
 
index 2d107e7..368dc0d 100644 (file)
@@ -17552,10 +17552,10 @@ Sanitizer: Validating that <meta> and <link> work, but only for Microdata
 <p>    <meta itemprop="hello" content="world" />
        &lt;meta http-equiv="refresh" content="5"&gt;
        <meta itemprop="hello" content="5" />
-</p>
        <link itemprop="hello" href="http&#58;//example.org" />
        &lt;link rel="stylesheet" href="<a rel="nofollow" class="external free" href="http://example.org">http://example.org</a>"&gt;
        <link itemprop="hello" href="http&#58;//example.org" />
+</p>
 </div>
 
 !! end