Terminate free external link on &nbsp; (and numeric versions of <>)
authorC. Scott Ananian <cscott@cscott.net>
Wed, 23 Sep 2015 19:16:24 +0000 (15:16 -0400)
committerC. Scott Ananian <cscott@cscott.net>
Wed, 23 Sep 2015 20:00:52 +0000 (16:00 -0400)
Bug: T84937
Change-Id: Ic74d8d069e08c0597c7b26755e0d942bf3a510cc

RELEASE-NOTES-1.26
includes/parser/Parser.php
tests/parser/parserTests.txt

index d5b521e..aa32c9f 100644 (file)
@@ -203,6 +203,8 @@ changes to languages because of Phabricator reports.
 * DeferredUpdates::addHTMLCacheUpdate() was removed.
 * The default name of the 'suppress' group page has been changed from
   'Project:Oversight' to 'Project:Suppress'.
+* (T84937) Free external links ("autolinked" urls) will now be terminated
+  by &nbsp; and HTML entity encodings of &nbsp, <, and >.
 
 == Compatibility ==
 
index 677da63..288e486 100644 (file)
@@ -1474,8 +1474,9 @@ class Parser {
                # The characters '<' and '>' (which were escaped by
                # removeHTMLtags()) should not be included in
                # URLs, per RFC 2396.
+               # Make &nbsp; terminate a URL as well (bug T84937)
                $m2 = array();
-               if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
+               if ( preg_match( '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
                        $trail = substr( $url, $m2[0][1] ) . $trail;
                        $url = substr( $url, 0, $m2[0][1] );
                }
index c8c63f3..4c6c9a5 100644 (file)
@@ -4646,6 +4646,9 @@ http://example.com?
 http://example.com)
 http://example.com/url_with_(brackets)
 (http://example.com/url_without_brackets)
+http://example.com/url_with_entity&amp;
+http://example.com/url_with_entity&#x26;
+http://example.com/url_with_entity&#038;
 http://example.com/url_with_entity&nbsp;
 http://example.com/url_with_entity&#xA0;
 http://example.com/url_with_entity&#160;
@@ -4663,12 +4666,15 @@ http://example.com/url_with_entity&#60;
 <a rel="nofollow" class="external free" href="http://example.com">http://example.com</a>)
 <a rel="nofollow" class="external free" href="http://example.com/url_with_(brackets)">http://example.com/url_with_(brackets)</a>
 (<a rel="nofollow" class="external free" href="http://example.com/url_without_brackets">http://example.com/url_without_brackets</a>)
-<a rel="nofollow" class="external free" href="http://example.com/url_with_entity ">http://example.com/url_with_entity </a>
-<a rel="nofollow" class="external free" href="http://example.com/url_with_entity ">http://example.com/url_with_entity </a>
-<a rel="nofollow" class="external free" href="http://example.com/url_with_entity ">http://example.com/url_with_entity </a>
+<a rel="nofollow" class="external free" href="http://example.com/url_with_entity&amp;">http://example.com/url_with_entity&amp;</a>
+<a rel="nofollow" class="external free" href="http://example.com/url_with_entity&amp;">http://example.com/url_with_entity&amp;</a>
+<a rel="nofollow" class="external free" href="http://example.com/url_with_entity&amp;">http://example.com/url_with_entity&amp;</a>
+<a rel="nofollow" class="external free" href="http://example.com/url_with_entity">http://example.com/url_with_entity</a>&#160;
+<a rel="nofollow" class="external free" href="http://example.com/url_with_entity">http://example.com/url_with_entity</a>&#xa0;
+<a rel="nofollow" class="external free" href="http://example.com/url_with_entity">http://example.com/url_with_entity</a>&#160;
 <a rel="nofollow" class="external free" href="http://example.com/url_with_entity">http://example.com/url_with_entity</a>&lt;
-<a rel="nofollow" class="external free" href="http://example.com/url_with_entity%3C">http://example.com/url_with_entity%3C</a>
-<a rel="nofollow" class="external free" href="http://example.com/url_with_entity%3C">http://example.com/url_with_entity%3C</a>
+<a rel="nofollow" class="external free" href="http://example.com/url_with_entity">http://example.com/url_with_entity</a>&#x3c;
+<a rel="nofollow" class="external free" href="http://example.com/url_with_entity">http://example.com/url_with_entity</a>&#60;
 </p>
 !! html/parsoid
 <p><a rel="mw:ExtLink" href="http://example.com">http://example.com</a>,