From 0e1b52a40e15f229e4920945350b77ce09b1c9b2 Mon Sep 17 00:00:00 2001 From: Arlo Breault Date: Mon, 26 Jun 2017 19:20:31 -0400 Subject: [PATCH] Make multiple colons escaping interlanguage links invalid, consistently * Right now, one or two are permitted. This patch limits it to one. The current behaviour seems more a byproduct of refactoring than an explicit goal. * Note that this will break links on a handful of pages surfaced in Parsoid's roundtrip testing. Change-Id: Icabd34bbf15781bb891bd8e0c079d1a65eb28595 --- includes/parser/Parser.php | 14 +++++++------- includes/title/MediaWikiTitleCodec.php | 3 ++- tests/parser/parserTests.txt | 13 +++++++++++++ 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/includes/parser/Parser.php b/includes/parser/Parser.php index 9ea65e013d..0cc2aac87c 100644 --- a/includes/parser/Parser.php +++ b/includes/parser/Parser.php @@ -2240,12 +2240,6 @@ class Parser { $link = $origLink; } - $noforce = ( substr( $origLink, 0, 1 ) !== ':' ); - if ( !$noforce ) { - # Strip off leading ':' - $link = substr( $link, 1 ); - } - $unstrip = $this->mStripState->unstripNoWiki( $link ); $nt = is_string( $unstrip ) ? Title::newFromText( $unstrip ) : null; if ( $nt === null ) { @@ -2256,6 +2250,8 @@ class Parser { $ns = $nt->getNamespace(); $iw = $nt->getInterwiki(); + $noforce = ( substr( $origLink, 0, 1 ) !== ':' ); + if ( $might_be_img ) { # if this is actually an invalid link if ( $ns == NS_FILE && $noforce ) { # but might be an image $found = false; @@ -2300,6 +2296,10 @@ class Parser { $wasblank = ( $text == '' ); if ( $wasblank ) { $text = $link; + if ( !$noforce ) { + # Strip off leading ':' + $text = substr( $text, 1 ); + } } else { # T6598 madness. Handle the quotes only if they come from the alternate part # [[Lista d''e paise d''o munno]] -> Lista d''e paise d''o munno @@ -2324,7 +2324,7 @@ class Parser { } $s = rtrim( $s . $prefix ); - $s .= trim( $trail, "\n" ) == '' ? '': $prefix . $trail; + $s .= trim( $trail, "\n" ) == '' ? '' : $prefix . $trail; continue; } diff --git a/includes/title/MediaWikiTitleCodec.php b/includes/title/MediaWikiTitleCodec.php index 0fff97cb26..dd8b97546b 100644 --- a/includes/title/MediaWikiTitleCodec.php +++ b/includes/title/MediaWikiTitleCodec.php @@ -301,7 +301,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { # Initial colon indicates main namespace rather than specified default # but should not create invalid {ns,title} pairs such as {0,Project:Foo} - if ( $dbkey !== '' && ':' == $dbkey[0] ) { + if ( $dbkey !== '' && $dbkey[0] == ':' ) { $parts['namespace'] = NS_MAIN; $dbkey = substr( $dbkey, 1 ); # remove the colon but continue processing $dbkey = trim( $dbkey, '_' ); # remove any subsequent whitespace @@ -368,6 +368,7 @@ class MediaWikiTitleCodec implements TitleFormatter, TitleParser { if ( $dbkey !== '' && $dbkey[0] == ':' ) { $parts['namespace'] = NS_MAIN; $dbkey = substr( $dbkey, 1 ); + $dbkey = trim( $dbkey, '_' ); } } # If there's no recognized interwiki or namespace, diff --git a/tests/parser/parserTests.txt b/tests/parser/parserTests.txt index 44bcdffeb4..f7629cd472 100644 --- a/tests/parser/parserTests.txt +++ b/tests/parser/parserTests.txt @@ -8741,6 +8741,19 @@ Blah blah blah zh : Chinese

!! end +!! test +Multiple colons escaping interlanguage links +!! wikitext +[[:es:Spanish]] +[[::es:Spanish]] +[[:::es:Spanish]] +!! html/php +

es:Spanish +[[::es:Spanish]] +[[:::es:Spanish]] +

+!! end + ## parsoid html2wt will normalize the space to _ !! test Space and question mark encoding in interlanguage links (T95473) -- 2.20.1