* Don't URL-decode in the title attribute for URL links; it can produce false
authorBrion Vibber <brion@users.mediawiki.org>
Fri, 17 Mar 2006 01:02:14 +0000 (01:02 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Fri, 17 Mar 2006 01:02:14 +0000 (01:02 +0000)
  results that don't code back to their original values.

RELEASE-NOTES
includes/Linker.php
includes/Parser.php
maintenance/parserTests.txt

index 8f6a4e1..0d9e3d6 100644 (file)
@@ -695,6 +695,12 @@ fully support the editing toolbar, but was found to be too confusing.
 * (bug 5236) Load wikibits.js before site-customized javascript
 * (bug 4119) Workaround for <nowiki> following link in Walloon; remove capitals
   from linktrail, as they're not used anywhere else.
+* (bug 4781) Output links with the percent-encoding they're supplied with;
+  save the normalization for internal link storage. The normalization is a bit
+  buggy and can make incorrect foldings in the query string and such, so isn't
+  reliable beyond the hostname where it's used for the spam bulk checker.
+* Don't URL-decode in the title attribute for URL links; it can produce false
+  results that don't code back to their original values.
 
 
 === Caveats ===
index 32b7cb4..3065457 100644 (file)
@@ -30,6 +30,17 @@ class Linker {
        function getExternalLinkAttributes( $link, $text, $class='' ) {
                global $wgContLang;
 
+               $link = htmlspecialchars( $link );
+
+               $r = ($class != '') ? " class='$class'" : " class='external'";
+
+               $r .= " title=\"{$link}\"";
+               return $r;
+       }
+
+       function getInterwikiLinkAttributes( $link, $text, $class='' ) {
+               global $wgContLang;
+
                $same = ($link == $text);
                $link = urldecode( $link );
                $link = $wgContLang->checkTitleEncoding( $link );
@@ -194,7 +205,7 @@ class Linker {
                        $u = $nt->getFullURL();
                        $link = $nt->getPrefixedURL();
                        if ( '' == $text ) { $text = $nt->getPrefixedText(); }
-                       $style = $this->getExternalLinkAttributes( $link, $text, 'extiw' );
+                       $style = $this->getInterwikiLinkAttributes( $link, $text, 'extiw' );
 
                        $inside = '';
                        if ( '' != $trail ) {
index 3b5d119..195d280 100644 (file)
@@ -1132,9 +1132,6 @@ class Parser
                        # Replace &amp; from obsolete syntax with &.
                        # All HTML entities will be escaped by makeExternalLink()
                        $url = str_replace( '&amp;', '&', $url );
-                       # Replace unnecessary URL escape codes with the referenced character
-                       # This prevents spammers from hiding links from the filters
-                       $url = Parser::replaceUnusualEscapes( $url );
 
                        # Process the trail (i.e. everything after this link up until start of the next link),
                        # replacing any non-bracketed links
@@ -1146,8 +1143,11 @@ class Parser
                        # This was changed in August 2004
                        $s .= $sk->makeExternalLink( $url, $text, false, $linktype ) . $dtrail . $trail;
 
-                       # Register link in the output object
-                       $this->mOutput->addExternalLink( $url );
+                       # Register link in the output object.
+                       # Replace unnecessary URL escape codes with the referenced character
+                       # This prevents spammers from hiding links from the filters
+                       $pasteurized = Parser::replaceUnusualEscapes( $url );
+                       $this->mOutput->addExternalLink( $pasteurized );
                }
 
                wfProfileOut( $fname );
@@ -1203,16 +1203,16 @@ class Parser
                                # All HTML entities will be escaped by makeExternalLink()
                                # or maybeMakeExternalImage()
                                $url = str_replace( '&amp;', '&', $url );
-                               # Replace unnecessary URL escape codes with their equivalent characters
-                               $url = Parser::replaceUnusualEscapes( $url );
 
                                # Is this an external image?
                                $text = $this->maybeMakeExternalImage( $url );
                                if ( $text === false ) {
                                        # Not an image, make a link
                                        $text = $sk->makeExternalLink( $url, $wgContLang->markNoConversion($url), true, 'free' );
-                                       # Register it in the output object
-                                       $this->mOutput->addExternalLink( $url );
+                                       # Register it in the output object...
+                                       # Replace unnecessary URL escape codes with their equivalent characters
+                                       $pasteurized = Parser::replaceUnusualEscapes( $url );
+                                       $this->mOutput->addExternalLink( $pasteurized );
                                }
                                $s .= $text . $trail;
                        } else {
@@ -1228,6 +1228,10 @@ class Parser
         * @param string
         * @return string
         * @static
+        * @fixme This can merge genuinely required bits in the path or query string,
+        *        breaking legit URLs. A proper fix would treat the various parts of
+        *        the URL differently; as a workaround, just use the output for
+        *        statistical records, not for actual linking/output.
         */
        function replaceUnusualEscapes( $url ) {
                return preg_replace_callback( '/%[0-9A-Fa-f]{2}/',
index d312561..c0d8f6f 100644 (file)
@@ -735,6 +735,62 @@ http://www.example.com/?title=Ben-Hur_%281959_film%29
 !! end
 
 
+!! test
+Bug 4781: %26 in autonumber URL
+!! input
+[http://www.example.com/?title=AT%26T]
+!! result
+<p><a href="http://www.example.com/?title=AT%26T" class='external autonumber' title="http://www.example.com/?title=AT%26T" rel="nofollow">[1]</a>
+</p>
+!! end
+
+!! test
+Bug 4781, 5267: %26 in autonumber URL
+!! input
+[http://www.example.com/?title=100%25_Bran]
+!! result
+<p><a href="http://www.example.com/?title=100%25_Bran" class='external autonumber' title="http://www.example.com/?title=100%25_Bran" rel="nofollow">[1]</a>
+</p>
+!! end
+
+!! test
+Bug 4781, 5267: %28, %29 in autonumber URL
+!! input
+[http://www.example.com/?title=Ben-Hur_%281959_film%29]
+!! result
+<p><a href="http://www.example.com/?title=Ben-Hur_%281959_film%29" class='external autonumber' title="http://www.example.com/?title=Ben-Hur_%281959_film%29" rel="nofollow">[1]</a>
+</p>
+!! end
+
+
+!! test
+Bug 4781: %26 in bracketed URL
+!! input
+[http://www.example.com/?title=AT%26T link]
+!! result
+<p><a href="http://www.example.com/?title=AT%26T" class='external text' title="http://www.example.com/?title=AT%26T" rel="nofollow">link</a>
+</p>
+!! end
+
+!! test
+Bug 4781, 5267: %26 in bracketed URL
+!! input
+[http://www.example.com/?title=100%25_Bran link]
+!! result
+<p><a href="http://www.example.com/?title=100%25_Bran" class='external text' title="http://www.example.com/?title=100%25_Bran" rel="nofollow">link</a>
+</p>
+!! end
+
+!! test
+Bug 4781, 5267: %28, %29 in bracketed URL
+!! input
+[http://www.example.com/?title=Ben-Hur_%281959_film%29 link]
+!! result
+<p><a href="http://www.example.com/?title=Ben-Hur_%281959_film%29" class='external text' title="http://www.example.com/?title=Ben-Hur_%281959_film%29" rel="nofollow">link</a>
+</p>
+!! end
+
+
 ###
 ### Quotes
 ###