(bug 32951) Do not register absolute internal externals
authorumherirrender <umherirrender_de.wp@web.de>
Sat, 29 Sep 2012 10:36:33 +0000 (12:36 +0200)
committerGerrit Code Review <gerrit@wikimedia.org>
Wed, 3 Oct 2012 23:15:38 +0000 (23:15 +0000)
Setting $wgRegisterInternalExternals = false for proto server should not
store the http/https links in externallinks table

Also fix detection of own links for links with query or anchor or
nothing

new also detected:
//localhost
//localhost?query
//localhost#anchor

already detected:
//localhost/path

Change-Id: Idd03d309cc3b71728a8cbea460efa12b10348d64

RELEASE-NOTES-1.21
includes/parser/ParserOutput.php
tests/phpunit/includes/parser/ParserOutputTest.php [new file with mode: 0644]

index 0548946..50c3b2d 100644 (file)
@@ -20,6 +20,8 @@ production.
 * (bug 40352) fixDoubleRedirects.php should support interwiki redirects.
 * (bug 9237) SpecialBrokenRedirect should not list interwiki redirects.
 * (bug 34960) Drop unused fields rc_moved_to_ns and rc_moved_to_title from recentchanges table.
+* (bug 32951) Do not register internal externals with absolute protocol,
+  when server has relative protocol.
 
 === API changes in 1.21 ===
 * (bug 35693) ApiQueryImageInfo now suppresses errors when unserializing metadata.
index 41b4a38..be629d3 100644 (file)
@@ -150,11 +150,35 @@ class ParserOutput extends CacheTime {
                return (bool)$this->mNewSection;
        }
 
+       /**
+        * Checks, if a url is pointing to the own server
+        *
+        * @param $internal String the server to check against
+        * @param $url String the url to check
+        * @return bool
+        */
+       static function isLinkInternal( $internal, $url ) {
+               return (bool)preg_match( '/^' .
+                       # If server is proto relative, check also for http/https links
+                       ( substr( $internal, 0, 2 ) === '//' ? '(?:https?:)?' : '' ) .
+                       preg_quote( $internal, '/' ) .
+                       # check for query/path/anchor or end of link in each case
+                       '(?:[\?\/\#]|$)/i',
+                       $url
+               );
+       }
+
        function addExternalLink( $url ) {
                # We don't register links pointing to our own server, unless... :-)
                global $wgServer, $wgRegisterInternalExternals;
-               if( $wgRegisterInternalExternals or stripos($url,$wgServer.'/')!==0)
+
+               $registerExternalLink = true;
+               if( !$wgRegisterInternalExternals ) {
+                       $registerExternalLink = !self::isLinkInternal( $wgServer, $url );
+               }
+               if( $registerExternalLink ) {
                        $this->mExternalLinks[$url] = 1;
+               }
        }
 
        /**
diff --git a/tests/phpunit/includes/parser/ParserOutputTest.php b/tests/phpunit/includes/parser/ParserOutputTest.php
new file mode 100644 (file)
index 0000000..2244fdb
--- /dev/null
@@ -0,0 +1,38 @@
+<?php
+
+class ParserOutputTest extends MediaWikiTestCase {
+
+       function dataIsLinkInternal() {
+               return array(
+                       // Different domains
+                       array( false, 'http://example.org', 'http://mediawiki.org' ),
+                       // Same domains
+                       array( true, 'http://example.org', 'http://example.org' ),
+                       array( true, 'https://example.org', 'https://example.org' ),
+                       array( true, '//example.org', '//example.org' ),
+                       // Same domain different cases
+                       array( true, 'http://example.org', 'http://EXAMPLE.ORG' ),
+                       // Paths, queries, and fragments are not relevant
+                       array( true, 'http://example.org', 'http://example.org/wiki/Main_Page' ),
+                       array( true, 'http://example.org', 'http://example.org?my=query' ),
+                       array( true, 'http://example.org', 'http://example.org#its-a-fragment' ),
+                       // Different protocols
+                       array( false, 'http://example.org', 'https://example.org' ),
+                       array( false, 'https://example.org', 'http://example.org' ),
+                       // Protocol relative servers always match http and https links
+                       array( true, '//example.org', 'http://example.org' ),
+                       array( true, '//example.org', 'https://example.org' ),
+                       // But they don't match strange things like this
+                       array( false, '//example.org', 'irc://example.org' ),
+               );
+       }
+
+       /**
+        * Test to make sure ParserOutput::isLinkInternal behaves properly
+        * @dataProvider dataIsLinkInternal
+        */
+       function testIsLinkInternal( $shouldMatch, $server, $url ) {
+
+               $this->assertEquals( $shouldMatch, ParserOutput::isLinkInternal( $server, $url ) );
+       }
+}