Avoid a redirect loop when the request URL is not normalized
authorTim Starling <tstarling@wikimedia.org>
Tue, 10 Jul 2018 07:04:31 +0000 (17:04 +1000)
committerTim Starling <tstarling@wikimedia.org>
Mon, 16 Jul 2018 05:55:59 +0000 (15:55 +1000)
If the request URL was not normalized, for example having a double slash
in it, this could cause it to fail to match in the PathRouter. But the
canonicalizing redirect was using the normalized URL, causing a redirect
loop exception.

So:

* If the PathRouter fails to match with the original URL, try matching
  against the normalized URL. This allows it to still work for
  normalized URLs with a double slash in the title part of the path.
* Have WebRequest::getFullRequestURL() always return the URL without
  removing dot segments or interpreting double slashes. Just append
  the path to the server.
* Make MediaWikiTest.php use WebRequest instead of FauxRequest, allowing
  it to reproduce the exception in question. Add relevant test.
* Add tests for the new PathRouter behaviour.

Bug: T100782
Change-Id: Ic0f3a0060904abc364f75dae920480b81175d52f

includes/GlobalFunctions.php
includes/PathRouter.php
includes/WebRequest.php
tests/phpunit/includes/MediaWikiTest.php
tests/phpunit/includes/PathRouterTest.php
tests/phpunit/includes/api/format/ApiFormatBaseTest.php

index 3ea020f..2c14fa7 100644 (file)
@@ -582,6 +582,19 @@ function wfExpandUrl( $url, $defaultProto = PROTO_CURRENT ) {
        return false;
 }
 
+/**
+ * Get the wiki's "server", i.e. the protocol and host part of the URL, with a
+ * protocol specified using a PROTO_* constant as in wfExpandUrl()
+ *
+ * @since 1.32
+ * @param string|int|null $proto One of the PROTO_* constants.
+ * @return string The URL
+ */
+function wfGetServerUrl( $proto ) {
+       $url = wfExpandUrl( '/', $proto );
+       return substr( $url, 0, -1 );
+}
+
 /**
  * This function will reassemble a URL parsed with wfParseURL.  This is useful
  * if you need to edit part of a URL and put it back together.
index f24e298..faf4db4 100644 (file)
@@ -240,6 +240,28 @@ class PathRouter {
                // matches are tested first
                $this->sortByWeight();
 
+               $matches = $this->internalParse( $path );
+               if ( is_null( $matches ) ) {
+                       // Try with the normalized path (T100782)
+                       $path = wfRemoveDotSegments( $path );
+                       $path = preg_replace( '#/+#', '/', $path );
+                       $matches = $this->internalParse( $path );
+               }
+
+               // We know the difference between null (no matches) and
+               // array() (a match with no data) but our WebRequest caller
+               // expects array() even when we have no matches so return
+               // a array() when we have null
+               return is_null( $matches ) ? [] : $matches;
+       }
+
+       /**
+        * Match a path against each defined pattern
+        *
+        * @param string $path
+        * @return array|null
+        */
+       protected function internalParse( $path ) {
                $matches = null;
 
                foreach ( $this->patterns as $pattern ) {
@@ -248,12 +270,7 @@ class PathRouter {
                                break;
                        }
                }
-
-               // We know the difference between null (no matches) and
-               // array() (a match with no data) but our WebRequest caller
-               // expects array() even when we have no matches so return
-               // a array() when we have null
-               return is_null( $matches ) ? [] : $matches;
+               return $matches;
        }
 
        /**
index f3edebc..e31d3f9 100644 (file)
@@ -856,7 +856,7 @@ class WebRequest {
         * @return string
         */
        public function getFullRequestURL() {
-               return wfExpandUrl( $this->getRequestURL(), PROTO_CURRENT );
+               return wfGetServerUrl( PROTO_CURRENT ) .  $this->getRequestURL();
        }
 
        /**
index 7d7e637..d79d2cf 100644 (file)
@@ -1,6 +1,8 @@
 <?php
 
 class MediaWikiTest extends MediaWikiTestCase {
+       private $oldServer, $oldGet, $oldPost;
+
        protected function setUp() {
                parent::setUp();
 
@@ -11,6 +13,18 @@ class MediaWikiTest extends MediaWikiTestCase {
                        'wgArticlePath' => '/wiki/$1',
                        'wgActionPaths' => [],
                ] );
+
+               // phpcs:disable MediaWiki.Usage.SuperGlobalsUsage.SuperGlobals
+               $this->oldServer = $_SERVER;
+               $this->oldGet = $_GET;
+               $this->oldPost = $_POST;
+       }
+
+       protected function tearDown() {
+               parent::tearDown();
+               $_SERVER = $this->oldServer;
+               $_GET = $this->oldGet;
+               $_POST = $this->oldPost;
        }
 
        public static function provideTryNormaliseRedirect() {
@@ -113,6 +127,13 @@ class MediaWikiTest extends MediaWikiTestCase {
                                'title' => 'Foo_Bar',
                                'redirect' => false,
                        ],
+                       [
+                               // Path with double slash prefix (T100782)
+                               'url' => 'http://example.org//wiki/Double_slash',
+                               'query' => [],
+                               'title' => 'Double_slash',
+                               'redirect' => false,
+                       ],
                ];
        }
 
@@ -122,11 +143,13 @@ class MediaWikiTest extends MediaWikiTestCase {
         */
        public function testTryNormaliseRedirect( $url, $query, $title, $expectedRedirect = false ) {
                // Set SERVER because interpolateTitle() doesn't use getRequestURL(),
-               // whereas tryNormaliseRedirect does().
+               // whereas tryNormaliseRedirect does(). Also, using WebRequest allows
+               // us to test some quirks in that class.
                $_SERVER['REQUEST_URI'] = $url;
+               $_POST = [];
+               $_GET = $query;
+               $req = new WebRequest;
 
-               $req = new FauxRequest( $query );
-               $req->setRequestURL( $url );
                // This adds a virtual 'title' query parameter. Normally called from Setup.php
                $req->interpolateTitle();
 
index 15c4791..d891675 100644 (file)
@@ -145,6 +145,58 @@ class PathRouterTest extends MediaWikiTestCase {
                                [ 'title' => "Title_With Space" ]
                        ],
 
+                       // Double slash and dot expansion
+                       'Double slash in prefix' => [
+                               '/wiki/$1',
+                               '//wiki/Foo',
+                               [ 'title' => 'Foo' ]
+                       ],
+                       'Double slash at start of $1' => [
+                               '/wiki/$1',
+                               '/wiki//Foo',
+                               [ 'title' => '/Foo' ]
+                       ],
+                       'Double slash in middle of $1' => [
+                               '/wiki/$1',
+                               '/wiki/.hack//SIGN',
+                               [ 'title' => '.hack//SIGN' ]
+                       ],
+                       'Dots removed 1' => [
+                               '/wiki/$1',
+                               '/x/../wiki/Foo',
+                               [ 'title' => 'Foo' ]
+                       ],
+                       'Dots removed 2' => [
+                               '/wiki/$1',
+                               '/./wiki/Foo',
+                               [ 'title' => 'Foo' ]
+                       ],
+                       'Dots retained 1' => [
+                               '/wiki/$1',
+                               '/wiki/../wiki/Foo',
+                               [ 'title' => '../wiki/Foo' ]
+                       ],
+                       'Dots retained 2' => [
+                               '/wiki/$1',
+                               '/wiki/./Foo',
+                               [ 'title' => './Foo' ]
+                       ],
+                       'Triple slash' => [
+                               '/wiki/$1',
+                               '///wiki/Foo',
+                               [ 'title' => 'Foo' ]
+                       ],
+                       // '..' only traverses one slash, see e.g. RFC 3986
+                       'Dots traversing double slash 1' => [
+                               '/wiki/$1',
+                               '/a//b/../../wiki/Foo',
+                               []
+                       ],
+                       'Dots traversing double slash 2' => [
+                               '/wiki/$1',
+                               '/a//b/../../../wiki/Foo',
+                               [ 'title' => 'Foo' ]
+                       ],
                ];
 
                // Make sure the router doesn't break on special characters like $ used in regexp replacements
index d4aa805..03359f8 100644 (file)
@@ -10,6 +10,13 @@ class ApiFormatBaseTest extends ApiFormatTestBase {
 
        protected $printerName = 'mockbase';
 
+       protected function setUp() {
+               parent::setUp();
+               $this->setMwGlobals( [
+                       'wgServer' => 'http://example.org'
+               ] );
+       }
+
        public function getMockFormatter( ApiMain $main = null, $format, $methods = [] ) {
                if ( $main === null ) {
                        $context = new RequestContext;
@@ -352,7 +359,7 @@ class ApiFormatBaseTest extends ApiFormatTestBase {
        public function testHtmlHeader( $post, $registerNonHtml, $expect ) {
                $context = new RequestContext;
                $request = new FauxRequest( [ 'a' => 1, 'b' => 2 ], $post );
-               $request->setRequestURL( 'http://example.org/wx/api.php' );
+               $request->setRequestURL( '/wx/api.php' );
                $context->setRequest( $request );
                $context->setLanguage( 'qqx' );
                $main = new ApiMain( $context );