SECURITY: Work around PHP bug in parse_url
authorBrad Jorsch <bjorsch@wikimedia.org>
Mon, 17 Dec 2018 18:20:12 +0000 (13:20 -0500)
committerReedy <reedy@wikimedia.org>
Thu, 19 Dec 2019 13:24:03 +0000 (13:24 +0000)
It gets confused by URLs with a query portion but no path.

Bug: T212067
Change-Id: I15c15161a668115d68eb2e2f8004826b47148fc1

RELEASE-NOTES-1.31
includes/GlobalFunctions.php
tests/phpunit/includes/GlobalFunctions/wfParseUrlTest.php

index 41b8184..52f2b78 100644 (file)
@@ -16,6 +16,7 @@ THIS IS NOT A RELEASE YET
 * (T233342) rdbms: Log debug message traces as 'exception.trace' instead of
   'trace'.
 * (T226751) media: Log and fail gracefully on invalid EXIF coordinates.
 * (T233342) rdbms: Log debug message traces as 'exception.trace' instead of
   'trace'.
 * (T226751) media: Log and fail gracefully on invalid EXIF coordinates.
+* (T212067) Work around PHP bug in parse_url.
 
 == MediaWiki 1.31.5 ==
 
 
 == MediaWiki 1.31.5 ==
 
index 0152209..c025553 100644 (file)
@@ -811,6 +811,18 @@ function wfParseUrl( $url ) {
        Wikimedia\suppressWarnings();
        $bits = parse_url( $url );
        Wikimedia\restoreWarnings();
        Wikimedia\suppressWarnings();
        $bits = parse_url( $url );
        Wikimedia\restoreWarnings();
+
+       // T212067: PHP < 5.6.28, 7.0.0–7.0.12, and HHVM (all relevant versions) screw up parsing
+       // the query part of pathless URLs
+       if ( isset( $bits['host'] ) && strpos( $bits['host'], '?' ) !== false ) {
+               list( $host, $query ) = explode( '?', $bits['host'], 2 );
+               $bits['host'] = $host;
+               $bits['query'] = $query
+                       . ( $bits['path'] ?? '' )
+                       . ( isset( $bits['query'] ) ? '?' . $bits['query'] : '' );
+               unset( $bits['path'] );
+       }
+
        // parse_url() returns an array without scheme for some invalid URLs, e.g.
        // parse_url("%0Ahttp://example.com") == [ 'host' => '%0Ahttp', 'path' => 'example.com' ]
        if ( !$bits || !isset( $bits['scheme'] ) ) {
        // parse_url() returns an array without scheme for some invalid URLs, e.g.
        // parse_url("%0Ahttp://example.com") == [ 'host' => '%0Ahttp', 'path' => 'example.com' ]
        if ( !$bits || !isset( $bits['scheme'] ) ) {
index b20cfb5..25a2342 100644 (file)
@@ -152,6 +152,46 @@ class WfParseUrlTest extends MediaWikiTestCase {
                                'invalid://test/',
                                false
                        ],
                                'invalid://test/',
                                false
                        ],
+                       // T212067
+                       [
+                               '//evil.com?example.org/foo/bar',
+                               [
+                                       'scheme' => '',
+                                       'delimiter' => '//',
+                                       'host' => 'evil.com',
+                                       'query' => 'example.org/foo/bar',
+                               ]
+                       ],
+                       [
+                               '//evil.com?example.org/foo/bar?baz#quux',
+                               [
+                                       'scheme' => '',
+                                       'delimiter' => '//',
+                                       'host' => 'evil.com',
+                                       'query' => 'example.org/foo/bar?baz',
+                                       'fragment' => 'quux',
+                               ]
+                       ],
+                       [
+                               '//evil.com?example.org?baz#quux',
+                               [
+                                       'scheme' => '',
+                                       'delimiter' => '//',
+                                       'host' => 'evil.com',
+                                       'query' => 'example.org?baz',
+                                       'fragment' => 'quux',
+                               ]
+                       ],
+                       [
+                               '//evil.com?example.org#quux',
+                               [
+                                       'scheme' => '',
+                                       'delimiter' => '//',
+                                       'host' => 'evil.com',
+                                       'query' => 'example.org',
+                                       'fragment' => 'quux',
+                               ]
+                       ],
                ];
        }
 }
                ];
        }
 }