WebRequest: Optimise WebRequest::getPathInfo()
authorTimo Tijhof <krinklemail@gmail.com>
Mon, 2 Sep 2019 01:03:02 +0000 (02:03 +0100)
committerAaron Schulz <aschulz@wikimedia.org>
Tue, 3 Sep 2019 18:32:21 +0000 (18:32 +0000)
Called for all PHP web requests from WebStart.php via
WebRequest::interpolateTitle.

* Use isset() instead of empty() where we only need to check
  that the key is supported.

* Don't import global `$wgUsePathInfo` in the common case.

* Migrate from deprecated wrapper Wikimedia\suppressWarnings
  to AtEase::suppressWarnings.

* Use strpos() instead of preg_match(). Consistently faster,
  albeit not by much (for 100 iterations: 0.04ms vs 0.23ms).

* Don't create unused $matches array for the common case.

Bug: T189966
Change-Id: I0de126953c25f3629cb85a0d4e46598baf261c15

includes/WebRequest.php

index bbaa10f..9b8f5a6 100644 (file)
@@ -27,6 +27,7 @@ use MediaWiki\MediaWikiServices;
 use MediaWiki\Session\Session;
 use MediaWiki\Session\SessionId;
 use MediaWiki\Session\SessionManager;
+use Wikimedia\AtEase\AtEase;
 
 // The point of this class is to be a wrapper around super globals
 // phpcs:disable MediaWiki.Usage.SuperGlobalsUsage.SuperGlobals
@@ -117,77 +118,79 @@ class WebRequest {
         * @return array Any query arguments found in path matches.
         */
        public static function getPathInfo( $want = 'all' ) {
-               global $wgUsePathInfo;
                // PATH_INFO is mangled due to https://bugs.php.net/bug.php?id=31892
                // And also by Apache 2.x, double slashes are converted to single slashes.
                // So we will use REQUEST_URI if possible.
-               $matches = [];
-               if ( !empty( $_SERVER['REQUEST_URI'] ) ) {
+               if ( isset( $_SERVER['REQUEST_URI'] ) ) {
                        // Slurp out the path portion to examine...
                        $url = $_SERVER['REQUEST_URI'];
                        if ( !preg_match( '!^https?://!', $url ) ) {
                                $url = 'http://unused' . $url;
                        }
-                       Wikimedia\suppressWarnings();
+                       AtEase::suppressWarnings();
                        $a = parse_url( $url );
-                       Wikimedia\restoreWarnings();
-                       if ( $a ) {
-                               $path = $a['path'] ?? '';
-
-                               global $wgScript;
-                               if ( $path == $wgScript && $want !== 'all' ) {
-                                       // Script inside a rewrite path?
-                                       // Abort to keep from breaking...
-                                       return $matches;
-                               }
+                       AtEase::restoreWarnings();
+                       if ( !$a ) {
+                               return [];
+                       }
+                       $path = $a['path'] ?? '';
 
-                               $router = new PathRouter;
+                       global $wgScript;
+                       if ( $path == $wgScript && $want !== 'all' ) {
+                               // Script inside a rewrite path?
+                               // Abort to keep from breaking...
+                               return [];
+                       }
 
-                               // Raw PATH_INFO style
-                               $router->add( "$wgScript/$1" );
+                       $router = new PathRouter;
 
-                               if ( isset( $_SERVER['SCRIPT_NAME'] )
-                                       && preg_match( '/\.php/', $_SERVER['SCRIPT_NAME'] )
-                               ) {
-                                       # Check for SCRIPT_NAME, we handle index.php explicitly
-                                       # But we do have some other .php files such as img_auth.php
-                                       # Don't let root article paths clober the parsing for them
-                                       $router->add( $_SERVER['SCRIPT_NAME'] . "/$1" );
-                               }
-
-                               global $wgArticlePath;
-                               if ( $wgArticlePath ) {
-                                       $router->add( $wgArticlePath );
-                               }
+                       // Raw PATH_INFO style
+                       $router->add( "$wgScript/$1" );
 
-                               global $wgActionPaths;
-                               if ( $wgActionPaths ) {
-                                       $router->add( $wgActionPaths, [ 'action' => '$key' ] );
-                               }
+                       if ( isset( $_SERVER['SCRIPT_NAME'] )
+                               && strpos( $_SERVER['SCRIPT_NAME'], '.php' ) !== false
+                       ) {
+                               // Check for SCRIPT_NAME, we handle index.php explicitly
+                               // But we do have some other .php files such as img_auth.php
+                               // Don't let root article paths clober the parsing for them
+                               $router->add( $_SERVER['SCRIPT_NAME'] . "/$1" );
+                       }
 
-                               global $wgVariantArticlePath;
-                               if ( $wgVariantArticlePath ) {
-                                       $router->add( $wgVariantArticlePath,
-                                               [ 'variant' => '$2' ],
-                                               [ '$2' => MediaWikiServices::getInstance()->getContentLanguage()->
-                                               getVariants() ]
-                                       );
-                               }
+                       global $wgArticlePath;
+                       if ( $wgArticlePath ) {
+                               $router->add( $wgArticlePath );
+                       }
 
-                               Hooks::run( 'WebRequestPathInfoRouter', [ $router ] );
+                       global $wgActionPaths;
+                       if ( $wgActionPaths ) {
+                               $router->add( $wgActionPaths, [ 'action' => '$key' ] );
+                       }
 
-                               $matches = $router->parse( $path );
+                       global $wgVariantArticlePath;
+                       if ( $wgVariantArticlePath ) {
+                               $router->add( $wgVariantArticlePath,
+                                       [ 'variant' => '$2' ],
+                                       [ '$2' => MediaWikiServices::getInstance()->getContentLanguage()->
+                                       getVariants() ]
+                               );
                        }
-               } elseif ( $wgUsePathInfo ) {
-                       if ( isset( $_SERVER['ORIG_PATH_INFO'] ) && $_SERVER['ORIG_PATH_INFO'] != '' ) {
-                               // Mangled PATH_INFO
-                               // https://bugs.php.net/bug.php?id=31892
-                               // Also reported when ini_get('cgi.fix_pathinfo')==false
-                               $matches['title'] = substr( $_SERVER['ORIG_PATH_INFO'], 1 );
-
-                       } elseif ( isset( $_SERVER['PATH_INFO'] ) && $_SERVER['PATH_INFO'] != '' ) {
-                               // Regular old PATH_INFO yay
-                               $matches['title'] = substr( $_SERVER['PATH_INFO'], 1 );
+
+                       Hooks::run( 'WebRequestPathInfoRouter', [ $router ] );
+
+                       $matches = $router->parse( $path );
+               } else {
+                       global $wgUsePathInfo;
+                       $matches = [];
+                       if ( $wgUsePathInfo ) {
+                               if ( !empty( $_SERVER['ORIG_PATH_INFO'] ) ) {
+                                       // Mangled PATH_INFO
+                                       // https://bugs.php.net/bug.php?id=31892
+                                       // Also reported when ini_get('cgi.fix_pathinfo')==false
+                                       $matches['title'] = substr( $_SERVER['ORIG_PATH_INFO'], 1 );
+                               } elseif ( !empty( $_SERVER['PATH_INFO'] ) ) {
+                                       // Regular old PATH_INFO yay
+                                       $matches['title'] = substr( $_SERVER['PATH_INFO'], 1 );
+                               }
                        }
                }