Short-circuit WebRequest::getGPCVal() for printable ASCII strings
authorAaron Schulz <aschulz@wikimedia.org>
Wed, 31 Jul 2019 22:48:45 +0000 (18:48 -0400)
committerAaron Schulz <aschulz@wikimedia.org>
Fri, 23 Aug 2019 14:53:38 +0000 (14:53 +0000)
This avoids accessing the content Language instance in many cases

Change-Id: I82bd66496180f947c1af16c2728a1548df03dcf9

includes/WebRequest.php

index 6593e49..defe07e 100644 (file)
@@ -395,18 +395,25 @@ class WebRequest {
                # https://www.php.net/variables.external#language.variables.external.dot-in-names
                # Work around PHP *feature* to avoid *bugs* elsewhere.
                $name = strtr( $name, '.', '_' );
-               if ( isset( $arr[$name] ) ) {
-                       $data = $arr[$name];
+
+               if ( !isset( $arr[$name] ) ) {
+                       return $default;
+               }
+
+               $data = $arr[$name];
+               # Optimisation: Skip UTF-8 normalization and legacy transcoding for simple ASCII strings.
+               $isAsciiStr = ( is_string( $data ) && preg_match( '/[^\x20-\x7E]/', $data ) === 0 );
+               if ( !$isAsciiStr ) {
                        if ( isset( $_GET[$name] ) && is_string( $data ) ) {
                                # Check for alternate/legacy character encoding.
-                               $contLang = MediaWikiServices::getInstance()->getContentLanguage();
-                               $data = $contLang->checkTitleEncoding( $data );
+                               $data = MediaWikiServices::getInstance()
+                                       ->getContentLanguage()
+                                       ->checkTitleEncoding( $data );
                        }
                        $data = $this->normalizeUnicode( $data );
-                       return $data;
-               } else {
-                       return $default;
                }
+
+               return $data;
        }
 
        /**