* Add preprocessor tests that Bergi supplied for Bug #28642
[lhc/web/wiklou.git] / includes / WebRequest.php
index 8960669..41077d3 100644 (file)
  */
 class WebRequest {
        protected $data, $headers = array();
-       private $_response;
+
+       /**
+        * Lazy-init response object
+        * @var WebResponse
+        */
+       private $response;
 
        public function __construct() {
                /// @todo Fixme: this preemptive de-quoting can interfere with other web libraries
@@ -50,6 +55,75 @@ class WebRequest {
                $this->data = $_POST + $_GET;
        }
 
+       /**
+        * Extract the PATH_INFO variable even when it isn't a reasonable
+        * value. On some large webhosts, PATH_INFO includes the script
+        * path as well as everything after it.
+        *
+        * @param $want string: If this is not 'all', then the function
+        * will return an empty array if it determines that the URL is
+        * inside a rewrite path.
+        *
+        * @return Array: 'title' key is the title of the article.
+        */
+       static public function getPathInfo( $want = 'all' ) {
+               // PATH_INFO is mangled due to http://bugs.php.net/bug.php?id=31892
+               // And also by Apache 2.x, double slashes are converted to single slashes.
+               // So we will use REQUEST_URI if possible.
+               $matches = array();
+               if ( !empty( $_SERVER['REQUEST_URI'] ) ) {
+                       // Slurp out the path portion to examine...
+                       $url = $_SERVER['REQUEST_URI'];
+                       if ( !preg_match( '!^https?://!', $url ) ) {
+                               $url = 'http://unused' . $url;
+                       }
+                       $a = parse_url( $url );
+                       if( $a ) {
+                               $path = isset( $a['path'] ) ? $a['path'] : '';
+
+                               global $wgScript;
+                               if( $path == $wgScript && $want !== 'all' ) {
+                                       // Script inside a rewrite path?
+                                       // Abort to keep from breaking...
+                                       return $matches;
+                               }
+                               // Raw PATH_INFO style
+                               $matches = self::extractTitle( $path, "$wgScript/$1" );
+
+                               global $wgArticlePath;
+                               if( !$matches && $wgArticlePath ) {
+                                       $matches = self::extractTitle( $path, $wgArticlePath );
+                               }
+
+                               global $wgActionPaths;
+                               if( !$matches && $wgActionPaths ) {
+                                       $matches = self::extractTitle( $path, $wgActionPaths, 'action' );
+                               }
+
+                               global $wgVariantArticlePath, $wgContLang;
+                               if( !$matches && $wgVariantArticlePath ) {
+                                       $variantPaths = array();
+                                       foreach( $wgContLang->getVariants() as $variant ) {
+                                               $variantPaths[$variant] =
+                                                       str_replace( '$2', $variant, $wgVariantArticlePath );
+                                       }
+                                       $matches = self::extractTitle( $path, $variantPaths, 'variant' );
+                               }
+                       }
+               } elseif ( isset( $_SERVER['ORIG_PATH_INFO'] ) && $_SERVER['ORIG_PATH_INFO'] != '' ) {
+                       // Mangled PATH_INFO
+                       // http://bugs.php.net/bug.php?id=31892
+                       // Also reported when ini_get('cgi.fix_pathinfo')==false
+                       $matches['title'] = substr( $_SERVER['ORIG_PATH_INFO'], 1 );
+
+               } elseif ( isset( $_SERVER['PATH_INFO'] ) && ($_SERVER['PATH_INFO'] != '') ) {
+                       // Regular old PATH_INFO yay
+                       $matches['title'] = substr( $_SERVER['PATH_INFO'], 1 );
+               }
+
+               return $matches;
+       }
+
        /**
         * Check for title, action, and/or variant data in the URL
         * and interpolate it into the GET variables.
@@ -60,61 +134,13 @@ class WebRequest {
        public function interpolateTitle() {
                global $wgUsePathInfo;
 
-               if ( $wgUsePathInfo ) {
-                       // PATH_INFO is mangled due to http://bugs.php.net/bug.php?id=31892
-                       // And also by Apache 2.x, double slashes are converted to single slashes.
-                       // So we will use REQUEST_URI if possible.
-                       $matches = array();
-
-                       if ( !empty( $_SERVER['REQUEST_URI'] ) ) {
-                               // Slurp out the path portion to examine...
-                               $url = $_SERVER['REQUEST_URI'];
-                               if ( !preg_match( '!^https?://!', $url ) ) {
-                                       $url = 'http://unused' . $url;
-                               }
-                               $a = parse_url( $url );
-                               if( $a ) {
-                                       $path = isset( $a['path'] ) ? $a['path'] : '';
-
-                                       global $wgScript;
-                                       if( $path == $wgScript ) {
-                                               // Script inside a rewrite path?
-                                               // Abort to keep from breaking...
-                                               return;
-                                       }
-                                       // Raw PATH_INFO style
-                                       $matches = $this->extractTitle( $path, "$wgScript/$1" );
-
-                                       global $wgArticlePath;
-                                       if( !$matches && $wgArticlePath ) {
-                                               $matches = $this->extractTitle( $path, $wgArticlePath );
-                                       }
-
-                                       global $wgActionPaths;
-                                       if( !$matches && $wgActionPaths ) {
-                                               $matches = $this->extractTitle( $path, $wgActionPaths, 'action' );
-                                       }
+               // bug 16019: title interpolation on API queries is useless and sometimes harmful
+               if ( defined( 'MW_API' ) ) {
+                       return;
+               }
 
-                                       global $wgVariantArticlePath, $wgContLang;
-                                       if( !$matches && $wgVariantArticlePath ) {
-                                               $variantPaths = array();
-                                               foreach( $wgContLang->getVariants() as $variant ) {
-                                                       $variantPaths[$variant] =
-                                                               str_replace( '$2', $variant, $wgVariantArticlePath );
-                                               }
-                                               $matches = $this->extractTitle( $path, $variantPaths, 'variant' );
-                                       }
-                               }
-                       } elseif ( isset( $_SERVER['ORIG_PATH_INFO'] ) && $_SERVER['ORIG_PATH_INFO'] != '' ) {
-                               // Mangled PATH_INFO
-                               // http://bugs.php.net/bug.php?id=31892
-                               // Also reported when ini_get('cgi.fix_pathinfo')==false
-                               $matches['title'] = substr( $_SERVER['ORIG_PATH_INFO'], 1 );
-
-                       } elseif ( isset( $_SERVER['PATH_INFO'] ) && ($_SERVER['PATH_INFO'] != '') ) {
-                               // Regular old PATH_INFO yay
-                               $matches['title'] = substr( $_SERVER['PATH_INFO'], 1 );
-                       }
+               if ( $wgUsePathInfo ) {
+                       $matches = self::getPathInfo( 'title' );
                        foreach( $matches as $key => $val) {
                                $this->data[$key] = $_GET[$key] = $_REQUEST[$key] = $val;
                        }
@@ -131,7 +157,7 @@ class WebRequest {
         *             passed on as the value of this URL parameter
         * @return array of URL variables to interpolate; empty if no match
         */
-       private function extractTitle( $path, $bases, $key=false ) {
+       private static function extractTitle( $path, $bases, $key=false ) {
                foreach( (array)$bases as $keyValue => $base ) {
                        // Find the part after $wgArticlePath
                        $base = str_replace( '$1', '', $base );
@@ -345,7 +371,20 @@ class WebRequest {
         * @return Boolean
         */
        public function getBool( $name, $default = false ) {
-               return $this->getVal( $name, $default ) ? true : false;
+               return (bool)$this->getVal( $name, $default );
+       }
+
+       /**
+        * Fetch a boolean value from the input or return $default if not set.
+        * Unlike getBool, the string "false" will result in boolean false, which is
+        * useful when interpreting information sent from JavaScript.
+        *
+        * @param $name String
+        * @param $default Boolean
+        * @return Boolean
+        */
+       public function getFuzzyBool( $name, $default = false ) {
+               return $this->getBool( $name, $default ) && strcasecmp( $this->getVal( $name ), 'false' ) !== 0;
        }
 
        /**
@@ -403,6 +442,16 @@ class WebRequest {
                return $retVal;
        }
 
+       /**
+        * Get the values passed in the query string.
+        * No transformation is performed on the values.
+        *
+        * @return Array
+        */
+        public function getQueryValues() {
+               return $_GET;
+        }
+
        /**
         * Returns true if the present request was reached by a POST operation,
         * false otherwise (GET, HEAD, or command-line).
@@ -453,10 +502,12 @@ class WebRequest {
         * @return String
         */
        public function getRequestURL() {
-               if( isset( $_SERVER['REQUEST_URI']) && strlen($_SERVER['REQUEST_URI']) ) {
+               if( isset( $_SERVER['REQUEST_URI'] ) && strlen( $_SERVER['REQUEST_URI'] ) ) {
                        $base = $_SERVER['REQUEST_URI'];
-               } elseif( isset( $_SERVER['SCRIPT_NAME'] ) ) {
+               } elseif ( isset( $_SERVER['HTTP_X_ORIGINAL_URL'] ) && strlen( $_SERVER['HTTP_X_ORIGINAL_URL'] ) ) {
                        // Probably IIS; doesn't set REQUEST_URI
+                       $base = $_SERVER['HTTP_X_ORIGINAL_URL'];
+               } elseif( isset( $_SERVER['SCRIPT_NAME'] ) ) {
                        $base = $_SERVER['SCRIPT_NAME'];
                        if( isset( $_SERVER['QUERY_STRING'] ) && $_SERVER['QUERY_STRING'] != '' ) {
                                $base .= '?' . $_SERVER['QUERY_STRING'];
@@ -464,8 +515,8 @@ class WebRequest {
                } else {
                        // This shouldn't happen!
                        throw new MWException( "Web server doesn't provide either " .
-                               "REQUEST_URI or SCRIPT_NAME. Report details of your " .
-                               "web server configuration to http://bugzilla.wikimedia.org/" );
+                               "REQUEST_URI, HTTP_X_ORIGINAL_URL or SCRIPT_NAME. Report details " .
+                               "of your web server configuration to http://bugzilla.wikimedia.org/" );
                }
                // User-agents should not send a fragment with the URI, but
                // if they do, and the web server passes it on to us, we
@@ -475,7 +526,7 @@ class WebRequest {
                if( $hash !== false ) {
                        $base = substr( $base, 0, $hash );
                }
-               if( $base{0} == '/' ) {
+               if( $base[0] == '/' ) {
                        return $base;
                } else {
                        // We may get paths with a host prepended; strip it.
@@ -500,23 +551,7 @@ class WebRequest {
         * @return String
         */
        public function appendQuery( $query ) {
-               global $wgTitle;
-               $basequery = '';
-               foreach( $_GET as $var => $val ) {
-                       if ( $var == 'title' )
-                               continue;
-                       if ( is_array( $val ) )
-                               /* This will happen given a request like
-                                * http://en.wikipedia.org/w/index.php?title[]=Special:Userlogin&returnto[]=Main_Page
-                                */
-                               continue;
-                       $basequery .= '&' . urlencode( $var ) . '=' . urlencode( $val );
-               }
-               $basequery .= '&' . $query;
-
-               # Trim the extra &
-               $basequery = substr( $basequery, 1 );
-               return $wgTitle->getLocalURL( $basequery );
+               return $this->appendQueryArray( wfCgiToArray( $query ) );
        }
 
        /**
@@ -543,7 +578,7 @@ class WebRequest {
         */
        public function appendQueryArray( $array, $onlyquery = false ) {
                global $wgTitle;
-               $newquery = $_GET;
+               $newquery = $this->getQueryValues();
                unset( $newquery['title'] );
                $newquery = array_merge( $newquery, $array );
                $query = wfArrayToCGI( $newquery );
@@ -563,15 +598,23 @@ class WebRequest {
                global $wgUser;
 
                $limit = $this->getInt( 'limit', 0 );
-               if( $limit < 0 ) $limit = 0;
+               if( $limit < 0 ) {
+                       $limit = 0;
+               }
                if( ( $limit == 0 ) && ( $optionname != '' ) ) {
                        $limit = (int)$wgUser->getOption( $optionname );
                }
-               if( $limit <= 0 ) $limit = $deflimit;
-               if( $limit > 5000 ) $limit = 5000; # We have *some* limits...
+               if( $limit <= 0 ) {
+                       $limit = $deflimit;
+               }
+               if( $limit > 5000 ) {
+                       $limit = 5000; # We have *some* limits...
+               }
 
                $offset = $this->getInt( 'offset', 0 );
-               if( $offset < 0 ) $offset = 0;
+               if( $offset < 0 ) {
+                       $offset = 0;
+               }
 
                return array( $limit, $offset );
        }
@@ -625,10 +668,10 @@ class WebRequest {
                $file = new WebRequestUpload( $this, $key );
                return $file->getName();
        }
-       
+
        /**
         * Return a WebRequestUpload object corresponding to the key
-        * 
+        *
         * @param @key string
         * @return WebRequestUpload
         */
@@ -639,14 +682,51 @@ class WebRequest {
        /**
         * Return a handle to WebResponse style object, for setting cookies,
         * headers and other stuff, for Request being worked on.
+        *
+        * @return WebResponse
         */
        public function response() {
                /* Lazy initialization of response object for this request */
-               if ( !is_object( $this->_response ) ) {
+               if ( !is_object( $this->response ) ) {
                        $class = ( $this instanceof FauxRequest ) ? 'FauxResponse' : 'WebResponse';
-                       $this->_response = new $class();
+                       $this->response = new $class();
+               }
+               return $this->response;
+       }
+
+       /**
+        * Initialise the header list
+        */
+       private function initHeaders() {
+               if ( count( $this->headers ) ) {
+                       return;
+               }
+
+               if ( function_exists( 'apache_request_headers' ) ) {
+                       foreach ( apache_request_headers() as $tempName => $tempValue ) {
+                               $this->headers[ strtoupper( $tempName ) ] = $tempValue;
+                       }
+               } else {
+                       $headers = $_SERVER;
+                       foreach ( $_SERVER as $name => $value ) {
+                               if ( substr( $name, 0, 5 ) === 'HTTP_' ) {
+                                       $name = str_replace( '_', '-',  substr( $name, 5 ) );
+                                       $this->headers[$name] = $value;
+                               } elseif ( $name === 'CONTENT_LENGTH' ) {
+                                       $this->headers['CONTENT-LENGTH'] = $value;
+                               }
+                       }
                }
-               return $this->_response;
+       }
+
+       /**
+        * Get an array containing all request headers
+        *
+        * @return Array mapping header name to its value
+        */
+       public function getAllHeaders() {
+               $this->initHeaders();
+               return $this->headers;
        }
 
        /**
@@ -654,28 +734,12 @@ class WebRequest {
         * @param $name String: case-insensitive header name
         */
        public function getHeader( $name ) {
+               $this->initHeaders();
                $name = strtoupper( $name );
-               if ( function_exists( 'apache_request_headers' ) ) {
-                       if ( !$this->headers ) {
-                               foreach ( apache_request_headers() as $tempName => $tempValue ) {
-                                       $this->headers[ strtoupper( $tempName ) ] = $tempValue;
-                               }
-                       }
-                       if ( isset( $this->headers[$name] ) ) {
-                               return $this->headers[$name];
-                       } else {
-                               return false;
-                       }
+               if ( isset( $this->headers[$name] ) ) {
+                       return $this->headers[$name];
                } else {
-                       $name = 'HTTP_' . str_replace( '-', '_', $name );
-                       if ( $name === 'HTTP_CONTENT_LENGTH' && !isset( $_SERVER[$name] ) ) {
-                               $name = 'CONTENT_LENGTH';
-                       }
-                       if ( isset( $_SERVER[$name] ) ) {
-                               return $_SERVER[$name];
-                       } else {
-                               return false;
-                       }
+                       return false;
                }
        }
 
@@ -686,8 +750,9 @@ class WebRequest {
         * @return Mixed
         */
        public function getSessionData( $key ) {
-               if( !isset( $_SESSION[$key] ) )
+               if( !isset( $_SESSION[$key] ) ) {
                        return null;
+               }
                return $_SESSION[$key];
        }
 
@@ -712,10 +777,27 @@ class WebRequest {
         * but only by prefixing it with the script name and maybe some other stuff,
         * the extension is not mangled. So this should be a reasonably portable
         * way to perform this security check.
+        *
+        * Also checks for anything that looks like a file extension at the end of
+        * QUERY_STRING, since IE 6 and earlier will use this to get the file type
+        * if there was no dot before the question mark (bug 28235).
         */
        public function isPathInfoBad() {
                global $wgScriptExtension;
 
+               if ( isset( $_SERVER['QUERY_STRING'] ) 
+                       && preg_match( '/\.[a-z0-9]{1,4}(#|\?|$)/i', $_SERVER['QUERY_STRING'] ) )
+               {
+                       // Bug 28235
+                       // Block only Internet Explorer, and requests with missing UA 
+                       // headers that could be IE users behind a privacy proxy.
+                       if ( !isset( $_SERVER['HTTP_USER_AGENT'] ) 
+                               || preg_match( '/; *MSIE/', $_SERVER['HTTP_USER_AGENT'] ) )
+                       {
+                               return true;
+                       }
+               }
+
                if ( !isset( $_SERVER['PATH_INFO'] ) ) {
                        return false;
                }
@@ -727,7 +809,7 @@ class WebRequest {
                $ext = substr( $pi, $dotPos );
                return !in_array( $ext, array( $wgScriptExtension, '.php', '.php5' ) );
        }
-       
+
        /**
         * Parse the Accept-Language header sent by the client into an array
         * @return array( languageCode => q-value ) sorted by q-value in descending order
@@ -740,15 +822,15 @@ class WebRequest {
                if ( !$acceptLang ) {
                        return array();
                }
-               
+
                // Return the language codes in lower case
                $acceptLang = strtolower( $acceptLang );
-               
+
                // Break up string into pieces (languages and q factors)
                $lang_parse = null;
                preg_match_all( '/([a-z]{1,8}(-[a-z]{1,8})?|\*)\s*(;\s*q\s*=\s*(1|0(\.[0-9]+)?)?)?/',
                        $acceptLang, $lang_parse );
-               
+
                if ( !count( $lang_parse[1] ) ) {
                        return array();
                }
@@ -777,10 +859,10 @@ class WebRequestUpload {
        protected $request;
        protected $doesExist;
        protected $fileInfo;
-       
+
        /**
         * Constructor. Should only be called by WebRequest
-        * 
+        *
         * @param $request WebRequest The associated request
         * @param $key string Key in $_FILES array (name of form field)
         */
@@ -791,26 +873,26 @@ class WebRequestUpload {
                        $this->fileInfo = $_FILES[$key];
                }
        }
-       
+
        /**
         * Return whether a file with this name was uploaded.
-        * 
+        *
         * @return bool
         */
        public function exists() {
                return $this->doesExist;
        }
-       
+
        /**
         * Return the original filename of the uploaded file
-        * 
+        *
         * @return mixed Filename or null if non-existent
         */
        public function getName() {
                if ( !$this->exists() ) {
                        return null;
                }
-               
+
                global $wgContLang;
                $name = $this->fileInfo['name'];
 
@@ -821,51 +903,51 @@ class WebRequestUpload {
                wfDebug( __METHOD__ . ": {$this->fileInfo['name']} normalized to '$name'\n" );
                return $name;
        }
-       
+
        /**
         * Return the file size of the uploaded file
-        * 
+        *
         * @return int File size or zero if non-existent
         */
        public function getSize() {
                if ( !$this->exists() ) {
                        return 0;
                }
-               
+
                return $this->fileInfo['size'];
        }
-       
+
        /**
         * Return the path to the temporary file
-        * 
+        *
         * @return mixed Path or null if non-existent
         */
        public function getTempName() {
                if ( !$this->exists() ) {
                        return null;
                }
-               
+
                return $this->fileInfo['tmp_name'];
        }
-       
+
        /**
         * Return the upload error. See link for explanation
         * http://www.php.net/manual/en/features.file-upload.errors.php
-        * 
+        *
         * @return int One of the UPLOAD_ constants, 0 if non-existent
         */
        public function getError() {
                if ( !$this->exists() ) {
                        return 0; # UPLOAD_ERR_OK
                }
-               
+
                return $this->fileInfo['error'];
        }
-       
+
        /**
         * Returns whether this upload failed because of overflow of a maximum set
         * in php.ini
-        * 
+        *
         * @return bool
         */
        public function isIniSizeOverflow() {
@@ -879,7 +961,7 @@ class WebRequestUpload {
                        # post_max_size is exceeded
                        return true;
                }
-               
+
                return false;
        }
 }
@@ -923,6 +1005,14 @@ class FauxRequest extends WebRequest {
                return $this->data;
        }
 
+       public function getQueryValues() {
+               if ( $this->wasPosted ) {
+                       return array();
+               } else {
+                       return $this->data;
+               }
+       }
+
        public function wasPosted() {
                return $this->wasPosted;
        }
@@ -935,10 +1025,6 @@ class FauxRequest extends WebRequest {
                $this->notImplemented( __METHOD__ );
        }
 
-       public function appendQuery( $query ) {
-               $this->notImplemented( __METHOD__ );
-       }
-
        public function getHeader( $name ) {
                return isset( $this->headers[$name] ) ? $this->headers[$name] : false;
        }
@@ -956,6 +1042,10 @@ class FauxRequest extends WebRequest {
                $this->session[$key] = $data;
        }
 
+       public function getSessionArray() {
+               return $this->session;
+       }
+
        public function isPathInfoBad() {
                return false;
        }