'scheme', 'password' => 'pass' ); /** * parse_url() work-alike, but non-broken. Differences: * * 1) Does not raise warnings on bad URLs (just returns false) * 2) Handles protocols that don't use :// (e.g., mailto: and news: , as well as protocol-relative URLs) correctly * 3) Adds a "delimiter" element to the array, either '://', ':' or '//' (see (2)) * * @param $url String: a URL to parse * @return Array: bits of the URL in an associative array, per PHP docs */ protected static function parseUri( $url ) { global $wgUrlProtocols; // Allow all protocols defined in DefaultSettings/LocalSettings.php // Protocol-relative URLs are handled really badly by parse_url(). It's so bad that the easiest // way to handle them is to just prepend 'http:' and strip the protocol out later $wasRelative = substr( $url, 0, 2 ) == '//'; if ( $wasRelative ) { $url = "http:$url"; } wfSuppressWarnings(); $bits = parse_url( $url ); wfRestoreWarnings(); // parse_url() returns an array without scheme for some invalid URLs, e.g. // parse_url("%0Ahttp://example.com") == array( 'host' => '%0Ahttp', 'path' => 'example.com' ) if ( !$bits || !isset( $bits['scheme'] ) && strpos( $url, "://" ) !== false ) { wfDebug( __METHOD__ . ": Invalid URL: $url" ); return false; } else { $scheme = isset( $bits['scheme'] ) ? $bits['scheme'] : null; } // most of the protocols are followed by ://, but mailto: and sometimes news: not, check for it if ( in_array( $scheme . '://', $wgUrlProtocols ) ) { $bits['delimiter'] = '://'; } elseif ( !is_null( $scheme ) && !in_array( $scheme . ':', $wgUrlProtocols ) ) { wfDebug( __METHOD__ . ": Invalid scheme in URL: $scheme" ); return false; } elseif( !is_null( $scheme ) ) { if( !in_array( $scheme . ':', $wgUrlProtocols ) ) { // For URLs that don't have a scheme, but do have a user:password, parse_url // detects the user as the scheme. unset( $bits['scheme'] ); $bits['user'] = $scheme; } else { $bits['delimiter'] = ':'; // parse_url detects for news: and mailto: the host part of an url as path // We have to correct this wrong detection if ( isset( $bits['path'] ) ) { $bits['host'] = $bits['path']; $bits['path'] = ''; } } } /* Provide an empty host for eg. file:/// urls (see bug 28627) */ if ( !isset( $bits['host'] ) && $scheme == "file" ) { $bits['host'] = ''; /* parse_url loses the third / for file:///c:/ urls (but not on variants) */ if ( isset( $bits['path'] ) && substr( $bits['path'], 0, 1 ) !== '/' ) { $bits['path'] = '/' . $bits['path']; } } // If the URL was protocol-relative, fix scheme and delimiter if ( $wasRelative ) { $bits['scheme'] = ''; $bits['delimiter'] = '//'; } return $bits; } /** * * @param $uri mixed URI string or array */ public function __construct( $uri ) { $this->components = array(); $this->setUri( $uri ); } /** * Set the Uri to the value of some other URI. * * @param $uri mixed URI string or array */ public function setUri( $uri ) { if ( is_string( $uri ) ) { $parsed = self::parseUri( $uri ); if( $parsed === false ) { return false; } $this->setComponents( $parsed ); } elseif ( is_array( $uri ) ) { $this->setComponents( $uri ); } elseif ( $uri instanceof Uri ) { $this->setComponents( $uri->getComponents() ); } else { throw new MWException( __METHOD__ . ": $uri is not of a valid type." ); } } /** * Set the components of this array. * Will output warnings when invalid components or aliases are found. * * @param $components Array The components to set on this Uri. */ public function setComponents( array $components ) { foreach ( $components as $name => $value ) { if ( isset( self::$componentAliases[$name] ) ) { $canonical = self::$componentAliases[$name]; wfDebug( __METHOD__ . ": Converting alias $name to canonical $canonical." ); $components[$canonical] = $value; unset( $components[$name] ); } elseif ( !in_array( $name, self::$validComponents ) ) { throw new MWException( __METHOD__ . ": $name is not a valid component." ); } } $this->components = $components; } /** * Return the components for this Uri * @return Array */ public function getComponents() { return $this->components; } /** * Return the value of a specific component * * @param $name string The name of the component to return * @param string|null */ public function getComponent( $name ) { if ( isset( self::$componentAliases[$name] ) ) { // Component is an alias. Get the actual name. $alias = $name; $name = self::$componentAliases[$name]; wfDebug( __METHOD__ . ": Converting alias $alias to canonical $name." ); } if( !in_array( $name, self::$validComponents ) ) { // Component is invalid throw new MWException( __METHOD__ . ": $name is not a valid component." ); } elseif( !empty( $this->components[$name] ) ) { // Component is valid and has a value. return $this->components[$name]; } else { // Component is empty return null; } } /** * Set a component for this Uri * @param $name string The name of the component to set * @param $value string|null The value to set */ public function setComponent( $name, $value ) { if ( isset( self::$componentAliases[$name] ) ) { $alias = $name; $name = self::$componentAliases[$name]; wfDebug( __METHOD__ . ": Converting alias $alias to canonical $name." ); } elseif ( !in_array( $name, self::$validComponents ) ) { throw new MWException( __METHOD__ . ": $name is not a valid component." ); } $this->components[$name] = $value; } public function getProtocol() { return $this->getComponent( 'scheme' ); } public function getUser() { return $this->getComponent( 'user' ); } public function getPassword() { return $this->getComponent( 'pass' ); } public function getHost() { return $this->getComponent( 'host' ); } public function getPort() { return $this->getComponent( 'port' ); } public function getPath() { return $this->getComponent( 'path' ); } public function getQueryString() { return $this->getComponent( 'query' ); } public function getFragment() { return $this->getComponent( 'fragment' ); } public function setProtocol( $scheme ) { $this->setComponent( 'scheme', $scheme ); } public function setUser( $user ) { $this->setComponent( 'user', $user ); } public function setPassword( $pass ) { $this->setComponent( 'pass', $pass ); } public function setHost( $host ) { $this->setComponent( 'host', $host ); } public function setPort( $port ) { $this->setComponent( 'port', $port ); } public function setPath( $path ) { $this->setComponent( 'path', $path ); } public function setFragment( $fragment ) { $this->setComponent( 'fragment', $fragment ); } /** * Gets the protocol-authority delimiter of a URI (:// or //). * @return string|null */ public function getDelimiter() { $delimiter = $this->getComponent( 'delimiter' ); if ( $delimiter ) { // A specific delimiter is set, so return it. return $delimiter; } if ( $this->getAuthority() && $this->getProtocol() ) { // If the URI has a protocol and a body (i.e., some sort of host, etc.) // the default delimiter is "://", e.g., "http://test.com". return '://'; } return null; } /** * Gets query portion of a URI in array format. * @return string */ public function getQuery() { return wfCgiToArray( $this->getQueryString() ); } /** * Gets query portion of a URI. * @param string|array $query */ public function setQuery( $query ) { if ( is_array( $query ) ) { $query = wfArrayToCGI( $query ); } $this->setComponent( 'query', $query ); } /** * Extend the query -- supply query parameters to override or add to ours * @param Array|string $parameters query parameters to override or add * @return Uri this URI object */ public function extendQuery( $parameters ) { if ( !is_array( $parameters ) ) { $parameters = wfCgiToArray( $parameters ); } $query = $this->getQuery(); foreach( $parameters as $key => $value ) { $query[$key] = $value; } $this->setQuery( $query ); return $this; } /** * Returns user and password portion of a URI. * @return string */ public function getUserInfo() { $user = $this->getComponent( 'user' ); $pass = $this->getComponent( 'pass' ); return $pass ? "$user:$pass" : $user; } /** * Gets host and port portion of a URI. * @return string */ public function getHostPort() { $host = $this->getComponent( 'host' ); $port = $this->getComponent( 'port' ); return $port ? "$host:$port" : $host; } /** * Returns the userInfo and host and port portion of the URI. * In most real-world URLs, this is simply the hostname, but it is more general. * @return string */ public function getAuthority() { $userinfo = $this->getUserInfo(); $hostinfo = $this->getHostPort(); return $userinfo ? "$userinfo@$hostinfo" : $hostinfo; } /** * Returns everything after the authority section of the URI * @return String */ public function getRelativePath() { $path = $this->getComponent( 'path' ); $query = $this->getComponent( 'query' ); $fragment = $this->getComponent( 'fragment' ); $retval = $path; if( $query ) { $retval .= "?$query"; } if( $fragment ) { $retval .= "#$fragment"; } return $retval; } /** * Gets the entire URI string. May not be precisely the same as input due to order of query arguments. * @return String the URI string */ public function toString() { return $this->getComponent( 'scheme' ) . $this->getDelimiter() . $this->getAuthority() . $this->getRelativePath(); } /** * Gets the entire URI string. May not be precisely the same as input due to order of query arguments. * @return String the URI string */ public function __toString() { return $this->toString(); } }