* Perform an HTTP request
*
* @param $method String: HTTP method. Usually GET/POST
- * @param $url String: full URL to act on
+ * @param $url String: full URL to act on. If protocol-relative, will be expanded to an http:// URL
* @param $options Array: options to pass to MWHttpRequest object.
* Possible keys for the array:
* - timeout Timeout length in seconds
* @return Mixed: (bool)false on failure or a string on success
*/
public static function request( $method, $url, $options = array() ) {
- $url = wfExpandUrl( $url );
wfDebug( "HTTP: $method: $url\n" );
$options['method'] = strtoupper( $method );
/**
* Simple wrapper for Http::request( 'GET' )
* @see Http::request()
+ *
+ * @return string
*/
public static function get( $url, $timeout = 'default', $options = array() ) {
$options['timeout'] = $timeout;
/**
* Simple wrapper for Http::request( 'POST' )
* @see Http::request()
+ *
+ * @return string
*/
public static function post( $url, $options = array() ) {
return Http::request( 'POST', $url, $options );
}
/**
- * Checks that the given URI is a valid one
+ * Checks that the given URI is a valid one. Hardcoding the
+ * protocols, because we only want protocols that both cURL
+ * and php support.
+ *
+ * @fixme this is wildly inaccurate and fails to actually check most stuff
*
* @param $uri Mixed: URI to check for validity
* @returns Boolean
*/
public static function isValidURI( $uri ) {
return preg_match(
- '/(ftp|http|https):\/\/(\w+:{0,1}\w*@)?(\S+)(:[0-9]+)?(\/|\/([\w#!:.?+=&%@!\-\/]))?/',
- $uri,
- $matches
+ '/^https?:\/\/[^\/\s]\S*$/D',
+ $uri
);
}
}
* This wrapper class will call out to curl (if available) or fallback
* to regular PHP if necessary for handling internal HTTP requests.
*
- * Renamed from HttpRequest to MWHttpRequst to avoid conflict with
- * php's HTTP extension.
+ * Renamed from HttpRequest to MWHttpRequest to avoid conflict with
+ * PHP's HTTP extension.
*/
class MWHttpRequest {
const SUPPORTS_FILE_POSTS = false;
-
+
protected $content;
protected $timeout = 'default';
protected $headersOnly = null;
protected $maxRedirects = 5;
protected $followRedirects = false;
+ /**
+ * @var CookieJar
+ */
protected $cookieJar;
protected $headerList = array();
public $status;
/**
- * @param $url String: url to use
+ * @param $url String: url to use. If protocol-relative, will be expanded to an http:// URL
* @param $options Array: (optional) extra params to pass (see Http::request())
*/
function __construct( $url, $options = array() ) {
global $wgHTTPTimeout;
- $this->url = $url;
- $this->parsedUrl = parse_url( $url );
+ $this->url = wfExpandUrl( $url, PROTO_HTTP );
+ $this->parsedUrl = wfParseUrl( $this->url );
- if ( !Http::isValidURI( $this->url ) ) {
+ if ( !$this->parsedUrl || !Http::isValidURI( $this->url ) ) {
$this->status = Status::newFatal( 'http-invalid-url' );
} else {
$this->status = Status::newGood( 100 ); // continue
}
}
+ /**
+ * Simple function to test if we can make any sort of requests at all, using
+ * cURL or fopen()
+ * @return bool
+ */
+ public static function canMakeRequests() {
+ return function_exists( 'curl_init' ) || wfIniGetBool( 'allow_url_fopen' );
+ }
+
/**
* Generate a new request object
- * @param $url String: url to use
+ * @param $url String: url to use
* @param $options Array: (optional) extra params to pass (see Http::request())
* @see MWHttpRequest::__construct
*/
}
/**
- * Set the callback
+ * Set a read callback to accept data read from the HTTP request.
+ * By default, data is appended to an internal buffer which can be
+ * retrieved through $req->getContent().
+ *
+ * To handle data as it comes in -- especially for large files that
+ * would not fit in memory -- you can instead set your own callback,
+ * in the form function($resource, $buffer) where the first parameter
+ * is the low-level resource being read (implementation specific),
+ * and the second parameter is the data buffer.
+ *
+ * You MUST return the number of bytes handled in the buffer; if fewer
+ * bytes are reported handled than were passed to you, the HTTP fetch
+ * will be aborted.
*
* @param $callback Callback
*/
public function setCallback( $callback ) {
+ if ( !is_callable( $callback ) ) {
+ throw new MWException( 'Invalid MwHttpRequest callback' );
+ }
$this->callback = $callback;
}
}
if ( is_object( $wgTitle ) && !isset( $this->reqHeaders['Referer'] ) ) {
- $this->setReferer( $wgTitle->getFullURL() );
+ $this->setReferer( wfExpandUrl( $wgTitle->getFullURL(), PROTO_CURRENT ) );
}
if ( !$this->noProxy ) {
/**
* Returns the final URL after all redirections.
*
- * @return String
- */
- public function getFinalUrl() {
- $location = $this->getResponseHeader( "Location" );
-
- if ( $location ) {
- return $location;
- }
-
- return $this->url;
- }
-
- /**
- * Returns true if the backend can follow redirects. Overridden by the
- * child classes.
- */
- public function canFollowRedirects() {
- return true;
- }
-}
-
-
-class Cookie {
- protected $name;
- protected $value;
- protected $expires;
- protected $path;
- protected $domain;
- protected $isSessionKey = true;
- // TO IMPLEMENT protected $secure
- // TO IMPLEMENT? protected $maxAge (add onto expires)
- // TO IMPLEMENT? protected $version
- // TO IMPLEMENT? protected $comment
-
- function __construct( $name, $value, $attr ) {
- $this->name = $name;
- $this->set( $value, $attr );
- }
-
- /**
- * Sets a cookie. Used before a request to set up any individual
- * cookies. Used internally after a request to parse the
- * Set-Cookie headers.
- *
- * @param $value String: the value of the cookie
- * @param $attr Array: possible key/values:
- * expires A date string
- * path The path this cookie is used on
- * domain Domain this cookie is used on
- */
- public function set( $value, $attr ) {
- $this->value = $value;
-
- if ( isset( $attr['expires'] ) ) {
- $this->isSessionKey = false;
- $this->expires = strtotime( $attr['expires'] );
- }
-
- if ( isset( $attr['path'] ) ) {
- $this->path = $attr['path'];
- } else {
- $this->path = "/";
- }
-
- if ( isset( $attr['domain'] ) ) {
- if ( self::validateCookieDomain( $attr['domain'] ) ) {
- $this->domain = $attr['domain'];
- }
- } else {
- throw new MWException( "You must specify a domain." );
- }
- }
-
- /**
- * Return the true if the cookie is valid is valid. Otherwise,
- * false. The uses a method similar to IE cookie security
- * described here:
- * http://kuza55.blogspot.com/2008/02/understanding-cookie-security.html
- * A better method might be to use a blacklist like
- * http://publicsuffix.org/
+ * Relative values of the "Location" header are incorrect as stated in RFC, however they do happen and modern browsers support them.
+ * This function loops backwards through all locations in order to build the proper absolute URI - Marooned at wikia-inc.com
*
- * @param $domain String: the domain to validate
- * @param $originDomain String: (optional) the domain the cookie originates from
- * @return Boolean
+ * @returns string
*/
- public static function validateCookieDomain( $domain, $originDomain = null ) {
- // Don't allow a trailing dot
- if ( substr( $domain, -1 ) == "." ) {
- return false;
- }
-
- $dc = explode( ".", $domain );
-
- // Only allow full, valid IP addresses
- if ( preg_match( '/^[0-9.]+$/', $domain ) ) {
- if ( count( $dc ) != 4 ) {
- return false;
- }
-
- if ( ip2long( $domain ) === false ) {
- return false;
- }
-
- if ( $originDomain == null || $originDomain == $domain ) {
- return true;
- }
-
- }
+ public function getFinalUrl() {
+ $headers = $this->getResponseHeaders();
- // Don't allow cookies for "co.uk" or "gov.uk", etc, but allow "supermarket.uk"
- if ( strrpos( $domain, "." ) - strlen( $domain ) == -3 ) {
- if ( ( count( $dc ) == 2 && strlen( $dc[0] ) <= 2 )
- || ( count( $dc ) == 3 && strlen( $dc[0] ) == "" && strlen( $dc[1] ) <= 2 ) ) {
- return false;
- }
- if ( ( count( $dc ) == 2 || ( count( $dc ) == 3 && $dc[0] == "" ) )
- && preg_match( '/(com|net|org|gov|edu)\...$/', $domain ) ) {
- return false;
- }
- }
+ //return full url (fix for incorrect but handled relative location)
+ if ( isset( $headers[ 'location' ] ) ) {
+ $locations = $headers[ 'location' ];
+ $domain = '';
+ $foundRelativeURI = false;
+ $countLocations = count($locations);
- if ( $originDomain != null ) {
- if ( substr( $domain, 0, 1 ) != "." && $domain != $originDomain ) {
- return false;
- }
+ for ( $i = $countLocations - 1; $i >= 0; $i-- ) {
+ $url = parse_url( $locations[ $i ] );
- if ( substr( $domain, 0, 1 ) == "."
- && substr_compare( $originDomain, $domain, -strlen( $domain ),
- strlen( $domain ), TRUE ) != 0 ) {
- return false;
+ if ( isset($url[ 'host' ]) ) {
+ $domain = $url[ 'scheme' ] . '://' . $url[ 'host' ];
+ break; //found correct URI (with host)
+ } else {
+ $foundRelativeURI = true;
+ }
}
- }
-
- return true;
- }
-
- /**
- * Serialize the cookie jar into a format useful for HTTP Request headers.
- *
- * @param $path String: the path that will be used. Required.
- * @param $domain String: the domain that will be used. Required.
- * @return String
- */
- public function serializeToHttpRequest( $path, $domain ) {
- $ret = "";
-
- if ( $this->canServeDomain( $domain )
- && $this->canServePath( $path )
- && $this->isUnExpired() ) {
- $ret = $this->name . "=" . $this->value;
- }
-
- return $ret;
- }
-
- protected function canServeDomain( $domain ) {
- if ( $domain == $this->domain
- || ( strlen( $domain ) > strlen( $this->domain )
- && substr( $this->domain, 0, 1 ) == "."
- && substr_compare( $domain, $this->domain, -strlen( $this->domain ),
- strlen( $this->domain ), TRUE ) == 0 ) ) {
- return true;
- }
-
- return false;
- }
-
- protected function canServePath( $path ) {
- if ( $this->path && substr_compare( $this->path, $path, 0, strlen( $this->path ) ) == 0 ) {
- return true;
- }
-
- return false;
- }
-
- protected function isUnExpired() {
- if ( $this->isSessionKey || $this->expires > time() ) {
- return true;
- }
-
- return false;
- }
-}
-
-class CookieJar {
- private $cookie = array();
-
- /**
- * Set a cookie in the cookie jar. Make sure only one cookie per-name exists.
- * @see Cookie::set()
- */
- public function setCookie ( $name, $value, $attr ) {
- /* cookies: case insensitive, so this should work.
- * We'll still send the cookies back in the same case we got them, though.
- */
- $index = strtoupper( $name );
-
- if ( isset( $this->cookie[$index] ) ) {
- $this->cookie[$index]->set( $value, $attr );
- } else {
- $this->cookie[$index] = new Cookie( $name, $value, $attr );
- }
- }
- /**
- * @see Cookie::serializeToHttpRequest
- */
- public function serializeToHttpRequest( $path, $domain ) {
- $cookies = array();
-
- foreach ( $this->cookie as $c ) {
- $serialized = $c->serializeToHttpRequest( $path, $domain );
-
- if ( $serialized ) {
- $cookies[] = $serialized;
+ if ( $foundRelativeURI ) {
+ if ( $domain ) {
+ return $domain . $locations[ $countLocations - 1 ];
+ } else {
+ $url = parse_url( $this->url );
+ if ( isset($url[ 'host' ]) ) {
+ return $url[ 'scheme' ] . '://' . $url[ 'host' ] . $locations[ $countLocations - 1 ];
+ }
+ }
+ } else {
+ return $locations[ $countLocations - 1 ];
}
}
- return implode( "; ", $cookies );
+ return $this->url;
}
/**
- * Parse the content of an Set-Cookie HTTP Response header.
- *
- * @param $cookie String
- * @param $domain String: cookie's domain
+ * Returns true if the backend can follow redirects. Overridden by the
+ * child classes.
*/
- public function parseCookieResponseHeader ( $cookie, $domain ) {
- $len = strlen( "Set-Cookie:" );
-
- if ( substr_compare( "Set-Cookie:", $cookie, 0, $len, TRUE ) === 0 ) {
- $cookie = substr( $cookie, $len );
- }
-
- $bit = array_map( 'trim', explode( ";", $cookie ) );
-
- if ( count( $bit ) >= 1 ) {
- list( $name, $value ) = explode( "=", array_shift( $bit ), 2 );
- $attr = array();
-
- foreach ( $bit as $piece ) {
- $parts = explode( "=", $piece );
- if ( count( $parts ) > 1 ) {
- $attr[strtolower( $parts[0] )] = $parts[1];
- } else {
- $attr[strtolower( $parts[0] )] = true;
- }
- }
-
- if ( !isset( $attr['domain'] ) ) {
- $attr['domain'] = $domain;
- } elseif ( !Cookie::validateCookieDomain( $attr['domain'], $domain ) ) {
- return null;
- }
-
- $this->setCookie( $name, $value, $attr );
- }
+ public function canFollowRedirects() {
+ return true;
}
}
*/
class CurlHttpRequest extends MWHttpRequest {
const SUPPORTS_FILE_POSTS = true;
-
+
static $curlMessageMap = array(
6 => 'http-host-unreachable',
28 => 'http-timed-out'
if ( is_array( $this->postData ) ) {
$this->postData = wfArrayToCGI( $this->postData );
- }
-
- // At least on Centos 4.8 with PHP 5.1.6, using max_redirects to follow redirects
- // causes a segfault
- $manuallyRedirect = version_compare( phpversion(), '5.1.7', '<' );
+ }
- if ( $this->parsedUrl['scheme'] != 'http' ) {
+ if ( $this->parsedUrl['scheme'] != 'http' &&
+ $this->parsedUrl['scheme'] != 'https' ) {
$this->status->fatal( 'http-invalid-scheme', $this->parsedUrl['scheme'] );
}
$options['request_fulluri'] = true;
}
- if ( !$this->followRedirects || $manuallyRedirect ) {
+ if ( !$this->followRedirects ) {
$options['max_redirects'] = 0;
} else {
$options['max_redirects'] = $this->maxRedirects;
$options['content'] = $this->postData;
}
- $oldTimeout = false;
- if ( version_compare( '5.2.1', phpversion(), '>' ) ) {
- $oldTimeout = ini_set( 'default_socket_timeout', $this->timeout );
- } else {
- $options['timeout'] = $this->timeout;
- }
+ $options['timeout'] = $this->timeout;
$context = stream_context_create( array( 'http' => $options ) );
$reqCount = 0;
$url = $this->url;
+ $result = array();
+
do {
$reqCount++;
wfSuppressWarnings();
$this->headerList = $result['wrapper_data'];
$this->parseHeader();
- if ( !$manuallyRedirect || !$this->followRedirects ) {
+ if ( !$this->followRedirects ) {
break;
}
}
} while ( true );
- if ( $oldTimeout !== false ) {
- ini_set( 'default_socket_timeout', $oldTimeout );
- }
-
$this->setStatus();
if ( $fh === false ) {
return $this->status;
}
- if ( $this->status->isOK() ) {
+ // If everything went OK, or we recieved some error code
+ // get the response body content.
+ if ( $this->status->isOK()
+ || (int)$this->respStatus >= 300) {
while ( !feof( $fh ) ) {
$buf = fread( $fh, 8192 );