From 620509093c62ea224e77395edeaac82e58879393 Mon Sep 17 00:00:00 2001 From: Antoine Musso Date: Wed, 6 Mar 2019 08:52:34 +0000 Subject: [PATCH] Revert "Convert MultiHttpClient to use Guzzle" That breaks on Wikimedia beta cluster (T217733): Warning: Invalid argument: option: 6 in vendor/guzzlehttp/guzzle/src/Handler/CurlFactory.php on line 56 Warning: Invalid argument: option: 6 in vendor/guzzlehttp/guzzle/src/Handler/CurlFactory.php on line 56 Warning: Invalid argument: function: not string, closure, or array in vendor/guzzlehttp/guzzle/src/Handler/CurlMultiHandler.php on line 108 Fatal error: Uncaught exception 'ConfigException' Failed to load configuration from etcd: cURL error 23: Failed writing header This reverts commit 1e048a08b565ae909e85465f8b09a27ed8480ce2. Bug: T202352 Bug: T217733 Change-Id: I2384355043896128d3f191941e8da00fdc62361e --- RELEASE-NOTES-1.33 | 2 - includes/libs/MultiHttpClient.php | 527 ++++++++++++------ .../phpunit/includes/MultiHttpClientTest.php | 166 ++++++ .../includes/libs/MultiHttpClientTest.php | 169 ------ 4 files changed, 525 insertions(+), 339 deletions(-) create mode 100644 tests/phpunit/includes/MultiHttpClientTest.php delete mode 100644 tests/phpunit/includes/libs/MultiHttpClientTest.php diff --git a/RELEASE-NOTES-1.33 b/RELEASE-NOTES-1.33 index 3fd9520c89..c2bb4cfb62 100644 --- a/RELEASE-NOTES-1.33 +++ b/RELEASE-NOTES-1.33 @@ -337,8 +337,6 @@ because of Phabricator reports. check block behaviour. * The api-feature-usage log channel now has log context. The text message is deprecated and will be removed in the future. -* The "stream" request option in MultiHttpClient has been deprecated. - Use the new "sink" option instead. === Other changes in 1.33 === * (T201747) Html::openElement() warns if given an element name with a space diff --git a/includes/libs/MultiHttpClient.php b/includes/libs/MultiHttpClient.php index a383390dd5..536177edeb 100644 --- a/includes/libs/MultiHttpClient.php +++ b/includes/libs/MultiHttpClient.php @@ -23,8 +23,7 @@ use Psr\Log\LoggerAwareInterface; use Psr\Log\LoggerInterface; use Psr\Log\NullLogger; -use Psr\Http\Message\ResponseInterface; -use GuzzleHttp\Client; +use MediaWiki\MediaWikiServices; /** * Class to handle multiple HTTP requests @@ -42,10 +41,7 @@ use GuzzleHttp\Client; * PUT requests, and a field/value array for POST request; * array bodies are encoded as multipart/form-data and strings * use application/x-www-form-urlencoded (headers sent automatically) - * - sink : resource to receive the HTTP response body (preferred over stream) - * @since 1.33 * - stream : resource to stream the HTTP response body to - * @deprecated since 1.33, use sink instead * - proxy : HTTP proxy to use * - flags : map of boolean flags which supports: * - relayResponseHeaders : write out header via header() @@ -54,62 +50,58 @@ use GuzzleHttp\Client; * @since 1.23 */ class MultiHttpClient implements LoggerAwareInterface { - /** @var float connection timeout in seconds, zero to wait indefinitely*/ + /** @var resource */ + protected $multiHandle = null; // curl_multi handle + /** @var string|null SSL certificates path */ + protected $caBundlePath; + /** @var float */ protected $connTimeout = 10; - /** @var float request timeout in seconds, zero to wait indefinitely*/ + /** @var float */ protected $reqTimeout = 300; + /** @var bool */ + protected $usePipelining = false; + /** @var int */ + protected $maxConnsPerHost = 50; /** @var string|null proxy */ protected $proxy; - /** @var int CURLMOPT_PIPELINING value, only effective if curl is available */ - protected $pipeliningMode = 0; - /** @var int CURLMOPT_MAXCONNECTS value, only effective if curl is available */ - protected $maxConnsPerHost = 50; /** @var string */ protected $userAgent = 'wikimedia/multi-http-client v1.0'; /** @var LoggerInterface */ protected $logger; - /** @var string|null SSL certificates path */ - protected $caBundlePath; + + // In PHP 7 due to https://bugs.php.net/bug.php?id=76480 the request/connect + // timeouts are periodically polled instead of being accurately respected. + // The select timeout is set to the minimum timeout multiplied by this factor. + const TIMEOUT_ACCURACY_FACTOR = 0.1; /** * @param array $options * - connTimeout : default connection timeout (seconds) * - reqTimeout : default request timeout (seconds) * - proxy : HTTP proxy to use - * - pipeliningMode : whether to use HTTP pipelining/multiplexing if possible (for all - * hosts). The exact behavior is dependent on curl version. + * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) * - maxConnsPerHost : maximum number of concurrent connections (per host) * - userAgent : The User-Agent header value to send * - logger : a \Psr\Log\LoggerInterface instance for debug logging * - caBundlePath : path to specific Certificate Authority bundle (if any) * @throws Exception - * - * usePipelining is an alias for pipelining mode, retained for backward compatibility. - * If both usePipelining and pipeliningMode are specified, pipeliningMode wins. */ public function __construct( array $options ) { if ( isset( $options['caBundlePath'] ) ) { $this->caBundlePath = $options['caBundlePath']; if ( !file_exists( $this->caBundlePath ) ) { - throw new Exception( "Cannot find CA bundle: {$this->caBundlePath}" ); + throw new Exception( "Cannot find CA bundle: " . $this->caBundlePath ); } } - - // Backward compatibility. Defers to newer option naming if both are specified. - if ( isset( $options['usePipelining'] ) ) { - $this->pipeliningMode = $options['usePipelining']; - } - static $opts = [ - 'connTimeout', 'reqTimeout', 'proxy', 'pipeliningMode', 'maxConnsPerHost', - 'userAgent', 'logger' + 'connTimeout', 'reqTimeout', 'usePipelining', 'maxConnsPerHost', + 'proxy', 'userAgent', 'logger' ]; foreach ( $opts as $key ) { if ( isset( $options[$key] ) ) { $this->$key = $options[$key]; } } - if ( $this->logger === null ) { $this->logger = new NullLogger; } @@ -122,20 +114,17 @@ class MultiHttpClient implements LoggerAwareInterface { * - code : HTTP response code or 0 if there was a serious error * - reason : HTTP response reason (empty if there was a serious error) * - headers :
- * - body : HTTP response body - * - error : Any error string + * - body : HTTP response body or resource (if "stream" was set) + * - error : Any error string * The map also stores integer-indexed copies of these values. This lets callers do: * @code - * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req ); + * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req ); * @endcode * @param array $req HTTP request array * @param array $opts * - connTimeout : connection timeout per request (seconds) * - reqTimeout : post-connection timeout per request (seconds) - * - handler : optional custom handler - * See http://docs.guzzlephp.org/en/stable/handlers-and-middleware.html * @return array Response array for request - * @throws Exception */ public function run( array $req, array $opts = [] ) { return $this->runMulti( [ $req ], $opts )[0]['response']; @@ -151,7 +140,7 @@ class MultiHttpClient implements LoggerAwareInterface { * - code : HTTP response code or 0 if there was a serious error * - reason : HTTP response reason (empty if there was a serious error) * - headers :
- * - body : HTTP response body + * - body : HTTP response body or resource (if "stream" was set) * - error : Any error string * The map also stores integer-indexed copies of these values. This lets callers do: * @code @@ -165,20 +154,18 @@ class MultiHttpClient implements LoggerAwareInterface { * @param array $opts * - connTimeout : connection timeout per request (seconds) * - reqTimeout : post-connection timeout per request (seconds) - * - pipeliningMode : whether to use HTTP pipelining/multiplexing if possible (for all - * hosts). The exact behavior is dependent on curl version. + * - usePipelining : whether to use HTTP pipelining if possible * - maxConnsPerHost : maximum number of concurrent connections (per host) - * - handler : optional custom handler. - * See http://docs.guzzlephp.org/en/stable/handlers-and-middleware.html * @return array $reqs With response array populated for each * @throws Exception - * - * usePipelining is an alias for pipelining mode, retained for backward compatibility. - * If both usePipelining and pipeliningMode are specified, pipeliningMode wins. */ public function runMulti( array $reqs, array $opts = [] ) { $this->normalizeRequests( $reqs ); - return $this->runMultiGuzzle( $reqs, $opts ); + if ( $this->isCurlEnabled() ) { + return $this->runMultiCurl( $reqs, $opts ); + } else { + return $this->runMultiHttp( $reqs, $opts ); + } } /** @@ -197,178 +184,354 @@ class MultiHttpClient implements LoggerAwareInterface { * * @param array $reqs Map of HTTP request arrays * @param array $opts + * - connTimeout : connection timeout per request (seconds) + * - reqTimeout : post-connection timeout per request (seconds) + * - usePipelining : whether to use HTTP pipelining if possible + * - maxConnsPerHost : maximum number of concurrent connections (per host) * @return array $reqs With response array populated for each * @throws Exception */ - private function runMultiGuzzle( array $reqs, array $opts = [] ) { - $guzzleOptions = [ - 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout, - 'connect_timeout' => $opts['connTimeout'] ?? $this->connTimeout, - 'allow_redirects' => [ - 'max' => 4, - ], - ]; - - if ( !is_null( $this->caBundlePath ) ) { - $guzzleOptions['verify'] = $this->caBundlePath; + private function runMultiCurl( array $reqs, array $opts = [] ) { + $chm = $this->getCurlMulti(); + + $selectTimeout = $this->getSelectTimeout( $opts ); + + // Add all of the required cURL handles... + $handles = []; + foreach ( $reqs as $index => &$req ) { + $handles[$index] = $this->getCurlHandle( $req, $opts ); + if ( count( $reqs ) > 1 ) { + // https://github.com/guzzle/guzzle/issues/349 + curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true ); + } } + unset( $req ); // don't assign over this by accident - // Include curl-specific option section only if curl is available. - // Our defaults may differ from curl's defaults, depending on curl version. - if ( $this->isCurlEnabled() ) { - // Backward compatibility - $optsPipeliningMode = $opts['pipeliningMode'] ?? ( $opts['usePipelining'] ?? null ); + $indexes = array_keys( $reqs ); + if ( isset( $opts['usePipelining'] ) ) { + curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] ); + } + if ( isset( $opts['maxConnsPerHost'] ) ) { + // Keep these sockets around as they may be needed later in the request + curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] ); + } - // Per-request options override class-level options - $pipeliningMode = $optsPipeliningMode ?? $this->pipeliningMode; - $maxConnsPerHost = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost; + // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS) + $batches = array_chunk( $indexes, $this->maxConnsPerHost ); + $infos = []; - $guzzleOptions['curl'][CURLMOPT_PIPELINING] = (int)$pipeliningMode; - $guzzleOptions['curl'][CURLMOPT_MAXCONNECTS] = (int)$maxConnsPerHost; + foreach ( $batches as $batch ) { + // Attach all cURL handles for this batch + foreach ( $batch as $index ) { + curl_multi_add_handle( $chm, $handles[$index] ); + } + // Execute the cURL handles concurrently... + $active = null; // handles still being processed + do { + // Do any available work... + do { + $mrc = curl_multi_exec( $chm, $active ); + $info = curl_multi_info_read( $chm ); + if ( $info !== false ) { + $infos[(int)$info['handle']] = $info; + } + } while ( $mrc == CURLM_CALL_MULTI_PERFORM ); + // Wait (if possible) for available work... + if ( $active > 0 && $mrc == CURLM_OK ) { + if ( curl_multi_select( $chm, $selectTimeout ) == -1 ) { + // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html + usleep( 5000 ); // 5ms + } + } + } while ( $active > 0 && $mrc == CURLM_OK ); } - if ( isset( $opts['handler'] ) ) { - $guzzleOptions['handler'] = $opts['handler']; - } + // Remove all of the added cURL handles and check for errors... + foreach ( $reqs as $index => &$req ) { + $ch = $handles[$index]; + curl_multi_remove_handle( $chm, $ch ); + + if ( isset( $infos[(int)$ch] ) ) { + $info = $infos[(int)$ch]; + $errno = $info['result']; + if ( $errno !== 0 ) { + $req['response']['error'] = "(curl error: $errno)"; + if ( function_exists( 'curl_strerror' ) ) { + $req['response']['error'] .= " " . curl_strerror( $errno ); + } + $this->logger->warning( "Error fetching URL \"{$req['url']}\": " . + $req['response']['error'] ); + } + } else { + $req['response']['error'] = "(curl error: no status set)"; + } - $guzzleOptions['headers']['user-agent'] = $this->userAgent; + // For convenience with the list() operator + $req['response'][0] = $req['response']['code']; + $req['response'][1] = $req['response']['reason']; + $req['response'][2] = $req['response']['headers']; + $req['response'][3] = $req['response']['body']; + $req['response'][4] = $req['response']['error']; + curl_close( $ch ); + // Close any string wrapper file handles + if ( isset( $req['_closeHandle'] ) ) { + fclose( $req['_closeHandle'] ); + unset( $req['_closeHandle'] ); + } + } + unset( $req ); // don't assign over this by accident - $client = new Client( $guzzleOptions ); - $promises = []; - foreach ( $reqs as $index => $req ) { - $reqOptions = [ - 'proxy' => $req['proxy'] ?? $this->proxy, - ]; + // Restore the default settings + curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining ); + curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); - if ( $req['method'] == 'POST' ) { - $reqOptions['form_params'] = $req['body']; + return $reqs; + } - // Suppress 'Expect: 100-continue' header, as some servers - // will reject it with a 417 and Curl won't auto retry - // with HTTP 1.0 fallback - $reqOptions['expect'] = false; - } + /** + * @param array &$req HTTP request map + * @param array $opts + * - connTimeout : default connection timeout + * - reqTimeout : default request timeout + * @return resource + * @throws Exception + */ + protected function getCurlHandle( array &$req, array $opts = [] ) { + $ch = curl_init(); + + curl_setopt( $ch, CURLOPT_CONNECTTIMEOUT_MS, + ( $opts['connTimeout'] ?? $this->connTimeout ) * 1000 ); + curl_setopt( $ch, CURLOPT_PROXY, $req['proxy'] ?? $this->proxy ); + curl_setopt( $ch, CURLOPT_TIMEOUT_MS, + ( $opts['reqTimeout'] ?? $this->reqTimeout ) * 1000 ); + curl_setopt( $ch, CURLOPT_FOLLOWLOCATION, 1 ); + curl_setopt( $ch, CURLOPT_MAXREDIRS, 4 ); + curl_setopt( $ch, CURLOPT_HEADER, 0 ); + if ( !is_null( $this->caBundlePath ) ) { + curl_setopt( $ch, CURLOPT_SSL_VERIFYPEER, true ); + curl_setopt( $ch, CURLOPT_CAINFO, $this->caBundlePath ); + } + curl_setopt( $ch, CURLOPT_RETURNTRANSFER, 1 ); - if ( isset( $req['headers']['user-agent'] ) ) { - $reqOptions['headers']['user-agent'] = $req['headers']['user-agent']; - } + $url = $req['url']; + $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); + if ( $query != '' ) { + $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; + } + curl_setopt( $ch, CURLOPT_URL, $url ); - // Backward compatibility for pre-Guzzle naming - if ( isset( $req['sink'] ) ) { - $reqOptions['sink'] = $req['sink']; - } elseif ( isset( $req['stream'] ) ) { - $reqOptions['sink'] = $req['stream']; - } + curl_setopt( $ch, CURLOPT_CUSTOMREQUEST, $req['method'] ); + if ( $req['method'] === 'HEAD' ) { + curl_setopt( $ch, CURLOPT_NOBODY, 1 ); + } - if ( !empty( $req['flags']['relayResponseHeaders'] ) ) { - $reqOptions['on_headers'] = function ( ResponseInterface $response ) { - foreach ( $response->getHeaders() as $name => $values ) { - foreach ( $values as $value ) { - header( $name . ': ' . $value . "\r\n" ); - } - } - }; + if ( $req['method'] === 'PUT' ) { + curl_setopt( $ch, CURLOPT_PUT, 1 ); + if ( is_resource( $req['body'] ) ) { + curl_setopt( $ch, CURLOPT_INFILE, $req['body'] ); + if ( isset( $req['headers']['content-length'] ) ) { + curl_setopt( $ch, CURLOPT_INFILESIZE, $req['headers']['content-length'] ); + } elseif ( isset( $req['headers']['transfer-encoding'] ) && + $req['headers']['transfer-encoding'] === 'chunks' + ) { + curl_setopt( $ch, CURLOPT_UPLOAD, true ); + } else { + throw new Exception( "Missing 'Content-Length' or 'Transfer-Encoding' header." ); + } + } elseif ( $req['body'] !== '' ) { + $fp = fopen( "php://temp", "wb+" ); + fwrite( $fp, $req['body'], strlen( $req['body'] ) ); + rewind( $fp ); + curl_setopt( $ch, CURLOPT_INFILE, $fp ); + curl_setopt( $ch, CURLOPT_INFILESIZE, strlen( $req['body'] ) ); + $req['_closeHandle'] = $fp; // remember to close this later + } else { + curl_setopt( $ch, CURLOPT_INFILESIZE, 0 ); } - - $url = $req['url']; - $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); - if ( $query != '' ) { - $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; + curl_setopt( $ch, CURLOPT_READFUNCTION, + function ( $ch, $fd, $length ) { + $data = fread( $fd, $length ); + $len = strlen( $data ); + return $data; + } + ); + } elseif ( $req['method'] === 'POST' ) { + curl_setopt( $ch, CURLOPT_POST, 1 ); + // Don't interpret POST parameters starting with '@' as file uploads, because this + // makes it impossible to POST plain values starting with '@' (and causes security + // issues potentially exposing the contents of local files). + curl_setopt( $ch, CURLOPT_SAFE_UPLOAD, true ); + curl_setopt( $ch, CURLOPT_POSTFIELDS, $req['body'] ); + } else { + if ( is_resource( $req['body'] ) || $req['body'] !== '' ) { + throw new Exception( "HTTP body specified for a non PUT/POST request." ); } - $promises[$index] = $client->requestAsync( $req['method'], $url, $reqOptions ); + $req['headers']['content-length'] = 0; } - $results = GuzzleHttp\Promise\settle( $promises )->wait(); + if ( !isset( $req['headers']['user-agent'] ) ) { + $req['headers']['user-agent'] = $this->userAgent; + } - foreach ( $results as $index => $result ) { - if ( $result['state'] === 'fulfilled' ) { - $this->guzzleHandleSuccess( $reqs[$index], $result['value'] ); - } elseif ( $result['state'] === 'rejected' ) { - $this->guzzleHandleFailure( $reqs[$index], $result['reason'] ); - } else { - // This should never happen, and exists only in case of changes to guzzle - throw new UnexpectedValueException( - "Unrecognized result state: {$result['state']}" ); + $headers = []; + foreach ( $req['headers'] as $name => $value ) { + if ( strpos( $name, ': ' ) ) { + throw new Exception( "Headers cannot have ':' in the name." ); } + $headers[] = $name . ': ' . trim( $value ); } + curl_setopt( $ch, CURLOPT_HTTPHEADER, $headers ); - foreach ( $reqs as &$req ) { - $req['response'][0] = $req['response']['code']; - $req['response'][1] = $req['response']['reason']; - $req['response'][2] = $req['response']['headers']; - $req['response'][3] = $req['response']['body']; - $req['response'][4] = $req['response']['error']; + curl_setopt( $ch, CURLOPT_HEADERFUNCTION, + function ( $ch, $header ) use ( &$req ) { + if ( !empty( $req['flags']['relayResponseHeaders'] ) && trim( $header ) !== '' ) { + header( $header ); + } + $length = strlen( $header ); + $matches = []; + if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) { + $req['response']['code'] = (int)$matches[2]; + $req['response']['reason'] = trim( $matches[3] ); + return $length; + } + if ( strpos( $header, ":" ) === false ) { + return $length; + } + list( $name, $value ) = explode( ":", $header, 2 ); + $name = strtolower( $name ); + $value = trim( $value ); + if ( isset( $req['response']['headers'][$name] ) ) { + $req['response']['headers'][$name] .= ', ' . $value; + } else { + $req['response']['headers'][$name] = $value; + } + return $length; + } + ); + + if ( isset( $req['stream'] ) ) { + // Don't just use CURLOPT_FILE as that might give: + // curl_setopt(): cannot represent a stream of type Output as a STDIO FILE* + // The callback here handles both normal files and php://temp handles. + curl_setopt( $ch, CURLOPT_WRITEFUNCTION, + function ( $ch, $data ) use ( &$req ) { + return fwrite( $req['stream'], $data ); + } + ); + } else { + curl_setopt( $ch, CURLOPT_WRITEFUNCTION, + function ( $ch, $data ) use ( &$req ) { + $req['response']['body'] .= $data; + return strlen( $data ); + } + ); } - return $reqs; + return $ch; } /** - * Called for successful requests - * - * @param array $req the original request - * @param ResponseInterface $response + * @return resource + * @throws Exception */ - private function guzzleHandleSuccess( &$req, $response ) { - $req['response'] = [ - 'code' => $response->getStatusCode(), - 'reason' => $response->getReasonPhrase(), - 'headers' => $this->parseHeaders( $response->getHeaders() ), - 'body' => isset( $req['sink'] ) ? '' : $response->getBody()->getContents(), - 'error' => '', - ]; + protected function getCurlMulti() { + if ( !$this->multiHandle ) { + if ( !function_exists( 'curl_multi_init' ) ) { + throw new Exception( "PHP cURL function curl_multi_init missing. " . + "Check https://www.mediawiki.org/wiki/Manual:CURL" ); + } + $cmh = curl_multi_init(); + curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining ); + curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); + $this->multiHandle = $cmh; + } + return $this->multiHandle; } /** - * Called for failed requests + * Execute a set of HTTP(S) requests sequentially. * - * @param array $req the original request - * @param Exception $reason + * @see MultiHttpClient::runMulti() + * @todo Remove dependency on MediaWikiServices: use a separate HTTP client + * library or copy code from PhpHttpRequest + * @param array $reqs Map of HTTP request arrays + * @param array $opts + * - connTimeout : connection timeout per request (seconds) + * - reqTimeout : post-connection timeout per request (seconds) + * @return array $reqs With response array populated for each + * @throws Exception */ - private function guzzleHandleFailure( &$req, $reason ) { - $req['response'] = [ - 'code' => $reason->getCode(), - 'reason' => '', - 'headers' => [], - 'body' => '', - 'error' => $reason->getMessage(), + private function runMultiHttp( array $reqs, array $opts = [] ) { + $httpOptions = [ + 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout, + 'connectTimeout' => $opts['connTimeout'] ?? $this->connTimeout, + 'logger' => $this->logger, + 'caInfo' => $this->caBundlePath, ]; + foreach ( $reqs as &$req ) { + $reqOptions = $httpOptions + [ + 'method' => $req['method'], + 'proxy' => $req['proxy'] ?? $this->proxy, + 'userAgent' => $req['headers']['user-agent'] ?? $this->userAgent, + 'postData' => $req['body'], + ]; - if ( - $reason instanceof GuzzleHttp\Exception\RequestException && - $reason->hasResponse() - ) { - $response = $reason->getResponse(); - if ( $response ) { - $req['response']['reason'] = $response->getReasonPhrase(); - $req['response']['headers'] = $this->parseHeaders( $response->getHeaders() ); - $req['response']['body'] = $response->getBody()->getContents(); + $url = $req['url']; + $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 ); + if ( $query != '' ) { + $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query"; } - } - $this->logger->warning( "Error fetching URL \"{$req['url']}\": " . - $req['response']['error'] ); - } + $httpRequest = MediaWikiServices::getInstance()->getHttpRequestFactory()->create( + $url, $reqOptions ); + $sv = $httpRequest->execute()->getStatusValue(); - /** - * Parses response headers. - * - * @param string[][] $guzzleHeaders - * @return array - */ - private function parseHeaders( $guzzleHeaders ) { - $headers = []; - foreach ( $guzzleHeaders as $name => $values ) { - $headers[strtolower( $name )] = implode( ', ', $values ); + $respHeaders = array_map( + function ( $v ) { + return implode( ', ', $v ); + }, + $httpRequest->getResponseHeaders() ); + + $req['response'] = [ + 'code' => $httpRequest->getStatus(), + 'reason' => '', + 'headers' => $respHeaders, + 'body' => $httpRequest->getContent(), + 'error' => '', + ]; + + if ( !$sv->isOk() ) { + $svErrors = $sv->getErrors(); + if ( isset( $svErrors[0] ) ) { + $req['response']['error'] = $svErrors[0]['message']; + + // param values vary per failure type (ex. unknown host vs unknown page) + if ( isset( $svErrors[0]['params'][0] ) ) { + if ( is_numeric( $svErrors[0]['params'][0] ) ) { + if ( isset( $svErrors[0]['params'][1] ) ) { + $req['response']['reason'] = $svErrors[0]['params'][1]; + } + } else { + $req['response']['reason'] = $svErrors[0]['params'][0]; + } + } + } + } + + $req['response'][0] = $req['response']['code']; + $req['response'][1] = $req['response']['reason']; + $req['response'][2] = $req['response']['headers']; + $req['response'][3] = $req['response']['body']; + $req['response'][4] = $req['response']['error']; } - return $headers; + + return $reqs; } /** * Normalize request information * * @param array $reqs the requests to normalize - * @throws Exception */ private function normalizeRequests( array &$reqs ) { foreach ( $reqs as &$req ) { @@ -409,6 +572,28 @@ class MultiHttpClient implements LoggerAwareInterface { } } + /** + * Get a suitable select timeout for the given options. + * + * @param array $opts + * @return float + */ + private function getSelectTimeout( $opts ) { + $connTimeout = $opts['connTimeout'] ?? $this->connTimeout; + $reqTimeout = $opts['reqTimeout'] ?? $this->reqTimeout; + $timeouts = array_filter( [ $connTimeout, $reqTimeout ] ); + if ( count( $timeouts ) === 0 ) { + return 1; + } + + $selectTimeout = min( $timeouts ) * self::TIMEOUT_ACCURACY_FACTOR; + // Minimum 10us for sanity + if ( $selectTimeout < 10e-6 ) { + $selectTimeout = 10e-6; + } + return $selectTimeout; + } + /** * Register a logger * @@ -417,4 +602,10 @@ class MultiHttpClient implements LoggerAwareInterface { public function setLogger( LoggerInterface $logger ) { $this->logger = $logger; } + + function __destruct() { + if ( $this->multiHandle ) { + curl_multi_close( $this->multiHandle ); + } + } } diff --git a/tests/phpunit/includes/MultiHttpClientTest.php b/tests/phpunit/includes/MultiHttpClientTest.php new file mode 100644 index 0000000000..1c7e62d092 --- /dev/null +++ b/tests/phpunit/includes/MultiHttpClientTest.php @@ -0,0 +1,166 @@ +getMockBuilder( MultiHttpClient::class ) + ->setConstructorArgs( [ [] ] ) + ->setMethods( [ 'isCurlEnabled' ] )->getMock(); + $client->method( 'isCurlEnabled' )->willReturn( false ); + $this->client = $client; + } + + private function getHttpRequest( $statusValue, $statusCode, $headers = [] ) { + $httpRequest = $this->getMockBuilder( PhpHttpRequest::class ) + ->setConstructorArgs( [ '', [] ] ) + ->getMock(); + $httpRequest->expects( $this->any() ) + ->method( 'execute' ) + ->willReturn( Status::wrap( $statusValue ) ); + $httpRequest->expects( $this->any() ) + ->method( 'getResponseHeaders' ) + ->willReturn( $headers ); + $httpRequest->expects( $this->any() ) + ->method( 'getStatus' ) + ->willReturn( $statusCode ); + return $httpRequest; + } + + private function mockHttpRequestFactory( $httpRequest ) { + $factory = $this->getMockBuilder( MediaWiki\Http\HttpRequestFactory::class ) + ->getMock(); + $factory->expects( $this->any() ) + ->method( 'create' ) + ->willReturn( $httpRequest ); + return $factory; + } + + /** + * Test call of a single url that should succeed + */ + public function testMultiHttpClientSingleSuccess() { + // Mock success + $httpRequest = $this->getHttpRequest( StatusValue::newGood( 200 ), 200 ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $this->client->run( [ + 'method' => 'GET', + 'url' => "http://example.test", + ] ); + + $this->assertEquals( 200, $rcode ); + } + + /** + * Test call of a single url that should not exist, and therefore fail + */ + public function testMultiHttpClientSingleFailure() { + // Mock an invalid tld + $httpRequest = $this->getHttpRequest( + StatusValue::newFatal( 'http-invalid-url', 'http://www.example.test' ), 0 ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $this->client->run( [ + 'method' => 'GET', + 'url' => "http://www.example.test", + ] ); + + $failure = $rcode < 200 || $rcode >= 400; + $this->assertTrue( $failure ); + } + + /** + * Test call of multiple urls that should all succeed + */ + public function testMultiHttpClientMultipleSuccess() { + // Mock success + $httpRequest = $this->getHttpRequest( StatusValue::newGood( 200 ), 200 ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + $reqs = [ + [ + 'method' => 'GET', + 'url' => 'http://example.test', + ], + [ + 'method' => 'GET', + 'url' => 'https://get.test', + ], + ]; + $responses = $this->client->runMulti( $reqs ); + foreach ( $responses as $response ) { + list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $response['response']; + $this->assertEquals( 200, $rcode ); + } + } + + /** + * Test call of multiple urls that should all fail + */ + public function testMultiHttpClientMultipleFailure() { + // Mock page not found + $httpRequest = $this->getHttpRequest( + StatusValue::newFatal( "http-bad-status", 404, 'Not Found' ), 404 ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + $reqs = [ + [ + 'method' => 'GET', + 'url' => 'http://example.test/12345', + ], + [ + 'method' => 'GET', + 'url' => 'http://example.test/67890' , + ] + ]; + $responses = $this->client->runMulti( $reqs ); + foreach ( $responses as $response ) { + list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $response['response']; + $failure = $rcode < 200 || $rcode >= 400; + $this->assertTrue( $failure ); + } + } + + /** + * Test of response header handling + */ + public function testMultiHttpClientHeaders() { + // Represenative headers for typical requests, per MWHttpRequest::getResponseHeaders() + $headers = [ + 'content-type' => [ + 'text/html; charset=utf-8', + ], + 'date' => [ + 'Wed, 18 Jul 2018 14:52:41 GMT', + ], + 'set-cookie' => [ + 'COUNTRY=NAe6; expires=Wed, 25-Jul-2018 14:52:41 GMT; path=/; domain=.example.test', + 'LAST_NEWS=1531925562; expires=Thu, 18-Jul-2019 14:52:41 GMT; path=/; domain=.example.test', + ] + ]; + + // Mock success with specific headers + $httpRequest = $this->getHttpRequest( StatusValue::newGood( 200 ), 200, $headers ); + $this->setService( 'HttpRequestFactory', $this->mockHttpRequestFactory( $httpRequest ) ); + + list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $this->client->run( [ + 'method' => 'GET', + 'url' => 'http://example.test', + ] ); + + $this->assertEquals( 200, $rcode ); + $this->assertEquals( count( $headers ), count( $rhdrs ) ); + foreach ( $headers as $name => $values ) { + $value = implode( ', ', $values ); + $this->assertArrayHasKey( $name, $rhdrs ); + $this->assertEquals( $value, $rhdrs[$name] ); + } + } +} diff --git a/tests/phpunit/includes/libs/MultiHttpClientTest.php b/tests/phpunit/includes/libs/MultiHttpClientTest.php deleted file mode 100644 index 8372f51ba0..0000000000 --- a/tests/phpunit/includes/libs/MultiHttpClientTest.php +++ /dev/null @@ -1,169 +0,0 @@ - 'GET', - 'url' => 'http://example.test', - ], - [ - 'method' => 'GET', - 'url' => 'https://get.test', - ], - [ - 'method' => 'POST', - 'url' => 'http://example.test', - 'body' => [ 'field' => 'value' ], - ], - ]; - - private $failureReqs = [ - [ - 'method' => 'GET', - 'url' => 'http://example.test', - ], - [ - 'method' => 'GET', - 'url' => 'http://example.test/12345', - ], - [ - 'method' => 'POST', - 'url' => 'http://example.test', - 'body' => [ 'field' => 'value' ], - ], - ]; - - private function makeHandler( array $rCodes ) { - $queue = []; - foreach ( $rCodes as $rCode ) { - $queue[] = new Response( $rCode ); - } - return HandlerStack::create( new MockHandler( $queue ) ); - } - - /** - * Test call of a single url that should succeed - */ - public function testSingleSuccess() { - $handler = $this->makeHandler( [ 200 ] ); - $client = new MultiHttpClient( [] ); - - list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $client->run( - $this->successReqs[0], - [ 'handler' => $handler ] ); - - $this->assertEquals( 200, $rcode ); - } - - /** - * Test call of a single url that should not exist, and therefore fail - */ - public function testSingleFailure() { - $handler = $this->makeHandler( [ 404 ] ); - $client = new MultiHttpClient( [] ); - - list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $client->run( - $this->failureReqs[0], - [ 'handler' => $handler ] ); - - $failure = $rcode < 200 || $rcode >= 400; - $this->assertTrue( $failure ); - } - - /** - * Test call of multiple urls that should all succeed - */ - public function testMultipleSuccess() { - $handler = $this->makeHandler( [ 200, 200, 200 ] ); - $client = new MultiHttpClient( [] ); - $responses = $client->runMulti( $this->successReqs, [ 'handler' => $handler ] ); - - foreach ( $responses as $response ) { - list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $response['response']; - $this->assertEquals( 200, $rcode ); - } - } - - /** - * Test call of multiple urls that should all fail - */ - public function testMultipleFailure() { - $handler = $this->makeHandler( [ 404, 404, 404 ] ); - $client = new MultiHttpClient( [] ); - $responses = $client->runMulti( $this->failureReqs, [ 'handler' => $handler ] ); - - foreach ( $responses as $response ) { - list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $response['response']; - $failure = $rcode < 200 || $rcode >= 400; - $this->assertTrue( $failure ); - } - } - - /** - * Test call of multiple urls, some of which should succeed and some of which should fail - */ - public function testMixedSuccessAndFailure() { - $responseCodes = [ 200, 200, 200, 404, 404, 404 ]; - $handler = $this->makeHandler( $responseCodes ); - $client = new MultiHttpClient( [] ); - - $responses = $client->runMulti( - array_merge( $this->successReqs, $this->failureReqs ), - [ 'handler' => $handler ] ); - - foreach ( $responses as $index => $response ) { - list( $rcode, $rdesc, /* $rhdrs */, $rbody, $rerr ) = $response['response']; - $this->assertEquals( $responseCodes[$index], $rcode ); - } - } - - /** - * Test of response header handling - */ - public function testHeaders() { - // Representative headers for typical requests, per MWHttpRequest::getResponseHeaders() - $headers = [ - 'content-type' => [ - 'text/html; charset=utf-8', - ], - 'date' => [ - 'Wed, 18 Jul 2018 14:52:41 GMT', - ], - 'set-cookie' => [ - 'COUNTRY=NAe6; expires=Wed, 25-Jul-2018 14:52:41 GMT; path=/; domain=.example.test', - 'LAST_NEWS=1531925562; expires=Thu, 18-Jul-2019 14:52:41 GMT; path=/; domain=.example.test', - ] - ]; - - $handler = HandlerStack::create( new MockHandler( [ - new Response( 200, $headers ), - ] ) ); - - $client = new MultiHttpClient( [] ); - - list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $client->run( [ - 'method' => 'GET', - 'url' => "http://example.test", - ], - [ 'handler' => $handler ] ); - $this->assertEquals( 200, $rcode ); - - $this->assertEquals( count( $headers ), count( $rhdrs ) ); - foreach ( $headers as $name => $values ) { - $value = implode( ', ', $values ); - $this->assertArrayHasKey( $name, $rhdrs ); - $this->assertEquals( $value, $rhdrs[$name] ); - } - } -} -- 2.20.1