From: Reedy Date: Tue, 10 Sep 2019 14:56:38 +0000 (+0000) Subject: Revert "Improve MultiHttpClient connection concurrency and reuse" X-Git-Tag: 1.34.0-rc.0~285^2 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=134e933e610b712089feab62736ddb2f88ea4f86;hp=46531d62852239f620f7b7c0af1e5747a9006228 Revert "Improve MultiHttpClient connection concurrency and reuse" This reverts commit 46531d62852239f620f7b7c0af1e5747a9006228. Bug: T232487 Change-Id: I8b1b829197f0f5758a85cb1547e13448d425aed2 --- diff --git a/includes/libs/http/MultiHttpClient.php b/includes/libs/http/MultiHttpClient.php index 9ea8c569d1..b195a085ef 100644 --- a/includes/libs/http/MultiHttpClient.php +++ b/includes/libs/http/MultiHttpClient.php @@ -50,8 +50,8 @@ use MediaWiki\MediaWikiServices; * @since 1.23 */ class MultiHttpClient implements LoggerAwareInterface { - /** @var resource curl_multi_init() handle */ - protected $cmh; + /** @var resource */ + protected $multiHandle = null; // curl_multi handle /** @var string|null SSL certificates path */ protected $caBundlePath; /** @var float */ @@ -122,10 +122,8 @@ class MultiHttpClient implements LoggerAwareInterface { * @endcode * @param array $req HTTP request array * @param array $opts - * - connTimeout : connection timeout per request (seconds) - * - reqTimeout : post-connection timeout per request (seconds) - * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) - * - maxConnsPerHost : maximum number of concurrent connections (per host) + * - connTimeout : connection timeout per request (seconds) + * - reqTimeout : post-connection timeout per request (seconds) * @return array Response array for request */ public function run( array $req, array $opts = [] ) { @@ -153,10 +151,10 @@ class MultiHttpClient implements LoggerAwareInterface { * method/URL entries will also be changed to use the corresponding string keys. * * @param array[] $reqs Map of HTTP request arrays - * @param array $opts Options + * @param array $opts * - connTimeout : connection timeout per request (seconds) * - reqTimeout : post-connection timeout per request (seconds) - * - usePipelining : whether to use HTTP pipelining if possible (for all hosts) + * - usePipelining : whether to use HTTP pipelining if possible * - maxConnsPerHost : maximum number of concurrent connections (per host) * @return array $reqs With response array populated for each * @throws Exception @@ -200,7 +198,7 @@ class MultiHttpClient implements LoggerAwareInterface { * @suppress PhanTypeInvalidDimOffset */ private function runMultiCurl( array $reqs, array $opts ) { - $chm = $this->getCurlMulti( $opts ); + $chm = $this->getCurlMulti(); $selectTimeout = $this->getSelectTimeout( $opts ); @@ -208,28 +206,49 @@ class MultiHttpClient implements LoggerAwareInterface { $handles = []; foreach ( $reqs as $index => &$req ) { $handles[$index] = $this->getCurlHandle( $req, $opts ); - curl_multi_add_handle( $chm, $handles[$index] ); + if ( count( $reqs ) > 1 ) { + // https://github.com/guzzle/guzzle/issues/349 + curl_setopt( $handles[$index], CURLOPT_FORBID_REUSE, true ); + } } unset( $req ); // don't assign over this by accident + $indexes = array_keys( $reqs ); + if ( isset( $opts['usePipelining'] ) ) { + curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$opts['usePipelining'] ); + } + if ( isset( $opts['maxConnsPerHost'] ) ) { + // Keep these sockets around as they may be needed later in the request + curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$opts['maxConnsPerHost'] ); + } + + // @TODO: use a per-host rolling handle window (e.g. CURLMOPT_MAX_HOST_CONNECTIONS) + $batches = array_chunk( $indexes, $this->maxConnsPerHost ); $infos = []; - // Execute the cURL handles concurrently... - $active = null; // handles still being processed - do { - // Do any available work... + + foreach ( $batches as $batch ) { + // Attach all cURL handles for this batch + foreach ( $batch as $index ) { + curl_multi_add_handle( $chm, $handles[$index] ); + } + // Execute the cURL handles concurrently... + $active = null; // handles still being processed do { - $mrc = curl_multi_exec( $chm, $active ); - $info = curl_multi_info_read( $chm ); - if ( $info !== false ) { - $infos[(int)$info['handle']] = $info; + // Do any available work... + do { + $mrc = curl_multi_exec( $chm, $active ); + $info = curl_multi_info_read( $chm ); + if ( $info !== false ) { + $infos[(int)$info['handle']] = $info; + } + } while ( $mrc == CURLM_CALL_MULTI_PERFORM ); + // Wait (if possible) for available work... + if ( $active > 0 && $mrc == CURLM_OK && curl_multi_select( $chm, $selectTimeout ) == -1 ) { + // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html + usleep( 5000 ); // 5ms } - } while ( $mrc == CURLM_CALL_MULTI_PERFORM ); - // Wait (if possible) for available work... - if ( $active > 0 && $mrc == CURLM_OK && curl_multi_select( $chm, $selectTimeout ) == -1 ) { - // PHP bug 63411; https://curl.haxx.se/libcurl/c/curl_multi_fdset.html - usleep( 5000 ); // 5ms - } - } while ( $active > 0 && $mrc == CURLM_OK ); + } while ( $active > 0 && $mrc == CURLM_OK ); + } // Remove all of the added cURL handles and check for errors... foreach ( $reqs as $index => &$req ) { @@ -266,6 +285,10 @@ class MultiHttpClient implements LoggerAwareInterface { } unset( $req ); // don't assign over this by accident + // Restore the default settings + curl_multi_setopt( $chm, CURLMOPT_PIPELINING, (int)$this->usePipelining ); + curl_multi_setopt( $chm, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); + return $reqs; } @@ -401,31 +424,21 @@ class MultiHttpClient implements LoggerAwareInterface { } /** - * @param array $opts * @return resource * @throws Exception */ - protected function getCurlMulti( array $opts ) { - if ( !$this->cmh ) { + protected function getCurlMulti() { + if ( !$this->multiHandle ) { if ( !function_exists( 'curl_multi_init' ) ) { throw new Exception( "PHP cURL function curl_multi_init missing. " . "Check https://www.mediawiki.org/wiki/Manual:CURL" ); } $cmh = curl_multi_init(); - // Limit the size of the idle connection cache such that consecutive parallel - // request batches to the same host can avoid having to keep making connections + curl_multi_setopt( $cmh, CURLMOPT_PIPELINING, (int)$this->usePipelining ); curl_multi_setopt( $cmh, CURLMOPT_MAXCONNECTS, (int)$this->maxConnsPerHost ); - $this->cmh = $cmh; + $this->multiHandle = $cmh; } - - // Limit the number of in-flight requests for any given host - $maxHostConns = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost; - curl_multi_setopt( $this->cmh, CURLMOPT_MAX_HOST_CONNECTIONS, (int)$maxHostConns ); - // Configure when to multiplex multiple requests onto single TCP handles - $pipelining = $opts['usePipelining'] ?? $this->usePipelining; - curl_multi_setopt( $this->cmh, CURLMOPT_PIPELINING, (int)$pipelining ); - - return $this->cmh; + return $this->multiHandle; } /** @@ -585,8 +598,8 @@ class MultiHttpClient implements LoggerAwareInterface { } function __destruct() { - if ( $this->cmh ) { - curl_multi_close( $this->cmh ); + if ( $this->multiHandle ) { + curl_multi_close( $this->multiHandle ); } } }