5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 use Psr\Log\LoggerAwareInterface
;
24 use Psr\Log\LoggerInterface
;
25 use Psr\Log\NullLogger
;
26 use Psr\Http\Message\ResponseInterface
;
27 use GuzzleHttp\Client
;
30 * Class to handle multiple HTTP requests
32 * If curl is available, requests will be made concurrently.
33 * Otherwise, they will be made serially.
35 * HTTP request maps are arrays that use the following format:
36 * - method : GET/HEAD/PUT/POST/DELETE
37 * - url : HTTP/HTTPS URL
38 * - query : <query parameter field/value associative array> (uses RFC 3986)
39 * - headers : <header name/value associative array>
40 * - body : source to get the HTTP request body from;
41 * this can simply be a string (always), a resource for
42 * PUT requests, and a field/value array for POST request;
43 * array bodies are encoded as multipart/form-data and strings
44 * use application/x-www-form-urlencoded (headers sent automatically)
45 * - sink : resource to receive the HTTP response body (preferred over stream)
47 * - stream : resource to stream the HTTP response body to
48 * @deprecated since 1.33, use sink instead
49 * - proxy : HTTP proxy to use
50 * - flags : map of boolean flags which supports:
51 * - relayResponseHeaders : write out header via header()
52 * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'.
56 class MultiHttpClient
implements LoggerAwareInterface
{
57 /** @var float connection timeout in seconds, zero to wait indefinitely*/
58 protected $connTimeout = 10;
59 /** @var float request timeout in seconds, zero to wait indefinitely*/
60 protected $reqTimeout = 300;
61 /** @var string|null proxy */
63 /** @var int CURLMOPT_PIPELINING value, only effective if curl is available */
64 protected $pipeliningMode = 0;
65 /** @var int CURLMOPT_MAXCONNECTS value, only effective if curl is available */
66 protected $maxConnsPerHost = 50;
68 protected $userAgent = 'wikimedia/multi-http-client v1.0';
69 /** @var LoggerInterface */
71 /** @var string|null SSL certificates path */
72 protected $caBundlePath;
75 * @param array $options
76 * - connTimeout : default connection timeout (seconds)
77 * - reqTimeout : default request timeout (seconds)
78 * - proxy : HTTP proxy to use
79 * - pipeliningMode : whether to use HTTP pipelining/multiplexing if possible (for all
80 * hosts). The exact behavior is dependent on curl version.
81 * - maxConnsPerHost : maximum number of concurrent connections (per host)
82 * - userAgent : The User-Agent header value to send
83 * - logger : a \Psr\Log\LoggerInterface instance for debug logging
84 * - caBundlePath : path to specific Certificate Authority bundle (if any)
87 * usePipelining is an alias for pipelining mode, retained for backward compatibility.
88 * If both usePipelining and pipeliningMode are specified, pipeliningMode wins.
90 public function __construct( array $options ) {
91 if ( isset( $options['caBundlePath'] ) ) {
92 $this->caBundlePath
= $options['caBundlePath'];
93 if ( !file_exists( $this->caBundlePath
) ) {
94 throw new Exception( "Cannot find CA bundle: {$this->caBundlePath}" );
98 // Backward compatibility. Defers to newer option naming if both are specified.
99 if ( isset( $options['usePipelining'] ) ) {
100 $this->pipeliningMode
= $options['usePipelining'];
104 'connTimeout', 'reqTimeout', 'proxy', 'pipeliningMode', 'maxConnsPerHost',
105 'userAgent', 'logger'
107 foreach ( $opts as $key ) {
108 if ( isset( $options[$key] ) ) {
109 $this->$key = $options[$key];
113 if ( $this->logger
=== null ) {
114 $this->logger
= new NullLogger
;
119 * Execute an HTTP(S) request
121 * This method returns a response map of:
122 * - code : HTTP response code or 0 if there was a serious error
123 * - reason : HTTP response reason (empty if there was a serious error)
124 * - headers : <header name/value associative array>
125 * - body : HTTP response body
126 * - error : Any error string
127 * The map also stores integer-indexed copies of these values. This lets callers do:
129 * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req );
131 * @param array $req HTTP request array
133 * - connTimeout : connection timeout per request (seconds)
134 * - reqTimeout : post-connection timeout per request (seconds)
135 * - handler : optional custom handler
136 * See http://docs.guzzlephp.org/en/stable/handlers-and-middleware.html
137 * @return array Response array for request
140 public function run( array $req, array $opts = [] ) {
141 return $this->runMulti( [ $req ], $opts )[0]['response'];
145 * Execute a set of HTTP(S) requests.
147 * If curl is available, requests will be made concurrently.
148 * Otherwise, they will be made serially.
150 * The maps are returned by this method with the 'response' field set to a map of:
151 * - code : HTTP response code or 0 if there was a serious error
152 * - reason : HTTP response reason (empty if there was a serious error)
153 * - headers : <header name/value associative array>
154 * - body : HTTP response body
155 * - error : Any error string
156 * The map also stores integer-indexed copies of these values. This lets callers do:
158 * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $req['response'];
160 * All headers in the 'headers' field are normalized to use lower case names.
161 * This is true for the request headers and the response headers. Integer-indexed
162 * method/URL entries will also be changed to use the corresponding string keys.
164 * @param array $reqs Map of HTTP request arrays
166 * - connTimeout : connection timeout per request (seconds)
167 * - reqTimeout : post-connection timeout per request (seconds)
168 * - pipeliningMode : whether to use HTTP pipelining/multiplexing if possible (for all
169 * hosts). The exact behavior is dependent on curl version.
170 * - maxConnsPerHost : maximum number of concurrent connections (per host)
171 * - handler : optional custom handler.
172 * See http://docs.guzzlephp.org/en/stable/handlers-and-middleware.html
173 * @return array $reqs With response array populated for each
176 * usePipelining is an alias for pipelining mode, retained for backward compatibility.
177 * If both usePipelining and pipeliningMode are specified, pipeliningMode wins.
179 public function runMulti( array $reqs, array $opts = [] ) {
180 $this->normalizeRequests( $reqs );
181 return $this->runMultiGuzzle( $reqs, $opts );
185 * Determines if the curl extension is available
187 * @return bool true if curl is available, false otherwise.
189 protected function isCurlEnabled() {
190 return extension_loaded( 'curl' );
194 * Execute a set of HTTP(S) requests concurrently
196 * @see MultiHttpClient::runMulti()
198 * @param array $reqs Map of HTTP request arrays
200 * @return array $reqs With response array populated for each
203 private function runMultiGuzzle( array $reqs, array $opts = [] ) {
205 'timeout' => $opts['reqTimeout'] ??
$this->reqTimeout
,
206 'connect_timeout' => $opts['connTimeout'] ??
$this->connTimeout
,
207 'allow_redirects' => [
212 if ( !is_null( $this->caBundlePath
) ) {
213 $guzzleOptions['verify'] = $this->caBundlePath
;
216 // Include curl-specific option section only if curl is available.
217 // Our defaults may differ from curl's defaults, depending on curl version.
218 if ( $this->isCurlEnabled() ) {
219 // Backward compatibility
220 $optsPipeliningMode = $opts['pipeliningMode'] ??
( $opts['usePipelining'] ??
null );
222 // Per-request options override class-level options
223 $pipeliningMode = $optsPipeliningMode ??
$this->pipeliningMode
;
224 $maxConnsPerHost = $opts['maxConnsPerHost'] ??
$this->maxConnsPerHost
;
226 $guzzleOptions['curl'][CURLMOPT_PIPELINING
] = (int)$pipeliningMode;
227 $guzzleOptions['curl'][CURLMOPT_MAXCONNECTS
] = (int)$maxConnsPerHost;
230 if ( isset( $opts['handler'] ) ) {
231 $guzzleOptions['handler'] = $opts['handler'];
234 $guzzleOptions['headers']['user-agent'] = $this->userAgent
;
236 $client = new Client( $guzzleOptions );
238 foreach ( $reqs as $index => $req ) {
240 'proxy' => $req['proxy'] ??
$this->proxy
,
243 if ( $req['method'] == 'POST' ) {
244 $reqOptions['form_params'] = $req['body'];
246 // Suppress 'Expect: 100-continue' header, as some servers
247 // will reject it with a 417 and Curl won't auto retry
248 // with HTTP 1.0 fallback
249 $reqOptions['expect'] = false;
252 if ( isset( $req['headers']['user-agent'] ) ) {
253 $reqOptions['headers']['user-agent'] = $req['headers']['user-agent'];
256 // Backward compatibility for pre-Guzzle naming
257 if ( isset( $req['sink'] ) ) {
258 $reqOptions['sink'] = $req['sink'];
259 } elseif ( isset( $req['stream'] ) ) {
260 $reqOptions['sink'] = $req['stream'];
263 if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
264 $reqOptions['on_headers'] = function ( ResponseInterface
$response ) {
265 foreach ( $response->getHeaders() as $name => $values ) {
266 foreach ( $values as $value ) {
267 header( $name . ': ' . $value . "\r\n" );
274 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986
);
275 if ( $query != '' ) {
276 $url .= strpos( $req['url'], '?' ) === false ?
"?$query" : "&$query";
278 $promises[$index] = $client->requestAsync( $req['method'], $url, $reqOptions );
281 $results = GuzzleHttp\Promise\
settle( $promises )->wait();
283 foreach ( $results as $index => $result ) {
284 if ( $result['state'] === 'fulfilled' ) {
285 $this->guzzleHandleSuccess( $reqs[$index], $result['value'] );
286 } elseif ( $result['state'] === 'rejected' ) {
287 $this->guzzleHandleFailure( $reqs[$index], $result['reason'] );
289 // This should never happen, and exists only in case of changes to guzzle
290 throw new UnexpectedValueException(
291 "Unrecognized result state: {$result['state']}" );
295 foreach ( $reqs as &$req ) {
296 $req['response'][0] = $req['response']['code'];
297 $req['response'][1] = $req['response']['reason'];
298 $req['response'][2] = $req['response']['headers'];
299 $req['response'][3] = $req['response']['body'];
300 $req['response'][4] = $req['response']['error'];
307 * Called for successful requests
309 * @param array $req the original request
310 * @param ResponseInterface $response
312 private function guzzleHandleSuccess( &$req, $response ) {
314 'code' => $response->getStatusCode(),
315 'reason' => $response->getReasonPhrase(),
316 'headers' => $this->parseHeaders( $response->getHeaders() ),
317 'body' => isset( $req['sink'] ) ?
'' : $response->getBody()->getContents(),
323 * Called for failed requests
325 * @param array $req the original request
326 * @param Exception $reason
328 private function guzzleHandleFailure( &$req, $reason ) {
330 'code' => $reason->getCode(),
334 'error' => $reason->getMessage(),
338 $reason instanceof GuzzleHttp\Exception\RequestException
&&
339 $reason->hasResponse()
341 $response = $reason->getResponse();
343 $req['response']['reason'] = $response->getReasonPhrase();
344 $req['response']['headers'] = $this->parseHeaders( $response->getHeaders() );
345 $req['response']['body'] = $response->getBody()->getContents();
349 $this->logger
->warning( "Error fetching URL \"{$req['url']}\": " .
350 $req['response']['error'] );
354 * Parses response headers.
356 * @param string[][] $guzzleHeaders
359 private function parseHeaders( $guzzleHeaders ) {
361 foreach ( $guzzleHeaders as $name => $values ) {
362 $headers[strtolower( $name )] = implode( ', ', $values );
368 * Normalize request information
370 * @param array $reqs the requests to normalize
373 private function normalizeRequests( array &$reqs ) {
374 foreach ( $reqs as &$req ) {
382 if ( isset( $req[0] ) ) {
383 $req['method'] = $req[0]; // short-form
386 if ( isset( $req[1] ) ) {
387 $req['url'] = $req[1]; // short-form
390 if ( !isset( $req['method'] ) ) {
391 throw new Exception( "Request has no 'method' field set." );
392 } elseif ( !isset( $req['url'] ) ) {
393 throw new Exception( "Request has no 'url' field set." );
395 $this->logger
->debug( "{$req['method']}: {$req['url']}" );
396 $req['query'] = $req['query'] ??
[];
397 $headers = []; // normalized headers
398 if ( isset( $req['headers'] ) ) {
399 foreach ( $req['headers'] as $name => $value ) {
400 $headers[strtolower( $name )] = $value;
403 $req['headers'] = $headers;
404 if ( !isset( $req['body'] ) ) {
406 $req['headers']['content-length'] = 0;
408 $req['flags'] = $req['flags'] ??
[];
415 * @param LoggerInterface $logger
417 public function setLogger( LoggerInterface
$logger ) {
418 $this->logger
= $logger;