Merge "Convert MultiHttpClient to use Guzzle"
[lhc/web/wiklou.git] / includes / libs / MultiHttpClient.php
1 <?php
2 /**
3 * HTTP service client
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use Psr\Log\LoggerAwareInterface;
24 use Psr\Log\LoggerInterface;
25 use Psr\Log\NullLogger;
26 use Psr\Http\Message\ResponseInterface;
27 use GuzzleHttp\Client;
28
29 /**
30 * Class to handle multiple HTTP requests
31 *
32 * If curl is available, requests will be made concurrently.
33 * Otherwise, they will be made serially.
34 *
35 * HTTP request maps are arrays that use the following format:
36 * - method : GET/HEAD/PUT/POST/DELETE
37 * - url : HTTP/HTTPS URL
38 * - query : <query parameter field/value associative array> (uses RFC 3986)
39 * - headers : <header name/value associative array>
40 * - body : source to get the HTTP request body from;
41 * this can simply be a string (always), a resource for
42 * PUT requests, and a field/value array for POST request;
43 * array bodies are encoded as multipart/form-data and strings
44 * use application/x-www-form-urlencoded (headers sent automatically)
45 * - sink : resource to receive the HTTP response body (preferred over stream)
46 * @since 1.33
47 * - stream : resource to stream the HTTP response body to
48 * @deprecated since 1.33, use sink instead
49 * - proxy : HTTP proxy to use
50 * - flags : map of boolean flags which supports:
51 * - relayResponseHeaders : write out header via header()
52 * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'.
53 *
54 * @since 1.23
55 */
56 class MultiHttpClient implements LoggerAwareInterface {
57 /** @var float connection timeout in seconds, zero to wait indefinitely*/
58 protected $connTimeout = 10;
59 /** @var float request timeout in seconds, zero to wait indefinitely*/
60 protected $reqTimeout = 300;
61 /** @var string|null proxy */
62 protected $proxy;
63 /** @var int CURLMOPT_PIPELINING value, only effective if curl is available */
64 protected $pipeliningMode = 0;
65 /** @var int CURLMOPT_MAXCONNECTS value, only effective if curl is available */
66 protected $maxConnsPerHost = 50;
67 /** @var string */
68 protected $userAgent = 'wikimedia/multi-http-client v1.0';
69 /** @var LoggerInterface */
70 protected $logger;
71 /** @var string|null SSL certificates path */
72 protected $caBundlePath;
73
74 /**
75 * @param array $options
76 * - connTimeout : default connection timeout (seconds)
77 * - reqTimeout : default request timeout (seconds)
78 * - proxy : HTTP proxy to use
79 * - pipeliningMode : whether to use HTTP pipelining/multiplexing if possible (for all
80 * hosts). The exact behavior is dependent on curl version.
81 * - maxConnsPerHost : maximum number of concurrent connections (per host)
82 * - userAgent : The User-Agent header value to send
83 * - logger : a \Psr\Log\LoggerInterface instance for debug logging
84 * - caBundlePath : path to specific Certificate Authority bundle (if any)
85 * @throws Exception
86 *
87 * usePipelining is an alias for pipelining mode, retained for backward compatibility.
88 * If both usePipelining and pipeliningMode are specified, pipeliningMode wins.
89 */
90 public function __construct( array $options ) {
91 if ( isset( $options['caBundlePath'] ) ) {
92 $this->caBundlePath = $options['caBundlePath'];
93 if ( !file_exists( $this->caBundlePath ) ) {
94 throw new Exception( "Cannot find CA bundle: {$this->caBundlePath}" );
95 }
96 }
97
98 // Backward compatibility. Defers to newer option naming if both are specified.
99 if ( isset( $options['usePipelining'] ) ) {
100 $this->pipeliningMode = $options['usePipelining'];
101 }
102
103 static $opts = [
104 'connTimeout', 'reqTimeout', 'proxy', 'pipeliningMode', 'maxConnsPerHost',
105 'userAgent', 'logger'
106 ];
107 foreach ( $opts as $key ) {
108 if ( isset( $options[$key] ) ) {
109 $this->$key = $options[$key];
110 }
111 }
112
113 if ( $this->logger === null ) {
114 $this->logger = new NullLogger;
115 }
116 }
117
118 /**
119 * Execute an HTTP(S) request
120 *
121 * This method returns a response map of:
122 * - code : HTTP response code or 0 if there was a serious error
123 * - reason : HTTP response reason (empty if there was a serious error)
124 * - headers : <header name/value associative array>
125 * - body : HTTP response body
126 * - error : Any error string
127 * The map also stores integer-indexed copies of these values. This lets callers do:
128 * @code
129 * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $http->run( $req );
130 * @endcode
131 * @param array $req HTTP request array
132 * @param array $opts
133 * - connTimeout : connection timeout per request (seconds)
134 * - reqTimeout : post-connection timeout per request (seconds)
135 * - handler : optional custom handler
136 * See http://docs.guzzlephp.org/en/stable/handlers-and-middleware.html
137 * @return array Response array for request
138 * @throws Exception
139 */
140 public function run( array $req, array $opts = [] ) {
141 return $this->runMulti( [ $req ], $opts )[0]['response'];
142 }
143
144 /**
145 * Execute a set of HTTP(S) requests.
146 *
147 * If curl is available, requests will be made concurrently.
148 * Otherwise, they will be made serially.
149 *
150 * The maps are returned by this method with the 'response' field set to a map of:
151 * - code : HTTP response code or 0 if there was a serious error
152 * - reason : HTTP response reason (empty if there was a serious error)
153 * - headers : <header name/value associative array>
154 * - body : HTTP response body
155 * - error : Any error string
156 * The map also stores integer-indexed copies of these values. This lets callers do:
157 * @code
158 * list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $req['response'];
159 * @endcode
160 * All headers in the 'headers' field are normalized to use lower case names.
161 * This is true for the request headers and the response headers. Integer-indexed
162 * method/URL entries will also be changed to use the corresponding string keys.
163 *
164 * @param array $reqs Map of HTTP request arrays
165 * @param array $opts
166 * - connTimeout : connection timeout per request (seconds)
167 * - reqTimeout : post-connection timeout per request (seconds)
168 * - pipeliningMode : whether to use HTTP pipelining/multiplexing if possible (for all
169 * hosts). The exact behavior is dependent on curl version.
170 * - maxConnsPerHost : maximum number of concurrent connections (per host)
171 * - handler : optional custom handler.
172 * See http://docs.guzzlephp.org/en/stable/handlers-and-middleware.html
173 * @return array $reqs With response array populated for each
174 * @throws Exception
175 *
176 * usePipelining is an alias for pipelining mode, retained for backward compatibility.
177 * If both usePipelining and pipeliningMode are specified, pipeliningMode wins.
178 */
179 public function runMulti( array $reqs, array $opts = [] ) {
180 $this->normalizeRequests( $reqs );
181 return $this->runMultiGuzzle( $reqs, $opts );
182 }
183
184 /**
185 * Determines if the curl extension is available
186 *
187 * @return bool true if curl is available, false otherwise.
188 */
189 protected function isCurlEnabled() {
190 return extension_loaded( 'curl' );
191 }
192
193 /**
194 * Execute a set of HTTP(S) requests concurrently
195 *
196 * @see MultiHttpClient::runMulti()
197 *
198 * @param array $reqs Map of HTTP request arrays
199 * @param array $opts
200 * @return array $reqs With response array populated for each
201 * @throws Exception
202 */
203 private function runMultiGuzzle( array $reqs, array $opts = [] ) {
204 $guzzleOptions = [
205 'timeout' => $opts['reqTimeout'] ?? $this->reqTimeout,
206 'connect_timeout' => $opts['connTimeout'] ?? $this->connTimeout,
207 'allow_redirects' => [
208 'max' => 4,
209 ],
210 ];
211
212 if ( !is_null( $this->caBundlePath ) ) {
213 $guzzleOptions['verify'] = $this->caBundlePath;
214 }
215
216 // Include curl-specific option section only if curl is available.
217 // Our defaults may differ from curl's defaults, depending on curl version.
218 if ( $this->isCurlEnabled() ) {
219 // Backward compatibility
220 $optsPipeliningMode = $opts['pipeliningMode'] ?? ( $opts['usePipelining'] ?? null );
221
222 // Per-request options override class-level options
223 $pipeliningMode = $optsPipeliningMode ?? $this->pipeliningMode;
224 $maxConnsPerHost = $opts['maxConnsPerHost'] ?? $this->maxConnsPerHost;
225
226 $guzzleOptions['curl'][CURLMOPT_PIPELINING] = (int)$pipeliningMode;
227 $guzzleOptions['curl'][CURLMOPT_MAXCONNECTS] = (int)$maxConnsPerHost;
228 }
229
230 if ( isset( $opts['handler'] ) ) {
231 $guzzleOptions['handler'] = $opts['handler'];
232 }
233
234 $guzzleOptions['headers']['user-agent'] = $this->userAgent;
235
236 $client = new Client( $guzzleOptions );
237 $promises = [];
238 foreach ( $reqs as $index => $req ) {
239 $reqOptions = [
240 'proxy' => $req['proxy'] ?? $this->proxy,
241 ];
242
243 if ( $req['method'] == 'POST' ) {
244 $reqOptions['form_params'] = $req['body'];
245
246 // Suppress 'Expect: 100-continue' header, as some servers
247 // will reject it with a 417 and Curl won't auto retry
248 // with HTTP 1.0 fallback
249 $reqOptions['expect'] = false;
250 }
251
252 if ( isset( $req['headers']['user-agent'] ) ) {
253 $reqOptions['headers']['user-agent'] = $req['headers']['user-agent'];
254 }
255
256 // Backward compatibility for pre-Guzzle naming
257 if ( isset( $req['sink'] ) ) {
258 $reqOptions['sink'] = $req['sink'];
259 } elseif ( isset( $req['stream'] ) ) {
260 $reqOptions['sink'] = $req['stream'];
261 }
262
263 if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
264 $reqOptions['on_headers'] = function ( ResponseInterface $response ) {
265 foreach ( $response->getHeaders() as $name => $values ) {
266 foreach ( $values as $value ) {
267 header( $name . ': ' . $value . "\r\n" );
268 }
269 }
270 };
271 }
272
273 $url = $req['url'];
274 $query = http_build_query( $req['query'], '', '&', PHP_QUERY_RFC3986 );
275 if ( $query != '' ) {
276 $url .= strpos( $req['url'], '?' ) === false ? "?$query" : "&$query";
277 }
278 $promises[$index] = $client->requestAsync( $req['method'], $url, $reqOptions );
279 }
280
281 $results = GuzzleHttp\Promise\settle( $promises )->wait();
282
283 foreach ( $results as $index => $result ) {
284 if ( $result['state'] === 'fulfilled' ) {
285 $this->guzzleHandleSuccess( $reqs[$index], $result['value'] );
286 } elseif ( $result['state'] === 'rejected' ) {
287 $this->guzzleHandleFailure( $reqs[$index], $result['reason'] );
288 } else {
289 // This should never happen, and exists only in case of changes to guzzle
290 throw new UnexpectedValueException(
291 "Unrecognized result state: {$result['state']}" );
292 }
293 }
294
295 foreach ( $reqs as &$req ) {
296 $req['response'][0] = $req['response']['code'];
297 $req['response'][1] = $req['response']['reason'];
298 $req['response'][2] = $req['response']['headers'];
299 $req['response'][3] = $req['response']['body'];
300 $req['response'][4] = $req['response']['error'];
301 }
302
303 return $reqs;
304 }
305
306 /**
307 * Called for successful requests
308 *
309 * @param array $req the original request
310 * @param ResponseInterface $response
311 */
312 private function guzzleHandleSuccess( &$req, $response ) {
313 $req['response'] = [
314 'code' => $response->getStatusCode(),
315 'reason' => $response->getReasonPhrase(),
316 'headers' => $this->parseHeaders( $response->getHeaders() ),
317 'body' => isset( $req['sink'] ) ? '' : $response->getBody()->getContents(),
318 'error' => '',
319 ];
320 }
321
322 /**
323 * Called for failed requests
324 *
325 * @param array $req the original request
326 * @param Exception $reason
327 */
328 private function guzzleHandleFailure( &$req, $reason ) {
329 $req['response'] = [
330 'code' => $reason->getCode(),
331 'reason' => '',
332 'headers' => [],
333 'body' => '',
334 'error' => $reason->getMessage(),
335 ];
336
337 if (
338 $reason instanceof GuzzleHttp\Exception\RequestException &&
339 $reason->hasResponse()
340 ) {
341 $response = $reason->getResponse();
342 if ( $response ) {
343 $req['response']['reason'] = $response->getReasonPhrase();
344 $req['response']['headers'] = $this->parseHeaders( $response->getHeaders() );
345 $req['response']['body'] = $response->getBody()->getContents();
346 }
347 }
348
349 $this->logger->warning( "Error fetching URL \"{$req['url']}\": " .
350 $req['response']['error'] );
351 }
352
353 /**
354 * Parses response headers.
355 *
356 * @param string[][] $guzzleHeaders
357 * @return array
358 */
359 private function parseHeaders( $guzzleHeaders ) {
360 $headers = [];
361 foreach ( $guzzleHeaders as $name => $values ) {
362 $headers[strtolower( $name )] = implode( ', ', $values );
363 }
364 return $headers;
365 }
366
367 /**
368 * Normalize request information
369 *
370 * @param array $reqs the requests to normalize
371 * @throws Exception
372 */
373 private function normalizeRequests( array &$reqs ) {
374 foreach ( $reqs as &$req ) {
375 $req['response'] = [
376 'code' => 0,
377 'reason' => '',
378 'headers' => [],
379 'body' => '',
380 'error' => ''
381 ];
382 if ( isset( $req[0] ) ) {
383 $req['method'] = $req[0]; // short-form
384 unset( $req[0] );
385 }
386 if ( isset( $req[1] ) ) {
387 $req['url'] = $req[1]; // short-form
388 unset( $req[1] );
389 }
390 if ( !isset( $req['method'] ) ) {
391 throw new Exception( "Request has no 'method' field set." );
392 } elseif ( !isset( $req['url'] ) ) {
393 throw new Exception( "Request has no 'url' field set." );
394 }
395 $this->logger->debug( "{$req['method']}: {$req['url']}" );
396 $req['query'] = $req['query'] ?? [];
397 $headers = []; // normalized headers
398 if ( isset( $req['headers'] ) ) {
399 foreach ( $req['headers'] as $name => $value ) {
400 $headers[strtolower( $name )] = $value;
401 }
402 }
403 $req['headers'] = $headers;
404 if ( !isset( $req['body'] ) ) {
405 $req['body'] = '';
406 $req['headers']['content-length'] = 0;
407 }
408 $req['flags'] = $req['flags'] ?? [];
409 }
410 }
411
412 /**
413 * Register a logger
414 *
415 * @param LoggerInterface $logger
416 */
417 public function setLogger( LoggerInterface $logger ) {
418 $this->logger = $logger;
419 }
420 }