6c913d2a442daf30871b378f524f3a3dce3639bf
[lhc/web/wiklou.git] / includes / sparql / SparqlClient.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 */
20
21 namespace MediaWiki\Sparql;
22
23 use Http;
24 use MediaWiki\Http\HttpRequestFactory;
25
26 /**
27 * Simple SPARQL client
28 *
29 * @author Stas Malyshev
30 */
31 class SparqlClient {
32
33 /**
34 * Limit on how long can be the query to be sent by GET.
35 */
36 const MAX_GET_SIZE = 2048;
37
38 /**
39 * User agent for HTTP requests.
40 * @var string
41 */
42 private $userAgent;
43
44 /**
45 * Query timeout (seconds)
46 * @var int
47 */
48 private $timeout = 30;
49
50 /**
51 * SPARQL endpoint URL
52 * @var string
53 */
54 private $endpoint;
55
56 /**
57 * Client options
58 * @var array
59 */
60 private $options = [];
61
62 /**
63 * @var HttpRequestFactory
64 */
65 private $requestFactory;
66
67 /**
68 * @param string $url SPARQL Endpoint
69 * @param HttpRequestFactory $requestFactory
70 */
71 public function __construct( $url, HttpRequestFactory $requestFactory ) {
72 $this->endpoint = $url;
73 $this->requestFactory = $requestFactory;
74 $this->userAgent = Http::userAgent() . " SparqlClient";
75 }
76
77 /**
78 * Set query timeout (in seconds)
79 * @param int $timeout
80 * @return $this
81 */
82 public function setTimeout( $timeout ) {
83 if ( $timeout >= 0 ) {
84 $this->timeout = $timeout;
85 }
86 return $this;
87 }
88
89 /**
90 * Set client options
91 * @param array $options
92 * @return $this
93 */
94 public function setClientOptions( $options ) {
95 $this->options = $options;
96 return $this;
97 }
98
99 /**
100 * Get current user agent.
101 * @return string
102 */
103 public function getUserAgent() {
104 return $this->userAgent;
105 }
106
107 /**
108 * Set user agent string.
109 *
110 * Mote it is not recommended to completely override user agent for
111 * most applications.
112 * @see appendUserAgent() for recommended way of specifying user agent.
113 *
114 * @param string $agent
115 */
116 public function setUserAgent( $agent ) {
117 $this->userAgent = $agent;
118 }
119
120 /**
121 * Append specific string to user agent.
122 *
123 * This is the recommended way of specifying the user agent
124 * for specific applications of the SparqlClient inside MediaWiki
125 * and extension code.
126 *
127 * @param string $agent
128 */
129 public function appendUserAgent( $agent ) {
130 $this->userAgent .= ' ' . $agent;
131 }
132
133 /**
134 * Query SPARQL endpoint
135 *
136 * @param string $sparql query
137 * @param bool $rawData Whether to return only values or full data objects
138 *
139 * @return array List of results, one row per array element
140 * Each row will contain fields indexed by variable name.
141 * @throws SparqlException
142 */
143 public function query( $sparql, $rawData = false ) {
144 if ( empty( $this->endpoint ) ) {
145 throw new SparqlException( 'Endpoint URL can not be empty' );
146 }
147 $queryData = [ "query" => $sparql, "format" => "json" ];
148 $options = array_merge( [ 'method' => 'GET' ], $this->options );
149
150 if ( empty( $options['userAgent'] ) ) {
151 $options['userAgent'] = $this->userAgent;
152 }
153
154 if ( $this->timeout >= 0 ) {
155 // Blazegraph setting, see https://wiki.blazegraph.com/wiki/index.php/REST_API
156 $queryData['maxQueryTimeMillis'] = $this->timeout * 1000;
157 $options['timeout'] = $this->timeout;
158 }
159
160 if ( strlen( $sparql ) > self::MAX_GET_SIZE ) {
161 // big requests go to POST
162 $options['method'] = 'POST';
163 $options['postData'] = 'query=' . urlencode( $sparql );
164 unset( $queryData['query'] );
165 }
166
167 $url = wfAppendQuery( $this->endpoint, $queryData );
168 $request = $this->requestFactory->create( $url, $options, __METHOD__ );
169
170 $status = $request->execute();
171
172 if ( !$status->isOK() ) {
173 throw new SparqlException( "HTTP error: {$status->getWikiText()}" );
174 }
175 $result = $request->getContent();
176 \MediaWiki\suppressWarnings();
177 $data = json_decode( $result, true );
178 \MediaWiki\restoreWarnings();
179 if ( $data === null || $data === false ) {
180 throw new SparqlException( "HTTP request failed, response:\n" .
181 substr( $result, 1024 ) );
182 }
183
184 return $this->extractData( $data, $rawData );
185 }
186
187 /**
188 * Extract data from SPARQL response format.
189 * The response must be in format described in:
190 * https://www.w3.org/TR/sparql11-results-json/
191 *
192 * @param array $data SPARQL result
193 * @param bool $rawData Whether to return only values or full data objects
194 *
195 * @return array List of results, one row per element.
196 */
197 private function extractData( $data, $rawData = false ) {
198 $result = [];
199 if ( $data && !empty( $data['results'] ) ) {
200 $vars = $data['head']['vars'];
201 $resrow = [];
202 foreach ( $data['results']['bindings'] as $row ) {
203 foreach ( $vars as $var ) {
204 if ( !isset( $row[$var] ) ) {
205 $resrow[$var] = null;
206 continue;
207 }
208 if ( $rawData ) {
209 $resrow[$var] = $row[$var];
210 } else {
211 $resrow[$var] = $row[$var]['value'];
212 }
213 }
214 $result[] = $resrow;
215 }
216 }
217 return $result;
218 }
219
220 }