From: Stanislav Malyshev Date: Sat, 13 Jan 2018 01:00:28 +0000 (-0800) Subject: Add SPARQL client to core X-Git-Tag: 1.31.0-rc.0~617^2 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=7b27f210ba493283a635b1cc72edc173de05044f Add SPARQL client to core This will be used for deep category search implementation, also Wikibase one in repo/maintenance will be changed to use the same codebase. Bug: T185127 Change-Id: Ie8dd4a5aff55d90f02426f1430ed5214c7327bbc --- diff --git a/autoload.php b/autoload.php index 6fb2cc425b..7f1a47f497 100644 --- a/autoload.php +++ b/autoload.php @@ -938,6 +938,8 @@ $wgAutoloadLocalClasses = [ 'MediaWiki\\Shell\\Result' => __DIR__ . '/includes/shell/Result.php', 'MediaWiki\\Shell\\Shell' => __DIR__ . '/includes/shell/Shell.php', 'MediaWiki\\Site\\MediaWikiPageNameNormalizer' => __DIR__ . '/includes/site/MediaWikiPageNameNormalizer.php', + 'MediaWiki\\Sparql\\SparqlClient' => __DIR__ . '/includes/sparql/SparqlClient.php', + 'MediaWiki\\Sparql\\SparqlException' => __DIR__ . '/includes/sparql/SparqlException.php', 'MediaWiki\\Storage\\BlobAccessException' => __DIR__ . '/includes/Storage/BlobAccessException.php', 'MediaWiki\\Storage\\BlobStore' => __DIR__ . '/includes/Storage/BlobStore.php', 'MediaWiki\\Storage\\BlobStoreFactory' => __DIR__ . '/includes/Storage/BlobStoreFactory.php', diff --git a/includes/sparql/SparqlClient.php b/includes/sparql/SparqlClient.php new file mode 100644 index 0000000000..6c913d2a44 --- /dev/null +++ b/includes/sparql/SparqlClient.php @@ -0,0 +1,220 @@ +endpoint = $url; + $this->requestFactory = $requestFactory; + $this->userAgent = Http::userAgent() . " SparqlClient"; + } + + /** + * Set query timeout (in seconds) + * @param int $timeout + * @return $this + */ + public function setTimeout( $timeout ) { + if ( $timeout >= 0 ) { + $this->timeout = $timeout; + } + return $this; + } + + /** + * Set client options + * @param array $options + * @return $this + */ + public function setClientOptions( $options ) { + $this->options = $options; + return $this; + } + + /** + * Get current user agent. + * @return string + */ + public function getUserAgent() { + return $this->userAgent; + } + + /** + * Set user agent string. + * + * Mote it is not recommended to completely override user agent for + * most applications. + * @see appendUserAgent() for recommended way of specifying user agent. + * + * @param string $agent + */ + public function setUserAgent( $agent ) { + $this->userAgent = $agent; + } + + /** + * Append specific string to user agent. + * + * This is the recommended way of specifying the user agent + * for specific applications of the SparqlClient inside MediaWiki + * and extension code. + * + * @param string $agent + */ + public function appendUserAgent( $agent ) { + $this->userAgent .= ' ' . $agent; + } + + /** + * Query SPARQL endpoint + * + * @param string $sparql query + * @param bool $rawData Whether to return only values or full data objects + * + * @return array List of results, one row per array element + * Each row will contain fields indexed by variable name. + * @throws SparqlException + */ + public function query( $sparql, $rawData = false ) { + if ( empty( $this->endpoint ) ) { + throw new SparqlException( 'Endpoint URL can not be empty' ); + } + $queryData = [ "query" => $sparql, "format" => "json" ]; + $options = array_merge( [ 'method' => 'GET' ], $this->options ); + + if ( empty( $options['userAgent'] ) ) { + $options['userAgent'] = $this->userAgent; + } + + if ( $this->timeout >= 0 ) { + // Blazegraph setting, see https://wiki.blazegraph.com/wiki/index.php/REST_API + $queryData['maxQueryTimeMillis'] = $this->timeout * 1000; + $options['timeout'] = $this->timeout; + } + + if ( strlen( $sparql ) > self::MAX_GET_SIZE ) { + // big requests go to POST + $options['method'] = 'POST'; + $options['postData'] = 'query=' . urlencode( $sparql ); + unset( $queryData['query'] ); + } + + $url = wfAppendQuery( $this->endpoint, $queryData ); + $request = $this->requestFactory->create( $url, $options, __METHOD__ ); + + $status = $request->execute(); + + if ( !$status->isOK() ) { + throw new SparqlException( "HTTP error: {$status->getWikiText()}" ); + } + $result = $request->getContent(); + \MediaWiki\suppressWarnings(); + $data = json_decode( $result, true ); + \MediaWiki\restoreWarnings(); + if ( $data === null || $data === false ) { + throw new SparqlException( "HTTP request failed, response:\n" . + substr( $result, 1024 ) ); + } + + return $this->extractData( $data, $rawData ); + } + + /** + * Extract data from SPARQL response format. + * The response must be in format described in: + * https://www.w3.org/TR/sparql11-results-json/ + * + * @param array $data SPARQL result + * @param bool $rawData Whether to return only values or full data objects + * + * @return array List of results, one row per element. + */ + private function extractData( $data, $rawData = false ) { + $result = []; + if ( $data && !empty( $data['results'] ) ) { + $vars = $data['head']['vars']; + $resrow = []; + foreach ( $data['results']['bindings'] as $row ) { + foreach ( $vars as $var ) { + if ( !isset( $row[$var] ) ) { + $resrow[$var] = null; + continue; + } + if ( $rawData ) { + $resrow[$var] = $row[$var]; + } else { + $resrow[$var] = $row[$var]['value']; + } + } + $result[] = $resrow; + } + } + return $result; + } + +} diff --git a/includes/sparql/SparqlException.php b/includes/sparql/SparqlException.php new file mode 100644 index 0000000000..d65521e424 --- /dev/null +++ b/includes/sparql/SparqlException.php @@ -0,0 +1,30 @@ +getMock( HttpRequestFactory::class ); + $requestFactory->method( 'create' )->willReturn( $request ); + return $requestFactory; + } + + private function getRequestMock( $content ) { + $request = $this->getMockBuilder( MWHttpRequest::class )->disableOriginalConstructor()->getMock(); + $request->method( 'execute' )->willReturn( \Status::newGood( 200 ) ); + $request->method( 'getContent' )->willReturn( $content ); + return $request; + } + + public function testQuery() { + $json = <<getRequestMock( $json ); + $client = new SparqlClient( 'http://acme.test/', $this->getRequestFactory( $request ) ); + + // values only + $result = $client->query( "TEST SPARQL" ); + $this->assertCount( 2, $result ); + $this->assertEquals( 'http://wikiba.se/ontology#Dump', $result[0]['x'] ); + $this->assertEquals( 'http://creativecommons.org/ns#license', $result[0]['y'] ); + $this->assertEquals( '0.1.0', $result[1]['z'] ); + $this->assertNull( $result[1]['y'] ); + // raw data format + $result = $client->query( "TEST SPARQL 2", true ); + $this->assertCount( 2, $result ); + $this->assertEquals( 'uri', $result[0]['x']['type'] ); + $this->assertEquals( 'http://wikiba.se/ontology#Dump', $result[0]['x']['value'] ); + $this->assertEquals( 'literal', $result[1]['z']['type'] ); + $this->assertEquals( '0.1.0', $result[1]['z']['value'] ); + $this->assertNull( $result[1]['y'] ); + } + + /** + * @expectedException \Mediawiki\Sparql\SparqlException + */ + public function testBadQuery() { + $request = $this->getMockBuilder( MWHttpRequest::class )->disableOriginalConstructor()->getMock(); + $client = new SparqlClient( 'http://acme.test/', $this->getRequestFactory( $request ) ); + + $request->method( 'execute' )->willReturn( \Status::newFatal( "Bad query" ) ); + $result = $client->query( "TEST SPARQL 3" ); + } + + public function optionsProvider() { + return [ + 'defaults' => [ + 'TEST тест SPARQL 4 ', + null, + null, + [ + 'http://acme.test/', + 'query=TEST+%D1%82%D0%B5%D1%81%D1%82+SPARQL+4+', + 'format=json', + 'maxQueryTimeMillis=30000', + ], + [ + 'method' => 'GET', + 'userAgent' => Http::userAgent() ." SparqlClient", + 'timeout' => 30 + ] + ], + 'big query' => [ + str_repeat( 'ZZ', SparqlClient::MAX_GET_SIZE ), + null, + null, + [ + 'format=json', + 'maxQueryTimeMillis=30000', + ], + [ + 'method' => 'POST', + 'postData' => 'query=' . str_repeat( 'ZZ', SparqlClient::MAX_GET_SIZE ), + ] + ], + 'timeout 1s' => [ + 'TEST SPARQL 4', + null, + 1, + [ + 'maxQueryTimeMillis=1000', + ], + [ + 'timeout' => 1 + ] + ], + 'more options' => [ + 'TEST SPARQL 5', + [ + 'userAgent' => 'My Test', + 'randomOption' => 'duck', + ], + null, + [], + [ + 'userAgent' => 'My Test', + 'randomOption' => 'duck', + ] + ], + + ]; + } + + /** + * @dataProvider optionsProvider + * @param string $sparql + * @param array|null $options + * @param int|null $timeout + * @param array $expectedUrl + * @param array $expectedOptions + */ + public function testOptions( $sparql, $options, $timeout, $expectedUrl, $expectedOptions ) { + $requestFactory = $this->getMock( HttpRequestFactory::class ); + $client = new SparqlClient( 'http://acme.test/', $requestFactory ); + + $request = $this->getRequestMock( '{}' ); + + $requestFactory->method( 'create' )->willReturnCallback( + function ( $url, $options ) use ( $request, $expectedUrl, $expectedOptions ) { + foreach ( $expectedUrl as $eurl ) { + $this->assertContains( $eurl, $url ); + } + foreach ( $expectedOptions as $ekey => $evalue ) { + $this->assertArrayHasKey( $ekey, $options ); + $this->assertEquals( $options[$ekey], $evalue ); + } + return $request; + } + ); + + if ( !is_null( $options ) ) { + $client->setClientOptions( $options ); + } + if ( !is_null( $timeout ) ) { + $client->setTimeout( $timeout ); + } + + $result = $client->query( $sparql ); + } + +}