Add SPARQL client to core
authorStanislav Malyshev <smalyshev@gmail.com>
Sat, 13 Jan 2018 01:00:28 +0000 (17:00 -0800)
committerStanislav Malyshev <smalyshev@gmail.com>
Tue, 6 Feb 2018 20:56:08 +0000 (12:56 -0800)
This will be used for deep category search implementation,
also Wikibase one in repo/maintenance will be changed
to use the same codebase.

Bug: T185127
Change-Id: Ie8dd4a5aff55d90f02426f1430ed5214c7327bbc

autoload.php
includes/sparql/SparqlClient.php [new file with mode: 0644]
includes/sparql/SparqlException.php [new file with mode: 0644]
tests/phpunit/includes/sparql/SparqlClientTest.php [new file with mode: 0644]

index 6fb2cc4..7f1a47f 100644 (file)
@@ -938,6 +938,8 @@ $wgAutoloadLocalClasses = [
        'MediaWiki\\Shell\\Result' => __DIR__ . '/includes/shell/Result.php',
        'MediaWiki\\Shell\\Shell' => __DIR__ . '/includes/shell/Shell.php',
        'MediaWiki\\Site\\MediaWikiPageNameNormalizer' => __DIR__ . '/includes/site/MediaWikiPageNameNormalizer.php',
        'MediaWiki\\Shell\\Result' => __DIR__ . '/includes/shell/Result.php',
        'MediaWiki\\Shell\\Shell' => __DIR__ . '/includes/shell/Shell.php',
        'MediaWiki\\Site\\MediaWikiPageNameNormalizer' => __DIR__ . '/includes/site/MediaWikiPageNameNormalizer.php',
+       'MediaWiki\\Sparql\\SparqlClient' => __DIR__ . '/includes/sparql/SparqlClient.php',
+       'MediaWiki\\Sparql\\SparqlException' => __DIR__ . '/includes/sparql/SparqlException.php',
        'MediaWiki\\Storage\\BlobAccessException' => __DIR__ . '/includes/Storage/BlobAccessException.php',
        'MediaWiki\\Storage\\BlobStore' => __DIR__ . '/includes/Storage/BlobStore.php',
        'MediaWiki\\Storage\\BlobStoreFactory' => __DIR__ . '/includes/Storage/BlobStoreFactory.php',
        'MediaWiki\\Storage\\BlobAccessException' => __DIR__ . '/includes/Storage/BlobAccessException.php',
        'MediaWiki\\Storage\\BlobStore' => __DIR__ . '/includes/Storage/BlobStore.php',
        'MediaWiki\\Storage\\BlobStoreFactory' => __DIR__ . '/includes/Storage/BlobStoreFactory.php',
diff --git a/includes/sparql/SparqlClient.php b/includes/sparql/SparqlClient.php
new file mode 100644 (file)
index 0000000..6c913d2
--- /dev/null
@@ -0,0 +1,220 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+namespace MediaWiki\Sparql;
+
+use Http;
+use MediaWiki\Http\HttpRequestFactory;
+
+/**
+ * Simple SPARQL client
+ *
+ * @author Stas Malyshev
+ */
+class SparqlClient {
+
+       /**
+        * Limit on how long can be the query to be sent by GET.
+        */
+       const MAX_GET_SIZE = 2048;
+
+       /**
+        * User agent for HTTP requests.
+        * @var string
+        */
+       private $userAgent;
+
+       /**
+        * Query timeout (seconds)
+        * @var int
+        */
+       private $timeout = 30;
+
+       /**
+        * SPARQL endpoint URL
+        * @var string
+        */
+       private $endpoint;
+
+       /**
+        * Client options
+        * @var array
+        */
+       private $options = [];
+
+       /**
+        * @var HttpRequestFactory
+        */
+       private $requestFactory;
+
+       /**
+        * @param string $url SPARQL Endpoint
+        * @param HttpRequestFactory $requestFactory
+        */
+       public function __construct( $url, HttpRequestFactory $requestFactory ) {
+               $this->endpoint = $url;
+               $this->requestFactory = $requestFactory;
+               $this->userAgent = Http::userAgent() . " SparqlClient";
+       }
+
+       /**
+        * Set query timeout (in seconds)
+        * @param int $timeout
+        * @return $this
+        */
+       public function setTimeout( $timeout ) {
+               if ( $timeout >= 0 ) {
+                       $this->timeout = $timeout;
+               }
+               return $this;
+       }
+
+       /**
+        * Set client options
+        * @param array $options
+        * @return $this
+        */
+       public function setClientOptions( $options ) {
+               $this->options = $options;
+               return $this;
+       }
+
+       /**
+        * Get current user agent.
+        * @return string
+        */
+       public function getUserAgent() {
+               return $this->userAgent;
+       }
+
+       /**
+        * Set user agent string.
+        *
+        * Mote it is not recommended to completely override user agent for
+        * most applications.
+        * @see appendUserAgent() for recommended way of specifying user agent.
+        *
+        * @param string $agent
+        */
+       public function setUserAgent( $agent ) {
+               $this->userAgent = $agent;
+       }
+
+       /**
+        * Append specific string to user agent.
+        *
+        * This is the recommended way of specifying the user agent
+        * for specific applications of the SparqlClient inside MediaWiki
+        * and extension code.
+        *
+        * @param string $agent
+        */
+       public function appendUserAgent( $agent ) {
+               $this->userAgent .= ' ' . $agent;
+       }
+
+       /**
+        * Query SPARQL endpoint
+        *
+        * @param string $sparql query
+        * @param bool $rawData Whether to return only values or full data objects
+        *
+        * @return array List of results, one row per array element
+        *               Each row will contain fields indexed by variable name.
+        * @throws SparqlException
+        */
+       public function query( $sparql, $rawData = false ) {
+               if ( empty( $this->endpoint ) ) {
+                       throw new SparqlException( 'Endpoint URL can not be empty' );
+               }
+               $queryData = [ "query" => $sparql, "format" => "json" ];
+               $options = array_merge( [ 'method' => 'GET' ], $this->options );
+
+               if ( empty( $options['userAgent'] ) ) {
+                       $options['userAgent'] = $this->userAgent;
+               }
+
+               if ( $this->timeout >= 0 ) {
+                       // Blazegraph setting, see https://wiki.blazegraph.com/wiki/index.php/REST_API
+                       $queryData['maxQueryTimeMillis'] = $this->timeout * 1000;
+                       $options['timeout'] = $this->timeout;
+               }
+
+               if ( strlen( $sparql ) > self::MAX_GET_SIZE ) {
+                       // big requests go to POST
+                       $options['method'] = 'POST';
+                       $options['postData'] = 'query=' . urlencode( $sparql );
+                       unset( $queryData['query'] );
+               }
+
+               $url = wfAppendQuery( $this->endpoint, $queryData );
+               $request = $this->requestFactory->create( $url, $options, __METHOD__ );
+
+               $status = $request->execute();
+
+               if ( !$status->isOK() ) {
+                       throw new SparqlException( "HTTP error: {$status->getWikiText()}" );
+               }
+               $result = $request->getContent();
+               \MediaWiki\suppressWarnings();
+               $data = json_decode( $result, true );
+               \MediaWiki\restoreWarnings();
+               if ( $data === null || $data === false ) {
+                       throw new SparqlException( "HTTP request failed, response:\n" .
+                               substr( $result, 1024 ) );
+               }
+
+               return $this->extractData( $data, $rawData );
+       }
+
+       /**
+        * Extract data from SPARQL response format.
+        * The response must be in format described in:
+        * https://www.w3.org/TR/sparql11-results-json/
+        *
+        * @param array $data SPARQL result
+        * @param bool  $rawData Whether to return only values or full data objects
+        *
+        * @return array List of results, one row per element.
+        */
+       private function extractData( $data, $rawData = false ) {
+               $result = [];
+               if ( $data && !empty( $data['results'] ) ) {
+                       $vars = $data['head']['vars'];
+                       $resrow = [];
+                       foreach ( $data['results']['bindings'] as $row ) {
+                               foreach ( $vars as $var ) {
+                                       if ( !isset( $row[$var] ) ) {
+                                               $resrow[$var] = null;
+                                               continue;
+                                       }
+                                       if ( $rawData ) {
+                                               $resrow[$var] = $row[$var];
+                                       } else {
+                                               $resrow[$var] = $row[$var]['value'];
+                                       }
+                               }
+                               $result[] = $resrow;
+                       }
+               }
+               return $result;
+       }
+
+}
diff --git a/includes/sparql/SparqlException.php b/includes/sparql/SparqlException.php
new file mode 100644 (file)
index 0000000..d65521e
--- /dev/null
@@ -0,0 +1,30 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+namespace MediaWiki\Sparql;
+
+use Exception;
+
+/**
+ * Exception for SPARQLClient
+ * @author Stas Malyshev
+ */
+class SparqlException extends Exception {
+}
diff --git a/tests/phpunit/includes/sparql/SparqlClientTest.php b/tests/phpunit/includes/sparql/SparqlClientTest.php
new file mode 100644 (file)
index 0000000..e07e425
--- /dev/null
@@ -0,0 +1,188 @@
+<?php
+namespace MediaWiki\Sparql;
+
+use Http;
+use MediaWiki\Http\HttpRequestFactory;
+use MWHttpRequest;
+use PHPUnit_Framework_TestCase;
+
+/**
+ * @covers \MediaWiki\Sparql\SparqlClient
+ */
+class SparqlClientTest extends PHPUnit_Framework_TestCase {
+
+       private function getRequestFactory( $request ) {
+               $requestFactory = $this->getMock( HttpRequestFactory::class );
+               $requestFactory->method( 'create' )->willReturn( $request );
+               return $requestFactory;
+       }
+
+       private function getRequestMock( $content ) {
+               $request = $this->getMockBuilder( MWHttpRequest::class )->disableOriginalConstructor()->getMock();
+               $request->method( 'execute' )->willReturn( \Status::newGood( 200 ) );
+               $request->method( 'getContent' )->willReturn( $content );
+               return $request;
+       }
+
+       public function testQuery() {
+               $json = <<<JSON
+{
+  "head" : {
+    "vars" : [ "x", "y", "z" ]
+  },
+  "results" : {
+    "bindings" : [ {
+      "x" : {
+        "type" : "uri",
+        "value" : "http://wikiba.se/ontology#Dump"
+      },
+      "y" : {
+        "type" : "uri",
+        "value" : "http://creativecommons.org/ns#license"
+      },
+      "z" : {
+        "type" : "uri",
+        "value" : "http://creativecommons.org/publicdomain/zero/1.0/"
+      }
+    }, {
+      "x" : {
+        "type" : "uri",
+        "value" : "http://wikiba.se/ontology#Dump"
+      },
+      "z" : {
+        "type" : "literal",
+        "value" : "0.1.0"
+      }
+    } ]
+  }
+}
+JSON;
+
+               $request = $this->getRequestMock( $json );
+               $client = new SparqlClient( 'http://acme.test/', $this->getRequestFactory( $request ) );
+
+               // values only
+               $result = $client->query( "TEST SPARQL" );
+               $this->assertCount( 2, $result );
+               $this->assertEquals( 'http://wikiba.se/ontology#Dump', $result[0]['x'] );
+               $this->assertEquals( 'http://creativecommons.org/ns#license', $result[0]['y'] );
+               $this->assertEquals( '0.1.0', $result[1]['z'] );
+               $this->assertNull( $result[1]['y'] );
+               // raw data format
+               $result = $client->query( "TEST SPARQL 2", true );
+               $this->assertCount( 2, $result );
+               $this->assertEquals( 'uri', $result[0]['x']['type'] );
+               $this->assertEquals( 'http://wikiba.se/ontology#Dump', $result[0]['x']['value'] );
+               $this->assertEquals( 'literal', $result[1]['z']['type'] );
+               $this->assertEquals( '0.1.0', $result[1]['z']['value'] );
+               $this->assertNull( $result[1]['y'] );
+       }
+
+       /**
+        * @expectedException \Mediawiki\Sparql\SparqlException
+        */
+       public function testBadQuery() {
+               $request = $this->getMockBuilder( MWHttpRequest::class )->disableOriginalConstructor()->getMock();
+               $client = new SparqlClient( 'http://acme.test/', $this->getRequestFactory( $request ) );
+
+               $request->method( 'execute' )->willReturn( \Status::newFatal( "Bad query" ) );
+               $result = $client->query( "TEST SPARQL 3" );
+       }
+
+       public function optionsProvider() {
+               return [
+                       'defaults' => [
+                               'TEST ั‚ะตัั‚ SPARQL 4 ',
+                               null,
+                               null,
+                               [
+                                       'http://acme.test/',
+                                       'query=TEST+%D1%82%D0%B5%D1%81%D1%82+SPARQL+4+',
+                                       'format=json',
+                                       'maxQueryTimeMillis=30000',
+                               ],
+                               [
+                                       'method' => 'GET',
+                                       'userAgent' => Http::userAgent() ." SparqlClient",
+                                       'timeout' => 30
+                               ]
+                       ],
+                       'big query' => [
+                               str_repeat( 'ZZ', SparqlClient::MAX_GET_SIZE ),
+                               null,
+                               null,
+                               [
+                                       'format=json',
+                                       'maxQueryTimeMillis=30000',
+                               ],
+                               [
+                                       'method' => 'POST',
+                                       'postData' => 'query=' . str_repeat( 'ZZ', SparqlClient::MAX_GET_SIZE ),
+                               ]
+                       ],
+                       'timeout 1s' => [
+                               'TEST SPARQL 4',
+                               null,
+                               1,
+                               [
+                                       'maxQueryTimeMillis=1000',
+                               ],
+                               [
+                                       'timeout' => 1
+                               ]
+                       ],
+                       'more options' => [
+                               'TEST SPARQL 5',
+                               [
+                                       'userAgent' => 'My Test',
+                                       'randomOption' => 'duck',
+                               ],
+                               null,
+                               [],
+                               [
+                                       'userAgent' => 'My Test',
+                                       'randomOption' => 'duck',
+                               ]
+                       ],
+
+               ];
+       }
+
+       /**
+        * @dataProvider  optionsProvider
+        * @param string $sparql
+        * @param array|null $options
+        * @param int|null $timeout
+        * @param array $expectedUrl
+        * @param array $expectedOptions
+        */
+       public function testOptions( $sparql, $options, $timeout, $expectedUrl, $expectedOptions ) {
+               $requestFactory = $this->getMock( HttpRequestFactory::class );
+               $client = new SparqlClient( 'http://acme.test/',  $requestFactory );
+
+               $request = $this->getRequestMock( '{}' );
+
+               $requestFactory->method( 'create' )->willReturnCallback(
+                       function ( $url, $options ) use ( $request, $expectedUrl, $expectedOptions ) {
+                               foreach ( $expectedUrl as $eurl ) {
+                                       $this->assertContains( $eurl, $url );
+                               }
+                               foreach ( $expectedOptions as $ekey => $evalue ) {
+                                       $this->assertArrayHasKey( $ekey, $options );
+                                       $this->assertEquals( $options[$ekey], $evalue );
+                               }
+                               return $request;
+                       }
+               );
+
+               if ( !is_null( $options ) ) {
+                       $client->setClientOptions( $options );
+               }
+               if ( !is_null( $timeout ) ) {
+                       $client->setTimeout( $timeout );
+               }
+
+               $result = $client->query( $sparql );
+       }
+
+}