Create API to allow content handlers to handle structured data definitions
authorStanislav Malyshev <smalyshev@gmail.com>
Fri, 13 May 2016 00:10:52 +0000 (17:10 -0700)
committerStanislav Malyshev <smalyshev@gmail.com>
Wed, 6 Jul 2016 20:41:20 +0000 (13:41 -0700)
Change-Id: Ia1738803c42f6114575587c1c838fec62b6f54aa
Bug: T89733

12 files changed:
autoload.php
includes/content/CodeContentHandler.php
includes/content/ContentHandler.php
includes/content/TextContentHandler.php
includes/content/WikitextContentHandler.php
includes/search/NullIndexField.php [new file with mode: 0644]
includes/search/SearchEngine.php
includes/search/SearchIndexField.php [new file with mode: 0644]
includes/search/SearchIndexFieldDefinition.php [new file with mode: 0644]
tests/phpunit/includes/content/TextContentHandlerTest.php
tests/phpunit/includes/search/SearchEngineTest.php
tests/phpunit/includes/search/SearchIndexFieldTest.php [new file with mode: 0644]

index 0211c6d..16d69d0 100644 (file)
@@ -956,6 +956,7 @@ $wgAutoloadLocalClasses = [
        'NukePage' => __DIR__ . '/maintenance/nukePage.php',
        'NullFileJournal' => __DIR__ . '/includes/filebackend/filejournal/FileJournal.php',
        'NullFileOp' => __DIR__ . '/includes/filebackend/FileOp.php',
+       'NullIndexField' => __DIR__ . '/includes/search/NullIndexField.php',
        'NullJob' => __DIR__ . '/includes/jobqueue/jobs/NullJob.php',
        'NullLockManager' => __DIR__ . '/includes/filebackend/lockmanager/LockManager.php',
        'NullRepo' => __DIR__ . '/includes/filerepo/NullRepo.php',
@@ -1209,6 +1210,8 @@ $wgAutoloadLocalClasses = [
        'SearchEngineFactory' => __DIR__ . '/includes/search/SearchEngineFactory.php',
        'SearchExactMatchRescorer' => __DIR__ . '/includes/search/SearchExactMatchRescorer.php',
        'SearchHighlighter' => __DIR__ . '/includes/search/SearchHighlighter.php',
+       'SearchIndexField' => __DIR__ . '/includes/search/SearchIndexField.php',
+       'SearchIndexFieldDefinition' => __DIR__ . '/includes/search/SearchIndexFieldDefinition.php',
        'SearchMssql' => __DIR__ . '/includes/search/SearchMssql.php',
        'SearchMySQL' => __DIR__ . '/includes/search/SearchMySQL.php',
        'SearchNearMatchResultSet' => __DIR__ . '/includes/search/SearchNearMatchResultSet.php',
index 694b633..2bbf6ca 100644 (file)
@@ -63,4 +63,12 @@ abstract class CodeContentHandler extends TextContentHandler {
        protected function getContentClass() {
                throw new MWException( 'Subclass must override' );
        }
+
+       /**
+        * @param SearchEngine $engine
+        * @return array
+        */
+       public function getFieldsForSearchIndex( SearchEngine $engine ) {
+               return [];
+       }
 }
index e225fb7..1ecd614 100644 (file)
@@ -1248,4 +1248,26 @@ abstract class ContentHandler {
 
                return $ok;
        }
+
+       /**
+        * Get fields definition for search index
+        * @param SearchEngine $engine
+        * @return SearchIndexField[] List of fields this content handler can provide.
+        * @since 1.28
+        */
+       public function getFieldsForSearchIndex( SearchEngine $engine ) {
+               /* Default fields:
+               /*
+                * namespace
+                * namespace_text
+                * redirect
+                * source_text
+                * suggest
+                * timestamp
+                * title
+                * text
+                * text_bytes
+                */
+               return [];
+       }
 }
index ad40cd9..748c810 100644 (file)
@@ -31,8 +31,7 @@
 class TextContentHandler extends ContentHandler {
 
        // @codingStandardsIgnoreStart bug 57585
-       public function __construct( $modelId = CONTENT_MODEL_TEXT,
-               $formats = [ CONTENT_FORMAT_TEXT ] ) {
+       public function __construct( $modelId = CONTENT_MODEL_TEXT, $formats = [ CONTENT_FORMAT_TEXT ] ) {
                parent::__construct( $modelId, $formats );
        }
        // @codingStandardsIgnoreEnd
@@ -41,7 +40,7 @@ class TextContentHandler extends ContentHandler {
         * Returns the content's text as-is.
         *
         * @param Content $content
-        * @param string $format The serialization format to check
+        * @param string  $format The serialization format to check
         *
         * @return mixed
         */
@@ -143,4 +142,10 @@ class TextContentHandler extends ContentHandler {
                return true;
        }
 
+       public function getFieldsForSearchIndex( SearchEngine $engine ) {
+               $fields = [];
+               $fields['language'] =
+                       $engine->makeSearchFieldMapping( 'language', SearchIndexField::INDEX_TYPE_KEYWORD );
+               return $fields;
+       }
 }
index 0701a0f..86f0d50 100644 (file)
@@ -108,4 +108,40 @@ class WikitextContentHandler extends TextContentHandler {
                return true;
        }
 
+       public function getFieldsForSearchIndex( SearchEngine $engine ) {
+               $fields = [];
+
+               $fields['category'] =
+                       $engine->makeSearchFieldMapping( 'category', SearchIndexField::INDEX_TYPE_TEXT );
+               $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+
+               $fields['external_link'] =
+                       $engine->makeSearchFieldMapping( 'external_link', SearchIndexField::INDEX_TYPE_KEYWORD );
+
+               $fields['heading'] =
+                       $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT );
+               $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING );
+
+               $fields['auxiliary_text'] =
+                       $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT );
+
+               $fields['opening_text'] =
+                       $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT );
+               $fields['opening_text']->setFlag( SearchIndexField::FLAG_SCORING );
+
+               $fields['outgoing_link'] =
+                       $engine->makeSearchFieldMapping( 'outgoing_link', SearchIndexField::INDEX_TYPE_KEYWORD );
+
+               $fields['template'] =
+                       $engine->makeSearchFieldMapping( 'template', SearchIndexField::INDEX_TYPE_KEYWORD );
+               $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+
+               // FIXME: this really belongs in separate file handler but files
+               // do not have separate handler. Sadness.
+               $fields['file_text'] =
+                       $engine->makeSearchFieldMapping( 'file_text', SearchIndexField::INDEX_TYPE_TEXT );
+
+               return $fields;
+       }
+
 }
diff --git a/includes/search/NullIndexField.php b/includes/search/NullIndexField.php
new file mode 100644 (file)
index 0000000..933e0ad
--- /dev/null
@@ -0,0 +1,45 @@
+<?php
+
+/**
+ * Null index field - means search engine does not implement this field.
+ */
+class NullIndexField implements SearchIndexField {
+
+       /**
+        * Get mapping for specific search engine
+        * @param SearchEngine $engine
+        * @return array|null Null means this field does not map to anything
+        */
+       public function getMapping( SearchEngine $engine ) {
+               return null;
+       }
+
+       /**
+        * Set global flag for this field.
+        *
+        * @param int  $flag Bit flag to set/unset
+        * @param bool $unset True if flag should be unset, false by default
+        * @return $this
+        */
+       public function setFlag( $flag, $unset = false ) {
+       }
+
+       /**
+        * Check if flag is set.
+        * @param $flag
+        * @return int 0 if unset, !=0 if set
+        */
+       public function checkFlag( $flag ) {
+               return 0;
+       }
+
+       /**
+        * Merge two field definitions if possible.
+        *
+        * @param SearchIndexField $that
+        * @return SearchIndexField|false New definition or false if not mergeable.
+        */
+       public function merge( SearchIndexField $that ) {
+               return $that;
+       }
+}
index 0171ed9..9168d64 100644 (file)
@@ -655,6 +655,46 @@ abstract class SearchEngine {
                return null;
        }
 
+       /**
+        * Create a search field definition.
+        * Specific search engines should override this method to create search fields.
+        * @param string $name
+        * @param int    $type
+        * @return SearchIndexField
+        * @since 1.28
+        */
+       public function makeSearchFieldMapping( $name, $type ) {
+               return new NullIndexField();
+       }
+
+       /**
+        * Get fields for search index
+        * @since 1.28
+        * @return SearchIndexField[] Index field definitions for all content handlers
+        */
+       public function getSearchIndexFields() {
+               $models = ContentHandler::getContentModels();
+               $fields = [];
+               foreach ( $models as $model ) {
+                       $handler = ContentHandler::getForModelID( $model );
+                       $handlerFields = $handler->getFieldsForSearchIndex( $this );
+                       foreach ( $handlerFields as $fieldName => $fieldData ) {
+                               if ( empty( $fields[$fieldName] ) ) {
+                                       $fields[$fieldName] = $fieldData;
+                               } else {
+                                       // TODO: do we allow some clashes with the same type or reject all of them?
+                                       $mergeDef = $fields[$fieldName]->merge( $fieldData );
+                                       if ( !$mergeDef ) {
+                                               throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
+                                       }
+                                       $fields[$fieldName] = $mergeDef;
+                               }
+                       }
+               }
+               // Hook to allow extensions to produce search mapping fields
+               Hooks::run( 'SearchIndexFields', [ &$fields, $this ] );
+               return $fields;
+       }
 }
 
 /**
diff --git a/includes/search/SearchIndexField.php b/includes/search/SearchIndexField.php
new file mode 100644 (file)
index 0000000..2ea255f
--- /dev/null
@@ -0,0 +1,63 @@
+<?php
+/**
+ * Definition of a mapping for the search index field.
+ * @since 1.28
+ */
+interface SearchIndexField {
+       /**
+        * Field types
+        */
+       const INDEX_TYPE_TEXT = 0;
+       const INDEX_TYPE_KEYWORD = 1;
+       const INDEX_TYPE_INTEGER = 2;
+       const INDEX_TYPE_NUMBER = 3;
+       const INDEX_TYPE_DATETIME = 4;
+       const INDEX_TYPE_NESTED = 5;
+       const INDEX_TYPE_BOOL = 6;
+       /**
+        * Generic field flags.
+        */
+       /**
+        * This field is case-insensitive.
+        */
+       const FLAG_CASEFOLD = 1;
+       /**
+        * This field is for scoring only.
+        */
+       const FLAG_SCORING = 2;
+       /**
+        * This field does not need highlight handling.
+        */
+       const FLAG_NO_HIGHLIGHT = 4;
+       /**
+        * Do not index this field.
+        */
+       const FLAG_NO_INDEX = 8;
+       /**
+        * Get mapping for specific search engine
+        * @param SearchEngine $engine
+        * @return array|null Null means this field does not map to anything
+        */
+       public function getMapping( SearchEngine $engine );
+       /**
+        * Set global flag for this field.
+        *
+        * @param int  $flag Bit flag to set/unset
+        * @param bool $unset True if flag should be unset, false by default
+        * @return $this
+        */
+       public function setFlag( $flag, $unset = false );
+       /**
+        * Check if flag is set.
+        * @param $flag
+        * @return int 0 if unset, !=0 if set
+        */
+       public function checkFlag( $flag );
+       /**
+        * Merge two field definitions if possible.
+        *
+        * @param SearchIndexField $that
+        * @return SearchIndexField|false New definition or false if not mergeable.
+        */
+       public function merge( SearchIndexField $that );
+}
diff --git a/includes/search/SearchIndexFieldDefinition.php b/includes/search/SearchIndexFieldDefinition.php
new file mode 100644 (file)
index 0000000..3a86c82
--- /dev/null
@@ -0,0 +1,118 @@
+<?php
+
+/**
+ * Basic infrastructure of the field definition.
+ * Specific engines will need to override it at least for getMapping,
+ * but can reuse other parts.
+ * @since 1.28
+ */
+abstract class SearchIndexFieldDefinition implements SearchIndexField {
+       /**
+        * Name of the field
+        *
+        * @var string
+        */
+       protected $name;
+       /**
+        * Type of the field, one of the constants above
+        *
+        * @var int
+        */
+       protected $type;
+       /**
+        * Bit flags for the field.
+        *
+        * @var int
+        */
+       protected $flags = 0;
+       /**
+        * Subfields
+        * @var SearchIndexFieldDefinition[]
+        */
+       protected $subfields = [];
+
+       /**
+        * SearchIndexFieldDefinition constructor.
+        * @param string $name Field name
+        * @param int    $type Index type
+        */
+       public function __construct( $name, $type ) {
+               $this->name = $name;
+               $this->type = $type;
+       }
+
+       /**
+        * Get field name
+        * @return string
+        */
+       public function getName() {
+               return $this->name;
+       }
+
+       /**
+        * Get index type
+        * @return int
+        */
+       public function getIndexType() {
+               return $this->type;
+       }
+
+       /**
+        * Set global flag for this field.
+        *
+        * @param int  $flag Bit flag to set/unset
+        * @param bool $unset True if flag should be unset, false by default
+        * @return $this
+        */
+       public function setFlag( $flag, $unset = false ) {
+               if ( $unset ) {
+                       $this->flags &= ~$flag;
+               } else {
+                       $this->flags |= $flag;
+               }
+               return $this;
+       }
+
+       /**
+        * Check if flag is set.
+        * @param $flag
+        * @return int 0 if unset, !=0 if set
+        */
+       public function checkFlag( $flag ) {
+               return $this->flags & $flag;
+       }
+
+       /**
+        * Merge two field definitions if possible.
+        *
+        * @param SearchIndexField $that
+        * @return SearchIndexField|false New definition or false if not mergeable.
+        */
+       public function merge( SearchIndexField $that ) {
+               // TODO: which definitions may be compatible?
+               if ( ( $that instanceof self ) && $this->type === $that->type &&
+                    $this->flags === $that->flags && $this->type !== self::INDEX_TYPE_NESTED
+               ) {
+                       return $that;
+               }
+               return false;
+       }
+
+       /**
+        * Get subfields
+        * @return SearchIndexFieldDefinition[]
+        */
+       public function getSubfields() {
+               return $this->subfields;
+       }
+
+       /**
+        * Set subfields
+        * @param SearchIndexFieldDefinition[] $subfields
+        * @return $this
+        */
+       public function setSubfields( array $subfields ) {
+               $this->subfields = $subfields;
+               return $this;
+       }
+}
index 492fec6..e8681c7 100644 (file)
@@ -9,4 +9,45 @@ class TextContentHandlerTest extends MediaWikiLangTestCase {
                $this->assertTrue( $handler->supportsDirectEditing(), 'direct editing is supported' );
        }
 
+       /**
+        * @covers SearchEngine::makeSearchFieldMapping
+        * @covers ContentHandler::getFieldsForSearchIndex
+        */
+       public function testFieldsForIndex() {
+               $handler = new TextContentHandler();
+
+               $mockEngine = $this->getMock( 'SearchEngine' );
+
+               $mockEngine->expects( $this->atLeastOnce() )
+                       ->method( 'makeSearchFieldMapping' )
+                       ->willReturnCallback( function ( $name, $type ) {
+                               $mockField =
+                                       $this->getMockBuilder( 'SearchIndexFieldDefinition' )
+                                               ->setConstructorArgs( [ $name, $type ] )
+                                               ->getMock();
+                               $mockField->expects( $this->atLeastOnce() )->method( 'getMapping' )->willReturn( [
+                                               'testData' => 'test',
+                                               'name' => $name,
+                                               'type' => $type,
+                                       ] );
+                               return $mockField;
+                       } );
+
+               /**
+                * @var $mockEngine SearchEngine
+                */
+               $fields = $handler->getFieldsForSearchIndex( $mockEngine );
+               $mappedFields = [];
+               foreach ( $fields as $name => $field ) {
+                       $this->assertInstanceOf( 'SearchIndexField', $field );
+                       /**
+                        * @var $field SearchIndexField
+                        */
+                       $mappedFields[$name] = $field->getMapping( $mockEngine );
+               }
+               $this->assertArrayHasKey( 'language', $mappedFields );
+               $this->assertEquals( 'test', $mappedFields['language']['testData'] );
+               $this->assertEquals( 'language', $mappedFields['language']['name'] );
+       }
+
 }
index 40a33d9..f084c64 100644 (file)
@@ -157,4 +157,49 @@ class SearchEngineTest extends MediaWikiLangTestCase {
                        "Title power search failed" );
        }
 
+       /**
+        * @covers SearchEngine::getSearchIndexFields
+        */
+       public function testSearchIndexFields() {
+               /**
+                * @var $mockEngine SearchEngine
+                */
+               $mockEngine = $this->getMock( 'SearchEngine', [ 'makeSearchFieldMapping' ] );
+
+               $mockFieldBuilder = function ( $name, $type ) {
+                       $mockField =
+                               $this->getMockBuilder( 'SearchIndexFieldDefinition' )->setConstructorArgs( [
+                                       $name,
+                                       $type
+                               ] )->getMock();
+                       $mockField->expects( $this->any() )->method( 'getMapping' )->willReturn( [
+                               'testData' => 'test',
+                               'name' => $name,
+                               'type' => $type,
+                       ] );
+                       return $mockField;
+               };
+
+               $mockEngine->expects( $this->atLeastOnce() )
+                       ->method( 'makeSearchFieldMapping' )
+                       ->willReturnCallback( $mockFieldBuilder );
+
+               // Not using mock since PHPUnit mocks do not work properly with references in params
+               $this->mergeMwGlobalArrayValue( 'wgHooks',
+                       [ 'SearchIndexFields' => [ [ $this, 'hookSearchIndexFields', $mockFieldBuilder ] ] ] );
+
+               $fields = $mockEngine->getSearchIndexFields();
+               $this->assertArrayHasKey( 'language', $fields );
+               $this->assertArrayHasKey( 'category', $fields );
+               $this->assertInstanceOf( 'SearchIndexField', $fields['testField'] );
+
+               $mapping = $fields['testField']->getMapping( $mockEngine );
+               $this->assertArrayHasKey( 'testData', $mapping );
+               $this->assertEquals( 'test', $mapping['testData'] );
+       }
+
+       public function hookSearchIndexFields( $mockFieldBuilder, &$fields, SearchEngine $engine ) {
+               $fields['testField'] = $mockFieldBuilder( "testField", SearchIndexField::INDEX_TYPE_TEXT );
+               return true;
+       }
 }
diff --git a/tests/phpunit/includes/search/SearchIndexFieldTest.php b/tests/phpunit/includes/search/SearchIndexFieldTest.php
new file mode 100644 (file)
index 0000000..ec046a7
--- /dev/null
@@ -0,0 +1,39 @@
+<?php
+
+/**
+ * @group Search
+ * @covers SearchIndexFieldDefinition
+ */
+class SearchIndexFieldTest extends MediaWikiTestCase {
+
+       public function getMergeCases() {
+               return [
+                       [ 0, 'test', 0, 'test', true ],
+                       [ SearchIndexField::INDEX_TYPE_NESTED, 'test',
+                         SearchIndexField::INDEX_TYPE_NESTED, 'test', false ],
+                       [ 0, 'test', 0, 'test2', true ],
+                       [ 0, 'test', 1, 'test', false ],
+               ];
+       }
+
+       /**
+        * @dataProvider getMergeCases
+        */
+       public function testMerge( $t1, $n1, $t2, $n2, $result ) {
+               $field1 = $this->getMockBuilder( 'SearchIndexFieldDefinition' )
+                       ->setMethods( [ 'getMapping' ] )
+                       ->setConstructorArgs( [ $n1, $t1 ] )->getMock();
+               $field2 = $this->getMockBuilder( 'SearchIndexFieldDefinition' )
+                       ->setMethods( [ 'getMapping' ] )
+                       ->setConstructorArgs( [ $n2, $t2 ] )->getMock();
+
+               if ( $result ) {
+                       $this->assertNotFalse( $field1->merge( $field2 ) );
+               } else {
+                       $this->assertFalse( $field1->merge( $field2 ) );
+               }
+
+               $field1->setFlag( 0xFF );
+               $this->assertFalse( $field1->merge( $field2 ) );
+       }
+}