Extract ParserOutput search index data fields from WikiTextContentHandler
[lhc/web/wiklou.git] / includes / content / ContentHandler.php
index f3d6781..3a75f50 100644 (file)
@@ -1,4 +1,7 @@
 <?php
+
+use MediaWiki\Search\ParserOutputSearchDataExtractor;
+
 /**
  * Base class for content handling.
  *
@@ -641,7 +644,12 @@ abstract class ContentHandler {
         *
         * @since 1.21
         *
-        * @return array Always an empty array.
+        * @return array An array mapping action names (typically "view", "edit", "history" etc.) to
+        *  either the full qualified class name of an Action class, a callable taking ( Page $page,
+        *  IContextSource $context = null ) as parameters and returning an Action object, or an actual
+        *  Action object. An empty array in this default implementation.
+        *
+        * @see Action::factory
         */
        public function getActionOverrides() {
                return [];
@@ -1243,4 +1251,117 @@ abstract class ContentHandler {
 
                return $ok;
        }
+
+       /**
+        * Get fields definition for search index
+        *
+        * @todo Expose title, redirect, namespace, text, source_text, text_bytes
+        *       field mappings here. (see T142670 and T143409)
+        *
+        * @param SearchEngine $engine
+        * @return SearchIndexField[] List of fields this content handler can provide.
+        * @since 1.28
+        */
+       public function getFieldsForSearchIndex( SearchEngine $engine ) {
+               $fields['category'] = $engine->makeSearchFieldMapping(
+                       'category',
+                       SearchIndexField::INDEX_TYPE_TEXT
+               );
+
+               $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+
+               $fields['external_link'] = $engine->makeSearchFieldMapping(
+                       'external_link',
+                       SearchIndexField::INDEX_TYPE_KEYWORD
+               );
+
+               $fields['outgoing_link'] = $engine->makeSearchFieldMapping(
+                       'outgoing_link',
+                       SearchIndexField::INDEX_TYPE_KEYWORD
+               );
+
+               $fields['template'] = $engine->makeSearchFieldMapping(
+                       'template',
+                       SearchIndexField::INDEX_TYPE_KEYWORD
+               );
+
+               $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+
+               return $fields;
+       }
+
+       /**
+        * Add new field definition to array.
+        * @param SearchIndexField[] $fields
+        * @param SearchEngine       $engine
+        * @param string             $name
+        * @param int                $type
+        * @return SearchIndexField[] new field defs
+        * @since 1.28
+        */
+       protected function addSearchField( &$fields, SearchEngine $engine, $name, $type ) {
+               $fields[$name] = $engine->makeSearchFieldMapping( $name, $type );
+               return $fields;
+       }
+
+       /**
+        * Return fields to be indexed by search engine
+        * as representation of this document.
+        * Overriding class should call parent function or take care of calling
+        * the SearchDataForIndex hook.
+        * @param WikiPage     $page Page to index
+        * @param ParserOutput $output
+        * @param SearchEngine $engine Search engine for which we are indexing
+        * @return array Map of name=>value for fields
+        * @since 1.28
+        */
+       public function getDataForSearchIndex( WikiPage $page, ParserOutput $output,
+                                              SearchEngine $engine ) {
+               $fieldData = [];
+               $content = $page->getContent();
+
+               if ( $content ) {
+                       $searchDataExtractor = new ParserOutputSearchDataExtractor();
+
+                       $fieldData['category'] = $searchDataExtractor->getCategories( $output );
+                       $fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
+                       $fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
+                       $fieldData['template'] = $searchDataExtractor->getTemplates( $output );
+
+                       $text = $content->getTextForSearchIndex();
+
+                       $fieldData['text'] = $text;
+                       $fieldData['source_text'] = $text;
+                       $fieldData['text_bytes'] = $content->getSize();
+               }
+
+               Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
+               return $fieldData;
+       }
+
+       /**
+        * Produce page output suitable for indexing.
+        *
+        * Specific content handlers may override it if they need different content handling.
+        *
+        * @param WikiPage    $page
+        * @param ParserCache $cache
+        * @return ParserOutput
+        */
+       public function getParserOutputForIndexing( WikiPage $page, ParserCache $cache = null ) {
+               $parserOptions = $page->makeParserOptions( 'canonical' );
+               $revId = $page->getRevision()->getId();
+               if ( $cache ) {
+                       $parserOutput = $cache->get( $page, $parserOptions );
+               }
+               if ( empty( $parserOutput ) ) {
+                       $parserOutput =
+                               $page->getContent()->getParserOutput( $page->getTitle(), $revId, $parserOptions );
+                       if ( $cache ) {
+                               $cache->save( $parserOutput, $page, $parserOptions );
+                       }
+               }
+               return $parserOutput;
+       }
+
 }