Merge "Extract ParserOutput search index data fields from WikiTextContentHandler"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Fri, 19 Aug 2016 18:40:17 +0000 (18:40 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 19 Aug 2016 18:40:17 +0000 (18:40 +0000)
1  2 
autoload.php
includes/content/ContentHandler.php
includes/content/WikitextContentHandler.php
tests/phpunit/includes/content/ContentHandlerTest.php

diff --combined autoload.php
@@@ -372,6 -372,7 +372,7 @@@ $wgAutoloadLocalClasses = 
        'DoubleRedirectsPage' => __DIR__ . '/includes/specials/SpecialDoubleRedirects.php',
        'DoubleReplacer' => __DIR__ . '/includes/libs/replacers/DoubleReplacer.php',
        'DummyLinker' => __DIR__ . '/includes/DummyLinker.php',
+       'DummySearchIndexFieldDefinition' => __DIR__ . '/includes/search/DummySearchIndexFieldDefinition.php',
        'DummyTermColorer' => __DIR__ . '/maintenance/term/MWTerm.php',
        'Dump7ZipOutput' => __DIR__ . '/includes/export/Dump7ZipOutput.php',
        'DumpBZip2Output' => __DIR__ . '/includes/export/DumpBZip2Output.php',
        'GitInfo' => __DIR__ . '/includes/GitInfo.php',
        'GlobalDependency' => __DIR__ . '/includes/cache/CacheDependency.php',
        'GlobalVarConfig' => __DIR__ . '/includes/config/GlobalVarConfig.php',
 +      'HHVMMakeRepo' => __DIR__ . '/maintenance/hhvm/makeRepo.php',
        'HTMLApiField' => __DIR__ . '/includes/htmlform/fields/HTMLApiField.php',
        'HTMLAutoCompleteSelectField' => __DIR__ . '/includes/htmlform/fields/HTMLAutoCompleteSelectField.php',
        'HTMLButtonField' => __DIR__ . '/includes/htmlform/fields/HTMLButtonField.php',
        'MediaWiki\\Logger\\NullSpi' => __DIR__ . '/includes/debug/logger/NullSpi.php',
        'MediaWiki\\Logger\\Spi' => __DIR__ . '/includes/debug/logger/Spi.php',
        'MediaWiki\\MediaWikiServices' => __DIR__ . '/includes/MediaWikiServices.php',
+       'MediaWiki\\Search\\ParserOutputSearchDataExtractor' => __DIR__ . '/includes/search/ParserOutputSearchDataExtractor.php',
        'MediaWiki\\Services\\CannotReplaceActiveServiceException' => __DIR__ . '/includes/Services/CannotReplaceActiveServiceException.php',
        'MediaWiki\\Services\\ContainerDisabledException' => __DIR__ . '/includes/Services/ContainerDisabledException.php',
        'MediaWiki\\Services\\DestructibleService' => __DIR__ . '/includes/Services/DestructibleService.php',
@@@ -1,4 -1,7 +1,7 @@@
  <?php
+ use MediaWiki\Search\ParserOutputSearchDataExtractor;
  /**
   * Base class for content handling.
   *
@@@ -1156,19 -1159,62 +1159,19 @@@ abstract class ContentHandler 
         *
         * @param string $event Event name
         * @param array $args Parameters passed to hook functions
 -       * @param bool $warn Whether to log a warning.
 -       *                    Default to self::$enableDeprecationWarnings.
 -       *                    May be set to false for testing.
 +       * @param string|null $deprecatedVersion Emit a deprecation notice
 +       *   when the hook is run for the provided version
         *
         * @return bool True if no handler aborted the hook
 -       *
 -       * @see ContentHandler::$enableDeprecationWarnings
         */
        public static function runLegacyHooks( $event, $args = [],
 -              $warn = null
 +              $deprecatedVersion = null
        ) {
  
 -              if ( $warn === null ) {
 -                      $warn = self::$enableDeprecationWarnings;
 -              }
 -
                if ( !Hooks::isRegistered( $event ) ) {
                        return true; // nothing to do here
                }
  
 -              if ( $warn ) {
 -                      // Log information about which handlers are registered for the legacy hook,
 -                      // so we can find and fix them.
 -
 -                      $handlers = Hooks::getHandlers( $event );
 -                      $handlerInfo = [];
 -
 -                      MediaWiki\suppressWarnings();
 -
 -                      foreach ( $handlers as $handler ) {
 -                              if ( is_array( $handler ) ) {
 -                                      if ( is_object( $handler[0] ) ) {
 -                                              $info = get_class( $handler[0] );
 -                                      } else {
 -                                              $info = $handler[0];
 -                                      }
 -
 -                                      if ( isset( $handler[1] ) ) {
 -                                              $info .= '::' . $handler[1];
 -                                      }
 -                              } elseif ( is_object( $handler ) ) {
 -                                      $info = get_class( $handler[0] );
 -                                      $info .= '::on' . $event;
 -                              } else {
 -                                      $info = $handler;
 -                              }
 -
 -                              $handlerInfo[] = $info;
 -                      }
 -
 -                      MediaWiki\restoreWarnings();
 -
 -                      wfWarn( "Using obsolete hook $event via ContentHandler::runLegacyHooks()! Handlers: " .
 -                              implode( ', ', $handlerInfo ), 2 );
 -              }
 -
                // convert Content objects to text
                $contentObjects = [];
                $contentTexts = [];
                }
  
                // call the hook functions
 -              $ok = Hooks::run( $event, $args );
 +              $ok = Hooks::run( $event, $args, $deprecatedVersion );
  
                // see if the hook changed the text
                foreach ( $contentTexts as $k => $orig ) {
  
        /**
         * Get fields definition for search index
+        *
+        * @todo Expose title, redirect, namespace, text, source_text, text_bytes
+        *       field mappings here. (see T142670 and T143409)
+        *
         * @param SearchEngine $engine
         * @return SearchIndexField[] List of fields this content handler can provide.
         * @since 1.28
         */
        public function getFieldsForSearchIndex( SearchEngine $engine ) {
-               /* Default fields:
-               /*
-                * namespace
-                * namespace_text
-                * redirect
-                * source_text
-                * suggest
-                * timestamp
-                * title
-                * text
-                * text_bytes
-                */
-               return [];
+               $fields['category'] = $engine->makeSearchFieldMapping(
+                       'category',
+                       SearchIndexField::INDEX_TYPE_TEXT
+               );
+               $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+               $fields['external_link'] = $engine->makeSearchFieldMapping(
+                       'external_link',
+                       SearchIndexField::INDEX_TYPE_KEYWORD
+               );
+               $fields['outgoing_link'] = $engine->makeSearchFieldMapping(
+                       'outgoing_link',
+                       SearchIndexField::INDEX_TYPE_KEYWORD
+               );
+               $fields['template'] = $engine->makeSearchFieldMapping(
+                       'template',
+                       SearchIndexField::INDEX_TYPE_KEYWORD
+               );
+               $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+               return $fields;
        }
  
        /**
         */
        public function getDataForSearchIndex( WikiPage $page, ParserOutput $output,
                                               SearchEngine $engine ) {
-               $fields = [];
+               $fieldData = [];
                $content = $page->getContent();
                if ( $content ) {
+                       $searchDataExtractor = new ParserOutputSearchDataExtractor();
+                       $fieldData['category'] = $searchDataExtractor->getCategories( $output );
+                       $fieldData['external_link'] = $searchDataExtractor->getExternalLinks( $output );
+                       $fieldData['outgoing_link'] = $searchDataExtractor->getOutgoingLinks( $output );
+                       $fieldData['template'] = $searchDataExtractor->getTemplates( $output );
                        $text = $content->getTextForSearchIndex();
-                       $fields['text'] = $text;
-                       $fields['source_text'] = $text;
-                       $fields['text_bytes'] = $content->getSize();
+                       $fieldData['text'] = $text;
+                       $fieldData['source_text'] = $text;
+                       $fieldData['text_bytes'] = $content->getSize();
                }
-               Hooks::run( 'SearchDataForIndex', [ &$fields, $this, $page, $output, $engine ] );
-               return $fields;
+               Hooks::run( 'SearchDataForIndex', [ &$fieldData, $this, $page, $output, $engine ] );
+               return $fieldData;
        }
  
        /**
@@@ -111,13 -111,6 +111,6 @@@ class WikitextContentHandler extends Te
        public function getFieldsForSearchIndex( SearchEngine $engine ) {
                $fields = parent::getFieldsForSearchIndex( $engine );
  
-               $fields['category'] =
-                       $engine->makeSearchFieldMapping( 'category', SearchIndexField::INDEX_TYPE_TEXT );
-               $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
-               $fields['external_link'] =
-                       $engine->makeSearchFieldMapping( 'external_link', SearchIndexField::INDEX_TYPE_KEYWORD );
                $fields['heading'] =
                        $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT );
                $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING );
                $fields['opening_text']->setFlag( SearchIndexField::FLAG_SCORING |
                                                  SearchIndexField::FLAG_NO_HIGHLIGHT );
  
-               $fields['outgoing_link'] =
-                       $engine->makeSearchFieldMapping( 'outgoing_link', SearchIndexField::INDEX_TYPE_KEYWORD );
-               $fields['template'] =
-                       $engine->makeSearchFieldMapping( 'template', SearchIndexField::INDEX_TYPE_KEYWORD );
-               $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
                // FIXME: this really belongs in separate file handler but files
                // do not have separate handler. Sadness.
                $fields['file_text'] =
        protected function getFileText( Title $title ) {
                $file = wfLocalFile( $title );
                if ( $file && $file->exists() ) {
 -                      return $file->getHandler()->getEntireText( $file );
 +                      $handler = $file->getHandler();
 +                      if ( !$handler ) {
 +                              return null;
 +                      }
 +                      return $handler->getEntireText( $file );
                }
  
                return null;
                $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine );
  
                $structure = new WikiTextStructure( $parserOutput );
-               $fields['external_link'] = array_keys( $parserOutput->getExternalLinks() );
-               $fields['category'] = $structure->categories();
                $fields['heading'] = $structure->headings();
-               $fields['outgoing_link'] = $structure->outgoingLinks();
-               $fields['template'] = $structure->templates();
                // text fields
                $fields['opening_text'] = $structure->getOpeningText();
                $fields['text'] = $structure->getMainText(); // overwrites one from ContentHandler
@@@ -376,7 -376,8 +376,7 @@@ class ContentHandlerTest extends MediaW
                $content = new WikitextContent( 'test text' );
                $ok = ContentHandler::runLegacyHooks(
                        'testRunLegacyHooks',
 -                      [ 'foo', &$content, 'bar' ],
 -                      false
 +                      [ 'foo', &$content, 'bar' ]
                );
  
                $this->assertTrue( $ok, "runLegacyHooks should have returned true" );
                $this->assertInstanceOf( $handlerClass, $handler );
        }
  
+       public function testGetFieldsForSearchIndex() {
+               $searchEngine = $this->newSearchEngine();
+               $handler = ContentHandler::getForModelID( CONTENT_MODEL_WIKITEXT );
+               $fields = $handler->getFieldsForSearchIndex( $searchEngine );
+               $this->assertArrayHasKey( 'category', $fields );
+               $this->assertArrayHasKey( 'external_link', $fields );
+               $this->assertArrayHasKey( 'outgoing_link', $fields );
+               $this->assertArrayHasKey( 'template', $fields );
+       }
+       private function newSearchEngine() {
+               $searchEngine = $this->getMockBuilder( 'SearchEngine' )
+                       ->getMock();
+               $searchEngine->expects( $this->any() )
+                       ->method( 'makeSearchFieldMapping' )
+                       ->will( $this->returnCallback( function( $name, $type ) {
+                                       return new DummySearchIndexFieldDefinition( $name, $type );
+                       } ) );
+               return $searchEngine;
+       }
        /**
         * @covers ContentHandler::getDataForSearchIndex
         */
  
                $this->setTemporaryHook( 'SearchDataForIndex',
                        function ( &$fields, ContentHandler $handler, WikiPage $page, ParserOutput $output,
-                                  SearchEngine $engine ) {
+                                          SearchEngine $engine ) {
                                $fields['testDataField'] = 'test content';
                        } );