From 2a395370fc7f2fe5bff4fae8778e6b0385429f99 Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Tue, 13 Sep 2016 17:52:33 -0700 Subject: [PATCH] Create fields & data for image/file data indexing Bug: T145558 Change-Id: I23d4c8235d0e4150eefec31cea4b2cfdd32bf32a --- autoload.php | 1 + includes/content/FileContentHandler.php | 62 +++++++++++++++++++ includes/content/WikitextContentHandler.php | 44 +++++-------- .../content/FileContentHandlerTest.php | 50 +++++++++++++++ .../content/WikitextContentHandlerTest.php | 13 +++- 5 files changed, 138 insertions(+), 32 deletions(-) create mode 100644 includes/content/FileContentHandler.php create mode 100644 tests/phpunit/includes/content/FileContentHandlerTest.php diff --git a/autoload.php b/autoload.php index ec15c996b9..dfb0bf6b50 100644 --- a/autoload.php +++ b/autoload.php @@ -466,6 +466,7 @@ $wgAutoloadLocalClasses = [ 'FileBackendStoreShardListIterator' => __DIR__ . '/includes/libs/filebackend/FileBackendStore.php', 'FileBasedSiteLookup' => __DIR__ . '/includes/site/FileBasedSiteLookup.php', 'FileCacheBase' => __DIR__ . '/includes/cache/FileCacheBase.php', + 'FileContentHandler' => __DIR__ . '/includes/content/FileContentHandler.php', 'FileContentsHasher' => __DIR__ . '/includes/utils/FileContentsHasher.php', 'FileDeleteForm' => __DIR__ . '/includes/FileDeleteForm.php', 'FileDependency' => __DIR__ . '/includes/cache/CacheDependency.php', diff --git a/includes/content/FileContentHandler.php b/includes/content/FileContentHandler.php new file mode 100644 index 0000000000..26f119065d --- /dev/null +++ b/includes/content/FileContentHandler.php @@ -0,0 +1,62 @@ +makeSearchFieldMapping( 'file_media_type', SearchIndexField::INDEX_TYPE_KEYWORD ); + $fields['file_media_type']->setFlag( SearchIndexField::FLAG_CASEFOLD ); + $fields['file_mime'] = + $engine->makeSearchFieldMapping( 'file_mime', SearchIndexField::INDEX_TYPE_SHORT_TEXT ); + $fields['file_mime']->setFlag( SearchIndexField::FLAG_CASEFOLD ); + $fields['file_size'] = + $engine->makeSearchFieldMapping( 'file_size', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_width'] = + $engine->makeSearchFieldMapping( 'file_width', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_height'] = + $engine->makeSearchFieldMapping( 'file_height', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_bits'] = + $engine->makeSearchFieldMapping( 'file_bits', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_resolution'] = + $engine->makeSearchFieldMapping( 'file_resolution', SearchIndexField::INDEX_TYPE_INTEGER ); + $fields['file_text'] = + $engine->makeSearchFieldMapping( 'file_text', SearchIndexField::INDEX_TYPE_TEXT ); + return $fields; + } + + public function getDataForSearchIndex( WikiPage $page, ParserOutput $parserOutput, + SearchEngine $engine ) { + $fields = []; + + $title = $page->getTitle(); + if ( NS_FILE != $title->getNamespace() ) { + return []; + } + $file = wfLocalFile( $title ); + if ( !$file || !$file->exists() ) { + return []; + } + + $handler = $file->getHandler(); + if ( $handler ) { + $fields['file_text'] = $handler->getEntireText( $file ); + } + $fields['file_media_type'] = $file->getMediaType(); + $fields['file_mime'] = $file->getMimeType(); + $fields['file_size'] = $file->getSize(); + $fields['file_width'] = $file->getWidth(); + $fields['file_height'] = $file->getHeight(); + $fields['file_bits'] = $file->getBitDepth(); + $fields['file_resolution'] = + (int)floor( sqrt( $fields['file_width'] * $fields['file_height'] ) ); + + return $fields; + } + +} diff --git a/includes/content/WikitextContentHandler.php b/includes/content/WikitextContentHandler.php index 978ac44003..74b2f1aede 100644 --- a/includes/content/WikitextContentHandler.php +++ b/includes/content/WikitextContentHandler.php @@ -108,6 +108,14 @@ class WikitextContentHandler extends TextContentHandler { return true; } + /** + * Get file handler + * @return FileContentHandler + */ + protected function getFileHandler() { + return new FileContentHandler(); + } + public function getFieldsForSearchIndex( SearchEngine $engine ) { $fields = parent::getFieldsForSearchIndex( $engine ); @@ -122,34 +130,12 @@ class WikitextContentHandler extends TextContentHandler { $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT ); $fields['opening_text']->setFlag( SearchIndexField::FLAG_SCORING | SearchIndexField::FLAG_NO_HIGHLIGHT ); - - // FIXME: this really belongs in separate file handler but files - // do not have separate handler. Sadness. - $fields['file_text'] = - $engine->makeSearchFieldMapping( 'file_text', SearchIndexField::INDEX_TYPE_TEXT ); + // Until we have full first-class content handler for files, we invoke it explicitly here + $fields = array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) ); return $fields; } - /** - * Extract text of the file - * TODO: probably should go to file handler? - * @param Title $title - * @return string|null - */ - protected function getFileText( Title $title ) { - $file = wfLocalFile( $title ); - if ( $file && $file->exists() ) { - $handler = $file->getHandler(); - if ( !$handler ) { - return null; - } - return $handler->getEntireText( $file ); - } - - return null; - } - public function getDataForSearchIndex( WikiPage $page, ParserOutput $parserOutput, SearchEngine $engine ) { $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine ); @@ -162,12 +148,10 @@ class WikitextContentHandler extends TextContentHandler { $fields['auxiliary_text'] = $structure->getAuxiliaryText(); $fields['defaultsort'] = $structure->getDefaultSort(); - $title = $page->getTitle(); - if ( NS_FILE == $title->getNamespace() ) { - $fileText = $this->getFileText( $title ); - if ( $fileText ) { - $fields['file_text'] = $fileText; - } + // Until we have full first-class content handler for files, we invoke it explicitly here + if ( NS_FILE == $page->getTitle()->getNamespace() ) { + $fields = array_merge( $fields, + $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine ) ); } return $fields; } diff --git a/tests/phpunit/includes/content/FileContentHandlerTest.php b/tests/phpunit/includes/content/FileContentHandlerTest.php new file mode 100644 index 0000000000..276a86ee5e --- /dev/null +++ b/tests/phpunit/includes/content/FileContentHandlerTest.php @@ -0,0 +1,50 @@ +handler = new FileContentHandler(); + } + + public function testIndexMapping() { + $mockEngine = $this->getMock( 'SearchEngine' ); + + $mockEngine->expects( $this->atLeastOnce() ) + ->method( 'makeSearchFieldMapping' ) + ->willReturnCallback( function ( $name, $type ) { + $mockField = + $this->getMockBuilder( 'SearchIndexFieldDefinition' ) + ->setMethods( [ 'getMapping' ] ) + ->setConstructorArgs( [ $name, $type ] ) + ->getMock(); + return $mockField; + } ); + + $map = $this->handler->getFieldsForSearchIndex( $mockEngine ); + $expect = [ + 'file_media_type' => 1, + 'file_mime' => 1, + 'file_size' => 1, + 'file_width' => 1, + 'file_height' => 1, + 'file_bits' => 1, + 'file_resolution' => 1, + 'file_text' => 1, + ]; + foreach ( $map as $name => $field ) { + $this->assertInstanceOf( 'SearchIndexField', $field ); + $this->assertEquals( $name, $field->getName() ); + unset( $expect[$name] ); + } + $this->assertEmpty( $expect ); + } +} diff --git a/tests/phpunit/includes/content/WikitextContentHandlerTest.php b/tests/phpunit/includes/content/WikitextContentHandlerTest.php index 9d4abe857d..ec97d76371 100644 --- a/tests/phpunit/includes/content/WikitextContentHandlerTest.php +++ b/tests/phpunit/includes/content/WikitextContentHandlerTest.php @@ -249,11 +249,20 @@ class WikitextContentHandlerTest extends MediaWikiLangTestCase { $title = Title::newFromText( 'Somefile.jpg', NS_FILE ); $page = new WikiPage( $title ); + $fileHandler = $this->getMockBuilder( FileContentHandler::class ) + ->disableOriginalConstructor() + ->setMethods( [ 'getDataForSearchIndex' ] ) + ->getMock(); + $handler = $this->getMockBuilder( WikitextContentHandler::class ) ->disableOriginalConstructor() - ->setMethods( [ 'getFileText' ] ) + ->setMethods( [ 'getFileHandler' ] ) ->getMock(); - $handler->method( 'getFileText' )->will( $this->returnValue( 'This is file content' ) ); + + $handler->method( 'getFileHandler' )->will( $this->returnValue( $fileHandler ) ); + $fileHandler->expects( $this->once() ) + ->method( 'getDataForSearchIndex' ) + ->will( $this->returnValue( [ 'file_text' => 'This is file content' ] ) ); $data = $handler->getDataForSearchIndex( $page, new ParserOutput(), $mockEngine ); $this->assertArrayHasKey( 'file_text', $data ); -- 2.20.1