Create fields & data for image/file data indexing
authorStanislav Malyshev <smalyshev@gmail.com>
Wed, 14 Sep 2016 00:52:33 +0000 (17:52 -0700)
committerStanislav Malyshev <smalyshev@gmail.com>
Tue, 27 Sep 2016 06:42:06 +0000 (23:42 -0700)
Bug: T145558
Change-Id: I23d4c8235d0e4150eefec31cea4b2cfdd32bf32a

autoload.php
includes/content/FileContentHandler.php [new file with mode: 0644]
includes/content/WikitextContentHandler.php
tests/phpunit/includes/content/FileContentHandlerTest.php [new file with mode: 0644]
tests/phpunit/includes/content/WikitextContentHandlerTest.php

index ec15c99..dfb0bf6 100644 (file)
@@ -466,6 +466,7 @@ $wgAutoloadLocalClasses = [
        'FileBackendStoreShardListIterator' => __DIR__ . '/includes/libs/filebackend/FileBackendStore.php',
        'FileBasedSiteLookup' => __DIR__ . '/includes/site/FileBasedSiteLookup.php',
        'FileCacheBase' => __DIR__ . '/includes/cache/FileCacheBase.php',
        'FileBackendStoreShardListIterator' => __DIR__ . '/includes/libs/filebackend/FileBackendStore.php',
        'FileBasedSiteLookup' => __DIR__ . '/includes/site/FileBasedSiteLookup.php',
        'FileCacheBase' => __DIR__ . '/includes/cache/FileCacheBase.php',
+       'FileContentHandler' => __DIR__ . '/includes/content/FileContentHandler.php',
        'FileContentsHasher' => __DIR__ . '/includes/utils/FileContentsHasher.php',
        'FileDeleteForm' => __DIR__ . '/includes/FileDeleteForm.php',
        'FileDependency' => __DIR__ . '/includes/cache/CacheDependency.php',
        'FileContentsHasher' => __DIR__ . '/includes/utils/FileContentsHasher.php',
        'FileDeleteForm' => __DIR__ . '/includes/FileDeleteForm.php',
        'FileDependency' => __DIR__ . '/includes/cache/CacheDependency.php',
diff --git a/includes/content/FileContentHandler.php b/includes/content/FileContentHandler.php
new file mode 100644 (file)
index 0000000..26f1190
--- /dev/null
@@ -0,0 +1,62 @@
+<?php
+
+/**
+ * Content handler for File: files
+ * TODO: this handler s not used directly now,
+ * but instead manually called by WikitextHandler.
+ * This should be fixed in the future.
+ */
+class FileContentHandler extends WikitextContentHandler  {
+
+       public function getFieldsForSearchIndex( SearchEngine $engine ) {
+               $fields['file_media_type'] =
+                       $engine->makeSearchFieldMapping( 'file_media_type', SearchIndexField::INDEX_TYPE_KEYWORD );
+               $fields['file_media_type']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+               $fields['file_mime'] =
+                       $engine->makeSearchFieldMapping( 'file_mime', SearchIndexField::INDEX_TYPE_SHORT_TEXT );
+               $fields['file_mime']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+               $fields['file_size'] =
+                       $engine->makeSearchFieldMapping( 'file_size', SearchIndexField::INDEX_TYPE_INTEGER );
+               $fields['file_width'] =
+                       $engine->makeSearchFieldMapping( 'file_width', SearchIndexField::INDEX_TYPE_INTEGER );
+               $fields['file_height'] =
+                       $engine->makeSearchFieldMapping( 'file_height', SearchIndexField::INDEX_TYPE_INTEGER );
+               $fields['file_bits'] =
+                       $engine->makeSearchFieldMapping( 'file_bits', SearchIndexField::INDEX_TYPE_INTEGER );
+               $fields['file_resolution'] =
+                       $engine->makeSearchFieldMapping( 'file_resolution', SearchIndexField::INDEX_TYPE_INTEGER );
+               $fields['file_text'] =
+                       $engine->makeSearchFieldMapping( 'file_text', SearchIndexField::INDEX_TYPE_TEXT );
+               return $fields;
+       }
+
+       public function getDataForSearchIndex( WikiPage $page, ParserOutput $parserOutput,
+                                              SearchEngine $engine ) {
+               $fields = [];
+
+               $title = $page->getTitle();
+               if ( NS_FILE != $title->getNamespace() ) {
+                       return [];
+               }
+               $file = wfLocalFile( $title );
+               if ( !$file || !$file->exists() ) {
+                       return [];
+               }
+
+               $handler = $file->getHandler();
+               if ( $handler ) {
+                       $fields['file_text'] = $handler->getEntireText( $file );
+               }
+               $fields['file_media_type'] = $file->getMediaType();
+               $fields['file_mime'] = $file->getMimeType();
+               $fields['file_size'] = $file->getSize();
+               $fields['file_width'] = $file->getWidth();
+               $fields['file_height'] = $file->getHeight();
+               $fields['file_bits'] = $file->getBitDepth();
+               $fields['file_resolution'] =
+                       (int)floor( sqrt( $fields['file_width'] * $fields['file_height'] ) );
+
+               return $fields;
+       }
+
+}
index 978ac44..74b2f1a 100644 (file)
@@ -108,6 +108,14 @@ class WikitextContentHandler extends TextContentHandler {
                return true;
        }
 
                return true;
        }
 
+       /**
+        * Get file handler
+        * @return FileContentHandler
+        */
+       protected function getFileHandler() {
+               return new FileContentHandler();
+       }
+
        public function getFieldsForSearchIndex( SearchEngine $engine ) {
                $fields = parent::getFieldsForSearchIndex( $engine );
 
        public function getFieldsForSearchIndex( SearchEngine $engine ) {
                $fields = parent::getFieldsForSearchIndex( $engine );
 
@@ -122,34 +130,12 @@ class WikitextContentHandler extends TextContentHandler {
                        $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT );
                $fields['opening_text']->setFlag( SearchIndexField::FLAG_SCORING |
                                                  SearchIndexField::FLAG_NO_HIGHLIGHT );
                        $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT );
                $fields['opening_text']->setFlag( SearchIndexField::FLAG_SCORING |
                                                  SearchIndexField::FLAG_NO_HIGHLIGHT );
-
-               // FIXME: this really belongs in separate file handler but files
-               // do not have separate handler. Sadness.
-               $fields['file_text'] =
-                       $engine->makeSearchFieldMapping( 'file_text', SearchIndexField::INDEX_TYPE_TEXT );
+               // Until we have full first-class content handler for files, we invoke it explicitly here
+               $fields = array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) );
 
                return $fields;
        }
 
 
                return $fields;
        }
 
-       /**
-        * Extract text of the file
-        * TODO: probably should go to file handler?
-        * @param Title $title
-        * @return string|null
-        */
-       protected function getFileText( Title $title ) {
-               $file = wfLocalFile( $title );
-               if ( $file && $file->exists() ) {
-                       $handler = $file->getHandler();
-                       if ( !$handler ) {
-                               return null;
-                       }
-                       return $handler->getEntireText( $file );
-               }
-
-               return null;
-       }
-
        public function getDataForSearchIndex( WikiPage $page, ParserOutput $parserOutput,
                                               SearchEngine $engine ) {
                $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine );
        public function getDataForSearchIndex( WikiPage $page, ParserOutput $parserOutput,
                                               SearchEngine $engine ) {
                $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine );
@@ -162,12 +148,10 @@ class WikitextContentHandler extends TextContentHandler {
                $fields['auxiliary_text'] = $structure->getAuxiliaryText();
                $fields['defaultsort'] = $structure->getDefaultSort();
 
                $fields['auxiliary_text'] = $structure->getAuxiliaryText();
                $fields['defaultsort'] = $structure->getDefaultSort();
 
-               $title = $page->getTitle();
-               if ( NS_FILE == $title->getNamespace() ) {
-                       $fileText = $this->getFileText( $title );
-                       if ( $fileText ) {
-                               $fields['file_text'] = $fileText;
-                       }
+               // Until we have full first-class content handler for files, we invoke it explicitly here
+               if ( NS_FILE == $page->getTitle()->getNamespace() ) {
+                       $fields = array_merge( $fields,
+                                       $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine ) );
                }
                return $fields;
        }
                }
                return $fields;
        }
diff --git a/tests/phpunit/includes/content/FileContentHandlerTest.php b/tests/phpunit/includes/content/FileContentHandlerTest.php
new file mode 100644 (file)
index 0000000..276a86e
--- /dev/null
@@ -0,0 +1,50 @@
+<?php
+
+/**
+ * @group ContentHandler
+ */
+class FileContentHandlerTest extends MediaWikiLangTestCase {
+       /**
+        * @var FileContentHandler
+        */
+       private $handler;
+
+       protected function setUp() {
+               parent::setUp();
+
+               $this->handler = new FileContentHandler();
+       }
+
+       public function testIndexMapping() {
+               $mockEngine = $this->getMock( 'SearchEngine' );
+
+               $mockEngine->expects( $this->atLeastOnce() )
+                       ->method( 'makeSearchFieldMapping' )
+                       ->willReturnCallback( function ( $name, $type ) {
+                               $mockField =
+                                       $this->getMockBuilder( 'SearchIndexFieldDefinition' )
+                                               ->setMethods( [ 'getMapping' ] )
+                                               ->setConstructorArgs( [ $name, $type ] )
+                                               ->getMock();
+                               return $mockField;
+                       } );
+
+               $map = $this->handler->getFieldsForSearchIndex( $mockEngine );
+               $expect = [
+                       'file_media_type' => 1,
+                       'file_mime' => 1,
+                       'file_size' => 1,
+                       'file_width' => 1,
+                       'file_height' => 1,
+                       'file_bits' => 1,
+                       'file_resolution' => 1,
+                       'file_text' => 1,
+               ];
+               foreach ( $map as $name => $field ) {
+                       $this->assertInstanceOf( 'SearchIndexField', $field );
+                       $this->assertEquals( $name, $field->getName() );
+                       unset( $expect[$name] );
+               }
+               $this->assertEmpty( $expect );
+       }
+}
index 9d4abe8..ec97d76 100644 (file)
@@ -249,11 +249,20 @@ class WikitextContentHandlerTest extends MediaWikiLangTestCase {
                $title = Title::newFromText( 'Somefile.jpg', NS_FILE );
                $page = new WikiPage( $title );
 
                $title = Title::newFromText( 'Somefile.jpg', NS_FILE );
                $page = new WikiPage( $title );
 
+               $fileHandler = $this->getMockBuilder( FileContentHandler::class )
+                       ->disableOriginalConstructor()
+                       ->setMethods( [ 'getDataForSearchIndex' ] )
+                       ->getMock();
+
                $handler = $this->getMockBuilder( WikitextContentHandler::class )
                        ->disableOriginalConstructor()
                $handler = $this->getMockBuilder( WikitextContentHandler::class )
                        ->disableOriginalConstructor()
-                       ->setMethods( [ 'getFileText' ] )
+                       ->setMethods( [ 'getFileHandler' ] )
                        ->getMock();
                        ->getMock();
-               $handler->method( 'getFileText' )->will( $this->returnValue( 'This is file content' ) );
+
+               $handler->method( 'getFileHandler' )->will( $this->returnValue( $fileHandler ) );
+               $fileHandler->expects( $this->once() )
+                       ->method( 'getDataForSearchIndex' )
+                       ->will( $this->returnValue( [ 'file_text' => 'This is file content' ] ) );
 
                $data = $handler->getDataForSearchIndex( $page, new ParserOutput(), $mockEngine );
                $this->assertArrayHasKey( 'file_text', $data );
 
                $data = $handler->getDataForSearchIndex( $page, new ParserOutput(), $mockEngine );
                $this->assertArrayHasKey( 'file_text', $data );