Merge "Fix mime detection of easily-confused-with text/plain formats"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Fri, 4 Jul 2014 07:43:42 +0000 (07:43 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 4 Jul 2014 07:43:42 +0000 (07:43 +0000)
includes/MimeMagic.php
includes/mime.info
includes/mime.types
includes/upload/UploadBase.php
tests/phpunit/includes/MimeMagicTest.php [new file with mode: 0644]

index c2ac186..59f850c 100644 (file)
@@ -485,14 +485,6 @@ class MimeMagic {
         * by looking at the file extension. Typically, this method would be called on the
         * result of guessMimeType().
         *
-        * Currently, this method does the following:
-        *
-        * If $mime is "unknown/unknown" and isRecognizableExtension( $ext ) returns false,
-        * return the result of guessTypesForExtension($ext).
-        *
-        * If $mime is "application/x-opc+zip" and isMatchingExtension( $ext, $mime )
-        * gives true, return the result of guessTypesForExtension($ext).
-        *
         * @param string $mime The mime type, typically guessed from a file's content.
         * @param string $ext The file extension, as taken from the file name
         *
@@ -518,6 +510,12 @@ class MimeMagic {
                                        ".$ext is not a known OPC extension.\n" );
                                $mime = 'application/zip';
                        }
+               } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
+                       // Textual types are sometimes not recognized properly.
+                       // If detected as text/plain, and has an extension which is textual
+                       // improve to the extension's type. For example, csv and json are often
+                       // misdetected as text/plain.
+                       $mime = $this->guessTypesForExtension( $ext );
                }
 
                if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
index 9295ffb..7a4de3b 100644 (file)
@@ -65,6 +65,9 @@ text/plain    [TEXT]
 text/html application/xhtml+xml        [TEXT]
 application/xml text/xml       [TEXT]
 text   [TEXT]
+application/json       [TEXT]
+text/csv       [TEXT]
+text/tab-separated-values      [TEXT]
 
 application/zip application/x-zip      [ARCHIVE]
 application/x-gzip     [ARCHIVE]
index c764b39..ae6ca4d 100644 (file)
@@ -35,6 +35,7 @@ application/x-gzip gz
 application/x-hdf hdf
 application/x-jar jar
 application/x-javascript js
+application/json json
 application/x-koan skp skd skt skm
 application/x-latex latex
 application/x-netcdf nc cdf
@@ -109,6 +110,7 @@ model/mesh msh mesh silo
 model/vrml wrl vrml
 text/calendar ics ifb
 text/css css
+text/csv csv
 text/html html htm
 text/plain txt
 text/richtext rtx
index d1de993..5defd45 100644 (file)
@@ -435,7 +435,7 @@ abstract class UploadBase {
                }
 
                $this->mFileProps = FSFile::getPropsFromPath( $this->mTempPath, $this->mFinalExtension );
-               $mime = $this->mFileProps['file-mime'];
+               $mime = $this->mFileProps['mime'];
 
                if ( $wgVerifyMimeType ) {
                        # XXX: Missing extension will be caught by validateName() via getTitle()
diff --git a/tests/phpunit/includes/MimeMagicTest.php b/tests/phpunit/includes/MimeMagicTest.php
new file mode 100644 (file)
index 0000000..9582fe7
--- /dev/null
@@ -0,0 +1,39 @@
+<?php
+class MimeMagicTest extends MediaWikiTestCase {
+
+       /** @var MimeMagic */
+       private $mimeMagic;
+
+       function setUp() {
+               $this->mimeMagic = MimeMagic::singleton();
+               parent::setUp();
+       }
+
+       /**
+        * @dataProvider providerImproveTypeFromExtension
+        * @param $ext String File extension (no leading dot)
+        * @param $oldMime String Initially detected mime
+        * @param $expectedMime String Mime type after taking extension into account
+        */
+       function testImproveTypeFromExtension( $ext, $oldMime, $expectedMime ) {
+               $actualMime = $this->mimeMagic->improveTypeFromExtension( $oldMime, $ext );
+               $this->assertEquals( $expectedMime, $actualMime );
+       }
+
+       function providerImproveTypeFromExtension() {
+               return array(
+                       array( 'gif', 'image/gif', 'image/gif' ),
+                       array( 'gif', 'unknown/unknown', 'unknown/unknown' ),
+                       array( 'wrl', 'unknown/unknown', 'model/vrml' ),
+                       array( 'txt', 'text/plain', 'text/plain' ),
+                       array( 'csv', 'text/plain', 'text/csv' ),
+                       array( 'tsv', 'text/plain', 'text/tab-separated-values' ),
+                       array( 'json', 'text/plain', 'application/json' ),
+                       array( 'foo', 'application/x-opc+zip', 'application/zip' ),
+                       array( 'docx', 'application/x-opc+zip', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ),
+                       array( 'djvu', 'image/x-djvu', 'image/vnd.djvu' ),
+                       array( 'wav', 'audio/wav', 'audio/wav' ),
+               );
+       }
+
+}