Move MimeMagic code to libs/mime/MimeAnalyzer.php
authorAaron Schulz <aschulz@wikimedia.org>
Thu, 22 Sep 2016 04:57:13 +0000 (21:57 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Sat, 15 Oct 2016 00:05:16 +0000 (00:05 +0000)
* The later resides in /libs with related files.
* Explose MimeAnalyzer as a service.
* Keep MimeMagic::singleton() as a b/c alias.
* MimeMagic::applyDefaultConfig() will bootstrap the service
  with all of the old config, extension hook handler, and
  detector command shell-out behavior.

Change-Id: Ie2695a52e7a3bcfda9f7fa83659a9ff31b372bc3

19 files changed:
autoload.php
includes/Defines.php
includes/MediaWikiServices.php
includes/MimeMagic.php
includes/ServiceWiring.php
includes/libs/IEContentAnalyzer.php [deleted file]
includes/libs/XmlTypeCheck.php [deleted file]
includes/libs/mime/IEContentAnalyzer.php [new file with mode: 0644]
includes/libs/mime/MimeAnalyzer.php [new file with mode: 0644]
includes/libs/mime/XmlTypeCheck.php [new file with mode: 0644]
includes/libs/mime/defines.php [new file with mode: 0644]
includes/libs/mime/mime.info [new file with mode: 0644]
includes/libs/mime/mime.types [new file with mode: 0644]
includes/mime.info [deleted file]
includes/mime.types [deleted file]
maintenance/dev/includes/router.php
tests/phpunit/includes/MediaWikiServicesTest.php
tests/phpunit/includes/MimeMagicTest.php [deleted file]
tests/phpunit/includes/libs/mime/MimeAnalyzerTest.php [new file with mode: 0644]

index 748d954..49a9bd4 100644 (file)
@@ -586,7 +586,7 @@ $wgAutoloadLocalClasses = [
        'IContextSource' => __DIR__ . '/includes/context/IContextSource.php',
        'IDBAccessObject' => __DIR__ . '/includes/dao/IDBAccessObject.php',
        'IDatabase' => __DIR__ . '/includes/libs/rdbms/database/IDatabase.php',
-       'IEContentAnalyzer' => __DIR__ . '/includes/libs/IEContentAnalyzer.php',
+       'IEContentAnalyzer' => __DIR__ . '/includes/libs/mime/IEContentAnalyzer.php',
        'IEUrlExtension' => __DIR__ . '/includes/libs/IEUrlExtension.php',
        'IExpiringStore' => __DIR__ . '/includes/libs/objectcache/IExpiringStore.php',
        'IJobSpecification' => __DIR__ . '/includes/jobqueue/JobSpecification.php',
@@ -943,6 +943,7 @@ $wgAutoloadLocalClasses = [
        'MessageSpecifier' => __DIR__ . '/includes/libs/MessageSpecifier.php',
        'MigrateFileRepoLayout' => __DIR__ . '/maintenance/migrateFileRepoLayout.php',
        'MigrateUserGroup' => __DIR__ . '/maintenance/migrateUserGroup.php',
+       'MimeAnalyzer' => __DIR__ . '/includes/libs/mime/MimeAnalyzer.php',
        'MimeMagic' => __DIR__ . '/includes/MimeMagic.php',
        'MinifyScript' => __DIR__ . '/maintenance/minify.php',
        'MostcategoriesPage' => __DIR__ . '/includes/specials/SpecialMostcategories.php',
@@ -1580,7 +1581,7 @@ $wgAutoloadLocalClasses = [
        'XmlDumpWriter' => __DIR__ . '/includes/export/XmlDumpWriter.php',
        'XmlJsCode' => __DIR__ . '/includes/Xml.php',
        'XmlSelect' => __DIR__ . '/includes/XmlSelect.php',
-       'XmlTypeCheck' => __DIR__ . '/includes/libs/XmlTypeCheck.php',
+       'XmlTypeCheck' => __DIR__ . '/includes/libs/mime/XmlTypeCheck.php',
        'ZhConverter' => __DIR__ . '/languages/classes/LanguageZh.php',
        'ZipDirectoryReader' => __DIR__ . '/includes/utils/ZipDirectoryReader.php',
        'ZipDirectoryReaderError' => __DIR__ . '/includes/utils/ZipDirectoryReader.php',
index 02930ea..0616898 100644 (file)
@@ -97,32 +97,7 @@ define( 'CACHE_MEMCACHED', 2 );  // MemCached, must specify servers in $wgMemCac
 define( 'CACHE_ACCEL', 3 );      // APC, XCache or WinCache
 /**@}*/
 
-/**@{
- * Media types.
- * This defines constants for the value returned by File::getMediaType()
- */
-// unknown format
-define( 'MEDIATYPE_UNKNOWN', 'UNKNOWN' );
-// some bitmap image or image source (like psd, etc). Can't scale up.
-define( 'MEDIATYPE_BITMAP', 'BITMAP' );
-// some vector drawing (SVG, WMF, PS, ...) or image source (oo-draw, etc). Can scale up.
-define( 'MEDIATYPE_DRAWING', 'DRAWING' );
-// simple audio file (ogg, mp3, wav, midi, whatever)
-define( 'MEDIATYPE_AUDIO', 'AUDIO' );
-// simple video file (ogg, mpg, etc;
-// no not include formats here that may contain executable sections or scripts!)
-define( 'MEDIATYPE_VIDEO', 'VIDEO' );
-// Scriptable Multimedia (flash, advanced video container formats, etc)
-define( 'MEDIATYPE_MULTIMEDIA', 'MULTIMEDIA' );
-// Office Documents, Spreadsheets (office formats possibly containing apples, scripts, etc)
-define( 'MEDIATYPE_OFFICE', 'OFFICE' );
-// Plain text (possibly containing program code or scripts)
-define( 'MEDIATYPE_TEXT', 'TEXT' );
-// binary executable
-define( 'MEDIATYPE_EXECUTABLE', 'EXECUTABLE' );
-// archive file (zip, tar, etc)
-define( 'MEDIATYPE_ARCHIVE', 'ARCHIVE' );
-/**@}*/
+require_once __DIR__ . '/libs/mime/defines.php';
 
 /**@{
  * Antivirus result codes, for use in $wgAntivirusSetup.
index f91bbae..7f94ced 100644 (file)
@@ -19,6 +19,7 @@ use MediaWiki\Services\SalvageableService;
 use MediaWiki\Services\ServiceContainer;
 use MediaWiki\Services\NoSuchServiceException;
 use MWException;
+use MimeAnalyzer;
 use ObjectCache;
 use ProxyLookup;
 use SearchEngine;
@@ -539,6 +540,14 @@ class MediaWikiServices extends ServiceContainer {
                return $this->getService( 'MediaHandlerFactory' );
        }
 
+       /**
+        * @since 1.28
+        * @return MimeAnalyzer
+        */
+       public function getMimeAnalyzer() {
+               return $this->getService( 'MimeAnalyzer' );
+       }
+
        /**
         * @since 1.28
         * @return ProxyLookup
index 54d58d2..c03bce7 100644 (file)
@@ -1,7 +1,5 @@
 <?php
 /**
- * Module defining helper functions for detecting and dealing with MIME types.
- *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
  *
  * @file
  */
+use MediaWiki\MediaWikiServices;
+use MediaWiki\Logger\LoggerFactory;
 
-/**
- * Defines a set of well known MIME types
- * This is used as a fallback to mime.types files.
- * An extensive list of well known MIME types is provided by
- * the file mime.types in the includes directory.
- *
- * This list concatenated with mime.types is used to create a MIME <-> ext
- * map. Each line contains a MIME type followed by a space separated list of
- * extensions. If multiple extensions for a single MIME type exist or if
- * multiple MIME types exist for a single extension then in most cases
- * MediaWiki assumes that the first extension following the MIME type is the
- * canonical extension, and the first time a MIME type appears for a certain
- * extension is considered the canonical MIME type.
- *
- * (Note that appending $wgMimeTypeFile to the end of MM_WELL_KNOWN_MIME_TYPES
- * sucks because you can't redefine canonical types. This could be fixed by
- * appending MM_WELL_KNOWN_MIME_TYPES behind $wgMimeTypeFile, but who knows
- * what will break? In practice this probably isn't a problem anyway -- Bryan)
- */
-define( 'MM_WELL_KNOWN_MIME_TYPES', <<<END_STRING
-application/ogg ogx ogg ogm ogv oga spx
-application/pdf pdf
-application/vnd.oasis.opendocument.chart odc
-application/vnd.oasis.opendocument.chart-template otc
-application/vnd.oasis.opendocument.database odb
-application/vnd.oasis.opendocument.formula odf
-application/vnd.oasis.opendocument.formula-template otf
-application/vnd.oasis.opendocument.graphics odg
-application/vnd.oasis.opendocument.graphics-template otg
-application/vnd.oasis.opendocument.image odi
-application/vnd.oasis.opendocument.image-template oti
-application/vnd.oasis.opendocument.presentation odp
-application/vnd.oasis.opendocument.presentation-template otp
-application/vnd.oasis.opendocument.spreadsheet ods
-application/vnd.oasis.opendocument.spreadsheet-template ots
-application/vnd.oasis.opendocument.text odt
-application/vnd.oasis.opendocument.text-master otm
-application/vnd.oasis.opendocument.text-template ott
-application/vnd.oasis.opendocument.text-web oth
-application/javascript js
-application/x-shockwave-flash swf
-audio/midi mid midi kar
-audio/mpeg mpga mpa mp2 mp3
-audio/x-aiff aif aiff aifc
-audio/x-wav wav
-audio/ogg oga spx ogg
-image/x-bmp bmp
-image/gif gif
-image/jpeg jpeg jpg jpe
-image/png png
-image/svg+xml svg
-image/svg svg
-image/tiff tiff tif
-image/vnd.djvu djvu
-image/x.djvu djvu
-image/x-djvu djvu
-image/x-portable-pixmap ppm
-image/x-xcf xcf
-text/plain txt
-text/html html htm
-video/ogg ogv ogm ogg
-video/mpeg mpg mpeg
-END_STRING
-);
-
-/**
- * Defines a set of well known MIME info entries
- * This is used as a fallback to mime.info files.
- * An extensive list of well known MIME types is provided by
- * the file mime.info in the includes directory.
- */
-define( 'MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
-application/pdf [OFFICE]
-application/vnd.oasis.opendocument.chart [OFFICE]
-application/vnd.oasis.opendocument.chart-template [OFFICE]
-application/vnd.oasis.opendocument.database [OFFICE]
-application/vnd.oasis.opendocument.formula [OFFICE]
-application/vnd.oasis.opendocument.formula-template [OFFICE]
-application/vnd.oasis.opendocument.graphics [OFFICE]
-application/vnd.oasis.opendocument.graphics-template [OFFICE]
-application/vnd.oasis.opendocument.image [OFFICE]
-application/vnd.oasis.opendocument.image-template [OFFICE]
-application/vnd.oasis.opendocument.presentation [OFFICE]
-application/vnd.oasis.opendocument.presentation-template [OFFICE]
-application/vnd.oasis.opendocument.spreadsheet [OFFICE]
-application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
-application/vnd.oasis.opendocument.text [OFFICE]
-application/vnd.oasis.opendocument.text-template [OFFICE]
-application/vnd.oasis.opendocument.text-master [OFFICE]
-application/vnd.oasis.opendocument.text-web [OFFICE]
-application/javascript text/javascript application/x-javascript [EXECUTABLE]
-application/x-shockwave-flash [MULTIMEDIA]
-audio/midi [AUDIO]
-audio/x-aiff [AUDIO]
-audio/x-wav [AUDIO]
-audio/mp3 audio/mpeg [AUDIO]
-application/ogg audio/ogg video/ogg [MULTIMEDIA]
-image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
-image/gif [BITMAP]
-image/jpeg [BITMAP]
-image/png [BITMAP]
-image/svg+xml [DRAWING]
-image/tiff [BITMAP]
-image/vnd.djvu [BITMAP]
-image/x-xcf [BITMAP]
-image/x-portable-pixmap [BITMAP]
-text/plain [TEXT]
-text/html [TEXT]
-video/ogg [VIDEO]
-video/mpeg [VIDEO]
-unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
-END_STRING
-);
-
-/**
- * Implements functions related to MIME types such as detection and mapping to
- * file extension.
- *
- * Instances of this class are stateless, there only needs to be one global instance
- * of MimeMagic. Please use MimeMagic::singleton() to get that instance.
- */
-class MimeMagic {
-       /**
-        * @var array Mapping of media types to arrays of MIME types.
-        * This is used by findMediaType and getMediaType, respectively
-        */
-       protected $mMediaTypes = null;
-
-       /** @var array Map of MIME type aliases
-        */
-       protected $mMimeTypeAliases = null;
-
-       /** @var array Map of MIME types to file extensions (as a space separated list)
-        */
-       protected $mMimeToExt = null;
-
-       /** @var array Map of file extensions types to MIME types (as a space separated list)
-        */
-       public $mExtToMime = null;
-
-       /** @var IEContentAnalyzer
-        */
-       protected $mIEAnalyzer;
-
-       /** @var string Extra MIME types, set for example by media handling extensions
-        */
-       private $mExtraTypes = '';
-
-       /** @var string Extra MIME info, set for example by media handling extensions
-        */
-       private $mExtraInfo = '';
-
-       /** @var Config */
-       private $mConfig;
-
-       /** @var MimeMagic The singleton instance
-        */
-       private static $instance = null;
-
-       /** Initializes the MimeMagic object. This is called by MimeMagic::singleton().
-        *
-        * This constructor parses the mime.types and mime.info files and build internal mappings.
-        *
-        * @todo Make this constructor private once everything uses the singleton instance
-        * @param Config $config
-        */
-       function __construct( Config $config = null ) {
-               if ( !$config ) {
-                       wfDebug( __METHOD__ . ' called with no Config instance passed to it' );
-                       $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
-               }
-               $this->mConfig = $config;
-
-               /**
-                *   --- load mime.types ---
-                */
-
-               global $IP;
-
-               # Allow media handling extensions adding MIME-types and MIME-info
-               Hooks::run( 'MimeMagicInit', [ $this ] );
-
-               $types = MM_WELL_KNOWN_MIME_TYPES;
-
-               $mimeTypeFile = $this->mConfig->get( 'MimeTypeFile' );
-               if ( $mimeTypeFile == 'includes/mime.types' ) {
-                       $mimeTypeFile = "$IP/$mimeTypeFile";
-               }
-
-               if ( $mimeTypeFile ) {
-                       if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
-                               wfDebug( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
-                               $types .= "\n";
-                               $types .= file_get_contents( $mimeTypeFile );
-                       } else {
-                               wfDebug( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
-                       }
-               } else {
-                       wfDebug( __METHOD__ . ": no mime types file defined, using built-ins only.\n" );
-               }
-
-               $types .= "\n" . $this->mExtraTypes;
-
-               $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
-               $types = str_replace( "\t", " ", $types );
-
-               $this->mMimeToExt = [];
-               $this->mExtToMime = [];
-
-               $lines = explode( "\n", $types );
-               foreach ( $lines as $s ) {
-                       $s = trim( $s );
-                       if ( empty( $s ) ) {
-                               continue;
-                       }
-                       if ( strpos( $s, '#' ) === 0 ) {
-                               continue;
-                       }
-
-                       $s = strtolower( $s );
-                       $i = strpos( $s, ' ' );
-
-                       if ( $i === false ) {
-                               continue;
-                       }
-
-                       $mime = substr( $s, 0, $i );
-                       $ext = trim( substr( $s, $i + 1 ) );
-
-                       if ( empty( $ext ) ) {
-                               continue;
-                       }
-
-                       if ( !empty( $this->mMimeToExt[$mime] ) ) {
-                               $this->mMimeToExt[$mime] .= ' ' . $ext;
-                       } else {
-                               $this->mMimeToExt[$mime] = $ext;
-                       }
-
-                       $extensions = explode( ' ', $ext );
-
-                       foreach ( $extensions as $e ) {
-                               $e = trim( $e );
-                               if ( empty( $e ) ) {
-                                       continue;
-                               }
-
-                               if ( !empty( $this->mExtToMime[$e] ) ) {
-                                       $this->mExtToMime[$e] .= ' ' . $mime;
-                               } else {
-                                       $this->mExtToMime[$e] = $mime;
-                               }
-                       }
-               }
-
-               /**
-                *   --- load mime.info ---
-                */
-
-               $mimeInfoFile = $this->mConfig->get( 'MimeInfoFile' );
-               if ( $mimeInfoFile == 'includes/mime.info' ) {
-                       $mimeInfoFile = "$IP/$mimeInfoFile";
-               }
-
-               $info = MM_WELL_KNOWN_MIME_INFO;
-
-               if ( $mimeInfoFile ) {
-                       if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
-                               wfDebug( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
-                               $info .= "\n";
-                               $info .= file_get_contents( $mimeInfoFile );
-                       } else {
-                               wfDebug( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
-                       }
-               } else {
-                       wfDebug( __METHOD__ . ": no mime info file defined, using built-ins only.\n" );
-               }
-
-               $info .= "\n" . $this->mExtraInfo;
-
-               $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
-               $info = str_replace( "\t", " ", $info );
-
-               $this->mMimeTypeAliases = [];
-               $this->mMediaTypes = [];
-
-               $lines = explode( "\n", $info );
-               foreach ( $lines as $s ) {
-                       $s = trim( $s );
-                       if ( empty( $s ) ) {
-                               continue;
-                       }
-                       if ( strpos( $s, '#' ) === 0 ) {
-                               continue;
-                       }
-
-                       $s = strtolower( $s );
-                       $i = strpos( $s, ' ' );
-
-                       if ( $i === false ) {
-                               continue;
-                       }
-
-                       # print "processing MIME INFO line $s<br>";
-
-                       $match = [];
-                       if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
-                               $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
-                               $mtype = trim( strtoupper( $match[1] ) );
-                       } else {
-                               $mtype = MEDIATYPE_UNKNOWN;
-                       }
-
-                       $m = explode( ' ', $s );
-
-                       if ( !isset( $this->mMediaTypes[$mtype] ) ) {
-                               $this->mMediaTypes[$mtype] = [];
-                       }
-
-                       foreach ( $m as $mime ) {
-                               $mime = trim( $mime );
-                               if ( empty( $mime ) ) {
-                                       continue;
-                               }
-
-                               $this->mMediaTypes[$mtype][] = $mime;
-                       }
-
-                       if ( count( $m ) > 1 ) {
-                               $main = $m[0];
-                               $mCount = count( $m );
-                               for ( $i = 1; $i < $mCount; $i += 1 ) {
-                                       $mime = $m[$i];
-                                       $this->mMimeTypeAliases[$mime] = $main;
-                               }
-                       }
-               }
-       }
-
+class MimeMagic extends MimeAnalyzer {
        /**
         * Get an instance of this class
         * @return MimeMagic
+        * @deprecated since 1.28
         */
        public static function singleton() {
-               if ( self::$instance === null ) {
-                       self::$instance = new MimeMagic(
-                               ConfigFactory::getDefaultInstance()->makeConfig( 'main' )
-                       );
-               }
-               return self::$instance;
-       }
-
-       /**
-        * Adds to the list mapping MIME to file extensions.
-        * As an extension author, you are encouraged to submit patches to
-        * MediaWiki's core to add new MIME types to mime.types.
-        * @param string $types
-        */
-       public function addExtraTypes( $types ) {
-               $this->mExtraTypes .= "\n" . $types;
-       }
-
-       /**
-        * Adds to the list mapping MIME to media type.
-        * As an extension author, you are encouraged to submit patches to
-        * MediaWiki's core to add new MIME info to mime.info.
-        * @param string $info
-        */
-       public function addExtraInfo( $info ) {
-               $this->mExtraInfo .= "\n" . $info;
-       }
-
-       /**
-        * Returns a list of file extensions for a given MIME type as a space
-        * separated string or null if the MIME type was unrecognized. Resolves
-        * MIME type aliases.
-        *
-        * @param string $mime
-        * @return string|null
-        */
-       public function getExtensionsForType( $mime ) {
-               $mime = strtolower( $mime );
-
-               // Check the mime-to-ext map
-               if ( isset( $this->mMimeToExt[$mime] ) ) {
-                       return $this->mMimeToExt[$mime];
-               }
-
-               // Resolve the MIME type to the canonical type
-               if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
-                       $mime = $this->mMimeTypeAliases[$mime];
-                       if ( isset( $this->mMimeToExt[$mime] ) ) {
-                               return $this->mMimeToExt[$mime];
-                       }
-               }
-
-               return null;
+               return MediaWikiServices::getInstance()->getMIMEAnalyzer();
        }
 
        /**
-        * Returns a list of MIME types for a given file extension as a space
-        * separated string or null if the extension was unrecognized.
-        *
-        * @param string $ext
-        * @return string|null
-        */
-       public function getTypesForExtension( $ext ) {
-               $ext = strtolower( $ext );
-
-               $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
-               return $r;
-       }
-
-       /**
-        * Returns a single MIME type for a given file extension or null if unknown.
-        * This is always the first type from the list returned by getTypesForExtension($ext).
-        *
-        * @param string $ext
-        * @return string|null
-        */
-       public function guessTypesForExtension( $ext ) {
-               $m = $this->getTypesForExtension( $ext );
-               if ( is_null( $m ) ) {
-                       return null;
-               }
-
-               // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
-               $m = trim( $m );
-               $m = preg_replace( '/\s.*$/', '', $m );
-
-               return $m;
-       }
-
-       /**
-        * Tests if the extension matches the given MIME type. Returns true if a
-        * match was found, null if the MIME type is unknown, and false if the
-        * MIME type is known but no matches where found.
-        *
-        * @param string $extension
-        * @param string $mime
-        * @return bool|null
-        */
-       public function isMatchingExtension( $extension, $mime ) {
-               $ext = $this->getExtensionsForType( $mime );
-
-               if ( !$ext ) {
-                       return null; // Unknown MIME type
-               }
-
-               $ext = explode( ' ', $ext );
-
-               $extension = strtolower( $extension );
-               return in_array( $extension, $ext );
-       }
-
-       /**
-        * Returns true if the MIME type is known to represent an image format
-        * supported by the PHP GD library.
-        *
-        * @param string $mime
-        *
-        * @return bool
-        */
-       public function isPHPImageType( $mime ) {
-               // As defined by imagegetsize and image_type_to_mime
-               static $types = [
-                       'image/gif', 'image/jpeg', 'image/png',
-                       'image/x-bmp', 'image/xbm', 'image/tiff',
-                       'image/jp2', 'image/jpeg2000', 'image/iff',
-                       'image/xbm', 'image/x-xbitmap',
-                       'image/vnd.wap.wbmp', 'image/vnd.xiff',
-                       'image/x-photoshop',
-                       'application/x-shockwave-flash',
-               ];
-
-               return in_array( $mime, $types );
-       }
-
-       /**
-        * Returns true if the extension represents a type which can
-        * be reliably detected from its content. Use this to determine
-        * whether strict content checks should be applied to reject
-        * invalid uploads; if we can't identify the type we won't
-        * be able to say if it's invalid.
-        *
-        * @todo Be more accurate when using fancy MIME detector plugins;
-        *       right now this is the bare minimum getimagesize() list.
-        * @param string $extension
-        * @return bool
-        */
-       function isRecognizableExtension( $extension ) {
-               static $types = [
-                       // Types recognized by getimagesize()
-                       'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
-                       'bmp', 'tiff', 'tif', 'jpc', 'jp2',
-                       'jpx', 'jb2', 'swc', 'iff', 'wbmp',
-                       'xbm',
-
-                       // Formats we recognize magic numbers for
-                       'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
-                       'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
-                       'webp',
-
-                       // XML formats we sure hope we recognize reliably
-                       'svg',
-               ];
-               return in_array( strtolower( $extension ), $types );
-       }
-
-       /**
-        * Improves a MIME type using the file extension. Some file formats are very generic,
-        * so their MIME type is not very meaningful. A more useful MIME type can be derived
-        * by looking at the file extension. Typically, this method would be called on the
-        * result of guessMimeType().
-        *
-        * @param string $mime The MIME type, typically guessed from a file's content.
-        * @param string $ext The file extension, as taken from the file name
-        *
-        * @return string The MIME type
-        */
-       public function improveTypeFromExtension( $mime, $ext ) {
-               if ( $mime === 'unknown/unknown' ) {
-                       if ( $this->isRecognizableExtension( $ext ) ) {
-                               wfDebug( __METHOD__ . ': refusing to guess mime type for .' .
-                                       "$ext file, we should have recognized it\n" );
-                       } else {
-                               // Not something we can detect, so simply
-                               // trust the file extension
-                               $mime = $this->guessTypesForExtension( $ext );
-                       }
-               } elseif ( $mime === 'application/x-opc+zip' ) {
-                       if ( $this->isMatchingExtension( $ext, $mime ) ) {
-                               // A known file extension for an OPC file,
-                               // find the proper MIME type for that file extension
-                               $mime = $this->guessTypesForExtension( $ext );
-                       } else {
-                               wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " .
-                                       ".$ext is not a known OPC extension.\n" );
-                               $mime = 'application/zip';
-                       }
-               } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
-                       // Textual types are sometimes not recognized properly.
-                       // If detected as text/plain, and has an extension which is textual
-                       // improve to the extension's type. For example, csv and json are often
-                       // misdetected as text/plain.
-                       $mime = $this->guessTypesForExtension( $ext );
-               }
-
-               # Media handling extensions can improve the MIME detected
-               Hooks::run( 'MimeMagicImproveFromExtension', [ $this, $ext, &$mime ] );
-
-               if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
-                       $mime = $this->mMimeTypeAliases[$mime];
-               }
-
-               wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
-               return $mime;
-       }
-
-       /**
-        * MIME type detection. This uses detectMimeType to detect the MIME type
-        * of the file, but applies additional checks to determine some well known
-        * file formats that may be missed or misinterpreted by the default MIME
-        * detection (namely XML based formats like XHTML or SVG, as well as ZIP
-        * based formats like OPC/ODF files).
-        *
-        * @param string $file The file to check
-        * @param string|bool $ext The file extension, or true (default) to extract it from the filename.
-        *   Set it to false to ignore the extension. DEPRECATED! Set to false, use
-        *   improveTypeFromExtension($mime, $ext) later to improve MIME type.
-        *
-        * @return string The MIME type of $file
-        */
-       public function guessMimeType( $file, $ext = true ) {
-               if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
-                       wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
-                               "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
-               }
-
-               $mime = $this->doGuessMimeType( $file, $ext );
-
-               if ( !$mime ) {
-                       wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" );
-                       $mime = $this->detectMimeType( $file, $ext );
-               }
-
-               if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
-                       $mime = $this->mMimeTypeAliases[$mime];
-               }
-
-               wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" );
-               return $mime;
-       }
-
-       /**
-        * Guess the MIME type from the file contents.
-        *
-        * @todo Remove $ext param
-        *
-        * @param string $file
-        * @param mixed $ext
-        * @return bool|string
-        * @throws MWException
+        * @param array $params
+        * @param Config $mainConfig
+        * @return array
         */
-       private function doGuessMimeType( $file, $ext ) {
-               // Read a chunk of the file
-               MediaWiki\suppressWarnings();
-               $f = fopen( $file, 'rb' );
-               MediaWiki\restoreWarnings();
-
-               if ( !$f ) {
-                       return 'unknown/unknown';
-               }
-
-               $fsize = filesize( $file );
-               if ( $fsize === false ) {
-                       return 'unknown/unknown';
-               }
-
-               $head = fread( $f, 1024 );
-               $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
-               if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
-                       throw new MWException(
-                               "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
-               }
-               $tail = $tailLength ? fread( $f, $tailLength ) : '';
-               fclose( $f );
-
-               wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" );
-
-               // Hardcode a few magic number checks...
-               $headers = [
-                       // Multimedia...
-                       'MThd'             => 'audio/midi',
-                       'OggS'             => 'application/ogg',
-
-                       // Image formats...
-                       // Note that WMF may have a bare header, no magic number.
-                       "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
-                       "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
-                       '%PDF'             => 'application/pdf',
-                       'gimp xcf'         => 'image/x-xcf',
-
-                       // Some forbidden fruit...
-                       'MZ'               => 'application/octet-stream', // DOS/Windows executable
-                       "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
-                       "\x7fELF"          => 'application/octet-stream', // ELF binary
-               ];
-
-               foreach ( $headers as $magic => $candidate ) {
-                       if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
-                               wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
-                               return $candidate;
-                       }
-               }
-
-               /* Look for WebM and Matroska files */
-               if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
-                       $doctype = strpos( $head, "\x42\x82" );
-                       if ( $doctype ) {
-                               // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
-                               $data = substr( $head, $doctype + 3, 8 );
-                               if ( strncmp( $data, "matroska", 8 ) == 0 ) {
-                                       wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" );
-                                       return "video/x-matroska";
-                               } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
-                                       wfDebug( __METHOD__ . ": recognized file as video/webm\n" );
-                                       return "video/webm";
-                               }
-                       }
-                       wfDebug( __METHOD__ . ": unknown EBML file\n" );
-                       return "unknown/unknown";
-               }
-
-               /* Look for WebP */
-               if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0 ) {
-                       wfDebug( __METHOD__ . ": recognized file as image/webp\n" );
-                       return "image/webp";
-               }
-
-               /**
-                * Look for PHP.  Check for this before HTML/XML...  Warning: this is a
-                * heuristic, and won't match a file with a lot of non-PHP before.  It
-                * will also match text files which could be PHP. :)
-                *
-                * @todo FIXME: For this reason, the check is probably useless -- an attacker
-                * could almost certainly just pad the file with a lot of nonsense to
-                * circumvent the check in any case where it would be a security
-                * problem.  On the other hand, it causes harmful false positives (bug
-                * 16583).  The heuristic has been cut down to exclude three-character
-                * strings like "<? ", but should it be axed completely?
-                */
-               if ( ( strpos( $head, '<?php' ) !== false ) ||
-                       ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
-                       ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
-                       ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
-                       ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
-                       ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
-
-                       wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
-                       return 'application/x-php';
-               }
-
-               /**
-                * look for XML formats (XHTML and SVG)
-                */
-               $xml = new XmlTypeCheck( $file );
-               if ( $xml->wellFormed ) {
-                       $xmlMimeTypes = $this->mConfig->get( 'XMLMimeTypes' );
-                       if ( isset( $xmlMimeTypes[$xml->getRootElement()] ) ) {
-                               return $xmlMimeTypes[$xml->getRootElement()];
-                       } else {
-                               return 'application/xml';
-                       }
-               }
-
-               /**
-                * look for shell scripts
-                */
-               $script_type = null;
-
-               # detect by shebang
-               if ( substr( $head, 0, 2 ) == "#!" ) {
-                       $script_type = "ASCII";
-               } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
-                       $script_type = "UTF-8";
-               } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
-                       $script_type = "UTF-16BE";
-               } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
-                       $script_type = "UTF-16LE";
-               }
-
-               if ( $script_type ) {
-                       if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
-                               // Quick and dirty fold down to ASCII!
-                               $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
-                               $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
-                               $head = '';
-                               foreach ( $chars as $codepoint ) {
-                                       if ( $codepoint < 128 ) {
-                                               $head .= chr( $codepoint );
-                                       } else {
-                                               $head .= '?';
+       public static function applyDefaultParameters( array $params, Config $mainConfig ) {
+               $logger = LoggerFactory::getInstance( 'Mime' );
+               $params += [
+                       'typeFile' => $mainConfig->get( 'MimeTypeFile' ),
+                       'infoFile' => $mainConfig->get( 'MimeInfoFile' ),
+                       'xmlTypes' => $mainConfig->get( 'XMLMimeTypes' ),
+                       'guessCallback' =>
+                               function ( $mimeAnalyzer, &$head, &$tail, $file, &$mime ) use ( $logger ) {
+                                       // Also test DjVu
+                                       $deja = new DjVuImage( $file );
+                                       if ( $deja->isValid() ) {
+                                               $logger->info( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
+                                               $mime = 'image/vnd.djvu';
+
+                                               return;
                                        }
-                               }
-                       }
-
-                       $match = [];
-
-                       if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
-                               $mime = "application/x-{$match[2]}";
-                               wfDebug( __METHOD__ . ": shell script recognized as $mime\n" );
-                               return $mime;
-                       }
-               }
-
-               // Check for ZIP variants (before getimagesize)
-               if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
-                       wfDebug( __METHOD__ . ": ZIP header present in $file\n" );
-                       return $this->detectZipType( $head, $tail, $ext );
-               }
-
-               MediaWiki\suppressWarnings();
-               $gis = getimagesize( $file );
-               MediaWiki\restoreWarnings();
-
-               if ( $gis && isset( $gis['mime'] ) ) {
-                       $mime = $gis['mime'];
-                       wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" );
-                       return $mime;
-               }
-
-               // Also test DjVu
-               $deja = new DjVuImage( $file );
-               if ( $deja->isValid() ) {
-                       wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
-                       return 'image/vnd.djvu';
-               }
-
-               # Media handling extensions can guess the MIME by content
-               # It's intentionally here so that if core is wrong about a type (false positive),
-               # people will hopefully nag and submit patches :)
-               $mime = false;
-               # Some strings by reference for performance - assuming well-behaved hooks
-               Hooks::run(
-                       'MimeMagicGuessFromContent',
-                       [ $this, &$head, &$tail, $file, &$mime ]
-               );
-
-               return $mime;
-       }
-
-       /**
-        * Detect application-specific file type of a given ZIP file from its
-        * header data.  Currently works for OpenDocument and OpenXML types...
-        * If can't tell, returns 'application/zip'.
-        *
-        * @param string $header Some reasonably-sized chunk of file header
-        * @param string|null $tail The tail of the file
-        * @param string|bool $ext The file extension, or true to extract it from the filename.
-        *   Set it to false (default) to ignore the extension. DEPRECATED! Set to false,
-        *   use improveTypeFromExtension($mime, $ext) later to improve MIME type.
-        *
-        * @return string
-        */
-       function detectZipType( $header, $tail = null, $ext = false ) {
-               if ( $ext ) { # TODO: remove $ext param
-                       wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
-                               "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
-               }
-
-               $mime = 'application/zip';
-               $opendocTypes = [
-                       'chart-template',
-                       'chart',
-                       'formula-template',
-                       'formula',
-                       'graphics-template',
-                       'graphics',
-                       'image-template',
-                       'image',
-                       'presentation-template',
-                       'presentation',
-                       'spreadsheet-template',
-                       'spreadsheet',
-                       'text-template',
-                       'text-master',
-                       'text-web',
-                       'text' ];
-
-               // http://lists.oasis-open.org/archives/office/200505/msg00006.html
-               $types = '(?:' . implode( '|', $opendocTypes ) . ')';
-               $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
-
-               $openxmlRegex = "/^\[Content_Types\].xml/";
-
-               if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
-                       $mime = $matches[1];
-                       wfDebug( __METHOD__ . ": detected $mime from ZIP archive\n" );
-               } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
-                       $mime = "application/x-opc+zip";
-                       # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
-                       if ( $ext !== true && $ext !== false ) {
-                               // These MIME's are stored in the database, where we don't really want
-                               // x-opc+zip, because we use it only for internal purposes
-                               if ( $this->isMatchingExtension( $ext, $mime ) ) {
-                                       /* A known file extension for an OPC file,
-                                        * find the proper mime type for that file extension
-                                        */
-                                       $mime = $this->guessTypesForExtension( $ext );
-                               } else {
-                                       $mime = "application/zip";
-                               }
-                       }
-                       wfDebug( __METHOD__ . ": detected an Open Packaging Conventions archive: $mime\n" );
-               } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
-                               ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
-                               preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
-                       if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
-                               $mime = "application/msword";
-                       }
-                       switch ( substr( $header, 512, 6 ) ) {
-                               case "\xEC\xA5\xC1\x00\x0E\x00":
-                               case "\xEC\xA5\xC1\x00\x1C\x00":
-                               case "\xEC\xA5\xC1\x00\x43\x00":
-                                       $mime = "application/vnd.ms-powerpoint";
-                                       break;
-                               case "\xFD\xFF\xFF\xFF\x10\x00":
-                               case "\xFD\xFF\xFF\xFF\x1F\x00":
-                               case "\xFD\xFF\xFF\xFF\x22\x00":
-                               case "\xFD\xFF\xFF\xFF\x23\x00":
-                               case "\xFD\xFF\xFF\xFF\x28\x00":
-                               case "\xFD\xFF\xFF\xFF\x29\x00":
-                               case "\xFD\xFF\xFF\xFF\x10\x02":
-                               case "\xFD\xFF\xFF\xFF\x1F\x02":
-                               case "\xFD\xFF\xFF\xFF\x22\x02":
-                               case "\xFD\xFF\xFF\xFF\x23\x02":
-                               case "\xFD\xFF\xFF\xFF\x28\x02":
-                               case "\xFD\xFF\xFF\xFF\x29\x02":
-                                       $mime = "application/vnd.msexcel";
-                                       break;
-                       }
-
-                       wfDebug( __METHOD__ . ": detected a MS Office document with OPC trailer\n" );
-               } else {
-                       wfDebug( __METHOD__ . ": unable to identify type of ZIP archive\n" );
-               }
-               return $mime;
-       }
-
-       /**
-        * Internal MIME type detection. Detection is done using an external
-        * program, if $wgMimeDetectorCommand is set. Otherwise, the fileinfo
-        * extension is tried if it is available. If detection fails and $ext
-        * is not false, the MIME type is guessed from the file extension,
-        * using guessTypesForExtension.
-        *
-        * If the MIME type is still unknown, getimagesize is used to detect the
-        * MIME type if the file is an image. If no MIME type can be determined,
-        * this function returns 'unknown/unknown'.
-        *
-        * @param string $file The file to check
-        * @param string|bool $ext The file extension, or true (default) to extract it from the filename.
-        *   Set it to false to ignore the extension. DEPRECATED! Set to false, use
-        *   improveTypeFromExtension($mime, $ext) later to improve MIME type.
-        *
-        * @return string The MIME type of $file
-        */
-       private function detectMimeType( $file, $ext = true ) {
-               /** @todo Make $ext default to false. Or better, remove it. */
-               if ( $ext ) {
-                       wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. "
-                               . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
-               }
-
-               $mimeDetectorCommand = $this->mConfig->get( 'MimeDetectorCommand' );
-               $m = null;
-               if ( $mimeDetectorCommand ) {
-                       $args = wfEscapeShellArg( $file );
-                       $m = wfShellExec( "$mimeDetectorCommand $args" );
-               } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
-                       $mime_magic_resource = finfo_open( FILEINFO_MIME );
-
-                       if ( $mime_magic_resource ) {
-                               $m = finfo_file( $mime_magic_resource, $file );
-                               finfo_close( $mime_magic_resource );
-                       } else {
-                               wfDebug( __METHOD__ . ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
-                       }
-               } else {
-                       wfDebug( __METHOD__ . ": no magic mime detector found!\n" );
-               }
-
-               if ( $m ) {
-                       # normalize
-                       $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
-                       $m = trim( $m );
-                       $m = strtolower( $m );
-
-                       if ( strpos( $m, 'unknown' ) !== false ) {
-                               $m = null;
-                       } else {
-                               wfDebug( __METHOD__ . ": magic mime type of $file: $m\n" );
-                               return $m;
-                       }
-               }
-
-               // If desired, look at extension as a fallback.
-               if ( $ext === true ) {
-                       $i = strrpos( $file, '.' );
-                       $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
-               }
-               if ( $ext ) {
-                       if ( $this->isRecognizableExtension( $ext ) ) {
-                               wfDebug( __METHOD__ . ": refusing to guess mime type for .$ext file, "
-                                       . "we should have recognized it\n" );
-                       } else {
-                               $m = $this->guessTypesForExtension( $ext );
-                               if ( $m ) {
-                                       wfDebug( __METHOD__ . ": extension mime type of $file: $m\n" );
-                                       return $m;
-                               }
-                       }
-               }
-
-               // Unknown type
-               wfDebug( __METHOD__ . ": failed to guess mime type for $file!\n" );
-               return 'unknown/unknown';
-       }
-
-       /**
-        * Determine the media type code for a file, using its MIME type, name and
-        * possibly its contents.
-        *
-        * This function relies on the findMediaType(), mapping extensions and MIME
-        * types to media types.
-        *
-        * @todo analyse file if need be
-        * @todo look at multiple extension, separately and together.
-        *
-        * @param string $path Full path to the image file, in case we have to look at the contents
-        *        (if null, only the MIME type is used to determine the media type code).
-        * @param string $mime MIME type. If null it will be guessed using guessMimeType.
-        *
-        * @return string A value to be used with the MEDIATYPE_xxx constants.
-        */
-       function getMediaType( $path = null, $mime = null ) {
-               if ( !$mime && !$path ) {
-                       return MEDIATYPE_UNKNOWN;
-               }
-
-               // If MIME type is unknown, guess it
-               if ( !$mime ) {
-                       $mime = $this->guessMimeType( $path, false );
-               }
-
-               // Special code for ogg - detect if it's video (theora),
-               // else label it as sound.
-               if ( $mime == 'application/ogg' && file_exists( $path ) ) {
-
-                       // Read a chunk of the file
-                       $f = fopen( $path, "rt" );
-                       if ( !$f ) {
-                               return MEDIATYPE_UNKNOWN;
-                       }
-                       $head = fread( $f, 256 );
-                       fclose( $f );
-
-                       $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
-
-                       // This is an UGLY HACK, file should be parsed correctly
-                       if ( strpos( $head, 'theora' ) !== false ) {
-                               return MEDIATYPE_VIDEO;
-                       } elseif ( strpos( $head, 'vorbis' ) !== false ) {
-                               return MEDIATYPE_AUDIO;
-                       } elseif ( strpos( $head, 'flac' ) !== false ) {
-                               return MEDIATYPE_AUDIO;
-                       } elseif ( strpos( $head, 'speex' ) !== false ) {
-                               return MEDIATYPE_AUDIO;
-                       } else {
-                               return MEDIATYPE_MULTIMEDIA;
-                       }
-               }
-
-               $type = null;
-               // Check for entry for full MIME type
-               if ( $mime ) {
-                       $type = $this->findMediaType( $mime );
-                       if ( $type !== MEDIATYPE_UNKNOWN ) {
-                               return $type;
-                       }
-               }
-
-               // Check for entry for file extension
-               if ( $path ) {
-                       $i = strrpos( $path, '.' );
-                       $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
-
-                       // TODO: look at multi-extension if this fails, parse from full path
-                       $type = $this->findMediaType( '.' . $e );
-                       if ( $type !== MEDIATYPE_UNKNOWN ) {
-                               return $type;
-                       }
-               }
-
-               // Check major MIME type
-               if ( $mime ) {
-                       $i = strpos( $mime, '/' );
-                       if ( $i !== false ) {
-                               $major = substr( $mime, 0, $i );
-                               $type = $this->findMediaType( $major );
-                               if ( $type !== MEDIATYPE_UNKNOWN ) {
-                                       return $type;
-                               }
-                       }
-               }
+                                       // Some strings by reference for performance - assuming well-behaved hooks
+                                       Hooks::run(
+                                               'MimeMagicGuessFromContent',
+                                               [ $mimeAnalyzer, &$head, &$tail, $file, &$mime ]
+                                       );
+                               },
+                       'extCallback' => function ( $mimeAnalyzer, $ext, &$mime ) {
+                               // Media handling extensions can improve the MIME detected
+                               Hooks::run( 'MimeMagicImproveFromExtension', [ $mimeAnalyzer, $ext, &$mime ] );
+                       },
+                       'initCallback' => function ( $mimeAnalyzer ) {
+                               // Allow media handling extensions adding MIME-types and MIME-info
+                               Hooks::run( 'MimeMagicInit', [ $mimeAnalyzer ] );
+                       },
+                       'logger' => $logger
+               ];
 
-               if ( !$type ) {
-                       $type = MEDIATYPE_UNKNOWN;
+               if ( $params['infoFile'] === 'includes/mime.info' ) {
+                       $params['infoFile'] = __DIR__ . "/libs/mime/mime.info";
                }
 
-               return $type;
-       }
-
-       /**
-        * Returns a media code matching the given MIME type or file extension.
-        * File extensions are represented by a string starting with a dot (.) to
-        * distinguish them from MIME types.
-        *
-        * This function relies on the mapping defined by $this->mMediaTypes
-        * @access private
-        * @param string $extMime
-        * @return int|string
-        */
-       function findMediaType( $extMime ) {
-               if ( strpos( $extMime, '.' ) === 0 ) {
-                       // If it's an extension, look up the MIME types
-                       $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
-                       if ( !$m ) {
-                               return MEDIATYPE_UNKNOWN;
-                       }
-
-                       $m = explode( ' ', $m );
-               } else {
-                       // Normalize MIME type
-                       if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
-                               $extMime = $this->mMimeTypeAliases[$extMime];
-                       }
-
-                       $m = [ $extMime ];
+               if ( $params['typeFile'] === 'includes/mime.types' ) {
+                       $params['typeFile'] = __DIR__ . "/libs/mime/mime.types";
                }
 
-               foreach ( $m as $mime ) {
-                       foreach ( $this->mMediaTypes as $type => $codes ) {
-                               if ( in_array( $mime, $codes, true ) ) {
-                                       return $type;
-                               }
-                       }
+               $detectorCmd = $mainConfig->get( 'MimeDetectorCommand' );
+               if ( $detectorCmd ) {
+                       $params['detectCallback'] = function ( $file ) use ( $detectorCmd ) {
+                               return wfShellExec( "$detectorCmd " . wfEscapeShellArg( $file ) );
+                       };
                }
 
-               return MEDIATYPE_UNKNOWN;
-       }
-
-       /**
-        * Get the MIME types that various versions of Internet Explorer would
-        * detect from a chunk of the content.
-        *
-        * @param string $fileName The file name (unused at present)
-        * @param string $chunk The first 256 bytes of the file
-        * @param string $proposed The MIME type proposed by the server
-        * @return array
-        */
-       public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
-               $ca = $this->getIEContentAnalyzer();
-               return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
-       }
-
-       /**
-        * Get a cached instance of IEContentAnalyzer
-        *
-        * @return IEContentAnalyzer
-        */
-       protected function getIEContentAnalyzer() {
-               if ( is_null( $this->mIEAnalyzer ) ) {
-                       $this->mIEAnalyzer = new IEContentAnalyzer;
-               }
-               return $this->mIEAnalyzer;
+               return $params;
        }
 }
index 42b75f0..49183e5 100644 (file)
@@ -190,6 +190,15 @@ return [
                );
        },
 
+       'MimeAnalyzer' => function( MediaWikiServices $services ) {
+               return new MimeMagic(
+                       MimeMagic::applyDefaultParameters(
+                               [],
+                               $services->getMainConfig()
+                       )
+               );
+       },
+
        'ProxyLookup' => function( MediaWikiServices $services ) {
                $mainConfig = $services->getMainConfig();
                return new ProxyLookup(
diff --git a/includes/libs/IEContentAnalyzer.php b/includes/libs/IEContentAnalyzer.php
deleted file mode 100644 (file)
index 0d1e527..0000000
+++ /dev/null
@@ -1,851 +0,0 @@
-<?php
-/**
- * Simulation of Microsoft Internet Explorer's MIME type detection algorithm.
- *
- * @file
- * @todo Define the exact license of this file.
- */
-
-/**
- * This class simulates Microsoft Internet Explorer's terribly broken and
- * insecure MIME type detection algorithm. It can be used to check web uploads
- * with an apparently safe type, to see if IE will reinterpret them to produce
- * something dangerous.
- *
- * It is full of bugs and strange design choices should not under any
- * circumstances be used to determine a MIME type to present to a user or
- * client. (Apple Safari developers, this means you too.)
- *
- * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
- * attempted to ensure that this code works in exactly the same way as Internet
- * Explorer, it does not share any source code, or creative choices such as
- * variable names, thus I (Tim Starling) claim copyright on it.
- *
- * It may be redistributed without restriction. To aid reuse, this class does
- * not depend on any MediaWiki module.
- */
-class IEContentAnalyzer {
-       /**
-        * Relevant data taken from the type table in IE 5
-        */
-       protected $baseTypeTable = [
-               'ambiguous' /*1*/ => [
-                       'text/plain',
-                       'application/octet-stream',
-                       'application/x-netcdf', // [sic]
-               ],
-               'text' /*3*/ => [
-                       'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
-                       'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
-               ],
-               'binary' /*4*/ => [
-                       'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
-                       'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
-                       'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
-                       'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
-                       'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
-                       'application/x-msdownload'
-               ],
-               'html' /*5*/ => [ 'text/html' ],
-       ];
-
-       /**
-        * Changes to the type table in later versions of IE
-        */
-       protected $addedTypes = [
-               'ie07' => [
-                       'text' => [ 'text/xml', 'application/xml' ]
-               ],
-       ];
-
-       /**
-        * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
-        * typical Windows installation.
-        *
-        * Used for extension to MIME type mapping if detection fails.
-        */
-       protected $registry = [
-               '.323' => 'text/h323',
-               '.3g2' => 'video/3gpp2',
-               '.3gp' => 'video/3gpp',
-               '.3gp2' => 'video/3gpp2',
-               '.3gpp' => 'video/3gpp',
-               '.aac' => 'audio/aac',
-               '.ac3' => 'audio/ac3',
-               '.accda' => 'application/msaccess',
-               '.accdb' => 'application/msaccess',
-               '.accdc' => 'application/msaccess',
-               '.accde' => 'application/msaccess',
-               '.accdr' => 'application/msaccess',
-               '.accdt' => 'application/msaccess',
-               '.ade' => 'application/msaccess',
-               '.adp' => 'application/msaccess',
-               '.adts' => 'audio/aac',
-               '.ai' => 'application/postscript',
-               '.aif' => 'audio/aiff',
-               '.aifc' => 'audio/aiff',
-               '.aiff' => 'audio/aiff',
-               '.amc' => 'application/x-mpeg',
-               '.application' => 'application/x-ms-application',
-               '.asf' => 'video/x-ms-asf',
-               '.asx' => 'video/x-ms-asf',
-               '.au' => 'audio/basic',
-               '.avi' => 'video/avi',
-               '.bmp' => 'image/bmp',
-               '.caf' => 'audio/x-caf',
-               '.cat' => 'application/vnd.ms-pki.seccat',
-               '.cbo' => 'application/sha',
-               '.cdda' => 'audio/aiff',
-               '.cer' => 'application/x-x509-ca-cert',
-               '.conf' => 'text/plain',
-               '.crl' => 'application/pkix-crl',
-               '.crt' => 'application/x-x509-ca-cert',
-               '.css' => 'text/css',
-               '.csv' => 'application/vnd.ms-excel',
-               '.der' => 'application/x-x509-ca-cert',
-               '.dib' => 'image/bmp',
-               '.dif' => 'video/x-dv',
-               '.dll' => 'application/x-msdownload',
-               '.doc' => 'application/msword',
-               '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
-               '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
-               '.dot' => 'application/msword',
-               '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
-               '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
-               '.dv' => 'video/x-dv',
-               '.dwfx' => 'model/vnd.dwfx+xps',
-               '.edn' => 'application/vnd.adobe.edn',
-               '.eml' => 'message/rfc822',
-               '.eps' => 'application/postscript',
-               '.etd' => 'application/x-ebx',
-               '.exe' => 'application/x-msdownload',
-               '.fdf' => 'application/vnd.fdf',
-               '.fif' => 'application/fractals',
-               '.gif' => 'image/gif',
-               '.gsm' => 'audio/x-gsm',
-               '.hqx' => 'application/mac-binhex40',
-               '.hta' => 'application/hta',
-               '.htc' => 'text/x-component',
-               '.htm' => 'text/html',
-               '.html' => 'text/html',
-               '.htt' => 'text/webviewhtml',
-               '.hxa' => 'application/xml',
-               '.hxc' => 'application/xml',
-               '.hxd' => 'application/octet-stream',
-               '.hxe' => 'application/xml',
-               '.hxf' => 'application/xml',
-               '.hxh' => 'application/octet-stream',
-               '.hxi' => 'application/octet-stream',
-               '.hxk' => 'application/xml',
-               '.hxq' => 'application/octet-stream',
-               '.hxr' => 'application/octet-stream',
-               '.hxs' => 'application/octet-stream',
-               '.hxt' => 'application/xml',
-               '.hxv' => 'application/xml',
-               '.hxw' => 'application/octet-stream',
-               '.ico' => 'image/x-icon',
-               '.iii' => 'application/x-iphone',
-               '.ins' => 'application/x-internet-signup',
-               '.iqy' => 'text/x-ms-iqy',
-               '.isp' => 'application/x-internet-signup',
-               '.jfif' => 'image/jpeg',
-               '.jnlp' => 'application/x-java-jnlp-file',
-               '.jpe' => 'image/jpeg',
-               '.jpeg' => 'image/jpeg',
-               '.jpg' => 'image/jpeg',
-               '.jtx' => 'application/x-jtx+xps',
-               '.latex' => 'application/x-latex',
-               '.log' => 'text/plain',
-               '.m1v' => 'video/mpeg',
-               '.m2v' => 'video/mpeg',
-               '.m3u' => 'audio/x-mpegurl',
-               '.mac' => 'image/x-macpaint',
-               '.man' => 'application/x-troff-man',
-               '.mda' => 'application/msaccess',
-               '.mdb' => 'application/msaccess',
-               '.mde' => 'application/msaccess',
-               '.mfp' => 'application/x-shockwave-flash',
-               '.mht' => 'message/rfc822',
-               '.mhtml' => 'message/rfc822',
-               '.mid' => 'audio/mid',
-               '.midi' => 'audio/mid',
-               '.mod' => 'video/mpeg',
-               '.mov' => 'video/quicktime',
-               '.mp2' => 'video/mpeg',
-               '.mp2v' => 'video/mpeg',
-               '.mp3' => 'audio/mpeg',
-               '.mp4' => 'video/mp4',
-               '.mpa' => 'video/mpeg',
-               '.mpe' => 'video/mpeg',
-               '.mpeg' => 'video/mpeg',
-               '.mpf' => 'application/vnd.ms-mediapackage',
-               '.mpg' => 'video/mpeg',
-               '.mpv2' => 'video/mpeg',
-               '.mqv' => 'video/quicktime',
-               '.NMW' => 'application/nmwb',
-               '.nws' => 'message/rfc822',
-               '.odc' => 'text/x-ms-odc',
-               '.ols' => 'application/vnd.ms-publisher',
-               '.p10' => 'application/pkcs10',
-               '.p12' => 'application/x-pkcs12',
-               '.p7b' => 'application/x-pkcs7-certificates',
-               '.p7c' => 'application/pkcs7-mime',
-               '.p7m' => 'application/pkcs7-mime',
-               '.p7r' => 'application/x-pkcs7-certreqresp',
-               '.p7s' => 'application/pkcs7-signature',
-               '.pct' => 'image/pict',
-               '.pdf' => 'application/pdf',
-               '.pdx' => 'application/vnd.adobe.pdx',
-               '.pfx' => 'application/x-pkcs12',
-               '.pic' => 'image/pict',
-               '.pict' => 'image/pict',
-               '.pinstall' => 'application/x-picasa-detect',
-               '.pko' => 'application/vnd.ms-pki.pko',
-               '.png' => 'image/png',
-               '.pnt' => 'image/x-macpaint',
-               '.pntg' => 'image/x-macpaint',
-               '.pot' => 'application/vnd.ms-powerpoint',
-               '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
-               '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
-               '.ppa' => 'application/vnd.ms-powerpoint',
-               '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
-               '.pps' => 'application/vnd.ms-powerpoint',
-               '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
-               '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
-               '.ppt' => 'application/vnd.ms-powerpoint',
-               '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
-               '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
-               '.prf' => 'application/pics-rules',
-               '.ps' => 'application/postscript',
-               '.pub' => 'application/vnd.ms-publisher',
-               '.pwz' => 'application/vnd.ms-powerpoint',
-               '.py' => 'text/plain',
-               '.pyw' => 'text/plain',
-               '.qht' => 'text/x-html-insertion',
-               '.qhtm' => 'text/x-html-insertion',
-               '.qt' => 'video/quicktime',
-               '.qti' => 'image/x-quicktime',
-               '.qtif' => 'image/x-quicktime',
-               '.qtl' => 'application/x-quicktimeplayer',
-               '.rat' => 'application/rat-file',
-               '.rmf' => 'application/vnd.adobe.rmf',
-               '.rmi' => 'audio/mid',
-               '.rqy' => 'text/x-ms-rqy',
-               '.rtf' => 'application/msword',
-               '.sct' => 'text/scriptlet',
-               '.sd2' => 'audio/x-sd2',
-               '.sdp' => 'application/sdp',
-               '.shtml' => 'text/html',
-               '.sit' => 'application/x-stuffit',
-               '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
-               '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
-               '.slk' => 'application/vnd.ms-excel',
-               '.snd' => 'audio/basic',
-               '.so' => 'application/x-apachemodule',
-               '.sol' => 'text/plain',
-               '.sor' => 'text/plain',
-               '.spc' => 'application/x-pkcs7-certificates',
-               '.spl' => 'application/futuresplash',
-               '.sst' => 'application/vnd.ms-pki.certstore',
-               '.stl' => 'application/vnd.ms-pki.stl',
-               '.swf' => 'application/x-shockwave-flash',
-               '.thmx' => 'application/vnd.ms-officetheme',
-               '.tif' => 'image/tiff',
-               '.tiff' => 'image/tiff',
-               '.txt' => 'text/plain',
-               '.uls' => 'text/iuls',
-               '.vcf' => 'text/x-vcard',
-               '.vdx' => 'application/vnd.ms-visio.viewer',
-               '.vsd' => 'application/vnd.ms-visio.viewer',
-               '.vss' => 'application/vnd.ms-visio.viewer',
-               '.vst' => 'application/vnd.ms-visio.viewer',
-               '.vsx' => 'application/vnd.ms-visio.viewer',
-               '.vtx' => 'application/vnd.ms-visio.viewer',
-               '.wav' => 'audio/wav',
-               '.wax' => 'audio/x-ms-wax',
-               '.wbk' => 'application/msword',
-               '.wdp' => 'image/vnd.ms-photo',
-               '.wiz' => 'application/msword',
-               '.wm' => 'video/x-ms-wm',
-               '.wma' => 'audio/x-ms-wma',
-               '.wmd' => 'application/x-ms-wmd',
-               '.wmv' => 'video/x-ms-wmv',
-               '.wmx' => 'video/x-ms-wmx',
-               '.wmz' => 'application/x-ms-wmz',
-               '.wpl' => 'application/vnd.ms-wpl',
-               '.wsc' => 'text/scriptlet',
-               '.wvx' => 'video/x-ms-wvx',
-               '.xaml' => 'application/xaml+xml',
-               '.xbap' => 'application/x-ms-xbap',
-               '.xdp' => 'application/vnd.adobe.xdp+xml',
-               '.xfdf' => 'application/vnd.adobe.xfdf',
-               '.xht' => 'application/xhtml+xml',
-               '.xhtml' => 'application/xhtml+xml',
-               '.xla' => 'application/vnd.ms-excel',
-               '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
-               '.xlk' => 'application/vnd.ms-excel',
-               '.xll' => 'application/vnd.ms-excel',
-               '.xlm' => 'application/vnd.ms-excel',
-               '.xls' => 'application/vnd.ms-excel',
-               '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
-               '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
-               '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
-               '.xlt' => 'application/vnd.ms-excel',
-               '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
-               '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
-               '.xlw' => 'application/vnd.ms-excel',
-               '.xml' => 'text/xml',
-               '.xps' => 'application/vnd.ms-xpsdocument',
-               '.xsl' => 'text/xml',
-       ];
-
-       /**
-        * IE versions which have been analysed to bring you this class, and for
-        * which some substantive difference exists. These will appear as keys
-        * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
-        */
-       protected $versions = [ 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ];
-
-       /**
-        * Type table with versions expanded
-        */
-       protected $typeTable = [];
-
-       /** constructor */
-       function __construct() {
-               // Construct versioned type arrays from the base type array plus additions
-               $types = $this->baseTypeTable;
-               foreach ( $this->versions as $version ) {
-                       if ( isset( $this->addedTypes[$version] ) ) {
-                               foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
-                                       $types[$format] = array_merge( $types[$format], $addedTypes );
-                               }
-                       }
-                       $this->typeTable[$version] = $types;
-               }
-       }
-
-       /**
-        * Get the MIME types from getMimesFromData(), but convert the result from IE's
-        * idiosyncratic private types into something other apps will understand.
-        *
-        * @param string $fileName the file name (unused at present)
-        * @param string $chunk the first 256 bytes of the file
-        * @param string $proposed the MIME type proposed by the server
-        *
-        * @return Array: map of IE version to detected MIME type
-        */
-       public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
-               $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
-               $types = array_map( [ $this, 'translateMimeType' ], $types );
-               return $types;
-       }
-
-       /**
-        * Translate a MIME type from IE's idiosyncratic private types into
-        * more commonly understood type strings
-        * @param $type
-        * @return string
-        */
-       public function translateMimeType( $type ) {
-               static $table = [
-                       'image/pjpeg' => 'image/jpeg',
-                       'image/x-png' => 'image/png',
-                       'image/x-wmf' => 'application/x-msmetafile',
-                       'image/bmp' => 'image/x-bmp',
-                       'application/x-zip-compressed' => 'application/zip',
-                       'application/x-compressed' => 'application/x-compress',
-                       'application/x-gzip-compressed' => 'application/x-gzip',
-                       'audio/mid' => 'audio/midi',
-               ];
-               if ( isset( $table[$type] ) ) {
-                       $type = $table[$type];
-               }
-               return $type;
-       }
-
-       /**
-        * Get the untranslated MIME types for all known versions
-        *
-        * @param string $fileName the file name (unused at present)
-        * @param string $chunk the first 256 bytes of the file
-        * @param string $proposed the MIME type proposed by the server
-        *
-        * @return Array: map of IE version to detected MIME type
-        */
-       public function getMimesFromData( $fileName, $chunk, $proposed ) {
-               $types = [];
-               foreach ( $this->versions as $version ) {
-                       $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
-               }
-               return $types;
-       }
-
-       /**
-        * Get the MIME type for a given named version
-        * @param $version
-        * @param $fileName
-        * @param $chunk
-        * @param $proposed
-        * @return bool|string
-        */
-       protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
-               // Strip text after a semicolon
-               $semiPos = strpos( $proposed, ';' );
-               if ( $semiPos !== false ) {
-                       $proposed = substr( $proposed, 0, $semiPos );
-               }
-
-               $proposedFormat = $this->getDataFormat( $version, $proposed );
-               if ( $proposedFormat == 'unknown'
-                       && $proposed != 'multipart/mixed'
-                       && $proposed != 'multipart/x-mixed-replace' )
-               {
-                       return $proposed;
-               }
-               if ( strval( $chunk ) === '' ) {
-                       return $proposed;
-               }
-
-               // Truncate chunk at 255 bytes
-               $chunk = substr( $chunk, 0, 255 );
-
-               // IE does the Check*Headers() calls last, and instead does the following image
-               // type checks by directly looking for the magic numbers. What I do here should
-               // have the same effect since the magic number checks are identical in both cases.
-               $result = $this->sampleData( $version, $chunk );
-               $sampleFound = $result['found'];
-               $counters = $result['counters'];
-               $binaryType = $this->checkBinaryHeaders( $version, $chunk );
-               $textType = $this->checkTextHeaders( $version, $chunk );
-
-               if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
-                       return 'text/html';
-               }
-               if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
-                       return 'image/gif';
-               }
-               if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
-                       && $binaryType == 'image/pjpeg' )
-               {
-                       return $proposed;
-               }
-               // PNG check added in IE 7
-               if ( $version >= 'ie07'
-                       && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
-                       && $binaryType == 'image/x-png' )
-               {
-                       return $proposed;
-               }
-
-               // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
-               if ( isset( $sampleFound['cdf'] ) ) {
-                       return 'application/x-cdf';
-               }
-
-               // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
-               // previous versions
-               if ( isset( $sampleFound['rss'] ) ) {
-                       return 'application/rss+xml';
-               }
-               if ( isset( $sampleFound['rdf-tag'] )
-                       && isset( $sampleFound['rdf-url'] )
-                       && isset( $sampleFound['rdf-purl'] ) )
-               {
-                       return 'application/rss+xml';
-               }
-               if ( isset( $sampleFound['atom'] ) ) {
-                       return 'application/atom+xml';
-               }
-
-               if ( isset( $sampleFound['xml'] ) ) {
-                       // TODO: I'm not sure under what circumstances this flag is enabled
-                       if ( strpos( $version, 'strict' ) !== false ) {
-                               if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
-                                       return 'text/xml';
-                               }
-                       } else {
-                               return 'text/xml';
-                       }
-               }
-               if ( isset( $sampleFound['html'] ) ) {
-                       // TODO: I'm not sure under what circumstances this flag is enabled
-                       if ( strpos( $version, 'nohtml' ) !== false ) {
-                               if ( $proposed == 'text/plain' ) {
-                                       return 'text/html';
-                               }
-                       } else {
-                               return 'text/html';
-                       }
-               }
-               if ( isset( $sampleFound['xbm'] ) ) {
-                       return 'image/x-bitmap';
-               }
-               if ( isset( $sampleFound['binhex'] ) ) {
-                       return 'application/macbinhex40';
-               }
-               if ( isset( $sampleFound['scriptlet'] ) ) {
-                       if ( strpos( $version, 'strict' ) !== false ) {
-                               if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
-                                       return 'text/scriptlet';
-                               }
-                       } else {
-                               return 'text/scriptlet';
-                       }
-               }
-
-               // Freaky heuristics to determine if the data is text or binary
-               // The heuristic is of course broken for non-ASCII text
-               if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
-                       < ( $counters['ctrl'] + $counters['high'] ) * 16 )
-               {
-                       $kindOfBinary = true;
-                       $type = $binaryType ? $binaryType : $textType;
-                       if ( $type === false ) {
-                               $type = 'application/octet-stream';
-                       }
-               } else {
-                       $kindOfBinary = false;
-                       $type = $textType ? $textType : $binaryType;
-                       if ( $type === false ) {
-                               $type = 'text/plain';
-                       }
-               }
-
-               // Check if the output format is ambiguous
-               // This generally means that detection failed, real types aren't ambiguous
-               $detectedFormat = $this->getDataFormat( $version, $type );
-               if ( $detectedFormat != 'ambiguous' ) {
-                       return $type;
-               }
-
-               if ( $proposedFormat != 'ambiguous' ) {
-                       // FormatAgreesWithData()
-                       if ( $proposedFormat == 'text' && !$kindOfBinary ) {
-                               return $proposed;
-                       }
-                       if ( $proposedFormat == 'binary' && $kindOfBinary ) {
-                               return $proposed;
-                       }
-                       if ( $proposedFormat == 'html' ) {
-                               return $proposed;
-                       }
-               }
-
-               // Find a MIME type by searching the registry for the file extension.
-               $dotPos = strrpos( $fileName, '.' );
-               if ( $dotPos === false ) {
-                       return $type;
-               }
-               $ext = substr( $fileName, $dotPos );
-               if ( isset( $this->registry[$ext] ) ) {
-                       return $this->registry[$ext];
-               }
-
-               // TODO: If the extension has an application registered to it, IE will return
-               // application/octet-stream. We'll skip that, so we could erroneously
-               // return text/plain or application/x-netcdf where application/octet-stream
-               // would be correct.
-
-               return $type;
-       }
-
-       /**
-        * Check for text headers at the start of the chunk
-        * Confirmed same in 5 and 7.
-        * @param $version
-        * @param $chunk
-        * @return bool|string
-        */
-       private function checkTextHeaders( $version, $chunk ) {
-               $chunk2 = substr( $chunk, 0, 2 );
-               $chunk4 = substr( $chunk, 0, 4 );
-               $chunk5 = substr( $chunk, 0, 5 );
-               if ( $chunk4 == '%PDF' ) {
-                       return 'application/pdf';
-               }
-               if ( $chunk2 == '%!' ) {
-                       return 'application/postscript';
-               }
-               if ( $chunk5 == '{\\rtf' ) {
-                       return 'text/richtext';
-               }
-               if ( $chunk5 == 'begin' ) {
-                       return 'application/base64';
-               }
-               return false;
-       }
-
-       /**
-        * Check for binary headers at the start of the chunk
-        * Confirmed same in 5 and 7.
-        * @param $version
-        * @param $chunk
-        * @return bool|string
-        */
-       private function checkBinaryHeaders( $version, $chunk ) {
-               $chunk2 = substr( $chunk, 0, 2 );
-               $chunk3 = substr( $chunk, 0, 3 );
-               $chunk4 = substr( $chunk, 0, 4 );
-               $chunk5 = substr( $chunk, 0, 5 );
-               $chunk5uc = strtoupper( $chunk5 );
-               $chunk8 = substr( $chunk, 0, 8 );
-               if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
-                       return 'image/gif';
-               }
-               if ( $chunk2 == "\xff\xd8" ) {
-                       return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
-               }
-
-               if ( $chunk2 == 'BM'
-                       && substr( $chunk, 6, 2 ) == "\000\000"
-                       && substr( $chunk, 8, 2 ) == "\000\000" )
-               {
-                       return 'image/bmp'; // another non-standard MIME
-               }
-               if ( $chunk4 == 'RIFF'
-                       && substr( $chunk, 8, 4 ) == 'WAVE' )
-               {
-                       return 'audio/wav';
-               }
-               // These were integer literals in IE
-               // Perhaps the author was not sure what the target endianness was
-               if ( $chunk4 == ".sd\000"
-                       || $chunk4 == ".snd"
-                       || $chunk4 == "\000ds."
-                       || $chunk4 == "dns." )
-               {
-                       return 'audio/basic';
-               }
-               if ( $chunk3 == "MM\000" ) {
-                       return 'image/tiff';
-               }
-               if ( $chunk2 == 'MZ' ) {
-                       return 'application/x-msdownload';
-               }
-               if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
-                       return 'image/x-png'; // [sic]
-               }
-               if ( strlen( $chunk ) >= 5 ) {
-                       $byte2 = ord( $chunk[2] );
-                       $byte4 = ord( $chunk[4] );
-                       if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
-                               return 'image/x-jg';
-                       }
-               }
-               // More endian confusion?
-               if ( $chunk4 == 'MROF' ) {
-                       return 'audio/x-aiff';
-               }
-               $chunk4_8 = substr( $chunk, 8, 4 );
-               if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
-                       return 'audio/x-aiff';
-               }
-               if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
-                       return 'video/avi';
-               }
-               if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
-                       return 'video/mpeg';
-               }
-               if ( $chunk4 == "\001\000\000\000"
-                       && substr( $chunk, 40, 4 ) == ' EMF' )
-               {
-                       return 'image/x-emf';
-               }
-               if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
-                       return 'image/x-wmf';
-               }
-               if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
-                       return 'application/java';
-               }
-               if ( $chunk2 == 'PK' ) {
-                       return 'application/x-zip-compressed';
-               }
-               if ( $chunk2 == "\x1f\x9d" ) {
-                       return 'application/x-compressed';
-               }
-               if ( $chunk2 == "\x1f\x8b" ) {
-                       return 'application/x-gzip-compressed';
-               }
-               // Skip redundant check for ZIP
-               if ( $chunk5 == "MThd\000" ) {
-                       return 'audio/mid';
-               }
-               if ( $chunk4 == '%PDF' ) {
-                       return 'application/pdf';
-               }
-               return false;
-       }
-
-       /**
-        * Do heuristic checks on the bulk of the data sample.
-        * Search for HTML tags.
-        * @param $version
-        * @param $chunk
-        * @return array
-        */
-       protected function sampleData( $version, $chunk ) {
-               $found = [];
-               $counters = [
-                       'ctrl' => 0,
-                       'high' => 0,
-                       'low' => 0,
-                       'lf' => 0,
-                       'cr' => 0,
-                       'ff' => 0
-               ];
-               $htmlTags = [
-                       'html',
-                       'head',
-                       'title',
-                       'body',
-                       'script',
-                       'a href',
-                       'pre',
-                       'img',
-                       'plaintext',
-                       'table'
-               ];
-               $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
-               $rdfPurl = 'http://purl.org/rss/1.0/';
-               $xbmMagic1 = '#define';
-               $xbmMagic2 = '_width';
-               $xbmMagic3 = '_bits';
-               $binhexMagic = 'converted with BinHex';
-               $chunkLength = strlen( $chunk );
-
-               for ( $offset = 0; $offset < $chunkLength; $offset++ ) {
-                       $curChar = $chunk[$offset];
-                       if ( $curChar == "\x0a" ) {
-                               $counters['lf']++;
-                               continue;
-                       } elseif ( $curChar == "\x0d" ) {
-                               $counters['cr']++;
-                               continue;
-                       } elseif ( $curChar == "\x0c" ) {
-                               $counters['ff']++;
-                               continue;
-                       } elseif ( $curChar == "\t" ) {
-                               $counters['low']++;
-                               continue;
-                       } elseif ( ord( $curChar ) < 32 ) {
-                               $counters['ctrl']++;
-                               continue;
-                       } elseif ( ord( $curChar ) >= 128 ) {
-                               $counters['high']++;
-                               continue;
-                       }
-
-                       $counters['low']++;
-                       if ( $curChar == '<' ) {
-                               // XML
-                               $remainder = substr( $chunk, $offset + 1 );
-                               if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
-                                       $nextChar = substr( $chunk, $offset + 5, 1 );
-                                       if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
-                                               $found['xml'] = true;
-                                       }
-                               }
-                               // Scriptlet (JSP)
-                               if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
-                                       $found['scriptlet'] = true;
-                                       break;
-                               }
-                               // HTML
-                               foreach ( $htmlTags as $tag ) {
-                                       if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
-                                               $found['html'] = true;
-                                       }
-                               }
-                               // Skip broken check for additional tags (HR etc.)
-
-                               // CHANNEL replaced by RSS, RDF and FEED in IE 7
-                               if ( $version < 'ie07' ) {
-                                       if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
-                                               $found['cdf'] = true;
-                                       }
-                               } else {
-                                       // RSS
-                                       if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
-                                               $found['rss'] = true;
-                                               break; // return from SampleData
-                                       }
-                                       if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
-                                               $found['rdf-tag'] = true;
-                                               // no break
-                                       }
-                                       if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
-                                               $found['atom'] = true;
-                                               break;
-                                       }
-                               }
-                               continue;
-                       }
-                       // Skip broken check for -->
-
-                       // RSS URL checks
-                       // For some reason both URLs must appear before it is recognised
-                       $remainder = substr( $chunk, $offset );
-                       if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
-                               $found['rdf-url'] = true;
-                               if ( isset( $found['rdf-tag'] )
-                                       && isset( $found['rdf-purl'] ) ) // [sic]
-                               {
-                                       break;
-                               }
-                               continue;
-                       }
-
-                       if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
-                               if ( isset( $found['rdf-tag'] )
-                                       && isset( $found['rdf-url'] ) ) // [sic]
-                               {
-                                       break;
-                               }
-                               continue;
-                       }
-
-                       // XBM checks
-                       if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
-                               $found['xbm1'] = true;
-                               continue;
-                       }
-                       if ( $curChar == '_' ) {
-                               if ( isset( $found['xbm2'] ) ) {
-                                       if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
-                                               $found['xbm'] = true;
-                                               break;
-                                       }
-                               } elseif ( isset( $found['xbm1'] ) ) {
-                                       if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
-                                               $found['xbm2'] = true;
-                                       }
-                               }
-                       }
-
-                       // BinHex
-                       if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
-                               $found['binhex'] = true;
-                       }
-               }
-               return [ 'found' => $found, 'counters' => $counters ];
-       }
-
-       /**
-        * @param $version
-        * @param $type
-        * @return int|string
-        */
-       protected function getDataFormat( $version, $type ) {
-               $types = $this->typeTable[$version];
-               if ( $type == '(null)' || strval( $type ) === '' ) {
-                       return 'ambiguous';
-               }
-               foreach ( $types as $format => $list ) {
-                       if ( in_array( $type, $list ) ) {
-                               return $format;
-                       }
-               }
-               return 'unknown';
-       }
-}
diff --git a/includes/libs/XmlTypeCheck.php b/includes/libs/XmlTypeCheck.php
deleted file mode 100644 (file)
index f057140..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
-<?php
-/**
- * XML syntax and type checker.
- *
- * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us
- * more control over the expansion of XML entities. When passed to the
- * callback, entities will be fully expanded, but may report the XML is
- * invalid if expanding the entities are likely to cause a DoS.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-class XmlTypeCheck {
-       /**
-        * Will be set to true or false to indicate whether the file is
-        * well-formed XML. Note that this doesn't check schema validity.
-        */
-       public $wellFormed = null;
-
-       /**
-        * Will be set to true if the optional element filter returned
-        * a match at some point.
-        */
-       public $filterMatch = false;
-
-       /**
-        * Will contain the type of filter hit if the optional element filter returned
-        * a match at some point.
-        * @var mixed
-        */
-       public $filterMatchType = false;
-
-       /**
-        * Name of the document's root element, including any namespace
-        * as an expanded URL.
-        */
-       public $rootElement = '';
-
-       /**
-        * A stack of strings containing the data of each xml element as it's processed. Append
-        * data to the top string of the stack, then pop off the string and process it when the
-        * element is closed.
-        */
-       protected $elementData = [];
-
-       /**
-        * A stack of element names and attributes, as we process them.
-        */
-       protected $elementDataContext = [];
-
-       /**
-        * Current depth of the data stack.
-        */
-       protected $stackDepth = 0;
-
-       /**
-        * Additional parsing options
-        */
-       private $parserOptions = [
-               'processing_instruction_handler' => '',
-       ];
-
-       /**
-        * @param string $input a filename or string containing the XML element
-        * @param callable $filterCallback (optional)
-        *        Function to call to do additional custom validity checks from the
-        *        SAX element handler event. This gives you access to the element
-        *        namespace, name, attributes, and text contents.
-        *        Filter should return 'true' to toggle on $this->filterMatch
-        * @param bool $isFile (optional) indicates if the first parameter is a
-        *        filename (default, true) or if it is a string (false)
-        * @param array $options list of additional parsing options:
-        *        processing_instruction_handler: Callback for xml_set_processing_instruction_handler
-        */
-       function __construct( $input, $filterCallback = null, $isFile = true, $options = [] ) {
-               $this->filterCallback = $filterCallback;
-               $this->parserOptions = array_merge( $this->parserOptions, $options );
-               $this->validateFromInput( $input, $isFile );
-       }
-
-       /**
-        * Alternative constructor: from filename
-        *
-        * @param string $fname the filename of an XML document
-        * @param callable $filterCallback (optional)
-        *        Function to call to do additional custom validity checks from the
-        *        SAX element handler event. This gives you access to the element
-        *        namespace, name, and attributes, but not to text contents.
-        *        Filter should return 'true' to toggle on $this->filterMatch
-        * @return XmlTypeCheck
-        */
-       public static function newFromFilename( $fname, $filterCallback = null ) {
-               return new self( $fname, $filterCallback, true );
-       }
-
-       /**
-        * Alternative constructor: from string
-        *
-        * @param string $string a string containing an XML element
-        * @param callable $filterCallback (optional)
-        *        Function to call to do additional custom validity checks from the
-        *        SAX element handler event. This gives you access to the element
-        *        namespace, name, and attributes, but not to text contents.
-        *        Filter should return 'true' to toggle on $this->filterMatch
-        * @return XmlTypeCheck
-        */
-       public static function newFromString( $string, $filterCallback = null ) {
-               return new self( $string, $filterCallback, false );
-       }
-
-       /**
-        * Get the root element. Simple accessor to $rootElement
-        *
-        * @return string
-        */
-       public function getRootElement() {
-               return $this->rootElement;
-       }
-
-       /**
-        * @param string $fname the filename
-        */
-       private function validateFromInput( $xml, $isFile ) {
-               $reader = new XMLReader();
-               if ( $isFile ) {
-                       $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
-               } else {
-                       $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
-               }
-               if ( $s !== true ) {
-                       // Couldn't open the XML
-                       $this->wellFormed = false;
-               } else {
-                       $oldDisable = libxml_disable_entity_loader( true );
-                       $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
-                       try {
-                               $this->validate( $reader );
-                       } catch ( Exception $e ) {
-                               // Calling this malformed, because we didn't parse the whole
-                               // thing. Maybe just an external entity refernce.
-                               $this->wellFormed = false;
-                               $reader->close();
-                               libxml_disable_entity_loader( $oldDisable );
-                               throw $e;
-                       }
-                       $reader->close();
-                       libxml_disable_entity_loader( $oldDisable );
-               }
-       }
-
-       private function readNext( XMLReader $reader ) {
-               set_error_handler( [ $this, 'XmlErrorHandler' ] );
-               $ret = $reader->read();
-               restore_error_handler();
-               return $ret;
-       }
-
-       public function XmlErrorHandler( $errno, $errstr ) {
-               $this->wellFormed = false;
-       }
-
-       private function validate( $reader ) {
-
-               // First, move through anything that isn't an element, and
-               // handle any processing instructions with the callback
-               do {
-                       if ( !$this->readNext( $reader ) ) {
-                               // Hit the end of the document before any elements
-                               $this->wellFormed = false;
-                               return;
-                       }
-                       if ( $reader->nodeType === XMLReader::PI ) {
-                               $this->processingInstructionHandler( $reader->name, $reader->value );
-                       }
-               } while ( $reader->nodeType != XMLReader::ELEMENT );
-
-               // Process the rest of the document
-               do {
-                       switch ( $reader->nodeType ) {
-                               case XMLReader::ELEMENT:
-                                       $name = $this->expandNS(
-                                               $reader->name,
-                                               $reader->namespaceURI
-                                       );
-                                       if ( $this->rootElement === '' ) {
-                                               $this->rootElement = $name;
-                                       }
-                                       $empty = $reader->isEmptyElement;
-                                       $attrs = $this->getAttributesArray( $reader );
-                                       $this->elementOpen( $name, $attrs );
-                                       if ( $empty ) {
-                                               $this->elementClose();
-                                       }
-                                       break;
-
-                               case XMLReader::END_ELEMENT:
-                                       $this->elementClose();
-                                       break;
-
-                               case XMLReader::WHITESPACE:
-                               case XMLReader::SIGNIFICANT_WHITESPACE:
-                               case XMLReader::CDATA:
-                               case XMLReader::TEXT:
-                                       $this->elementData( $reader->value );
-                                       break;
-
-                               case XMLReader::ENTITY_REF:
-                                       // Unexpanded entity (maybe external?),
-                                       // don't send to the filter (xml_parse didn't)
-                                       break;
-
-                               case XMLReader::COMMENT:
-                                       // Don't send to the filter (xml_parse didn't)
-                                       break;
-
-                               case XMLReader::PI:
-                                       // Processing instructions can happen after the header too
-                                       $this->processingInstructionHandler(
-                                               $reader->name,
-                                               $reader->value
-                                       );
-                                       break;
-                               default:
-                                       // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
-                                       // NOTATION, or XML_DECLARATION
-                                       // xml_parse didn't send these to the filter, so we won't.
-                       }
-
-               } while ( $this->readNext( $reader ) );
-
-               if ( $this->stackDepth !== 0 ) {
-                       $this->wellFormed = false;
-               } elseif ( $this->wellFormed === null ) {
-                       $this->wellFormed = true;
-               }
-
-       }
-
-       /**
-        * Get all of the attributes for an XMLReader's current node
-        * @param $r XMLReader
-        * @return array of attributes
-        */
-       private function getAttributesArray( XMLReader $r ) {
-               $attrs = [];
-               while ( $r->moveToNextAttribute() ) {
-                       if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
-                               // XMLReader treats xmlns attributes as normal
-                               // attributes, while xml_parse doesn't
-                               continue;
-                       }
-                       $name = $this->expandNS( $r->name, $r->namespaceURI );
-                       $attrs[$name] = $r->value;
-               }
-               return $attrs;
-       }
-
-       /**
-        * @param $name element or attribute name, maybe with a full or short prefix
-        * @param $namespaceURI the namespaceURI
-        * @return string the name prefixed with namespaceURI
-        */
-       private function expandNS( $name, $namespaceURI ) {
-               if ( $namespaceURI ) {
-                       $parts = explode( ':', $name );
-                       $localname = array_pop( $parts );
-                       return "$namespaceURI:$localname";
-               }
-               return $name;
-       }
-
-       /**
-        * @param $name
-        * @param $attribs
-        */
-       private function elementOpen( $name, $attribs ) {
-               $this->elementDataContext[] = [ $name, $attribs ];
-               $this->elementData[] = '';
-               $this->stackDepth++;
-       }
-
-       /**
-        */
-       private function elementClose() {
-               list( $name, $attribs ) = array_pop( $this->elementDataContext );
-               $data = array_pop( $this->elementData );
-               $this->stackDepth--;
-               $callbackReturn = false;
-
-               if ( is_callable( $this->filterCallback ) ) {
-                       $callbackReturn = call_user_func(
-                               $this->filterCallback,
-                               $name,
-                               $attribs,
-                               $data
-                       );
-               }
-               if ( $callbackReturn ) {
-                       // Filter hit!
-                       $this->filterMatch = true;
-                       $this->filterMatchType = $callbackReturn;
-               }
-       }
-
-       /**
-        * @param $data
-        */
-       private function elementData( $data ) {
-               // Collect any data here, and we'll run the callback in elementClose
-               $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
-       }
-
-       /**
-        * @param $target
-        * @param $data
-        */
-       private function processingInstructionHandler( $target, $data ) {
-               $callbackReturn = false;
-               if ( $this->parserOptions['processing_instruction_handler'] ) {
-                       $callbackReturn = call_user_func(
-                               $this->parserOptions['processing_instruction_handler'],
-                               $target,
-                               $data
-                       );
-               }
-               if ( $callbackReturn ) {
-                       // Filter hit!
-                       $this->filterMatch = true;
-                       $this->filterMatchType = $callbackReturn;
-               }
-       }
-}
diff --git a/includes/libs/mime/IEContentAnalyzer.php b/includes/libs/mime/IEContentAnalyzer.php
new file mode 100644 (file)
index 0000000..0d1e527
--- /dev/null
@@ -0,0 +1,851 @@
+<?php
+/**
+ * Simulation of Microsoft Internet Explorer's MIME type detection algorithm.
+ *
+ * @file
+ * @todo Define the exact license of this file.
+ */
+
+/**
+ * This class simulates Microsoft Internet Explorer's terribly broken and
+ * insecure MIME type detection algorithm. It can be used to check web uploads
+ * with an apparently safe type, to see if IE will reinterpret them to produce
+ * something dangerous.
+ *
+ * It is full of bugs and strange design choices should not under any
+ * circumstances be used to determine a MIME type to present to a user or
+ * client. (Apple Safari developers, this means you too.)
+ *
+ * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have
+ * attempted to ensure that this code works in exactly the same way as Internet
+ * Explorer, it does not share any source code, or creative choices such as
+ * variable names, thus I (Tim Starling) claim copyright on it.
+ *
+ * It may be redistributed without restriction. To aid reuse, this class does
+ * not depend on any MediaWiki module.
+ */
+class IEContentAnalyzer {
+       /**
+        * Relevant data taken from the type table in IE 5
+        */
+       protected $baseTypeTable = [
+               'ambiguous' /*1*/ => [
+                       'text/plain',
+                       'application/octet-stream',
+                       'application/x-netcdf', // [sic]
+               ],
+               'text' /*3*/ => [
+                       'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
+                       'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
+               ],
+               'binary' /*4*/ => [
+                       'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
+                       'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp',
+                       'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi',
+                       'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
+                       'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
+                       'application/x-msdownload'
+               ],
+               'html' /*5*/ => [ 'text/html' ],
+       ];
+
+       /**
+        * Changes to the type table in later versions of IE
+        */
+       protected $addedTypes = [
+               'ie07' => [
+                       'text' => [ 'text/xml', 'application/xml' ]
+               ],
+       ];
+
+       /**
+        * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
+        * typical Windows installation.
+        *
+        * Used for extension to MIME type mapping if detection fails.
+        */
+       protected $registry = [
+               '.323' => 'text/h323',
+               '.3g2' => 'video/3gpp2',
+               '.3gp' => 'video/3gpp',
+               '.3gp2' => 'video/3gpp2',
+               '.3gpp' => 'video/3gpp',
+               '.aac' => 'audio/aac',
+               '.ac3' => 'audio/ac3',
+               '.accda' => 'application/msaccess',
+               '.accdb' => 'application/msaccess',
+               '.accdc' => 'application/msaccess',
+               '.accde' => 'application/msaccess',
+               '.accdr' => 'application/msaccess',
+               '.accdt' => 'application/msaccess',
+               '.ade' => 'application/msaccess',
+               '.adp' => 'application/msaccess',
+               '.adts' => 'audio/aac',
+               '.ai' => 'application/postscript',
+               '.aif' => 'audio/aiff',
+               '.aifc' => 'audio/aiff',
+               '.aiff' => 'audio/aiff',
+               '.amc' => 'application/x-mpeg',
+               '.application' => 'application/x-ms-application',
+               '.asf' => 'video/x-ms-asf',
+               '.asx' => 'video/x-ms-asf',
+               '.au' => 'audio/basic',
+               '.avi' => 'video/avi',
+               '.bmp' => 'image/bmp',
+               '.caf' => 'audio/x-caf',
+               '.cat' => 'application/vnd.ms-pki.seccat',
+               '.cbo' => 'application/sha',
+               '.cdda' => 'audio/aiff',
+               '.cer' => 'application/x-x509-ca-cert',
+               '.conf' => 'text/plain',
+               '.crl' => 'application/pkix-crl',
+               '.crt' => 'application/x-x509-ca-cert',
+               '.css' => 'text/css',
+               '.csv' => 'application/vnd.ms-excel',
+               '.der' => 'application/x-x509-ca-cert',
+               '.dib' => 'image/bmp',
+               '.dif' => 'video/x-dv',
+               '.dll' => 'application/x-msdownload',
+               '.doc' => 'application/msword',
+               '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
+               '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+               '.dot' => 'application/msword',
+               '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
+               '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
+               '.dv' => 'video/x-dv',
+               '.dwfx' => 'model/vnd.dwfx+xps',
+               '.edn' => 'application/vnd.adobe.edn',
+               '.eml' => 'message/rfc822',
+               '.eps' => 'application/postscript',
+               '.etd' => 'application/x-ebx',
+               '.exe' => 'application/x-msdownload',
+               '.fdf' => 'application/vnd.fdf',
+               '.fif' => 'application/fractals',
+               '.gif' => 'image/gif',
+               '.gsm' => 'audio/x-gsm',
+               '.hqx' => 'application/mac-binhex40',
+               '.hta' => 'application/hta',
+               '.htc' => 'text/x-component',
+               '.htm' => 'text/html',
+               '.html' => 'text/html',
+               '.htt' => 'text/webviewhtml',
+               '.hxa' => 'application/xml',
+               '.hxc' => 'application/xml',
+               '.hxd' => 'application/octet-stream',
+               '.hxe' => 'application/xml',
+               '.hxf' => 'application/xml',
+               '.hxh' => 'application/octet-stream',
+               '.hxi' => 'application/octet-stream',
+               '.hxk' => 'application/xml',
+               '.hxq' => 'application/octet-stream',
+               '.hxr' => 'application/octet-stream',
+               '.hxs' => 'application/octet-stream',
+               '.hxt' => 'application/xml',
+               '.hxv' => 'application/xml',
+               '.hxw' => 'application/octet-stream',
+               '.ico' => 'image/x-icon',
+               '.iii' => 'application/x-iphone',
+               '.ins' => 'application/x-internet-signup',
+               '.iqy' => 'text/x-ms-iqy',
+               '.isp' => 'application/x-internet-signup',
+               '.jfif' => 'image/jpeg',
+               '.jnlp' => 'application/x-java-jnlp-file',
+               '.jpe' => 'image/jpeg',
+               '.jpeg' => 'image/jpeg',
+               '.jpg' => 'image/jpeg',
+               '.jtx' => 'application/x-jtx+xps',
+               '.latex' => 'application/x-latex',
+               '.log' => 'text/plain',
+               '.m1v' => 'video/mpeg',
+               '.m2v' => 'video/mpeg',
+               '.m3u' => 'audio/x-mpegurl',
+               '.mac' => 'image/x-macpaint',
+               '.man' => 'application/x-troff-man',
+               '.mda' => 'application/msaccess',
+               '.mdb' => 'application/msaccess',
+               '.mde' => 'application/msaccess',
+               '.mfp' => 'application/x-shockwave-flash',
+               '.mht' => 'message/rfc822',
+               '.mhtml' => 'message/rfc822',
+               '.mid' => 'audio/mid',
+               '.midi' => 'audio/mid',
+               '.mod' => 'video/mpeg',
+               '.mov' => 'video/quicktime',
+               '.mp2' => 'video/mpeg',
+               '.mp2v' => 'video/mpeg',
+               '.mp3' => 'audio/mpeg',
+               '.mp4' => 'video/mp4',
+               '.mpa' => 'video/mpeg',
+               '.mpe' => 'video/mpeg',
+               '.mpeg' => 'video/mpeg',
+               '.mpf' => 'application/vnd.ms-mediapackage',
+               '.mpg' => 'video/mpeg',
+               '.mpv2' => 'video/mpeg',
+               '.mqv' => 'video/quicktime',
+               '.NMW' => 'application/nmwb',
+               '.nws' => 'message/rfc822',
+               '.odc' => 'text/x-ms-odc',
+               '.ols' => 'application/vnd.ms-publisher',
+               '.p10' => 'application/pkcs10',
+               '.p12' => 'application/x-pkcs12',
+               '.p7b' => 'application/x-pkcs7-certificates',
+               '.p7c' => 'application/pkcs7-mime',
+               '.p7m' => 'application/pkcs7-mime',
+               '.p7r' => 'application/x-pkcs7-certreqresp',
+               '.p7s' => 'application/pkcs7-signature',
+               '.pct' => 'image/pict',
+               '.pdf' => 'application/pdf',
+               '.pdx' => 'application/vnd.adobe.pdx',
+               '.pfx' => 'application/x-pkcs12',
+               '.pic' => 'image/pict',
+               '.pict' => 'image/pict',
+               '.pinstall' => 'application/x-picasa-detect',
+               '.pko' => 'application/vnd.ms-pki.pko',
+               '.png' => 'image/png',
+               '.pnt' => 'image/x-macpaint',
+               '.pntg' => 'image/x-macpaint',
+               '.pot' => 'application/vnd.ms-powerpoint',
+               '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
+               '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
+               '.ppa' => 'application/vnd.ms-powerpoint',
+               '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
+               '.pps' => 'application/vnd.ms-powerpoint',
+               '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
+               '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
+               '.ppt' => 'application/vnd.ms-powerpoint',
+               '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
+               '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+               '.prf' => 'application/pics-rules',
+               '.ps' => 'application/postscript',
+               '.pub' => 'application/vnd.ms-publisher',
+               '.pwz' => 'application/vnd.ms-powerpoint',
+               '.py' => 'text/plain',
+               '.pyw' => 'text/plain',
+               '.qht' => 'text/x-html-insertion',
+               '.qhtm' => 'text/x-html-insertion',
+               '.qt' => 'video/quicktime',
+               '.qti' => 'image/x-quicktime',
+               '.qtif' => 'image/x-quicktime',
+               '.qtl' => 'application/x-quicktimeplayer',
+               '.rat' => 'application/rat-file',
+               '.rmf' => 'application/vnd.adobe.rmf',
+               '.rmi' => 'audio/mid',
+               '.rqy' => 'text/x-ms-rqy',
+               '.rtf' => 'application/msword',
+               '.sct' => 'text/scriptlet',
+               '.sd2' => 'audio/x-sd2',
+               '.sdp' => 'application/sdp',
+               '.shtml' => 'text/html',
+               '.sit' => 'application/x-stuffit',
+               '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
+               '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
+               '.slk' => 'application/vnd.ms-excel',
+               '.snd' => 'audio/basic',
+               '.so' => 'application/x-apachemodule',
+               '.sol' => 'text/plain',
+               '.sor' => 'text/plain',
+               '.spc' => 'application/x-pkcs7-certificates',
+               '.spl' => 'application/futuresplash',
+               '.sst' => 'application/vnd.ms-pki.certstore',
+               '.stl' => 'application/vnd.ms-pki.stl',
+               '.swf' => 'application/x-shockwave-flash',
+               '.thmx' => 'application/vnd.ms-officetheme',
+               '.tif' => 'image/tiff',
+               '.tiff' => 'image/tiff',
+               '.txt' => 'text/plain',
+               '.uls' => 'text/iuls',
+               '.vcf' => 'text/x-vcard',
+               '.vdx' => 'application/vnd.ms-visio.viewer',
+               '.vsd' => 'application/vnd.ms-visio.viewer',
+               '.vss' => 'application/vnd.ms-visio.viewer',
+               '.vst' => 'application/vnd.ms-visio.viewer',
+               '.vsx' => 'application/vnd.ms-visio.viewer',
+               '.vtx' => 'application/vnd.ms-visio.viewer',
+               '.wav' => 'audio/wav',
+               '.wax' => 'audio/x-ms-wax',
+               '.wbk' => 'application/msword',
+               '.wdp' => 'image/vnd.ms-photo',
+               '.wiz' => 'application/msword',
+               '.wm' => 'video/x-ms-wm',
+               '.wma' => 'audio/x-ms-wma',
+               '.wmd' => 'application/x-ms-wmd',
+               '.wmv' => 'video/x-ms-wmv',
+               '.wmx' => 'video/x-ms-wmx',
+               '.wmz' => 'application/x-ms-wmz',
+               '.wpl' => 'application/vnd.ms-wpl',
+               '.wsc' => 'text/scriptlet',
+               '.wvx' => 'video/x-ms-wvx',
+               '.xaml' => 'application/xaml+xml',
+               '.xbap' => 'application/x-ms-xbap',
+               '.xdp' => 'application/vnd.adobe.xdp+xml',
+               '.xfdf' => 'application/vnd.adobe.xfdf',
+               '.xht' => 'application/xhtml+xml',
+               '.xhtml' => 'application/xhtml+xml',
+               '.xla' => 'application/vnd.ms-excel',
+               '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
+               '.xlk' => 'application/vnd.ms-excel',
+               '.xll' => 'application/vnd.ms-excel',
+               '.xlm' => 'application/vnd.ms-excel',
+               '.xls' => 'application/vnd.ms-excel',
+               '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
+               '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
+               '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+               '.xlt' => 'application/vnd.ms-excel',
+               '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
+               '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
+               '.xlw' => 'application/vnd.ms-excel',
+               '.xml' => 'text/xml',
+               '.xps' => 'application/vnd.ms-xpsdocument',
+               '.xsl' => 'text/xml',
+       ];
+
+       /**
+        * IE versions which have been analysed to bring you this class, and for
+        * which some substantive difference exists. These will appear as keys
+        * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
+        */
+       protected $versions = [ 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' ];
+
+       /**
+        * Type table with versions expanded
+        */
+       protected $typeTable = [];
+
+       /** constructor */
+       function __construct() {
+               // Construct versioned type arrays from the base type array plus additions
+               $types = $this->baseTypeTable;
+               foreach ( $this->versions as $version ) {
+                       if ( isset( $this->addedTypes[$version] ) ) {
+                               foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
+                                       $types[$format] = array_merge( $types[$format], $addedTypes );
+                               }
+                       }
+                       $this->typeTable[$version] = $types;
+               }
+       }
+
+       /**
+        * Get the MIME types from getMimesFromData(), but convert the result from IE's
+        * idiosyncratic private types into something other apps will understand.
+        *
+        * @param string $fileName the file name (unused at present)
+        * @param string $chunk the first 256 bytes of the file
+        * @param string $proposed the MIME type proposed by the server
+        *
+        * @return Array: map of IE version to detected MIME type
+        */
+       public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
+               $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
+               $types = array_map( [ $this, 'translateMimeType' ], $types );
+               return $types;
+       }
+
+       /**
+        * Translate a MIME type from IE's idiosyncratic private types into
+        * more commonly understood type strings
+        * @param $type
+        * @return string
+        */
+       public function translateMimeType( $type ) {
+               static $table = [
+                       'image/pjpeg' => 'image/jpeg',
+                       'image/x-png' => 'image/png',
+                       'image/x-wmf' => 'application/x-msmetafile',
+                       'image/bmp' => 'image/x-bmp',
+                       'application/x-zip-compressed' => 'application/zip',
+                       'application/x-compressed' => 'application/x-compress',
+                       'application/x-gzip-compressed' => 'application/x-gzip',
+                       'audio/mid' => 'audio/midi',
+               ];
+               if ( isset( $table[$type] ) ) {
+                       $type = $table[$type];
+               }
+               return $type;
+       }
+
+       /**
+        * Get the untranslated MIME types for all known versions
+        *
+        * @param string $fileName the file name (unused at present)
+        * @param string $chunk the first 256 bytes of the file
+        * @param string $proposed the MIME type proposed by the server
+        *
+        * @return Array: map of IE version to detected MIME type
+        */
+       public function getMimesFromData( $fileName, $chunk, $proposed ) {
+               $types = [];
+               foreach ( $this->versions as $version ) {
+                       $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
+               }
+               return $types;
+       }
+
+       /**
+        * Get the MIME type for a given named version
+        * @param $version
+        * @param $fileName
+        * @param $chunk
+        * @param $proposed
+        * @return bool|string
+        */
+       protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
+               // Strip text after a semicolon
+               $semiPos = strpos( $proposed, ';' );
+               if ( $semiPos !== false ) {
+                       $proposed = substr( $proposed, 0, $semiPos );
+               }
+
+               $proposedFormat = $this->getDataFormat( $version, $proposed );
+               if ( $proposedFormat == 'unknown'
+                       && $proposed != 'multipart/mixed'
+                       && $proposed != 'multipart/x-mixed-replace' )
+               {
+                       return $proposed;
+               }
+               if ( strval( $chunk ) === '' ) {
+                       return $proposed;
+               }
+
+               // Truncate chunk at 255 bytes
+               $chunk = substr( $chunk, 0, 255 );
+
+               // IE does the Check*Headers() calls last, and instead does the following image
+               // type checks by directly looking for the magic numbers. What I do here should
+               // have the same effect since the magic number checks are identical in both cases.
+               $result = $this->sampleData( $version, $chunk );
+               $sampleFound = $result['found'];
+               $counters = $result['counters'];
+               $binaryType = $this->checkBinaryHeaders( $version, $chunk );
+               $textType = $this->checkTextHeaders( $version, $chunk );
+
+               if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
+                       return 'text/html';
+               }
+               if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
+                       return 'image/gif';
+               }
+               if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
+                       && $binaryType == 'image/pjpeg' )
+               {
+                       return $proposed;
+               }
+               // PNG check added in IE 7
+               if ( $version >= 'ie07'
+                       && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
+                       && $binaryType == 'image/x-png' )
+               {
+                       return $proposed;
+               }
+
+               // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
+               if ( isset( $sampleFound['cdf'] ) ) {
+                       return 'application/x-cdf';
+               }
+
+               // RSS and Atom were added in IE 7 so they won't be in $sampleFound for
+               // previous versions
+               if ( isset( $sampleFound['rss'] ) ) {
+                       return 'application/rss+xml';
+               }
+               if ( isset( $sampleFound['rdf-tag'] )
+                       && isset( $sampleFound['rdf-url'] )
+                       && isset( $sampleFound['rdf-purl'] ) )
+               {
+                       return 'application/rss+xml';
+               }
+               if ( isset( $sampleFound['atom'] ) ) {
+                       return 'application/atom+xml';
+               }
+
+               if ( isset( $sampleFound['xml'] ) ) {
+                       // TODO: I'm not sure under what circumstances this flag is enabled
+                       if ( strpos( $version, 'strict' ) !== false ) {
+                               if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
+                                       return 'text/xml';
+                               }
+                       } else {
+                               return 'text/xml';
+                       }
+               }
+               if ( isset( $sampleFound['html'] ) ) {
+                       // TODO: I'm not sure under what circumstances this flag is enabled
+                       if ( strpos( $version, 'nohtml' ) !== false ) {
+                               if ( $proposed == 'text/plain' ) {
+                                       return 'text/html';
+                               }
+                       } else {
+                               return 'text/html';
+                       }
+               }
+               if ( isset( $sampleFound['xbm'] ) ) {
+                       return 'image/x-bitmap';
+               }
+               if ( isset( $sampleFound['binhex'] ) ) {
+                       return 'application/macbinhex40';
+               }
+               if ( isset( $sampleFound['scriptlet'] ) ) {
+                       if ( strpos( $version, 'strict' ) !== false ) {
+                               if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
+                                       return 'text/scriptlet';
+                               }
+                       } else {
+                               return 'text/scriptlet';
+                       }
+               }
+
+               // Freaky heuristics to determine if the data is text or binary
+               // The heuristic is of course broken for non-ASCII text
+               if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] )
+                       < ( $counters['ctrl'] + $counters['high'] ) * 16 )
+               {
+                       $kindOfBinary = true;
+                       $type = $binaryType ? $binaryType : $textType;
+                       if ( $type === false ) {
+                               $type = 'application/octet-stream';
+                       }
+               } else {
+                       $kindOfBinary = false;
+                       $type = $textType ? $textType : $binaryType;
+                       if ( $type === false ) {
+                               $type = 'text/plain';
+                       }
+               }
+
+               // Check if the output format is ambiguous
+               // This generally means that detection failed, real types aren't ambiguous
+               $detectedFormat = $this->getDataFormat( $version, $type );
+               if ( $detectedFormat != 'ambiguous' ) {
+                       return $type;
+               }
+
+               if ( $proposedFormat != 'ambiguous' ) {
+                       // FormatAgreesWithData()
+                       if ( $proposedFormat == 'text' && !$kindOfBinary ) {
+                               return $proposed;
+                       }
+                       if ( $proposedFormat == 'binary' && $kindOfBinary ) {
+                               return $proposed;
+                       }
+                       if ( $proposedFormat == 'html' ) {
+                               return $proposed;
+                       }
+               }
+
+               // Find a MIME type by searching the registry for the file extension.
+               $dotPos = strrpos( $fileName, '.' );
+               if ( $dotPos === false ) {
+                       return $type;
+               }
+               $ext = substr( $fileName, $dotPos );
+               if ( isset( $this->registry[$ext] ) ) {
+                       return $this->registry[$ext];
+               }
+
+               // TODO: If the extension has an application registered to it, IE will return
+               // application/octet-stream. We'll skip that, so we could erroneously
+               // return text/plain or application/x-netcdf where application/octet-stream
+               // would be correct.
+
+               return $type;
+       }
+
+       /**
+        * Check for text headers at the start of the chunk
+        * Confirmed same in 5 and 7.
+        * @param $version
+        * @param $chunk
+        * @return bool|string
+        */
+       private function checkTextHeaders( $version, $chunk ) {
+               $chunk2 = substr( $chunk, 0, 2 );
+               $chunk4 = substr( $chunk, 0, 4 );
+               $chunk5 = substr( $chunk, 0, 5 );
+               if ( $chunk4 == '%PDF' ) {
+                       return 'application/pdf';
+               }
+               if ( $chunk2 == '%!' ) {
+                       return 'application/postscript';
+               }
+               if ( $chunk5 == '{\\rtf' ) {
+                       return 'text/richtext';
+               }
+               if ( $chunk5 == 'begin' ) {
+                       return 'application/base64';
+               }
+               return false;
+       }
+
+       /**
+        * Check for binary headers at the start of the chunk
+        * Confirmed same in 5 and 7.
+        * @param $version
+        * @param $chunk
+        * @return bool|string
+        */
+       private function checkBinaryHeaders( $version, $chunk ) {
+               $chunk2 = substr( $chunk, 0, 2 );
+               $chunk3 = substr( $chunk, 0, 3 );
+               $chunk4 = substr( $chunk, 0, 4 );
+               $chunk5 = substr( $chunk, 0, 5 );
+               $chunk5uc = strtoupper( $chunk5 );
+               $chunk8 = substr( $chunk, 0, 8 );
+               if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
+                       return 'image/gif';
+               }
+               if ( $chunk2 == "\xff\xd8" ) {
+                       return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
+               }
+
+               if ( $chunk2 == 'BM'
+                       && substr( $chunk, 6, 2 ) == "\000\000"
+                       && substr( $chunk, 8, 2 ) == "\000\000" )
+               {
+                       return 'image/bmp'; // another non-standard MIME
+               }
+               if ( $chunk4 == 'RIFF'
+                       && substr( $chunk, 8, 4 ) == 'WAVE' )
+               {
+                       return 'audio/wav';
+               }
+               // These were integer literals in IE
+               // Perhaps the author was not sure what the target endianness was
+               if ( $chunk4 == ".sd\000"
+                       || $chunk4 == ".snd"
+                       || $chunk4 == "\000ds."
+                       || $chunk4 == "dns." )
+               {
+                       return 'audio/basic';
+               }
+               if ( $chunk3 == "MM\000" ) {
+                       return 'image/tiff';
+               }
+               if ( $chunk2 == 'MZ' ) {
+                       return 'application/x-msdownload';
+               }
+               if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
+                       return 'image/x-png'; // [sic]
+               }
+               if ( strlen( $chunk ) >= 5 ) {
+                       $byte2 = ord( $chunk[2] );
+                       $byte4 = ord( $chunk[4] );
+                       if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
+                               return 'image/x-jg';
+                       }
+               }
+               // More endian confusion?
+               if ( $chunk4 == 'MROF' ) {
+                       return 'audio/x-aiff';
+               }
+               $chunk4_8 = substr( $chunk, 8, 4 );
+               if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
+                       return 'audio/x-aiff';
+               }
+               if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
+                       return 'video/avi';
+               }
+               if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
+                       return 'video/mpeg';
+               }
+               if ( $chunk4 == "\001\000\000\000"
+                       && substr( $chunk, 40, 4 ) == ' EMF' )
+               {
+                       return 'image/x-emf';
+               }
+               if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
+                       return 'image/x-wmf';
+               }
+               if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
+                       return 'application/java';
+               }
+               if ( $chunk2 == 'PK' ) {
+                       return 'application/x-zip-compressed';
+               }
+               if ( $chunk2 == "\x1f\x9d" ) {
+                       return 'application/x-compressed';
+               }
+               if ( $chunk2 == "\x1f\x8b" ) {
+                       return 'application/x-gzip-compressed';
+               }
+               // Skip redundant check for ZIP
+               if ( $chunk5 == "MThd\000" ) {
+                       return 'audio/mid';
+               }
+               if ( $chunk4 == '%PDF' ) {
+                       return 'application/pdf';
+               }
+               return false;
+       }
+
+       /**
+        * Do heuristic checks on the bulk of the data sample.
+        * Search for HTML tags.
+        * @param $version
+        * @param $chunk
+        * @return array
+        */
+       protected function sampleData( $version, $chunk ) {
+               $found = [];
+               $counters = [
+                       'ctrl' => 0,
+                       'high' => 0,
+                       'low' => 0,
+                       'lf' => 0,
+                       'cr' => 0,
+                       'ff' => 0
+               ];
+               $htmlTags = [
+                       'html',
+                       'head',
+                       'title',
+                       'body',
+                       'script',
+                       'a href',
+                       'pre',
+                       'img',
+                       'plaintext',
+                       'table'
+               ];
+               $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+               $rdfPurl = 'http://purl.org/rss/1.0/';
+               $xbmMagic1 = '#define';
+               $xbmMagic2 = '_width';
+               $xbmMagic3 = '_bits';
+               $binhexMagic = 'converted with BinHex';
+               $chunkLength = strlen( $chunk );
+
+               for ( $offset = 0; $offset < $chunkLength; $offset++ ) {
+                       $curChar = $chunk[$offset];
+                       if ( $curChar == "\x0a" ) {
+                               $counters['lf']++;
+                               continue;
+                       } elseif ( $curChar == "\x0d" ) {
+                               $counters['cr']++;
+                               continue;
+                       } elseif ( $curChar == "\x0c" ) {
+                               $counters['ff']++;
+                               continue;
+                       } elseif ( $curChar == "\t" ) {
+                               $counters['low']++;
+                               continue;
+                       } elseif ( ord( $curChar ) < 32 ) {
+                               $counters['ctrl']++;
+                               continue;
+                       } elseif ( ord( $curChar ) >= 128 ) {
+                               $counters['high']++;
+                               continue;
+                       }
+
+                       $counters['low']++;
+                       if ( $curChar == '<' ) {
+                               // XML
+                               $remainder = substr( $chunk, $offset + 1 );
+                               if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
+                                       $nextChar = substr( $chunk, $offset + 5, 1 );
+                                       if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
+                                               $found['xml'] = true;
+                                       }
+                               }
+                               // Scriptlet (JSP)
+                               if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
+                                       $found['scriptlet'] = true;
+                                       break;
+                               }
+                               // HTML
+                               foreach ( $htmlTags as $tag ) {
+                                       if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
+                                               $found['html'] = true;
+                                       }
+                               }
+                               // Skip broken check for additional tags (HR etc.)
+
+                               // CHANNEL replaced by RSS, RDF and FEED in IE 7
+                               if ( $version < 'ie07' ) {
+                                       if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
+                                               $found['cdf'] = true;
+                                       }
+                               } else {
+                                       // RSS
+                                       if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
+                                               $found['rss'] = true;
+                                               break; // return from SampleData
+                                       }
+                                       if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
+                                               $found['rdf-tag'] = true;
+                                               // no break
+                                       }
+                                       if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
+                                               $found['atom'] = true;
+                                               break;
+                                       }
+                               }
+                               continue;
+                       }
+                       // Skip broken check for -->
+
+                       // RSS URL checks
+                       // For some reason both URLs must appear before it is recognised
+                       $remainder = substr( $chunk, $offset );
+                       if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
+                               $found['rdf-url'] = true;
+                               if ( isset( $found['rdf-tag'] )
+                                       && isset( $found['rdf-purl'] ) ) // [sic]
+                               {
+                                       break;
+                               }
+                               continue;
+                       }
+
+                       if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
+                               if ( isset( $found['rdf-tag'] )
+                                       && isset( $found['rdf-url'] ) ) // [sic]
+                               {
+                                       break;
+                               }
+                               continue;
+                       }
+
+                       // XBM checks
+                       if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
+                               $found['xbm1'] = true;
+                               continue;
+                       }
+                       if ( $curChar == '_' ) {
+                               if ( isset( $found['xbm2'] ) ) {
+                                       if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
+                                               $found['xbm'] = true;
+                                               break;
+                                       }
+                               } elseif ( isset( $found['xbm1'] ) ) {
+                                       if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
+                                               $found['xbm2'] = true;
+                                       }
+                               }
+                       }
+
+                       // BinHex
+                       if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
+                               $found['binhex'] = true;
+                       }
+               }
+               return [ 'found' => $found, 'counters' => $counters ];
+       }
+
+       /**
+        * @param $version
+        * @param $type
+        * @return int|string
+        */
+       protected function getDataFormat( $version, $type ) {
+               $types = $this->typeTable[$version];
+               if ( $type == '(null)' || strval( $type ) === '' ) {
+                       return 'ambiguous';
+               }
+               foreach ( $types as $format => $list ) {
+                       if ( in_array( $type, $list ) ) {
+                               return $format;
+                       }
+               }
+               return 'unknown';
+       }
+}
diff --git a/includes/libs/mime/MimeAnalyzer.php b/includes/libs/mime/MimeAnalyzer.php
new file mode 100644 (file)
index 0000000..5f4d7c9
--- /dev/null
@@ -0,0 +1,1166 @@
+<?php
+/**
+ * Module defining helper functions for detecting and dealing with MIME types.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+use Psr\Log\LoggerAwareInterface;
+use Psr\Log\LoggerInterface;
+
+/**
+ * Implements functions related to MIME types such as detection and mapping to file extension
+ *
+ * @since 1.28
+ */
+class MimeAnalyzer implements LoggerAwareInterface {
+       /** @var string */
+       protected $typeFile;
+       /** @var string */
+       protected $infoFile;
+       /** @var string */
+       protected $xmlTypes;
+       /** @var callable */
+       protected $initCallback;
+       /** @var callable */
+       protected $detectCallback;
+       /** @var callable */
+       protected $guessCallback;
+       /** @var callable */
+       protected $extCallback;
+       /** @var array Mapping of media types to arrays of MIME types */
+       protected $mediaTypes = null;
+       /** @var array Map of MIME type aliases */
+       protected $mimeTypeAliases = null;
+       /** @var array Map of MIME types to file extensions (as a space separated list) */
+       protected $mimetoExt = null;
+
+       /** @var array Map of file extensions types to MIME types (as a space separated list) */
+       public $mExtToMime = null; // legacy name; field accessed by hooks
+
+       /** @var IEContentAnalyzer */
+       protected $IEAnalyzer;
+
+       /** @var string Extra MIME types, set for example by media handling extensions */
+       private $extraTypes = '';
+       /** @var string Extra MIME info, set for example by media handling extensions */
+       private $extraInfo = '';
+
+       /** @var LoggerInterface */
+       private $logger;
+
+       /**
+        * Defines a set of well known MIME types
+        * This is used as a fallback to mime.types files.
+        * An extensive list of well known MIME types is provided by
+        * the file mime.types in the includes directory.
+        *
+        * This list concatenated with mime.types is used to create a MIME <-> ext
+        * map. Each line contains a MIME type followed by a space separated list of
+        * extensions. If multiple extensions for a single MIME type exist or if
+        * multiple MIME types exist for a single extension then in most cases
+        * MediaWiki assumes that the first extension following the MIME type is the
+        * canonical extension, and the first time a MIME type appears for a certain
+        * extension is considered the canonical MIME type.
+        *
+        * (Note that appending the type file list to the end of self::$wellKnownTypes
+        * sucks because you can't redefine canonical types. This could be fixed by
+        * appending self::$wellKnownTypes behind type file list, but who knows
+        * what will break? In practice this probably isn't a problem anyway -- Bryan)
+        */
+       protected static $wellKnownTypes = <<<EOT
+application/ogg ogx ogg ogm ogv oga spx
+application/pdf pdf
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.database odb
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-master otm
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-web oth
+application/javascript js
+application/x-shockwave-flash swf
+audio/midi mid midi kar
+audio/mpeg mpga mpa mp2 mp3
+audio/x-aiff aif aiff aifc
+audio/x-wav wav
+audio/ogg oga spx ogg
+image/x-bmp bmp
+image/gif gif
+image/jpeg jpeg jpg jpe
+image/png png
+image/svg+xml svg
+image/svg svg
+image/tiff tiff tif
+image/vnd.djvu djvu
+image/x.djvu djvu
+image/x-djvu djvu
+image/x-portable-pixmap ppm
+image/x-xcf xcf
+text/plain txt
+text/html html htm
+video/ogg ogv ogm ogg
+video/mpeg mpg mpeg
+EOT;
+
+       /**
+        * Defines a set of well known MIME info entries
+        * This is used as a fallback to mime.info files.
+        * An extensive list of well known MIME types is provided by
+        * the file mime.info in the includes directory.
+        */
+       protected static $wellKnownInfo = <<<EOT
+application/pdf [OFFICE]
+application/vnd.oasis.opendocument.chart [OFFICE]
+application/vnd.oasis.opendocument.chart-template [OFFICE]
+application/vnd.oasis.opendocument.database [OFFICE]
+application/vnd.oasis.opendocument.formula [OFFICE]
+application/vnd.oasis.opendocument.formula-template [OFFICE]
+application/vnd.oasis.opendocument.graphics [OFFICE]
+application/vnd.oasis.opendocument.graphics-template [OFFICE]
+application/vnd.oasis.opendocument.image [OFFICE]
+application/vnd.oasis.opendocument.image-template [OFFICE]
+application/vnd.oasis.opendocument.presentation [OFFICE]
+application/vnd.oasis.opendocument.presentation-template [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
+application/vnd.oasis.opendocument.text [OFFICE]
+application/vnd.oasis.opendocument.text-template [OFFICE]
+application/vnd.oasis.opendocument.text-master [OFFICE]
+application/vnd.oasis.opendocument.text-web [OFFICE]
+application/javascript text/javascript application/x-javascript [EXECUTABLE]
+application/x-shockwave-flash [MULTIMEDIA]
+audio/midi [AUDIO]
+audio/x-aiff [AUDIO]
+audio/x-wav [AUDIO]
+audio/mp3 audio/mpeg [AUDIO]
+application/ogg audio/ogg video/ogg [MULTIMEDIA]
+image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
+image/gif [BITMAP]
+image/jpeg [BITMAP]
+image/png [BITMAP]
+image/svg+xml [DRAWING]
+image/tiff [BITMAP]
+image/vnd.djvu [BITMAP]
+image/x-xcf [BITMAP]
+image/x-portable-pixmap [BITMAP]
+text/plain [TEXT]
+text/html [TEXT]
+video/ogg [VIDEO]
+video/mpeg [VIDEO]
+unknown/unknown application/octet-stream application/x-empty [UNKNOWN]
+EOT;
+
+       /**
+        * @param array $params Configuration map, includes:
+        *   - typeFile: path to file with the list of known MIME types
+        *   - infoFile: path to file with the MIME type info
+        *   - xmlTypes: map of root element names to XML MIME types
+        *   - initCallback: initialization callback that is passed this object [optional]
+        *   - detectCallback: alternative to finfo that returns the mime type for a file.
+        *      For example, the callback can return the output of "file -bi". [optional]
+        *   - guessCallback: callback to improve the guessed MIME type using the file data.
+        *      This is intended for fixing mistakes in fileinfo or "detectCallback". [optional]
+        *   - extCallback: callback to improve the guessed MIME type using the extension. [optional]
+        *   - logger: PSR-3 logger [optional]
+        * @note Constructing these instances is expensive due to file reads.
+        *  A service or singleton pattern should be used to avoid creating instances again and again.
+        */
+       public function __construct( array $params ) {
+               $this->typeFile = $params['typeFile'];
+               $this->infoFile = $params['infoFile'];
+               $this->xmlTypes = $params['xmlTypes'];
+               $this->initCallback = isset( $params['initCallback'] )
+                       ? $params['initCallback']
+                       : null;
+               $this->detectCallback = isset( $params['detectCallback'] )
+                       ? $params['detectCallback']
+                       : null;
+               $this->guessCallback = isset( $params['guessCallback'] )
+                       ? $params['guessCallback']
+                       : null;
+               $this->extCallback = isset( $params['extCallback'] )
+                       ? $params['extCallback']
+                       : null;
+               $this->logger = isset( $params['logger'] )
+                       ? $params['logger']
+                       : new \Psr\Log\NullLogger();
+
+               $this->loadFiles();
+       }
+
+       protected function loadFiles() {
+               /**
+                *   --- load mime.types ---
+                */
+
+               # Allow media handling extensions adding MIME-types and MIME-info
+               if ( $this->initCallback ) {
+                       call_user_func( $this->initCallback, $this );
+               }
+
+               $types = self::$wellKnownTypes;
+
+               $mimeTypeFile = $this->typeFile;
+               if ( $mimeTypeFile ) {
+                       if ( is_file( $mimeTypeFile ) && is_readable( $mimeTypeFile ) ) {
+                               $this->logger->info( __METHOD__ . ": loading mime types from $mimeTypeFile\n" );
+                               $types .= "\n";
+                               $types .= file_get_contents( $mimeTypeFile );
+                       } else {
+                               $this->logger->info( __METHOD__ . ": can't load mime types from $mimeTypeFile\n" );
+                       }
+               } else {
+                       $this->logger->info( __METHOD__ .
+                               ": no mime types file defined, using built-ins only.\n" );
+               }
+
+               $types .= "\n" . $this->extraTypes;
+
+               $types = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $types );
+               $types = str_replace( "\t", " ", $types );
+
+               $this->mimetoExt = [];
+               $this->mExtToMime = [];
+
+               $lines = explode( "\n", $types );
+               foreach ( $lines as $s ) {
+                       $s = trim( $s );
+                       if ( empty( $s ) ) {
+                               continue;
+                       }
+                       if ( strpos( $s, '#' ) === 0 ) {
+                               continue;
+                       }
+
+                       $s = strtolower( $s );
+                       $i = strpos( $s, ' ' );
+
+                       if ( $i === false ) {
+                               continue;
+                       }
+
+                       $mime = substr( $s, 0, $i );
+                       $ext = trim( substr( $s, $i + 1 ) );
+
+                       if ( empty( $ext ) ) {
+                               continue;
+                       }
+
+                       if ( !empty( $this->mimetoExt[$mime] ) ) {
+                               $this->mimetoExt[$mime] .= ' ' . $ext;
+                       } else {
+                               $this->mimetoExt[$mime] = $ext;
+                       }
+
+                       $extensions = explode( ' ', $ext );
+
+                       foreach ( $extensions as $e ) {
+                               $e = trim( $e );
+                               if ( empty( $e ) ) {
+                                       continue;
+                               }
+
+                               if ( !empty( $this->mExtToMime[$e] ) ) {
+                                       $this->mExtToMime[$e] .= ' ' . $mime;
+                               } else {
+                                       $this->mExtToMime[$e] = $mime;
+                               }
+                       }
+               }
+
+               /**
+                *   --- load mime.info ---
+                */
+
+               $mimeInfoFile = $this->infoFile;
+
+               $info = self::$wellKnownInfo;
+
+               if ( $mimeInfoFile ) {
+                       if ( is_file( $mimeInfoFile ) && is_readable( $mimeInfoFile ) ) {
+                               $this->logger->info( __METHOD__ . ": loading mime info from $mimeInfoFile\n" );
+                               $info .= "\n";
+                               $info .= file_get_contents( $mimeInfoFile );
+                       } else {
+                               $this->logger->info( __METHOD__ . ": can't load mime info from $mimeInfoFile\n" );
+                       }
+               } else {
+                       $this->logger->info( __METHOD__ .
+                               ": no mime info file defined, using built-ins only.\n" );
+               }
+
+               $info .= "\n" . $this->extraInfo;
+
+               $info = str_replace( [ "\r\n", "\n\r", "\n\n", "\r\r", "\r" ], "\n", $info );
+               $info = str_replace( "\t", " ", $info );
+
+               $this->mimeTypeAliases = [];
+               $this->mediaTypes = [];
+
+               $lines = explode( "\n", $info );
+               foreach ( $lines as $s ) {
+                       $s = trim( $s );
+                       if ( empty( $s ) ) {
+                               continue;
+                       }
+                       if ( strpos( $s, '#' ) === 0 ) {
+                               continue;
+                       }
+
+                       $s = strtolower( $s );
+                       $i = strpos( $s, ' ' );
+
+                       if ( $i === false ) {
+                               continue;
+                       }
+
+                       # print "processing MIME INFO line $s<br>";
+
+                       $match = [];
+                       if ( preg_match( '!\[\s*(\w+)\s*\]!', $s, $match ) ) {
+                               $s = preg_replace( '!\[\s*(\w+)\s*\]!', '', $s );
+                               $mtype = trim( strtoupper( $match[1] ) );
+                       } else {
+                               $mtype = MEDIATYPE_UNKNOWN;
+                       }
+
+                       $m = explode( ' ', $s );
+
+                       if ( !isset( $this->mediaTypes[$mtype] ) ) {
+                               $this->mediaTypes[$mtype] = [];
+                       }
+
+                       foreach ( $m as $mime ) {
+                               $mime = trim( $mime );
+                               if ( empty( $mime ) ) {
+                                       continue;
+                               }
+
+                               $this->mediaTypes[$mtype][] = $mime;
+                       }
+
+                       if ( count( $m ) > 1 ) {
+                               $main = $m[0];
+                               $mCount = count( $m );
+                               for ( $i = 1; $i < $mCount; $i += 1 ) {
+                                       $mime = $m[$i];
+                                       $this->mimeTypeAliases[$mime] = $main;
+                               }
+                       }
+               }
+       }
+
+       public function setLogger( LoggerInterface $logger ) {
+               $this->logger = $logger;
+       }
+
+       /**
+        * Adds to the list mapping MIME to file extensions.
+        * As an extension author, you are encouraged to submit patches to
+        * MediaWiki's core to add new MIME types to mime.types.
+        * @param string $types
+        */
+       public function addExtraTypes( $types ) {
+               $this->extraTypes .= "\n" . $types;
+       }
+
+       /**
+        * Adds to the list mapping MIME to media type.
+        * As an extension author, you are encouraged to submit patches to
+        * MediaWiki's core to add new MIME info to mime.info.
+        * @param string $info
+        */
+       public function addExtraInfo( $info ) {
+               $this->extraInfo .= "\n" . $info;
+       }
+
+       /**
+        * Returns a list of file extensions for a given MIME type as a space
+        * separated string or null if the MIME type was unrecognized. Resolves
+        * MIME type aliases.
+        *
+        * @param string $mime
+        * @return string|null
+        */
+       public function getExtensionsForType( $mime ) {
+               $mime = strtolower( $mime );
+
+               // Check the mime-to-ext map
+               if ( isset( $this->mimetoExt[$mime] ) ) {
+                       return $this->mimetoExt[$mime];
+               }
+
+               // Resolve the MIME type to the canonical type
+               if ( isset( $this->mimeTypeAliases[$mime] ) ) {
+                       $mime = $this->mimeTypeAliases[$mime];
+                       if ( isset( $this->mimetoExt[$mime] ) ) {
+                               return $this->mimetoExt[$mime];
+                       }
+               }
+
+               return null;
+       }
+
+       /**
+        * Returns a list of MIME types for a given file extension as a space
+        * separated string or null if the extension was unrecognized.
+        *
+        * @param string $ext
+        * @return string|null
+        */
+       public function getTypesForExtension( $ext ) {
+               $ext = strtolower( $ext );
+
+               $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
+               return $r;
+       }
+
+       /**
+        * Returns a single MIME type for a given file extension or null if unknown.
+        * This is always the first type from the list returned by getTypesForExtension($ext).
+        *
+        * @param string $ext
+        * @return string|null
+        */
+       public function guessTypesForExtension( $ext ) {
+               $m = $this->getTypesForExtension( $ext );
+               if ( is_null( $m ) ) {
+                       return null;
+               }
+
+               // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
+               $m = trim( $m );
+               $m = preg_replace( '/\s.*$/', '', $m );
+
+               return $m;
+       }
+
+       /**
+        * Tests if the extension matches the given MIME type. Returns true if a
+        * match was found, null if the MIME type is unknown, and false if the
+        * MIME type is known but no matches where found.
+        *
+        * @param string $extension
+        * @param string $mime
+        * @return bool|null
+        */
+       public function isMatchingExtension( $extension, $mime ) {
+               $ext = $this->getExtensionsForType( $mime );
+
+               if ( !$ext ) {
+                       return null; // Unknown MIME type
+               }
+
+               $ext = explode( ' ', $ext );
+
+               $extension = strtolower( $extension );
+               return in_array( $extension, $ext );
+       }
+
+       /**
+        * Returns true if the MIME type is known to represent an image format
+        * supported by the PHP GD library.
+        *
+        * @param string $mime
+        *
+        * @return bool
+        */
+       public function isPHPImageType( $mime ) {
+               // As defined by imagegetsize and image_type_to_mime
+               static $types = [
+                       'image/gif', 'image/jpeg', 'image/png',
+                       'image/x-bmp', 'image/xbm', 'image/tiff',
+                       'image/jp2', 'image/jpeg2000', 'image/iff',
+                       'image/xbm', 'image/x-xbitmap',
+                       'image/vnd.wap.wbmp', 'image/vnd.xiff',
+                       'image/x-photoshop',
+                       'application/x-shockwave-flash',
+               ];
+
+               return in_array( $mime, $types );
+       }
+
+       /**
+        * Returns true if the extension represents a type which can
+        * be reliably detected from its content. Use this to determine
+        * whether strict content checks should be applied to reject
+        * invalid uploads; if we can't identify the type we won't
+        * be able to say if it's invalid.
+        *
+        * @todo Be more accurate when using fancy MIME detector plugins;
+        *       right now this is the bare minimum getimagesize() list.
+        * @param string $extension
+        * @return bool
+        */
+       function isRecognizableExtension( $extension ) {
+               static $types = [
+                       // Types recognized by getimagesize()
+                       'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
+                       'bmp', 'tiff', 'tif', 'jpc', 'jp2',
+                       'jpx', 'jb2', 'swc', 'iff', 'wbmp',
+                       'xbm',
+
+                       // Formats we recognize magic numbers for
+                       'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
+                       'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
+                       'webp',
+
+                       // XML formats we sure hope we recognize reliably
+                       'svg',
+               ];
+               return in_array( strtolower( $extension ), $types );
+       }
+
+       /**
+        * Improves a MIME type using the file extension. Some file formats are very generic,
+        * so their MIME type is not very meaningful. A more useful MIME type can be derived
+        * by looking at the file extension. Typically, this method would be called on the
+        * result of guessMimeType().
+        *
+        * @param string $mime The MIME type, typically guessed from a file's content.
+        * @param string $ext The file extension, as taken from the file name
+        *
+        * @return string The MIME type
+        */
+       public function improveTypeFromExtension( $mime, $ext ) {
+               if ( $mime === 'unknown/unknown' ) {
+                       if ( $this->isRecognizableExtension( $ext ) ) {
+                               $this->logger->info( __METHOD__ . ': refusing to guess mime type for .' .
+                                       "$ext file, we should have recognized it\n" );
+                       } else {
+                               // Not something we can detect, so simply
+                               // trust the file extension
+                               $mime = $this->guessTypesForExtension( $ext );
+                       }
+               } elseif ( $mime === 'application/x-opc+zip' ) {
+                       if ( $this->isMatchingExtension( $ext, $mime ) ) {
+                               // A known file extension for an OPC file,
+                               // find the proper MIME type for that file extension
+                               $mime = $this->guessTypesForExtension( $ext );
+                       } else {
+                               $this->logger->info( __METHOD__ .
+                                       ": refusing to guess better type for $mime file, " .
+                                       ".$ext is not a known OPC extension.\n" );
+                               $mime = 'application/zip';
+                       }
+               } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
+                       // Textual types are sometimes not recognized properly.
+                       // If detected as text/plain, and has an extension which is textual
+                       // improve to the extension's type. For example, csv and json are often
+                       // misdetected as text/plain.
+                       $mime = $this->guessTypesForExtension( $ext );
+               }
+
+               # Media handling extensions can improve the MIME detected
+               $callback = $this->extCallback;
+               if ( $callback ) {
+                       $callback( $this, $ext, $mime /* by reference */ );
+               }
+
+               if ( isset( $this->mimeTypeAliases[$mime] ) ) {
+                       $mime = $this->mimeTypeAliases[$mime];
+               }
+
+               $this->logger->info( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
+               return $mime;
+       }
+
+       /**
+        * MIME type detection. This uses detectMimeType to detect the MIME type
+        * of the file, but applies additional checks to determine some well known
+        * file formats that may be missed or misinterpreted by the default MIME
+        * detection (namely XML based formats like XHTML or SVG, as well as ZIP
+        * based formats like OPC/ODF files).
+        *
+        * @param string $file The file to check
+        * @param string|bool $ext The file extension, or true (default) to extract
+        *   it from the filename. Set it to false to ignore the extension. DEPRECATED!
+        *   Set to false, use improveTypeFromExtension($mime, $ext) later to improve MIME type.
+        *
+        * @return string The MIME type of $file
+        */
+       public function guessMimeType( $file, $ext = true ) {
+               if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
+                       $this->logger->info( __METHOD__ .
+                               ": WARNING: use of the \$ext parameter is deprecated. " .
+                               "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+               }
+
+               $mime = $this->doGuessMimeType( $file, $ext );
+
+               if ( !$mime ) {
+                       $this->logger->info( __METHOD__ .
+                               ": internal type detection failed for $file (.$ext)...\n" );
+                       $mime = $this->detectMimeType( $file, $ext );
+               }
+
+               if ( isset( $this->mimeTypeAliases[$mime] ) ) {
+                       $mime = $this->mimeTypeAliases[$mime];
+               }
+
+               $this->logger->info( __METHOD__ . ": guessed mime type of $file: $mime\n" );
+               return $mime;
+       }
+
+       /**
+        * Guess the MIME type from the file contents.
+        *
+        * @todo Remove $ext param
+        *
+        * @param string $file
+        * @param mixed $ext
+        * @return bool|string
+        * @throws UnexpectedValueException
+        */
+       private function doGuessMimeType( $file, $ext ) {
+               // Read a chunk of the file
+               MediaWiki\suppressWarnings();
+               $f = fopen( $file, 'rb' );
+               MediaWiki\restoreWarnings();
+
+               if ( !$f ) {
+                       return 'unknown/unknown';
+               }
+
+               $fsize = filesize( $file );
+               if ( $fsize === false ) {
+                       return 'unknown/unknown';
+               }
+
+               $head = fread( $f, 1024 );
+               $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
+               if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
+                       throw new UnexpectedValueException(
+                               "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
+               }
+               $tail = $tailLength ? fread( $f, $tailLength ) : '';
+               fclose( $f );
+
+               $this->logger->info( __METHOD__ .
+                       ": analyzing head and tail of $file for magic numbers.\n" );
+
+               // Hardcode a few magic number checks...
+               $headers = [
+                       // Multimedia...
+                       'MThd'             => 'audio/midi',
+                       'OggS'             => 'application/ogg',
+
+                       // Image formats...
+                       // Note that WMF may have a bare header, no magic number.
+                       "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
+                       "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
+                       '%PDF'             => 'application/pdf',
+                       'gimp xcf'         => 'image/x-xcf',
+
+                       // Some forbidden fruit...
+                       'MZ'               => 'application/octet-stream', // DOS/Windows executable
+                       "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
+                       "\x7fELF"          => 'application/octet-stream', // ELF binary
+               ];
+
+               foreach ( $headers as $magic => $candidate ) {
+                       if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
+                               $this->logger->info( __METHOD__ .
+                                       ": magic header in $file recognized as $candidate\n" );
+                               return $candidate;
+                       }
+               }
+
+               /* Look for WebM and Matroska files */
+               if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
+                       $doctype = strpos( $head, "\x42\x82" );
+                       if ( $doctype ) {
+                               // Next byte is datasize, then data (sizes larger than 1 byte are stupid muxers)
+                               $data = substr( $head, $doctype + 3, 8 );
+                               if ( strncmp( $data, "matroska", 8 ) == 0 ) {
+                                       $this->logger->info( __METHOD__ . ": recognized file as video/x-matroska\n" );
+                                       return "video/x-matroska";
+                               } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
+                                       $this->logger->info( __METHOD__ . ": recognized file as video/webm\n" );
+                                       return "video/webm";
+                               }
+                       }
+                       $this->logger->info( __METHOD__ . ": unknown EBML file\n" );
+                       return "unknown/unknown";
+               }
+
+               /* Look for WebP */
+               if ( strncmp( $head, "RIFF", 4 ) == 0 &&
+                       strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0
+               ) {
+                       $this->logger->info( __METHOD__ . ": recognized file as image/webp\n" );
+                       return "image/webp";
+               }
+
+               /**
+                * Look for PHP.  Check for this before HTML/XML...  Warning: this is a
+                * heuristic, and won't match a file with a lot of non-PHP before.  It
+                * will also match text files which could be PHP. :)
+                *
+                * @todo FIXME: For this reason, the check is probably useless -- an attacker
+                * could almost certainly just pad the file with a lot of nonsense to
+                * circumvent the check in any case where it would be a security
+                * problem.  On the other hand, it causes harmful false positives (bug
+                * 16583).  The heuristic has been cut down to exclude three-character
+                * strings like "<? ", but should it be axed completely?
+                */
+               if ( ( strpos( $head, '<?php' ) !== false ) ||
+                       ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
+                       ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
+                       ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
+                       ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
+                       ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
+
+                       $this->logger->info( __METHOD__ . ": recognized $file as application/x-php\n" );
+                       return 'application/x-php';
+               }
+
+               /**
+                * look for XML formats (XHTML and SVG)
+                */
+               $xml = new XmlTypeCheck( $file );
+               if ( $xml->wellFormed ) {
+                       $xmlTypes = $this->xmlTypes;
+                       if ( isset( $xmlTypes[$xml->getRootElement()] ) ) {
+                               return $xmlTypes[$xml->getRootElement()];
+                       } else {
+                               return 'application/xml';
+                       }
+               }
+
+               /**
+                * look for shell scripts
+                */
+               $script_type = null;
+
+               # detect by shebang
+               if ( substr( $head, 0, 2 ) == "#!" ) {
+                       $script_type = "ASCII";
+               } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
+                       $script_type = "UTF-8";
+               } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
+                       $script_type = "UTF-16BE";
+               } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
+                       $script_type = "UTF-16LE";
+               }
+
+               if ( $script_type ) {
+                       if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
+                               // Quick and dirty fold down to ASCII!
+                               $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
+                               $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
+                               $head = '';
+                               foreach ( $chars as $codepoint ) {
+                                       if ( $codepoint < 128 ) {
+                                               $head .= chr( $codepoint );
+                                       } else {
+                                               $head .= '?';
+                                       }
+                               }
+                       }
+
+                       $match = [];
+
+                       if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
+                               $mime = "application/x-{$match[2]}";
+                               $this->logger->info( __METHOD__ . ": shell script recognized as $mime\n" );
+                               return $mime;
+                       }
+               }
+
+               // Check for ZIP variants (before getimagesize)
+               if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
+                       $this->logger->info( __METHOD__ . ": ZIP header present in $file\n" );
+                       return $this->detectZipType( $head, $tail, $ext );
+               }
+
+               MediaWiki\suppressWarnings();
+               $gis = getimagesize( $file );
+               MediaWiki\restoreWarnings();
+
+               if ( $gis && isset( $gis['mime'] ) ) {
+                       $mime = $gis['mime'];
+                       $this->logger->info( __METHOD__ . ": getimagesize detected $file as $mime\n" );
+                       return $mime;
+               }
+
+               # Media handling extensions can guess the MIME by content
+               # It's intentionally here so that if core is wrong about a type (false positive),
+               # people will hopefully nag and submit patches :)
+               $mime = false;
+               # Some strings by reference for performance - assuming well-behaved hooks
+               $callback = $this->guessCallback;
+               if ( $callback ) {
+                       $callback( $this, $head, $tail, $file, $mime /* by reference */ );
+               };
+
+               return $mime;
+       }
+
+       /**
+        * Detect application-specific file type of a given ZIP file from its
+        * header data.  Currently works for OpenDocument and OpenXML types...
+        * If can't tell, returns 'application/zip'.
+        *
+        * @param string $header Some reasonably-sized chunk of file header
+        * @param string|null $tail The tail of the file
+        * @param string|bool $ext The file extension, or true to extract it from the filename.
+        *   Set it to false (default) to ignore the extension. DEPRECATED! Set to false,
+        *   use improveTypeFromExtension($mime, $ext) later to improve MIME type.
+        *
+        * @return string
+        */
+       function detectZipType( $header, $tail = null, $ext = false ) {
+               if ( $ext ) { # TODO: remove $ext param
+                       $this->logger->info( __METHOD__ .
+                               ": WARNING: use of the \$ext parameter is deprecated. " .
+                               "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+               }
+
+               $mime = 'application/zip';
+               $opendocTypes = [
+                       'chart-template',
+                       'chart',
+                       'formula-template',
+                       'formula',
+                       'graphics-template',
+                       'graphics',
+                       'image-template',
+                       'image',
+                       'presentation-template',
+                       'presentation',
+                       'spreadsheet-template',
+                       'spreadsheet',
+                       'text-template',
+                       'text-master',
+                       'text-web',
+                       'text' ];
+
+               // http://lists.oasis-open.org/archives/office/200505/msg00006.html
+               $types = '(?:' . implode( '|', $opendocTypes ) . ')';
+               $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
+
+               $openxmlRegex = "/^\[Content_Types\].xml/";
+
+               if ( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
+                       $mime = $matches[1];
+                       $this->logger->info( __METHOD__ . ": detected $mime from ZIP archive\n" );
+               } elseif ( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
+                       $mime = "application/x-opc+zip";
+                       # TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere
+                       if ( $ext !== true && $ext !== false ) {
+                               /** This is the mode used by getPropsFromPath
+                                * These MIME's are stored in the database, where we don't really want
+                                * x-opc+zip, because we use it only for internal purposes
+                                */
+                               if ( $this->isMatchingExtension( $ext, $mime ) ) {
+                                       /* A known file extension for an OPC file,
+                                        * find the proper mime type for that file extension
+                                        */
+                                       $mime = $this->guessTypesForExtension( $ext );
+                               } else {
+                                       $mime = "application/zip";
+                               }
+                       }
+                       $this->logger->info( __METHOD__ .
+                               ": detected an Open Packaging Conventions archive: $mime\n" );
+               } elseif ( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" &&
+                       ( $headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
+                       preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
+                       if ( substr( $header, 512, 4 ) == "\xEC\xA5\xC1\x00" ) {
+                               $mime = "application/msword";
+                       }
+                       switch ( substr( $header, 512, 6 ) ) {
+                               case "\xEC\xA5\xC1\x00\x0E\x00":
+                               case "\xEC\xA5\xC1\x00\x1C\x00":
+                               case "\xEC\xA5\xC1\x00\x43\x00":
+                                       $mime = "application/vnd.ms-powerpoint";
+                                       break;
+                               case "\xFD\xFF\xFF\xFF\x10\x00":
+                               case "\xFD\xFF\xFF\xFF\x1F\x00":
+                               case "\xFD\xFF\xFF\xFF\x22\x00":
+                               case "\xFD\xFF\xFF\xFF\x23\x00":
+                               case "\xFD\xFF\xFF\xFF\x28\x00":
+                               case "\xFD\xFF\xFF\xFF\x29\x00":
+                               case "\xFD\xFF\xFF\xFF\x10\x02":
+                               case "\xFD\xFF\xFF\xFF\x1F\x02":
+                               case "\xFD\xFF\xFF\xFF\x22\x02":
+                               case "\xFD\xFF\xFF\xFF\x23\x02":
+                               case "\xFD\xFF\xFF\xFF\x28\x02":
+                               case "\xFD\xFF\xFF\xFF\x29\x02":
+                                       $mime = "application/vnd.msexcel";
+                                       break;
+                       }
+
+                       $this->logger->info( __METHOD__ .
+                               ": detected a MS Office document with OPC trailer\n" );
+               } else {
+                       $this->logger->info( __METHOD__ . ": unable to identify type of ZIP archive\n" );
+               }
+               return $mime;
+       }
+
+       /**
+        * Internal MIME type detection. Detection is done using the fileinfo
+        * extension if it is available. It can be overriden by callback, which could
+        * use an external program, for example. If detection fails and $ext is not false,
+        * the MIME type is guessed from the file extension, using guessTypesForExtension.
+        *
+        * If the MIME type is still unknown, getimagesize is used to detect the
+        * MIME type if the file is an image. If no MIME type can be determined,
+        * this function returns 'unknown/unknown'.
+        *
+        * @param string $file The file to check
+        * @param string|bool $ext The file extension, or true (default) to extract it from the filename.
+        *   Set it to false to ignore the extension. DEPRECATED! Set to false, use
+        *   improveTypeFromExtension($mime, $ext) later to improve MIME type.
+        *
+        * @return string The MIME type of $file
+        */
+       private function detectMimeType( $file, $ext = true ) {
+               /** @todo Make $ext default to false. Or better, remove it. */
+               if ( $ext ) {
+                       $this->logger->info( __METHOD__ .
+                               ": WARNING: use of the \$ext parameter is deprecated. "
+                               . "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+               }
+
+               $callback = $this->detectCallback;
+               $m = null;
+               if ( $callback ) {
+                       $m = $callback( $file );
+               } elseif ( function_exists( "finfo_open" ) && function_exists( "finfo_file" ) ) {
+                       $mime_magic_resource = finfo_open( FILEINFO_MIME );
+
+                       if ( $mime_magic_resource ) {
+                               $m = finfo_file( $mime_magic_resource, $file );
+                               finfo_close( $mime_magic_resource );
+                       } else {
+                               $this->logger->info( __METHOD__ .
+                                       ": finfo_open failed on " . FILEINFO_MIME . "!\n" );
+                       }
+               } else {
+                       $this->logger->info( __METHOD__ . ": no magic mime detector found!\n" );
+               }
+
+               if ( $m ) {
+                       # normalize
+                       $m = preg_replace( '![;, ].*$!', '', $m ); # strip charset, etc
+                       $m = trim( $m );
+                       $m = strtolower( $m );
+
+                       if ( strpos( $m, 'unknown' ) !== false ) {
+                               $m = null;
+                       } else {
+                               $this->logger->info( __METHOD__ . ": magic mime type of $file: $m\n" );
+                               return $m;
+                       }
+               }
+
+               // If desired, look at extension as a fallback.
+               if ( $ext === true ) {
+                       $i = strrpos( $file, '.' );
+                       $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' );
+               }
+               if ( $ext ) {
+                       if ( $this->isRecognizableExtension( $ext ) ) {
+                               $this->logger->info( __METHOD__ . ": refusing to guess mime type for .$ext file, "
+                                       . "we should have recognized it\n" );
+                       } else {
+                               $m = $this->guessTypesForExtension( $ext );
+                               if ( $m ) {
+                                       $this->logger->info( __METHOD__ . ": extension mime type of $file: $m\n" );
+                                       return $m;
+                               }
+                       }
+               }
+
+               // Unknown type
+               $this->logger->info( __METHOD__ . ": failed to guess mime type for $file!\n" );
+               return 'unknown/unknown';
+       }
+
+       /**
+        * Determine the media type code for a file, using its MIME type, name and
+        * possibly its contents.
+        *
+        * This function relies on the findMediaType(), mapping extensions and MIME
+        * types to media types.
+        *
+        * @todo analyse file if need be
+        * @todo look at multiple extension, separately and together.
+        *
+        * @param string $path Full path to the image file, in case we have to look at the contents
+        *        (if null, only the MIME type is used to determine the media type code).
+        * @param string $mime MIME type. If null it will be guessed using guessMimeType.
+        *
+        * @return string A value to be used with the MEDIATYPE_xxx constants.
+        */
+       function getMediaType( $path = null, $mime = null ) {
+               if ( !$mime && !$path ) {
+                       return MEDIATYPE_UNKNOWN;
+               }
+
+               // If MIME type is unknown, guess it
+               if ( !$mime ) {
+                       $mime = $this->guessMimeType( $path, false );
+               }
+
+               // Special code for ogg - detect if it's video (theora),
+               // else label it as sound.
+               if ( $mime == 'application/ogg' && file_exists( $path ) ) {
+
+                       // Read a chunk of the file
+                       $f = fopen( $path, "rt" );
+                       if ( !$f ) {
+                               return MEDIATYPE_UNKNOWN;
+                       }
+                       $head = fread( $f, 256 );
+                       fclose( $f );
+
+                       $head = str_replace( 'ffmpeg2theora', '', strtolower( $head ) );
+
+                       // This is an UGLY HACK, file should be parsed correctly
+                       if ( strpos( $head, 'theora' ) !== false ) {
+                               return MEDIATYPE_VIDEO;
+                       } elseif ( strpos( $head, 'vorbis' ) !== false ) {
+                               return MEDIATYPE_AUDIO;
+                       } elseif ( strpos( $head, 'flac' ) !== false ) {
+                               return MEDIATYPE_AUDIO;
+                       } elseif ( strpos( $head, 'speex' ) !== false ) {
+                               return MEDIATYPE_AUDIO;
+                       } else {
+                               return MEDIATYPE_MULTIMEDIA;
+                       }
+               }
+
+               $type = null;
+               // Check for entry for full MIME type
+               if ( $mime ) {
+                       $type = $this->findMediaType( $mime );
+                       if ( $type !== MEDIATYPE_UNKNOWN ) {
+                               return $type;
+                       }
+               }
+
+               // Check for entry for file extension
+               if ( $path ) {
+                       $i = strrpos( $path, '.' );
+                       $e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
+
+                       // TODO: look at multi-extension if this fails, parse from full path
+                       $type = $this->findMediaType( '.' . $e );
+                       if ( $type !== MEDIATYPE_UNKNOWN ) {
+                               return $type;
+                       }
+               }
+
+               // Check major MIME type
+               if ( $mime ) {
+                       $i = strpos( $mime, '/' );
+                       if ( $i !== false ) {
+                               $major = substr( $mime, 0, $i );
+                               $type = $this->findMediaType( $major );
+                               if ( $type !== MEDIATYPE_UNKNOWN ) {
+                                       return $type;
+                               }
+                       }
+               }
+
+               if ( !$type ) {
+                       $type = MEDIATYPE_UNKNOWN;
+               }
+
+               return $type;
+       }
+
+       /**
+        * Returns a media code matching the given MIME type or file extension.
+        * File extensions are represented by a string starting with a dot (.) to
+        * distinguish them from MIME types.
+        *
+        * This function relies on the mapping defined by $this->mMediaTypes
+        * @access private
+        * @param string $extMime
+        * @return int|string
+        */
+       function findMediaType( $extMime ) {
+               if ( strpos( $extMime, '.' ) === 0 ) {
+                       // If it's an extension, look up the MIME types
+                       $m = $this->getTypesForExtension( substr( $extMime, 1 ) );
+                       if ( !$m ) {
+                               return MEDIATYPE_UNKNOWN;
+                       }
+
+                       $m = explode( ' ', $m );
+               } else {
+                       // Normalize MIME type
+                       if ( isset( $this->mimeTypeAliases[$extMime] ) ) {
+                               $extMime = $this->mimeTypeAliases[$extMime];
+                       }
+
+                       $m = [ $extMime ];
+               }
+
+               foreach ( $m as $mime ) {
+                       foreach ( $this->mediaTypes as $type => $codes ) {
+                               if ( in_array( $mime, $codes, true ) ) {
+                                       return $type;
+                               }
+                       }
+               }
+
+               return MEDIATYPE_UNKNOWN;
+       }
+
+       /**
+        * Get the MIME types that various versions of Internet Explorer would
+        * detect from a chunk of the content.
+        *
+        * @param string $fileName The file name (unused at present)
+        * @param string $chunk The first 256 bytes of the file
+        * @param string $proposed The MIME type proposed by the server
+        * @return array
+        */
+       public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
+               $ca = $this->getIEContentAnalyzer();
+               return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
+       }
+
+       /**
+        * Get a cached instance of IEContentAnalyzer
+        *
+        * @return IEContentAnalyzer
+        */
+       protected function getIEContentAnalyzer() {
+               if ( is_null( $this->IEAnalyzer ) ) {
+                       $this->IEAnalyzer = new IEContentAnalyzer;
+               }
+               return $this->IEAnalyzer;
+       }
+}
diff --git a/includes/libs/mime/XmlTypeCheck.php b/includes/libs/mime/XmlTypeCheck.php
new file mode 100644 (file)
index 0000000..f057140
--- /dev/null
@@ -0,0 +1,347 @@
+<?php
+/**
+ * XML syntax and type checker.
+ *
+ * Since 1.24.2, it uses XMLReader instead of xml_parse, which gives us
+ * more control over the expansion of XML entities. When passed to the
+ * callback, entities will be fully expanded, but may report the XML is
+ * invalid if expanding the entities are likely to cause a DoS.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+class XmlTypeCheck {
+       /**
+        * Will be set to true or false to indicate whether the file is
+        * well-formed XML. Note that this doesn't check schema validity.
+        */
+       public $wellFormed = null;
+
+       /**
+        * Will be set to true if the optional element filter returned
+        * a match at some point.
+        */
+       public $filterMatch = false;
+
+       /**
+        * Will contain the type of filter hit if the optional element filter returned
+        * a match at some point.
+        * @var mixed
+        */
+       public $filterMatchType = false;
+
+       /**
+        * Name of the document's root element, including any namespace
+        * as an expanded URL.
+        */
+       public $rootElement = '';
+
+       /**
+        * A stack of strings containing the data of each xml element as it's processed. Append
+        * data to the top string of the stack, then pop off the string and process it when the
+        * element is closed.
+        */
+       protected $elementData = [];
+
+       /**
+        * A stack of element names and attributes, as we process them.
+        */
+       protected $elementDataContext = [];
+
+       /**
+        * Current depth of the data stack.
+        */
+       protected $stackDepth = 0;
+
+       /**
+        * Additional parsing options
+        */
+       private $parserOptions = [
+               'processing_instruction_handler' => '',
+       ];
+
+       /**
+        * @param string $input a filename or string containing the XML element
+        * @param callable $filterCallback (optional)
+        *        Function to call to do additional custom validity checks from the
+        *        SAX element handler event. This gives you access to the element
+        *        namespace, name, attributes, and text contents.
+        *        Filter should return 'true' to toggle on $this->filterMatch
+        * @param bool $isFile (optional) indicates if the first parameter is a
+        *        filename (default, true) or if it is a string (false)
+        * @param array $options list of additional parsing options:
+        *        processing_instruction_handler: Callback for xml_set_processing_instruction_handler
+        */
+       function __construct( $input, $filterCallback = null, $isFile = true, $options = [] ) {
+               $this->filterCallback = $filterCallback;
+               $this->parserOptions = array_merge( $this->parserOptions, $options );
+               $this->validateFromInput( $input, $isFile );
+       }
+
+       /**
+        * Alternative constructor: from filename
+        *
+        * @param string $fname the filename of an XML document
+        * @param callable $filterCallback (optional)
+        *        Function to call to do additional custom validity checks from the
+        *        SAX element handler event. This gives you access to the element
+        *        namespace, name, and attributes, but not to text contents.
+        *        Filter should return 'true' to toggle on $this->filterMatch
+        * @return XmlTypeCheck
+        */
+       public static function newFromFilename( $fname, $filterCallback = null ) {
+               return new self( $fname, $filterCallback, true );
+       }
+
+       /**
+        * Alternative constructor: from string
+        *
+        * @param string $string a string containing an XML element
+        * @param callable $filterCallback (optional)
+        *        Function to call to do additional custom validity checks from the
+        *        SAX element handler event. This gives you access to the element
+        *        namespace, name, and attributes, but not to text contents.
+        *        Filter should return 'true' to toggle on $this->filterMatch
+        * @return XmlTypeCheck
+        */
+       public static function newFromString( $string, $filterCallback = null ) {
+               return new self( $string, $filterCallback, false );
+       }
+
+       /**
+        * Get the root element. Simple accessor to $rootElement
+        *
+        * @return string
+        */
+       public function getRootElement() {
+               return $this->rootElement;
+       }
+
+       /**
+        * @param string $fname the filename
+        */
+       private function validateFromInput( $xml, $isFile ) {
+               $reader = new XMLReader();
+               if ( $isFile ) {
+                       $s = $reader->open( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+               } else {
+                       $s = $reader->XML( $xml, null, LIBXML_NOERROR | LIBXML_NOWARNING );
+               }
+               if ( $s !== true ) {
+                       // Couldn't open the XML
+                       $this->wellFormed = false;
+               } else {
+                       $oldDisable = libxml_disable_entity_loader( true );
+                       $reader->setParserProperty( XMLReader::SUBST_ENTITIES, true );
+                       try {
+                               $this->validate( $reader );
+                       } catch ( Exception $e ) {
+                               // Calling this malformed, because we didn't parse the whole
+                               // thing. Maybe just an external entity refernce.
+                               $this->wellFormed = false;
+                               $reader->close();
+                               libxml_disable_entity_loader( $oldDisable );
+                               throw $e;
+                       }
+                       $reader->close();
+                       libxml_disable_entity_loader( $oldDisable );
+               }
+       }
+
+       private function readNext( XMLReader $reader ) {
+               set_error_handler( [ $this, 'XmlErrorHandler' ] );
+               $ret = $reader->read();
+               restore_error_handler();
+               return $ret;
+       }
+
+       public function XmlErrorHandler( $errno, $errstr ) {
+               $this->wellFormed = false;
+       }
+
+       private function validate( $reader ) {
+
+               // First, move through anything that isn't an element, and
+               // handle any processing instructions with the callback
+               do {
+                       if ( !$this->readNext( $reader ) ) {
+                               // Hit the end of the document before any elements
+                               $this->wellFormed = false;
+                               return;
+                       }
+                       if ( $reader->nodeType === XMLReader::PI ) {
+                               $this->processingInstructionHandler( $reader->name, $reader->value );
+                       }
+               } while ( $reader->nodeType != XMLReader::ELEMENT );
+
+               // Process the rest of the document
+               do {
+                       switch ( $reader->nodeType ) {
+                               case XMLReader::ELEMENT:
+                                       $name = $this->expandNS(
+                                               $reader->name,
+                                               $reader->namespaceURI
+                                       );
+                                       if ( $this->rootElement === '' ) {
+                                               $this->rootElement = $name;
+                                       }
+                                       $empty = $reader->isEmptyElement;
+                                       $attrs = $this->getAttributesArray( $reader );
+                                       $this->elementOpen( $name, $attrs );
+                                       if ( $empty ) {
+                                               $this->elementClose();
+                                       }
+                                       break;
+
+                               case XMLReader::END_ELEMENT:
+                                       $this->elementClose();
+                                       break;
+
+                               case XMLReader::WHITESPACE:
+                               case XMLReader::SIGNIFICANT_WHITESPACE:
+                               case XMLReader::CDATA:
+                               case XMLReader::TEXT:
+                                       $this->elementData( $reader->value );
+                                       break;
+
+                               case XMLReader::ENTITY_REF:
+                                       // Unexpanded entity (maybe external?),
+                                       // don't send to the filter (xml_parse didn't)
+                                       break;
+
+                               case XMLReader::COMMENT:
+                                       // Don't send to the filter (xml_parse didn't)
+                                       break;
+
+                               case XMLReader::PI:
+                                       // Processing instructions can happen after the header too
+                                       $this->processingInstructionHandler(
+                                               $reader->name,
+                                               $reader->value
+                                       );
+                                       break;
+                               default:
+                                       // One of DOC, DOC_TYPE, ENTITY, END_ENTITY,
+                                       // NOTATION, or XML_DECLARATION
+                                       // xml_parse didn't send these to the filter, so we won't.
+                       }
+
+               } while ( $this->readNext( $reader ) );
+
+               if ( $this->stackDepth !== 0 ) {
+                       $this->wellFormed = false;
+               } elseif ( $this->wellFormed === null ) {
+                       $this->wellFormed = true;
+               }
+
+       }
+
+       /**
+        * Get all of the attributes for an XMLReader's current node
+        * @param $r XMLReader
+        * @return array of attributes
+        */
+       private function getAttributesArray( XMLReader $r ) {
+               $attrs = [];
+               while ( $r->moveToNextAttribute() ) {
+                       if ( $r->namespaceURI === 'http://www.w3.org/2000/xmlns/' ) {
+                               // XMLReader treats xmlns attributes as normal
+                               // attributes, while xml_parse doesn't
+                               continue;
+                       }
+                       $name = $this->expandNS( $r->name, $r->namespaceURI );
+                       $attrs[$name] = $r->value;
+               }
+               return $attrs;
+       }
+
+       /**
+        * @param $name element or attribute name, maybe with a full or short prefix
+        * @param $namespaceURI the namespaceURI
+        * @return string the name prefixed with namespaceURI
+        */
+       private function expandNS( $name, $namespaceURI ) {
+               if ( $namespaceURI ) {
+                       $parts = explode( ':', $name );
+                       $localname = array_pop( $parts );
+                       return "$namespaceURI:$localname";
+               }
+               return $name;
+       }
+
+       /**
+        * @param $name
+        * @param $attribs
+        */
+       private function elementOpen( $name, $attribs ) {
+               $this->elementDataContext[] = [ $name, $attribs ];
+               $this->elementData[] = '';
+               $this->stackDepth++;
+       }
+
+       /**
+        */
+       private function elementClose() {
+               list( $name, $attribs ) = array_pop( $this->elementDataContext );
+               $data = array_pop( $this->elementData );
+               $this->stackDepth--;
+               $callbackReturn = false;
+
+               if ( is_callable( $this->filterCallback ) ) {
+                       $callbackReturn = call_user_func(
+                               $this->filterCallback,
+                               $name,
+                               $attribs,
+                               $data
+                       );
+               }
+               if ( $callbackReturn ) {
+                       // Filter hit!
+                       $this->filterMatch = true;
+                       $this->filterMatchType = $callbackReturn;
+               }
+       }
+
+       /**
+        * @param $data
+        */
+       private function elementData( $data ) {
+               // Collect any data here, and we'll run the callback in elementClose
+               $this->elementData[ $this->stackDepth - 1 ] .= trim( $data );
+       }
+
+       /**
+        * @param $target
+        * @param $data
+        */
+       private function processingInstructionHandler( $target, $data ) {
+               $callbackReturn = false;
+               if ( $this->parserOptions['processing_instruction_handler'] ) {
+                       $callbackReturn = call_user_func(
+                               $this->parserOptions['processing_instruction_handler'],
+                               $target,
+                               $data
+                       );
+               }
+               if ( $callbackReturn ) {
+                       // Filter hit!
+                       $this->filterMatch = true;
+                       $this->filterMatchType = $callbackReturn;
+               }
+       }
+}
diff --git a/includes/libs/mime/defines.php b/includes/libs/mime/defines.php
new file mode 100644 (file)
index 0000000..ae0b5f8
--- /dev/null
@@ -0,0 +1,46 @@
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**@{
+ * Media types.
+ * This defines constants for the value returned by File::getMediaType()
+ */
+// unknown format
+define( 'MEDIATYPE_UNKNOWN', 'UNKNOWN' );
+// some bitmap image or image source (like psd, etc). Can't scale up.
+define( 'MEDIATYPE_BITMAP', 'BITMAP' );
+// some vector drawing (SVG, WMF, PS, ...) or image source (oo-draw, etc). Can scale up.
+define( 'MEDIATYPE_DRAWING', 'DRAWING' );
+// simple audio file (ogg, mp3, wav, midi, whatever)
+define( 'MEDIATYPE_AUDIO', 'AUDIO' );
+// simple video file (ogg, mpg, etc;
+// no not include formats here that may contain executable sections or scripts!)
+define( 'MEDIATYPE_VIDEO', 'VIDEO' );
+// Scriptable Multimedia (flash, advanced video container formats, etc)
+define( 'MEDIATYPE_MULTIMEDIA', 'MULTIMEDIA' );
+// Office Documents, Spreadsheets (office formats possibly containing apples, scripts, etc)
+define( 'MEDIATYPE_OFFICE', 'OFFICE' );
+// Plain text (possibly containing program code or scripts)
+define( 'MEDIATYPE_TEXT', 'TEXT' );
+// binary executable
+define( 'MEDIATYPE_EXECUTABLE', 'EXECUTABLE' );
+// archive file (zip, tar, etc)
+define( 'MEDIATYPE_ARCHIVE', 'ARCHIVE' );
+/**@}*/
diff --git a/includes/libs/mime/mime.info b/includes/libs/mime/mime.info
new file mode 100644 (file)
index 0000000..b04d3c6
--- /dev/null
@@ -0,0 +1,119 @@
+# MIME type info file.
+# the first MIME type in each line is the "main" MIME type,
+# the others are aliases for this type
+# the media type is given in upper case and square brackets,
+# like [BITMAP], and must indicate a media type as defined by
+# the MEDIATYPE_xxx constants in Defines.php
+
+
+image/gif      [BITMAP]
+image/png image/x-png  [BITMAP]
+image/ief      [BITMAP]
+image/jpeg image/pjpeg [BITMAP]
+image/jp2      [BITMAP]
+image/xbm      [BITMAP]
+image/tiff     [BITMAP]
+image/x-icon image/x-ico image/vnd.microsoft.icon      [BITMAP]
+image/x-rgb    [BITMAP]
+image/x-portable-pixmap                [BITMAP]
+image/x-portable-graymap image/x-portable-greymap      [BITMAP]
+image/x-bmp image/x-ms-bmp image/bmp application/x-bmp application/bmp [BITMAP]
+image/x-photoshop image/psd image/x-psd image/photoshop image/vnd.adobe.photoshop      [BITMAP]
+image/vnd.djvu image/x.djvu image/x-djvu [BITMAP]
+image/webp     [BITMAP]
+
+image/svg+xml application/svg+xml application/svg image/svg    [DRAWING]
+application/postscript [DRAWING]
+application/x-latex    [DRAWING]
+application/x-tex      [DRAWING]
+application/x-dia-diagram [DRAWING]
+
+
+audio/mpeg audio/mp3 audio/mpeg3       [AUDIO]
+audio/mp4                              [AUDIO]
+audio/wav audio/x-wav audio/wave       [AUDIO]
+audio/midi audio/mid   [AUDIO]
+audio/basic            [AUDIO]
+audio/ogg              [AUDIO]
+audio/x-aiff           [AUDIO]
+audio/x-pn-realaudio   [AUDIO]
+audio/x-realaudio      [AUDIO]
+audio/webm             [AUDIO]
+audio/x-matroska       [AUDIO]
+audio/x-flac           [AUDIO]
+audio/flac             [AUDIO]
+
+video/mpeg application/mpeg    [VIDEO]
+video/ogg                      [VIDEO]
+video/x-sgi-video              [VIDEO]
+video/x-flv                    [VIDEO]
+video/webm                     [VIDEO]
+video/x-matroska               [VIDEO]
+video/mp4                      [VIDEO]
+
+application/ogg application/x-ogg audio/ogg audio/x-ogg video/ogg video/x-ogg          [MULTIMEDIA]
+
+application/x-shockwave-flash  [MULTIMEDIA]
+audio/x-pn-realaudio-plugin    [MULTIMEDIA]
+model/iges     [MULTIMEDIA]
+model/mesh     [MULTIMEDIA]
+model/vrml     [MULTIMEDIA]
+video/quicktime        [MULTIMEDIA]
+video/x-msvideo        [MULTIMEDIA]
+
+text/plain     [TEXT]
+text/html application/xhtml+xml        [TEXT]
+application/xml text/xml       [TEXT]
+text   [TEXT]
+application/json       [TEXT]
+text/csv       [TEXT]
+text/tab-separated-values      [TEXT]
+
+application/zip application/x-zip      [ARCHIVE]
+application/x-gzip     [ARCHIVE]
+application/x-bzip     [ARCHIVE]
+application/x-bzip2    [ARCHIVE]
+application/x-tar      [ARCHIVE]
+application/x-stuffit  [ARCHIVE]
+application/x-opc+zip  [ARCHIVE]
+application/x-7z-compressed [ARCHIVE]
+
+application/javascript text/javascript application/x-javascript application/x-ecmascript text/ecmascript       [EXECUTABLE]
+application/x-bash     [EXECUTABLE]
+application/x-sh       [EXECUTABLE]
+application/x-csh      [EXECUTABLE]
+application/x-tcsh     [EXECUTABLE]
+application/x-tcl      [EXECUTABLE]
+application/x-perl     [EXECUTABLE]
+application/x-python   [EXECUTABLE]
+
+application/pdf application/acrobat    [OFFICE]
+application/msword             [OFFICE]
+application/vnd.ms-excel       [OFFICE]
+application/vnd.ms-powerpoint  [OFFICE]
+application/x-director         [OFFICE]
+text/rtf                       [OFFICE]
+
+application/vnd.openxmlformats-officedocument.wordprocessingml.document        [OFFICE]
+application/vnd.openxmlformats-officedocument.wordprocessingml.template                [OFFICE]
+application/vnd.ms-word.document.macroEnabled.12                               [OFFICE]
+application/vnd.ms-word.template.macroEnabled.12                               [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.template          [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.slideshow         [OFFICE]
+application/vnd.openxmlformats-officedocument.presentationml.presentation      [OFFICE]
+application/vnd.ms-powerpoint.addin.macroEnabled.12                            [OFFICE]
+application/vnd.ms-powerpoint.presentation.macroEnabled.12                     [OFFICE]
+application/vnd.ms-powerpoint.presentation.macroEnabled.12                     [OFFICE]
+application/vnd.ms-powerpoint.slideshow.macroEnabled.12                                [OFFICE]
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet              [OFFICE]
+application/vnd.openxmlformats-officedocument.spreadsheetml.template           [OFFICE]
+application/vnd.ms-excel.sheet.macroEnabled.12                                 [OFFICE]
+application/vnd.ms-excel.template.macroEnabled.12                              [OFFICE]
+application/vnd.ms-excel.addin.macroEnabled.12                                 [OFFICE]
+application/vnd.ms-excel.sheet.binary.macroEnabled.12                          [OFFICE]
+application/acad application/x-acad application/autocad_dwg image/x-dwg application/dwg application/x-dwg application/x-autocad image/vnd.dwg drawing/dwg [DRAWING]
+chemical/x-mdl-molfile     [DRAWING]
+chemical/x-mdl-sdfile      [DRAWING]
+chemical/x-mdl-rxnfile     [DRAWING]
+chemical/x-mdl-rdfile      [DRAWING]
+chemical/x-mdl-rgfile      [DRAWING]
diff --git a/includes/libs/mime/mime.types b/includes/libs/mime/mime.types
new file mode 100644 (file)
index 0000000..b4f515a
--- /dev/null
@@ -0,0 +1,188 @@
+application/acad dwg
+application/andrew-inset ez
+application/mac-binhex40 hqx
+application/mac-compactpro cpt
+application/mathml+xml mathml
+application/msword doc dot
+application/octet-stream bin dms lha lzh exe class so dll
+application/oda oda
+application/ogg ogx ogg ogm ogv oga spx opus
+application/pdf pdf
+application/postscript ai eps ps
+application/rdf+xml rdf
+application/smil smi smil
+application/srgs gram
+application/srgs+xml grxml
+application/vnd.mif mif
+application/vnd.ms-excel xls xlt xla
+application/vnd.ms-powerpoint ppt pot pps ppa
+application/vnd.wap.wbxml wbxml
+application/vnd.wap.wmlc wmlc
+application/vnd.wap.wmlscriptc wmlsc
+application/voicexml+xml vxml
+application/x-7z-compressed 7z
+application/x-bcpio bcpio
+application/x-bzip bz
+application/x-bzip2 bz2
+application/x-cdlink vcd
+application/x-chess-pgn pgn
+application/x-cpio cpio
+application/x-csh csh
+application/x-dia-diagram dia
+application/x-director dcr dir dxr
+application/x-dvi dvi
+application/x-futuresplash spl
+application/x-gtar gtar tar
+application/x-gzip gz
+application/x-hdf hdf
+application/x-jar jar
+application/javascript js
+application/json json
+application/x-koan skp skd skt skm
+application/x-latex latex
+application/x-netcdf nc cdf
+application/x-sh sh
+application/x-shar shar
+application/x-shockwave-flash swf
+application/x-stuffit sit
+application/x-sv4cpio sv4cpio
+application/x-sv4crc sv4crc
+application/x-tar tar
+application/x-tcl tcl
+application/x-tex tex
+application/x-texinfo texinfo texi
+application/x-troff t tr roff
+application/x-troff-man man
+application/x-troff-me me
+application/x-troff-ms ms
+application/x-ustar ustar
+application/x-wais-source src
+application/x-xpinstall xpi
+application/xhtml+xml xhtml xht
+application/xslt+xml xslt
+application/xml xml xsl xsd kml
+application/xml-dtd dtd
+application/zip zip jar xpi sxc stc sxd std sxi sti sxm stm sxw stw
+application/x-rar rar
+application/font-woff woff
+application/font-woff2 woff2
+application/vnd.ms-fontobject eot
+application/x-font-ttf ttf
+audio/basic au snd
+audio/midi mid midi kar
+audio/mpeg mpga mp2 mp3
+audio/ogg oga ogg spx opus
+video/webm webm
+audio/webm webm
+audio/x-aiff aif aiff aifc
+audio/x-matroska mka mkv
+audio/x-mpegurl m3u
+audio/x-ogg oga ogg spx opus
+audio/x-pn-realaudio ram rm
+audio/x-pn-realaudio-plugin rpm
+audio/x-realaudio ra
+audio/x-wav wav
+audio/wav wav
+audio/x-flac flac
+audio/flac flac
+chemical/x-pdb pdb
+chemical/x-xyz xyz
+image/bmp bmp
+image/cgm cgm
+image/gif gif
+image/ief ief
+image/jp2 j2k jp2 jpg2
+image/jpeg jpeg jpg jpe
+image/png png apng
+image/svg+xml svg
+image/tiff tiff tif
+image/vnd.djvu djvu djv
+image/vnd.microsoft.icon ico
+image/vnd.wap.wbmp wbmp
+image/webp webp
+image/x-cmu-raster ras
+image/x-icon ico
+image/x-ms-bmp bmp
+image/x-portable-anymap pnm
+image/x-portable-bitmap pbm
+image/x-portable-graymap pgm
+image/x-portable-pixmap ppm
+image/x-rgb rgb
+image/x-photoshop psd
+image/x-xbitmap xbm
+image/x-xpixmap xpm
+image/x-xwindowdump xwd
+model/iges igs iges
+model/mesh msh mesh silo
+model/vrml wrl vrml
+text/calendar ics ifb
+text/css css
+text/csv csv
+text/html html htm
+text/plain txt
+text/richtext rtx
+text/rtf rtf
+text/sgml sgml sgm
+text/tab-separated-values tsv
+text/vnd.wap.wml wml
+text/vnd.wap.wmlscript wmls
+text/xml xml xsl xslt rss rdf
+text/x-component htc
+text/x-setext etx
+text/x-sawfish jl
+video/mpeg mpeg mpg mpe
+video/mp4 mp4 m4a m4p m4b m4r m4v
+audio/mp4 m4a
+video/ogg ogv ogm ogg
+video/quicktime qt mov
+video/vnd.mpegurl mxu
+video/x-flv flv
+video/x-matroska mkv mka
+video/x-msvideo avi
+video/x-ogg ogv ogm ogg
+video/x-sgi-movie movie
+x-conference/x-cooltalk ice
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.database odb
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-master odm
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-web oth
+application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
+application/vnd.openxmlformats-officedocument.wordprocessingml.template dotx
+application/vnd.ms-word.document.macroEnabled.12 docm
+application/vnd.ms-word.template.macroEnabled.12 dotm
+application/vnd.openxmlformats-officedocument.presentationml.template potx
+application/vnd.openxmlformats-officedocument.presentationml.slideshow ppsx
+application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
+application/vnd.ms-powerpoint.addin.macroEnabled.12 ppam
+application/vnd.ms-powerpoint.presentation.macroEnabled.12 pptm
+application/vnd.ms-powerpoint.presentation.macroEnabled.12 potm
+application/vnd.ms-powerpoint.slideshow.macroEnabled.12 ppsm
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
+application/vnd.openxmlformats-officedocument.spreadsheetml.template xltx
+application/vnd.ms-excel.sheet.macroEnabled.12 xlsm
+application/vnd.ms-excel.template.macroEnabled.12 xltm
+application/vnd.ms-excel.addin.macroEnabled.12 xlam
+application/vnd.ms-excel.sheet.binary.macroEnabled.12 xlsb
+model/vnd.dwfx+xps dwfx
+application/vnd.ms-xpsdocument xps
+application/x-opc+zip docx dotx docm dotm potx ppsx pptx ppam pptm potm ppsm xlsx xltx xlsm xltm xlam xlsb dwfx xps
+chemical/x-mdl-molfile mol
+chemical/x-mdl-sdfile sdf
+chemical/x-mdl-rxnfile rxn
+chemical/x-mdl-rdfile rd
+chemical/x-mdl-rgfile rg
+application/x-amf amf
+application/sla stl
diff --git a/includes/mime.info b/includes/mime.info
deleted file mode 100644 (file)
index b04d3c6..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-# MIME type info file.
-# the first MIME type in each line is the "main" MIME type,
-# the others are aliases for this type
-# the media type is given in upper case and square brackets,
-# like [BITMAP], and must indicate a media type as defined by
-# the MEDIATYPE_xxx constants in Defines.php
-
-
-image/gif      [BITMAP]
-image/png image/x-png  [BITMAP]
-image/ief      [BITMAP]
-image/jpeg image/pjpeg [BITMAP]
-image/jp2      [BITMAP]
-image/xbm      [BITMAP]
-image/tiff     [BITMAP]
-image/x-icon image/x-ico image/vnd.microsoft.icon      [BITMAP]
-image/x-rgb    [BITMAP]
-image/x-portable-pixmap                [BITMAP]
-image/x-portable-graymap image/x-portable-greymap      [BITMAP]
-image/x-bmp image/x-ms-bmp image/bmp application/x-bmp application/bmp [BITMAP]
-image/x-photoshop image/psd image/x-psd image/photoshop image/vnd.adobe.photoshop      [BITMAP]
-image/vnd.djvu image/x.djvu image/x-djvu [BITMAP]
-image/webp     [BITMAP]
-
-image/svg+xml application/svg+xml application/svg image/svg    [DRAWING]
-application/postscript [DRAWING]
-application/x-latex    [DRAWING]
-application/x-tex      [DRAWING]
-application/x-dia-diagram [DRAWING]
-
-
-audio/mpeg audio/mp3 audio/mpeg3       [AUDIO]
-audio/mp4                              [AUDIO]
-audio/wav audio/x-wav audio/wave       [AUDIO]
-audio/midi audio/mid   [AUDIO]
-audio/basic            [AUDIO]
-audio/ogg              [AUDIO]
-audio/x-aiff           [AUDIO]
-audio/x-pn-realaudio   [AUDIO]
-audio/x-realaudio      [AUDIO]
-audio/webm             [AUDIO]
-audio/x-matroska       [AUDIO]
-audio/x-flac           [AUDIO]
-audio/flac             [AUDIO]
-
-video/mpeg application/mpeg    [VIDEO]
-video/ogg                      [VIDEO]
-video/x-sgi-video              [VIDEO]
-video/x-flv                    [VIDEO]
-video/webm                     [VIDEO]
-video/x-matroska               [VIDEO]
-video/mp4                      [VIDEO]
-
-application/ogg application/x-ogg audio/ogg audio/x-ogg video/ogg video/x-ogg          [MULTIMEDIA]
-
-application/x-shockwave-flash  [MULTIMEDIA]
-audio/x-pn-realaudio-plugin    [MULTIMEDIA]
-model/iges     [MULTIMEDIA]
-model/mesh     [MULTIMEDIA]
-model/vrml     [MULTIMEDIA]
-video/quicktime        [MULTIMEDIA]
-video/x-msvideo        [MULTIMEDIA]
-
-text/plain     [TEXT]
-text/html application/xhtml+xml        [TEXT]
-application/xml text/xml       [TEXT]
-text   [TEXT]
-application/json       [TEXT]
-text/csv       [TEXT]
-text/tab-separated-values      [TEXT]
-
-application/zip application/x-zip      [ARCHIVE]
-application/x-gzip     [ARCHIVE]
-application/x-bzip     [ARCHIVE]
-application/x-bzip2    [ARCHIVE]
-application/x-tar      [ARCHIVE]
-application/x-stuffit  [ARCHIVE]
-application/x-opc+zip  [ARCHIVE]
-application/x-7z-compressed [ARCHIVE]
-
-application/javascript text/javascript application/x-javascript application/x-ecmascript text/ecmascript       [EXECUTABLE]
-application/x-bash     [EXECUTABLE]
-application/x-sh       [EXECUTABLE]
-application/x-csh      [EXECUTABLE]
-application/x-tcsh     [EXECUTABLE]
-application/x-tcl      [EXECUTABLE]
-application/x-perl     [EXECUTABLE]
-application/x-python   [EXECUTABLE]
-
-application/pdf application/acrobat    [OFFICE]
-application/msword             [OFFICE]
-application/vnd.ms-excel       [OFFICE]
-application/vnd.ms-powerpoint  [OFFICE]
-application/x-director         [OFFICE]
-text/rtf                       [OFFICE]
-
-application/vnd.openxmlformats-officedocument.wordprocessingml.document        [OFFICE]
-application/vnd.openxmlformats-officedocument.wordprocessingml.template                [OFFICE]
-application/vnd.ms-word.document.macroEnabled.12                               [OFFICE]
-application/vnd.ms-word.template.macroEnabled.12                               [OFFICE]
-application/vnd.openxmlformats-officedocument.presentationml.template          [OFFICE]
-application/vnd.openxmlformats-officedocument.presentationml.slideshow         [OFFICE]
-application/vnd.openxmlformats-officedocument.presentationml.presentation      [OFFICE]
-application/vnd.ms-powerpoint.addin.macroEnabled.12                            [OFFICE]
-application/vnd.ms-powerpoint.presentation.macroEnabled.12                     [OFFICE]
-application/vnd.ms-powerpoint.presentation.macroEnabled.12                     [OFFICE]
-application/vnd.ms-powerpoint.slideshow.macroEnabled.12                                [OFFICE]
-application/vnd.openxmlformats-officedocument.spreadsheetml.sheet              [OFFICE]
-application/vnd.openxmlformats-officedocument.spreadsheetml.template           [OFFICE]
-application/vnd.ms-excel.sheet.macroEnabled.12                                 [OFFICE]
-application/vnd.ms-excel.template.macroEnabled.12                              [OFFICE]
-application/vnd.ms-excel.addin.macroEnabled.12                                 [OFFICE]
-application/vnd.ms-excel.sheet.binary.macroEnabled.12                          [OFFICE]
-application/acad application/x-acad application/autocad_dwg image/x-dwg application/dwg application/x-dwg application/x-autocad image/vnd.dwg drawing/dwg [DRAWING]
-chemical/x-mdl-molfile     [DRAWING]
-chemical/x-mdl-sdfile      [DRAWING]
-chemical/x-mdl-rxnfile     [DRAWING]
-chemical/x-mdl-rdfile      [DRAWING]
-chemical/x-mdl-rgfile      [DRAWING]
diff --git a/includes/mime.types b/includes/mime.types
deleted file mode 100644 (file)
index b4f515a..0000000
+++ /dev/null
@@ -1,188 +0,0 @@
-application/acad dwg
-application/andrew-inset ez
-application/mac-binhex40 hqx
-application/mac-compactpro cpt
-application/mathml+xml mathml
-application/msword doc dot
-application/octet-stream bin dms lha lzh exe class so dll
-application/oda oda
-application/ogg ogx ogg ogm ogv oga spx opus
-application/pdf pdf
-application/postscript ai eps ps
-application/rdf+xml rdf
-application/smil smi smil
-application/srgs gram
-application/srgs+xml grxml
-application/vnd.mif mif
-application/vnd.ms-excel xls xlt xla
-application/vnd.ms-powerpoint ppt pot pps ppa
-application/vnd.wap.wbxml wbxml
-application/vnd.wap.wmlc wmlc
-application/vnd.wap.wmlscriptc wmlsc
-application/voicexml+xml vxml
-application/x-7z-compressed 7z
-application/x-bcpio bcpio
-application/x-bzip bz
-application/x-bzip2 bz2
-application/x-cdlink vcd
-application/x-chess-pgn pgn
-application/x-cpio cpio
-application/x-csh csh
-application/x-dia-diagram dia
-application/x-director dcr dir dxr
-application/x-dvi dvi
-application/x-futuresplash spl
-application/x-gtar gtar tar
-application/x-gzip gz
-application/x-hdf hdf
-application/x-jar jar
-application/javascript js
-application/json json
-application/x-koan skp skd skt skm
-application/x-latex latex
-application/x-netcdf nc cdf
-application/x-sh sh
-application/x-shar shar
-application/x-shockwave-flash swf
-application/x-stuffit sit
-application/x-sv4cpio sv4cpio
-application/x-sv4crc sv4crc
-application/x-tar tar
-application/x-tcl tcl
-application/x-tex tex
-application/x-texinfo texinfo texi
-application/x-troff t tr roff
-application/x-troff-man man
-application/x-troff-me me
-application/x-troff-ms ms
-application/x-ustar ustar
-application/x-wais-source src
-application/x-xpinstall xpi
-application/xhtml+xml xhtml xht
-application/xslt+xml xslt
-application/xml xml xsl xsd kml
-application/xml-dtd dtd
-application/zip zip jar xpi sxc stc sxd std sxi sti sxm stm sxw stw
-application/x-rar rar
-application/font-woff woff
-application/font-woff2 woff2
-application/vnd.ms-fontobject eot
-application/x-font-ttf ttf
-audio/basic au snd
-audio/midi mid midi kar
-audio/mpeg mpga mp2 mp3
-audio/ogg oga ogg spx opus
-video/webm webm
-audio/webm webm
-audio/x-aiff aif aiff aifc
-audio/x-matroska mka mkv
-audio/x-mpegurl m3u
-audio/x-ogg oga ogg spx opus
-audio/x-pn-realaudio ram rm
-audio/x-pn-realaudio-plugin rpm
-audio/x-realaudio ra
-audio/x-wav wav
-audio/wav wav
-audio/x-flac flac
-audio/flac flac
-chemical/x-pdb pdb
-chemical/x-xyz xyz
-image/bmp bmp
-image/cgm cgm
-image/gif gif
-image/ief ief
-image/jp2 j2k jp2 jpg2
-image/jpeg jpeg jpg jpe
-image/png png apng
-image/svg+xml svg
-image/tiff tiff tif
-image/vnd.djvu djvu djv
-image/vnd.microsoft.icon ico
-image/vnd.wap.wbmp wbmp
-image/webp webp
-image/x-cmu-raster ras
-image/x-icon ico
-image/x-ms-bmp bmp
-image/x-portable-anymap pnm
-image/x-portable-bitmap pbm
-image/x-portable-graymap pgm
-image/x-portable-pixmap ppm
-image/x-rgb rgb
-image/x-photoshop psd
-image/x-xbitmap xbm
-image/x-xpixmap xpm
-image/x-xwindowdump xwd
-model/iges igs iges
-model/mesh msh mesh silo
-model/vrml wrl vrml
-text/calendar ics ifb
-text/css css
-text/csv csv
-text/html html htm
-text/plain txt
-text/richtext rtx
-text/rtf rtf
-text/sgml sgml sgm
-text/tab-separated-values tsv
-text/vnd.wap.wml wml
-text/vnd.wap.wmlscript wmls
-text/xml xml xsl xslt rss rdf
-text/x-component htc
-text/x-setext etx
-text/x-sawfish jl
-video/mpeg mpeg mpg mpe
-video/mp4 mp4 m4a m4p m4b m4r m4v
-audio/mp4 m4a
-video/ogg ogv ogm ogg
-video/quicktime qt mov
-video/vnd.mpegurl mxu
-video/x-flv flv
-video/x-matroska mkv mka
-video/x-msvideo avi
-video/x-ogg ogv ogm ogg
-video/x-sgi-movie movie
-x-conference/x-cooltalk ice
-application/vnd.oasis.opendocument.chart odc
-application/vnd.oasis.opendocument.chart-template otc
-application/vnd.oasis.opendocument.database odb
-application/vnd.oasis.opendocument.formula odf
-application/vnd.oasis.opendocument.formula-template otf
-application/vnd.oasis.opendocument.graphics odg
-application/vnd.oasis.opendocument.graphics-template otg
-application/vnd.oasis.opendocument.image odi
-application/vnd.oasis.opendocument.image-template oti
-application/vnd.oasis.opendocument.presentation odp
-application/vnd.oasis.opendocument.presentation-template otp
-application/vnd.oasis.opendocument.spreadsheet ods
-application/vnd.oasis.opendocument.spreadsheet-template ots
-application/vnd.oasis.opendocument.text odt
-application/vnd.oasis.opendocument.text-master odm
-application/vnd.oasis.opendocument.text-template ott
-application/vnd.oasis.opendocument.text-web oth
-application/vnd.openxmlformats-officedocument.wordprocessingml.document docx
-application/vnd.openxmlformats-officedocument.wordprocessingml.template dotx
-application/vnd.ms-word.document.macroEnabled.12 docm
-application/vnd.ms-word.template.macroEnabled.12 dotm
-application/vnd.openxmlformats-officedocument.presentationml.template potx
-application/vnd.openxmlformats-officedocument.presentationml.slideshow ppsx
-application/vnd.openxmlformats-officedocument.presentationml.presentation pptx
-application/vnd.ms-powerpoint.addin.macroEnabled.12 ppam
-application/vnd.ms-powerpoint.presentation.macroEnabled.12 pptm
-application/vnd.ms-powerpoint.presentation.macroEnabled.12 potm
-application/vnd.ms-powerpoint.slideshow.macroEnabled.12 ppsm
-application/vnd.openxmlformats-officedocument.spreadsheetml.sheet xlsx
-application/vnd.openxmlformats-officedocument.spreadsheetml.template xltx
-application/vnd.ms-excel.sheet.macroEnabled.12 xlsm
-application/vnd.ms-excel.template.macroEnabled.12 xltm
-application/vnd.ms-excel.addin.macroEnabled.12 xlam
-application/vnd.ms-excel.sheet.binary.macroEnabled.12 xlsb
-model/vnd.dwfx+xps dwfx
-application/vnd.ms-xpsdocument xps
-application/x-opc+zip docx dotx docm dotm potx ppsx pptx ppam pptm potm ppsm xlsx xltx xlsm xltm xlam xlsb dwfx xps
-chemical/x-mdl-molfile mol
-chemical/x-mdl-sdfile sdf
-chemical/x-mdl-rxnfile rxn
-chemical/x-mdl-rdfile rd
-chemical/x-mdl-rgfile rg
-application/x-amf amf
-application/sla stl
index 97c8954..cdef7e0 100644 (file)
@@ -63,7 +63,8 @@ if ( $ext == 'php' || $ext == 'php5' ) {
        return true;
 }
 $mime = false;
-$lines = explode( "\n", file_get_contents( "includes/mime.types" ) );
+// Borrow mime type file from MimeAnalyzer
+$lines = explode( "\n", file_get_contents( "includes/libs/mime/mime.types" ) );
 foreach ( $lines as $line ) {
        $exts = explode( " ", $line );
        $mime = array_shift( $exts );
index f054c0e..0ff903f 100644 (file)
@@ -319,6 +319,7 @@ class MediaWikiServicesTest extends MediaWikiTestCase {
                        'LinkRenderer' => [ 'LinkRenderer', LinkRenderer::class ],
                        'LinkRendererFactory' => [ 'LinkRendererFactory', LinkRendererFactory::class ],
                        '_MediaWikiTitleCodec' => [ '_MediaWikiTitleCodec', MediaWikiTitleCodec::class ],
+                       'MimeAnalyzer' => [ 'MimeAnalyzer', MimeAnalyzer::class ],
                        'TitleFormatter' => [ 'TitleFormatter', TitleFormatter::class ],
                        'TitleParser' => [ 'TitleParser', TitleParser::class ],
                        'ProxyLookup' => [ 'ProxyLookup', ProxyLookup::class ],
diff --git a/tests/phpunit/includes/MimeMagicTest.php b/tests/phpunit/includes/MimeMagicTest.php
deleted file mode 100644 (file)
index e00cf0c..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-<?php
-class MimeMagicTest extends PHPUnit_Framework_TestCase {
-
-       /** @var MimeMagic */
-       private $mimeMagic;
-
-       function setUp() {
-               $this->mimeMagic = MimeMagic::singleton();
-               parent::setUp();
-       }
-
-       /**
-        * @dataProvider providerImproveTypeFromExtension
-        * @param string $ext File extension (no leading dot)
-        * @param string $oldMime Initially detected MIME
-        * @param string $expectedMime MIME type after taking extension into account
-        */
-       function testImproveTypeFromExtension( $ext, $oldMime, $expectedMime ) {
-               $actualMime = $this->mimeMagic->improveTypeFromExtension( $oldMime, $ext );
-               $this->assertEquals( $expectedMime, $actualMime );
-       }
-
-       function providerImproveTypeFromExtension() {
-               return [
-                       [ 'gif', 'image/gif', 'image/gif' ],
-                       [ 'gif', 'unknown/unknown', 'unknown/unknown' ],
-                       [ 'wrl', 'unknown/unknown', 'model/vrml' ],
-                       [ 'txt', 'text/plain', 'text/plain' ],
-                       [ 'csv', 'text/plain', 'text/csv' ],
-                       [ 'tsv', 'text/plain', 'text/tab-separated-values' ],
-                       [ 'js', 'text/javascript', 'application/javascript' ],
-                       [ 'js', 'application/x-javascript', 'application/javascript' ],
-                       [ 'json', 'text/plain', 'application/json' ],
-                       [ 'foo', 'application/x-opc+zip', 'application/zip' ],
-                       [ 'docx', 'application/x-opc+zip',
-                               'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ],
-                       [ 'djvu', 'image/x-djvu', 'image/vnd.djvu' ],
-                       [ 'wav', 'audio/wav', 'audio/wav' ],
-               ];
-       }
-
-       /**
-        * Test to make sure that encoder=ffmpeg2theora doesn't trigger
-        * MEDIATYPE_VIDEO (bug 63584)
-        */
-       function testOggRecognize() {
-               $oggFile = __DIR__ . '/../data/media/say-test.ogg';
-               $actualType = $this->mimeMagic->getMediaType( $oggFile, 'application/ogg' );
-               $this->assertEquals( $actualType, MEDIATYPE_AUDIO );
-       }
-}
diff --git a/tests/phpunit/includes/libs/mime/MimeAnalyzerTest.php b/tests/phpunit/includes/libs/mime/MimeAnalyzerTest.php
new file mode 100644 (file)
index 0000000..85927a3
--- /dev/null
@@ -0,0 +1,62 @@
+<?php
+class MimeMagicTest extends PHPUnit_Framework_TestCase {
+       /** @var MimeAnalyzer */
+       private $mimeAnalyzer;
+
+       function setUp() {
+               global $IP;
+
+               $this->mimeAnalyzer = new MimeAnalyzer( [
+                       'infoFile' => $IP . "/includes/libs/mime/mime.info",
+                       'typeFile' => $IP . "/includes/libs/mime/mime.types",
+                       'xmlTypes' => [
+                               'http://www.w3.org/2000/svg:svg' => 'image/svg+xml',
+                               'svg' => 'image/svg+xml',
+                               'http://www.lysator.liu.se/~alla/dia/:diagram' => 'application/x-dia-diagram',
+                               'http://www.w3.org/1999/xhtml:html' => 'text/html', // application/xhtml+xml?
+                               'html' => 'text/html', // application/xhtml+xml?
+                       ]
+               ] );
+               parent::setUp();
+       }
+
+       /**
+        * @dataProvider providerImproveTypeFromExtension
+        * @param string $ext File extension (no leading dot)
+        * @param string $oldMime Initially detected MIME
+        * @param string $expectedMime MIME type after taking extension into account
+        */
+       function testImproveTypeFromExtension( $ext, $oldMime, $expectedMime ) {
+               $actualMime = $this->mimeAnalyzer->improveTypeFromExtension( $oldMime, $ext );
+               $this->assertEquals( $expectedMime, $actualMime );
+       }
+
+       function providerImproveTypeFromExtension() {
+               return [
+                       [ 'gif', 'image/gif', 'image/gif' ],
+                       [ 'gif', 'unknown/unknown', 'unknown/unknown' ],
+                       [ 'wrl', 'unknown/unknown', 'model/vrml' ],
+                       [ 'txt', 'text/plain', 'text/plain' ],
+                       [ 'csv', 'text/plain', 'text/csv' ],
+                       [ 'tsv', 'text/plain', 'text/tab-separated-values' ],
+                       [ 'js', 'text/javascript', 'application/javascript' ],
+                       [ 'js', 'application/x-javascript', 'application/javascript' ],
+                       [ 'json', 'text/plain', 'application/json' ],
+                       [ 'foo', 'application/x-opc+zip', 'application/zip' ],
+                       [ 'docx', 'application/x-opc+zip',
+                               'application/vnd.openxmlformats-officedocument.wordprocessingml.document' ],
+                       [ 'djvu', 'image/x-djvu', 'image/vnd.djvu' ],
+                       [ 'wav', 'audio/wav', 'audio/wav' ],
+               ];
+       }
+
+       /**
+        * Test to make sure that encoder=ffmpeg2theora doesn't trigger
+        * MEDIATYPE_VIDEO (bug 63584)
+        */
+       function testOggRecognize() {
+               $oggFile = __DIR__ . '/../../../data/media/say-test.ogg';
+               $actualType = $this->mimeAnalyzer->getMediaType( $oggFile, 'application/ogg' );
+               $this->assertEquals( $actualType, MEDIATYPE_AUDIO );
+       }
+}