- if ( self::$instance === null ) {
- self::$instance = new MimeMagic(
- ConfigFactory::getDefaultInstance()->makeConfig( 'main' )
- );
- }
- return self::$instance;
- }
-
- /**
- * Adds to the list mapping MIME to file extensions.
- * As an extension author, you are encouraged to submit patches to
- * MediaWiki's core to add new MIME types to mime.types.
- * @param string $types
- */
- public function addExtraTypes( $types ) {
- $this->mExtraTypes .= "\n" . $types;
- }
-
- /**
- * Adds to the list mapping MIME to media type.
- * As an extension author, you are encouraged to submit patches to
- * MediaWiki's core to add new MIME info to mime.info.
- * @param string $info
- */
- public function addExtraInfo( $info ) {
- $this->mExtraInfo .= "\n" . $info;
- }
-
- /**
- * Returns a list of file extensions for a given MIME type as a space
- * separated string or null if the MIME type was unrecognized. Resolves
- * MIME type aliases.
- *
- * @param string $mime
- * @return string|null
- */
- public function getExtensionsForType( $mime ) {
- $mime = strtolower( $mime );
-
- // Check the mime-to-ext map
- if ( isset( $this->mMimeToExt[$mime] ) ) {
- return $this->mMimeToExt[$mime];
- }
-
- // Resolve the MIME type to the canonical type
- if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
- $mime = $this->mMimeTypeAliases[$mime];
- if ( isset( $this->mMimeToExt[$mime] ) ) {
- return $this->mMimeToExt[$mime];
- }
- }
-
- return null;
- }
-
- /**
- * Returns a list of MIME types for a given file extension as a space
- * separated string or null if the extension was unrecognized.
- *
- * @param string $ext
- * @return string|null
- */
- public function getTypesForExtension( $ext ) {
- $ext = strtolower( $ext );
-
- $r = isset( $this->mExtToMime[$ext] ) ? $this->mExtToMime[$ext] : null;
- return $r;
- }
-
- /**
- * Returns a single MIME type for a given file extension or null if unknown.
- * This is always the first type from the list returned by getTypesForExtension($ext).
- *
- * @param string $ext
- * @return string|null
- */
- public function guessTypesForExtension( $ext ) {
- $m = $this->getTypesForExtension( $ext );
- if ( is_null( $m ) ) {
- return null;
- }
-
- // TODO: Check if this is needed; strtok( $m, ' ' ) should be sufficient
- $m = trim( $m );
- $m = preg_replace( '/\s.*$/', '', $m );
-
- return $m;
- }
-
- /**
- * Tests if the extension matches the given MIME type. Returns true if a
- * match was found, null if the MIME type is unknown, and false if the
- * MIME type is known but no matches where found.
- *
- * @param string $extension
- * @param string $mime
- * @return bool|null
- */
- public function isMatchingExtension( $extension, $mime ) {
- $ext = $this->getExtensionsForType( $mime );
-
- if ( !$ext ) {
- return null; // Unknown MIME type
- }
-
- $ext = explode( ' ', $ext );
-
- $extension = strtolower( $extension );
- return in_array( $extension, $ext );
- }
-
- /**
- * Returns true if the MIME type is known to represent an image format
- * supported by the PHP GD library.
- *
- * @param string $mime
- *
- * @return bool
- */
- public function isPHPImageType( $mime ) {
- // As defined by imagegetsize and image_type_to_mime
- static $types = [
- 'image/gif', 'image/jpeg', 'image/png',
- 'image/x-bmp', 'image/xbm', 'image/tiff',
- 'image/jp2', 'image/jpeg2000', 'image/iff',
- 'image/xbm', 'image/x-xbitmap',
- 'image/vnd.wap.wbmp', 'image/vnd.xiff',
- 'image/x-photoshop',
- 'application/x-shockwave-flash',
- ];
-
- return in_array( $mime, $types );
- }
-
- /**
- * Returns true if the extension represents a type which can
- * be reliably detected from its content. Use this to determine
- * whether strict content checks should be applied to reject
- * invalid uploads; if we can't identify the type we won't
- * be able to say if it's invalid.
- *
- * @todo Be more accurate when using fancy MIME detector plugins;
- * right now this is the bare minimum getimagesize() list.
- * @param string $extension
- * @return bool
- */
- function isRecognizableExtension( $extension ) {
- static $types = [
- // Types recognized by getimagesize()
- 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd',
- 'bmp', 'tiff', 'tif', 'jpc', 'jp2',
- 'jpx', 'jb2', 'swc', 'iff', 'wbmp',
- 'xbm',
-
- // Formats we recognize magic numbers for
- 'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
- 'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
- 'webp',
-
- // XML formats we sure hope we recognize reliably
- 'svg',
- ];
- return in_array( strtolower( $extension ), $types );
- }
-
- /**
- * Improves a MIME type using the file extension. Some file formats are very generic,
- * so their MIME type is not very meaningful. A more useful MIME type can be derived
- * by looking at the file extension. Typically, this method would be called on the
- * result of guessMimeType().
- *
- * @param string $mime The MIME type, typically guessed from a file's content.
- * @param string $ext The file extension, as taken from the file name
- *
- * @return string The MIME type
- */
- public function improveTypeFromExtension( $mime, $ext ) {
- if ( $mime === 'unknown/unknown' ) {
- if ( $this->isRecognizableExtension( $ext ) ) {
- wfDebug( __METHOD__ . ': refusing to guess mime type for .' .
- "$ext file, we should have recognized it\n" );
- } else {
- // Not something we can detect, so simply
- // trust the file extension
- $mime = $this->guessTypesForExtension( $ext );
- }
- } elseif ( $mime === 'application/x-opc+zip' ) {
- if ( $this->isMatchingExtension( $ext, $mime ) ) {
- // A known file extension for an OPC file,
- // find the proper MIME type for that file extension
- $mime = $this->guessTypesForExtension( $ext );
- } else {
- wfDebug( __METHOD__ . ": refusing to guess better type for $mime file, " .
- ".$ext is not a known OPC extension.\n" );
- $mime = 'application/zip';
- }
- } elseif ( $mime === 'text/plain' && $this->findMediaType( ".$ext" ) === MEDIATYPE_TEXT ) {
- // Textual types are sometimes not recognized properly.
- // If detected as text/plain, and has an extension which is textual
- // improve to the extension's type. For example, csv and json are often
- // misdetected as text/plain.
- $mime = $this->guessTypesForExtension( $ext );
- }
-
- # Media handling extensions can improve the MIME detected
- Hooks::run( 'MimeMagicImproveFromExtension', [ $this, $ext, &$mime ] );
-
- if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
- $mime = $this->mMimeTypeAliases[$mime];
- }
-
- wfDebug( __METHOD__ . ": improved mime type for .$ext: $mime\n" );
- return $mime;
- }
-
- /**
- * MIME type detection. This uses detectMimeType to detect the MIME type
- * of the file, but applies additional checks to determine some well known
- * file formats that may be missed or misinterpreted by the default MIME
- * detection (namely XML based formats like XHTML or SVG, as well as ZIP
- * based formats like OPC/ODF files).
- *
- * @param string $file The file to check
- * @param string|bool $ext The file extension, or true (default) to extract it from the filename.
- * Set it to false to ignore the extension. DEPRECATED! Set to false, use
- * improveTypeFromExtension($mime, $ext) later to improve MIME type.
- *
- * @return string The MIME type of $file
- */
- public function guessMimeType( $file, $ext = true ) {
- if ( $ext ) { // TODO: make $ext default to false. Or better, remove it.
- wfDebug( __METHOD__ . ": WARNING: use of the \$ext parameter is deprecated. " .
- "Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
- }
-
- $mime = $this->doGuessMimeType( $file, $ext );
-
- if ( !$mime ) {
- wfDebug( __METHOD__ . ": internal type detection failed for $file (.$ext)...\n" );
- $mime = $this->detectMimeType( $file, $ext );
- }
-
- if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
- $mime = $this->mMimeTypeAliases[$mime];
- }
-
- wfDebug( __METHOD__ . ": guessed mime type of $file: $mime\n" );
- return $mime;
- }
-
- /**
- * Guess the MIME type from the file contents.
- *
- * @todo Remove $ext param
- *
- * @param string $file
- * @param mixed $ext
- * @return bool|string
- * @throws MWException
- */
- private function doGuessMimeType( $file, $ext ) {
- // Read a chunk of the file
- MediaWiki\suppressWarnings();
- $f = fopen( $file, 'rb' );
- MediaWiki\restoreWarnings();
-
- if ( !$f ) {
- return 'unknown/unknown';
- }
-
- $fsize = filesize( $file );
- if ( $fsize === false ) {
- return 'unknown/unknown';
- }
-
- $head = fread( $f, 1024 );
- $tailLength = min( 65558, $fsize ); // 65558 = maximum size of a zip EOCDR
- if ( fseek( $f, -1 * $tailLength, SEEK_END ) === -1 ) {
- throw new MWException(
- "Seeking $tailLength bytes from EOF failed in " . __METHOD__ );
- }
- $tail = fread( $f, $tailLength );
- fclose( $f );
-
- wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" );
-
- // Hardcode a few magic number checks...
- $headers = [
- // Multimedia...
- 'MThd' => 'audio/midi',
- 'OggS' => 'application/ogg',
-
- // Image formats...
- // Note that WMF may have a bare header, no magic number.
- "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
- "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
- '%PDF' => 'application/pdf',
- 'gimp xcf' => 'image/x-xcf',
-
- // Some forbidden fruit...
- 'MZ' => 'application/octet-stream', // DOS/Windows executable
- "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
- "\x7fELF" => 'application/octet-stream', // ELF binary
- ];
-
- foreach ( $headers as $magic => $candidate ) {
- if ( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
- wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
- return $candidate;
- }
- }
-
- /* Look for WebM and Matroska files */
- if ( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
- $doctype = strpos( $head, "\x42\x82" );
- if ( $doctype ) {
- // Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
- $data = substr( $head, $doctype + 3, 8 );
- if ( strncmp( $data, "matroska", 8 ) == 0 ) {
- wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" );
- return "video/x-matroska";
- } elseif ( strncmp( $data, "webm", 4 ) == 0 ) {
- wfDebug( __METHOD__ . ": recognized file as video/webm\n" );
- return "video/webm";
- }
- }
- wfDebug( __METHOD__ . ": unknown EBML file\n" );
- return "unknown/unknown";
- }
-
- /* Look for WebP */
- if ( strncmp( $head, "RIFF", 4 ) == 0 && strncmp( substr( $head, 8, 7 ), "WEBPVP8", 7 ) == 0 ) {
- wfDebug( __METHOD__ . ": recognized file as image/webp\n" );
- return "image/webp";
- }
-
- /**
- * Look for PHP. Check for this before HTML/XML... Warning: this is a
- * heuristic, and won't match a file with a lot of non-PHP before. It
- * will also match text files which could be PHP. :)
- *
- * @todo FIXME: For this reason, the check is probably useless -- an attacker
- * could almost certainly just pad the file with a lot of nonsense to
- * circumvent the check in any case where it would be a security
- * problem. On the other hand, it causes harmful false positives (bug
- * 16583). The heuristic has been cut down to exclude three-character
- * strings like "<? ", but should it be axed completely?
- */
- if ( ( strpos( $head, '<?php' ) !== false ) ||
- ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
- ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
- ( strpos( $head, "<\x00?\x00\n" ) !== false ) ||
- ( strpos( $head, "<\x00?\x00\t" ) !== false ) ||
- ( strpos( $head, "<\x00?\x00=" ) !== false ) ) {
-
- wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
- return 'application/x-php';
- }
-
- /**
- * look for XML formats (XHTML and SVG)
- */
- $xml = new XmlTypeCheck( $file );
- if ( $xml->wellFormed ) {
- $xmlMimeTypes = $this->mConfig->get( 'XMLMimeTypes' );
- if ( isset( $xmlMimeTypes[$xml->getRootElement()] ) ) {
- return $xmlMimeTypes[$xml->getRootElement()];
- } else {
- return 'application/xml';
- }
- }
-
- /**
- * look for shell scripts
- */
- $script_type = null;
-
- # detect by shebang
- if ( substr( $head, 0, 2 ) == "#!" ) {
- $script_type = "ASCII";
- } elseif ( substr( $head, 0, 5 ) == "\xef\xbb\xbf#!" ) {
- $script_type = "UTF-8";
- } elseif ( substr( $head, 0, 7 ) == "\xfe\xff\x00#\x00!" ) {
- $script_type = "UTF-16BE";
- } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) {
- $script_type = "UTF-16LE";
- }
-
- if ( $script_type ) {
- if ( $script_type !== "UTF-8" && $script_type !== "ASCII" ) {
- // Quick and dirty fold down to ASCII!
- $pack = [ 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ];
- $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
- $head = '';
- foreach ( $chars as $codepoint ) {
- if ( $codepoint < 128 ) {
- $head .= chr( $codepoint );
- } else {
- $head .= '?';
- }
- }
- }
-
- $match = [];
-
- if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) {
- $mime = "application/x-{$match[2]}";
- wfDebug( __METHOD__ . ": shell script recognized as $mime\n" );
- return $mime;
- }
- }
-
- // Check for ZIP variants (before getimagesize)
- if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
- wfDebug( __METHOD__ . ": ZIP header present in $file\n" );
- return $this->detectZipType( $head, $tail, $ext );
- }
-
- MediaWiki\suppressWarnings();
- $gis = getimagesize( $file );
- MediaWiki\restoreWarnings();
-
- if ( $gis && isset( $gis['mime'] ) ) {
- $mime = $gis['mime'];
- wfDebug( __METHOD__ . ": getimagesize detected $file as $mime\n" );
- return $mime;
- }
-
- // Also test DjVu
- $deja = new DjVuImage( $file );
- if ( $deja->isValid() ) {
- wfDebug( __METHOD__ . ": detected $file as image/vnd.djvu\n" );
- return 'image/vnd.djvu';
- }
-
- # Media handling extensions can guess the MIME by content
- # It's intentionally here so that if core is wrong about a type (false positive),
- # people will hopefully nag and submit patches :)
- $mime = false;
- # Some strings by reference for performance - assuming well-behaved hooks
- Hooks::run(
- 'MimeMagicGuessFromContent',
- [ $this, &$head, &$tail, $file, &$mime ]