X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FMimeMagic.php;h=39c82c9dc4f7bf78f94346acef263e2bf6d27494;hb=4a89127e92100bafb63cf47e8e2053103b3bf610;hp=eb44e9fa9147b52fc3a9511457bacaad1e794382;hpb=5e5d1f684bc10738610ca5475d5cb2d1267fe0e6;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/MimeMagic.php b/includes/MimeMagic.php index eb44e9fa91..39c82c9dc4 100644 --- a/includes/MimeMagic.php +++ b/includes/MimeMagic.php @@ -9,8 +9,24 @@ * the file mime.types in the includes directory. */ define('MM_WELL_KNOWN_MIME_TYPES',<<getTypesForExtension( $ext ); - if ( is_null( $m ) ) return NULL; + if ( is_null( $m ) ) return null; $m = trim( $m ); $m = preg_replace( '/\s.*$/', '', $m ); @@ -307,7 +345,7 @@ class MimeMagic { $ext = $this->getExtensionsForType( $mime ); if ( !$ext ) { - return NULL; //unknown + return null; //unknown } $ext = explode( ' ', $ext ); @@ -351,10 +389,17 @@ class MimeMagic { */ function isRecognizableExtension( $extension ) { static $types = array( + // Types recognized by getimagesize() 'gif', 'jpeg', 'jpg', 'png', 'swf', 'psd', 'bmp', 'tiff', 'tif', 'jpc', 'jp2', 'jpx', 'jb2', 'swc', 'iff', 'wbmp', - 'xbm', 'djvu' + 'xbm', + + // Formats we recognize magic numbers for + 'djvu', 'ogg', 'ogv', 'mid', 'pdf', 'wmf', 'xcf', + + // XML formats we sure hope we recognize reliably + 'svg', ); return in_array( strtolower( $extension ), $types ); } @@ -365,154 +410,219 @@ class MimeMagic { * or misinterpreter by the default mime detection (namely xml based formats like XHTML or SVG). * * @param string $file The file to check - * @param mixed $ext The file extension, or true to extract it from the filename. + * @param mixed $ext The file extension, or true to extract it from the filename. * Set it to false to ignore the extension. * * @return string the mime type of $file */ function guessMimeType( $file, $ext = true ) { - $mime = $this->detectMimeType( $file, $ext ); + $mime = $this->doGuessMimeType( $file, $ext ); + + if( !$mime ) { + wfDebug( __METHOD__.": internal type detection failed for $file (.$ext)...\n" ); + $mime = $this->detectMimeType( $file, $ext ); + } + + if ( isset( $this->mMimeTypeAliases[$mime] ) ) { + $mime = $this->mMimeTypeAliases[$mime]; + } + wfDebug(__METHOD__.": final mime type of $file: $mime\n"); + return $mime; + } + + function doGuessMimeType( $file, $ext = true ) { // Read a chunk of the file + wfSuppressWarnings(); $f = fopen( $file, "rt" ); + wfRestoreWarnings(); if( !$f ) return "unknown/unknown"; $head = fread( $f, 1024 ); + fseek( $f, -65558, SEEK_END ); + $tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR fclose( $f ); - $sub4 = substr( $head, 0, 4 ); - if ( $sub4 == "\x01\x00\x09\x00" || $sub4 == "\xd7\xcd\xc6\x9a" ) { - // WMF kill kill kill - // Note that WMF may have a bare header, no magic number. - // The former of the above two checks is theoretically prone to false positives - $mime = "application/x-msmetafile"; - } - - if ( strpos( $mime, "text/" ) === 0 || $mime === "application/xml" ) { - - $xml_type = NULL; - $script_type = NULL; - - /* - * look for XML formats (XHTML and SVG) - */ - if ($mime === "text/sgml" || - $mime === "text/plain" || - $mime === "text/html" || - $mime === "text/xml" || - $mime === "application/xml") { - - if ( substr( $head, 0, 5 ) == " 'audio/midi', + 'OggS' => 'application/ogg', - if ( preg_match( '%%sim', - $head, $match ) ) { - $doctype = $match[1]; - } - if ( preg_match( '%<(\w+).*>%sim', $head, $match ) ) { - $tag = $match[1]; - } - - #print "
ANALYSING $file ($mime): doctype= $doctype; tag= $tag
"; + // Image formats... + // Note that WMF may have a bare header, no magic number. + "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives? + "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile', + '%PDF' => 'application/pdf', + 'gimp xcf' => 'image/x-xcf', + + // Some forbidden fruit... + 'MZ' => 'application/octet-stream', // DOS/Windows executable + "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary + "\x7fELF" => 'application/octet-stream', // ELF binary + ); - if ( strpos( $doctype, "-//W3C//DTD SVG" ) === 0 ) { - $mime = "image/svg+xml"; - } elseif ( $tag === "svg" ) { - $mime = "image/svg+xml"; - } elseif ( strpos( $doctype, "-//W3C//DTD XHTML" ) === 0 ) { - $mime = "text/html"; - } elseif ( $tag === "html" ) { - $mime = "text/html"; - } - } + foreach( $headers as $magic => $candidate ) { + if( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) { + wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" ); + return $candidate; } + } - /* - * look for shell scripts - */ - if ( !$xml_type ) { - $script_type = NULL; - - # detect by shebang - if ( substr( $head, 0, 2) == "#!" ) { - $script_type = "ASCII"; - } elseif ( substr( $head, 0, 5) == "\xef\xbb\xbf#!" ) { - $script_type = "UTF-8"; - } elseif ( substr( $head, 0, 7) == "\xfe\xff\x00#\x00!" ) { - $script_type = "UTF-16BE"; - } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) { - $script_type= "UTF-16LE"; - } + /* + * Look for PHP. Check for this before HTML/XML... Warning: this is a + * heuristic, and won't match a file with a lot of non-PHP before. It + * will also match text files which could be PHP. :) + * + * FIXME: For this reason, the check is probably useless -- an attacker + * could almost certainly just pad the file with a lot of nonsense to + * circumvent the check in any case where it would be a security + * problem. On the other hand, it causes harmful false positives (bug + * 16583). The heuristic has been cut down to exclude three-character + * strings like "wellFormed ) { + global $wgXMLMimeTypes; + if( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) { + return $wgXMLMimeTypes[$xml->getRootElement()]; + } else { + return 'application/xml'; + } + } - $match = array(); + /* + * look for shell scripts + */ + $script_type = null; + + # detect by shebang + if ( substr( $head, 0, 2) == "#!" ) { + $script_type = "ASCII"; + } elseif ( substr( $head, 0, 5) == "\xef\xbb\xbf#!" ) { + $script_type = "UTF-8"; + } elseif ( substr( $head, 0, 7) == "\xfe\xff\x00#\x00!" ) { + $script_type = "UTF-16BE"; + } elseif ( substr( $head, 0, 7 ) == "\xff\xfe#\x00!" ) { + $script_type= "UTF-16LE"; + } - if ( preg_match( '%/?([^\s]+/)(\w+)%', $head, $match ) ) { - $mime = "application/x-{$match[2]}"; + if ( $script_type ) { + if ( $script_type !== "UTF-8" && $script_type !== "ASCII") { + // Quick and dirty fold down to ASCII! + $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' ); + $chars = unpack( $pack[$script_type], substr( $head, 2 ) ); + $head = ''; + foreach( $chars as $codepoint ) { + if( $codepoint < 128 ) { + $head .= chr( $codepoint ); + } else { + $head .= '?'; } } } - /* - * look for PHP - */ - if( !$xml_type && !$script_type ) { + $match = array(); - if( ( strpos( $head, 'detectZipType( $head ); + } - $mime = "application/x-php"; - } - } + wfSuppressWarnings(); + $gis = getimagesize( $file ); + wfRestoreWarnings(); + if( $gis && isset( $gis['mime'] ) ) { + $mime = $gis['mime']; + wfDebug( __METHOD__.": getimagesize detected $file as $mime\n" ); + return $mime; } - if ( isset( $this->mMimeTypeAliases[$mime] ) ) { - $mime = $this->mMimeTypeAliases[$mime]; + // Also test DjVu + $deja = new DjVuImage( $file ); + if( $deja->isValid() ) { + wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" ); + return 'image/vnd.djvu'; } - wfDebug(__METHOD__.": final mime type of $file: $mime\n"); - return $mime; + return false; + } + + /** + * Detect application-specific file type of a given ZIP file from its + * header data. Currently works for OpenDocument types... + * If can't tell, returns 'application/zip'. + * + * @param string $header Some reasonably-sized chunk of file header + * @return string + */ + function detectZipType( $header ) { + $opendocTypes = array( + 'chart-template', + 'chart', + 'formula-template', + 'formula', + 'graphics-template', + 'graphics', + 'image-template', + 'image', + 'presentation-template', + 'presentation', + 'spreadsheet-template', + 'spreadsheet', + 'text-template', + 'text-master', + 'text-web', + 'text' ); + + // http://lists.oasis-open.org/archives/office/200505/msg00006.html + $types = '(?:' . implode( '|', $opendocTypes ) . ')'; + $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/"; + wfDebug( __METHOD__.": $opendocRegex\n" ); + + if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) { + $mime = $matches[1]; + wfDebug( __METHOD__.": detected $mime from ZIP archive\n" ); + return $mime; + } else { + wfDebug( __METHOD__.": unable to identify type of ZIP archive\n" ); + return 'application/zip'; + } } /** Internal mime type detection, please use guessMimeType() for application code instead. * Detection is done using an external program, if $wgMimeDetectorCommand is set. * Otherwise, the fileinfo extension and mime_content_type are tried (in this order), if they are available. - * If the dections fails and $ext is not false, the mime type is guessed from the file extension, using + * If the dections fails and $ext is not false, the mime type is guessed from the file extension, using * guessTypesForExtension. * If the mime type is still unknown, getimagesize is used to detect the mime type if the file is an image. * If no mime type can be determined, this function returns "unknown/unknown". * * @param string $file The file to check - * @param mixed $ext The file extension, or true to extract it from the filename. + * @param mixed $ext The file extension, or true to extract it from the filename. * Set it to false to ignore the extension. * * @return string the mime type of $file @@ -521,7 +631,7 @@ class MimeMagic { function detectMimeType( $file, $ext = true ) { global $wgMimeDetectorCommand; - $m = NULL; + $m = null; if ( $wgMimeDetectorCommand ) { $fn = wfEscapeShellArg( $file ); $m = `$wgMimeDetectorCommand $fn`; @@ -557,15 +667,6 @@ class MimeMagic { # see http://www.php.net/manual/en/ref.mime-magic.php for details. $m = mime_content_type($file); - - if ( $m == 'text/plain' ) { - // mime_content_type sometimes considers DJVU files to be text/plain. - $deja = new DjVuImage( $file ); - if( $deja->isValid() ) { - wfDebug( __METHOD__.": (re)detected $file as image/vnd.djvu\n" ); - $m = 'image/vnd.djvu'; - } - } } else { wfDebug( __METHOD__.": no magic mime detector found!\n" ); } @@ -577,73 +678,27 @@ class MimeMagic { $m = strtolower( $m ); if ( strpos( $m, 'unknown' ) !== false ) { - $m = NULL; + $m = null; } else { wfDebug( __METHOD__.": magic mime type of $file: $m\n" ); return $m; } } - # if still not known, use getimagesize to find out the type of image - # TODO: skip things that do not have a well-known image extension? Would that be safe? - wfSuppressWarnings(); - $gis = getimagesize( $file ); - wfRestoreWarnings(); - - $notAnImage = false; - - if ( $gis && is_array($gis) && $gis[2] ) { - - switch ( $gis[2] ) { - case IMAGETYPE_GIF: $m = "image/gif"; break; - case IMAGETYPE_JPEG: $m = "image/jpeg"; break; - case IMAGETYPE_PNG: $m = "image/png"; break; - case IMAGETYPE_SWF: $m = "application/x-shockwave-flash"; break; - case IMAGETYPE_PSD: $m = "application/photoshop"; break; - case IMAGETYPE_BMP: $m = "image/bmp"; break; - case IMAGETYPE_TIFF_II: $m = "image/tiff"; break; - case IMAGETYPE_TIFF_MM: $m = "image/tiff"; break; - case IMAGETYPE_JPC: $m = "image"; break; - case IMAGETYPE_JP2: $m = "image/jpeg2000"; break; - case IMAGETYPE_JPX: $m = "image/jpeg2000"; break; - case IMAGETYPE_JB2: $m = "image"; break; - case IMAGETYPE_SWC: $m = "application/x-shockwave-flash"; break; - case IMAGETYPE_IFF: $m = "image/vnd.xiff"; break; - case IMAGETYPE_WBMP: $m = "image/vnd.wap.wbmp"; break; - case IMAGETYPE_XBM: $m = "image/x-xbitmap"; break; - } - - if ( $m ) { - wfDebug( __METHOD__.": image mime type of $file: $m\n" ); - return $m; - } - else { - $notAnImage = true; - } - } else { - // Also test DjVu - $deja = new DjVuImage( $file ); - if( $deja->isValid() ) { - wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" ); - return 'image/vnd.djvu'; - } - } - # if desired, look at extension as a fallback. if ( $ext === true ) { $i = strrpos( $file, '.' ); $ext = strtolower( $i ? substr( $file, $i + 1 ) : '' ); } if ( $ext ) { - $m = $this->guessTypesForExtension( $ext ); - - # TODO: if $notAnImage is set, do not trust the file extension if - # the results is one of the image types that should have been recognized - # by getimagesize - - if ( $m ) { - wfDebug( __METHOD__.": extension mime type of $file: $m\n" ); - return $m; + if( $this->isRecognizableExtension( $ext ) ) { + wfDebug( __METHOD__. ": refusing to guess mime type for .$ext file, we should have recognized it\n" ); + } else { + $m = $this->guessTypesForExtension( $ext ); + if ( $m ) { + wfDebug( __METHOD__.": extension mime type of $file: $m\n" ); + return $m; + } } } @@ -668,7 +723,7 @@ class MimeMagic { * * @return (int?string?) a value to be used with the MEDIATYPE_xxx constants. */ - function getMediaType( $path = NULL, $mime = NULL ) { + function getMediaType( $path = null, $mime = null ) { if( !$mime && !$path ) return MEDIATYPE_UNKNOWN; # If mime type is unknown, guess it @@ -701,7 +756,7 @@ class MimeMagic { } # Check for entry for file extension - $e = NULL; + $e = null; if ( $path ) { $i = strrpos( $path, '.' ); $e = strtolower( $i ? substr( $path, $i + 1 ) : '' ); @@ -740,7 +795,7 @@ class MimeMagic { if ( !$m ) return MEDIATYPE_UNKNOWN; $m = explode( ' ', $m ); - } else { + } else { # Normalize mime type if ( isset( $this->mMimeTypeAliases[$extMime] ) ) { $extMime = $this->mMimeTypeAliases[$extMime]; @@ -759,6 +814,27 @@ class MimeMagic { return MEDIATYPE_UNKNOWN; } -} -?> + /** + * Get the MIME types that various versions of Internet Explorer would + * detect from a chunk of the content. + * + * @param string $fileName The file name (unused at present) + * @param string $chunk The first 256 bytes of the file + * @param string $proposed The MIME type proposed by the server + */ + public function getIEMimeTypes( $fileName, $chunk, $proposed ) { + $ca = $this->getIEContentAnalyzer(); + return $ca->getRealMimesFromData( $fileName, $chunk, $proposed ); + } + + /** + * Get a cached instance of IEContentAnalyzer + */ + protected function getIEContentAnalyzer() { + if ( is_null( $this->mIEAnalyzer ) ) { + $this->mIEAnalyzer = new IEContentAnalyzer; + } + return $this->mIEAnalyzer; + } +}