Fix documentation introduced in r64403
[lhc/web/wiklou.git] / includes / IEContentAnalyzer.php
index 9a83b17..a2ef1a0 100644 (file)
-<?php\r
-\r
-/**\r
- * This class simulates Microsoft Internet Explorer's terribly broken and \r
- * insecure MIME type detection algorithm. It can be used to check web uploads\r
- * with an apparently safe type, to see if IE will reinterpret them to produce \r
- * something dangerous.\r
- *\r
- * It is full of bugs and strange design choices should not under any \r
- * circumstances be used to determine a MIME type to present to a user or \r
- * client. (Apple Safari developers, this means you too.)\r
- *\r
- * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have \r
- * attempted to ensure that this code works in exactly the same way as Internet \r
- * Explorer, it does not share any source code, or creative choices such as \r
- * variable names, thus I (Tim Starling) claim copyright on it. \r
- *\r
- * It may be redistributed without restriction. To aid reuse, this class does\r
- * not depend on any MediaWiki module.\r
- */\r
-class IEContentAnalyzer {\r
-       /**\r
-        * Relevant data taken from the type table in IE 5\r
-        */\r
-       protected $baseTypeTable = array(\r
-               'ambiguous' /*1*/ => array(\r
-                       'text/plain', \r
-                       'application/octet-stream', \r
-                       'application/x-netcdf', // [sic]\r
-               ),\r
-               'text' /*3*/ => array(\r
-                       'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',\r
-                       'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'\r
-               ),\r
-               'binary' /*4*/ => array(\r
-                       'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',\r
-                       'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', \r
-                       'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', \r
-                       'video/x-msvideo', 'video/mpeg', 'application/x-compressed',\r
-                       'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',\r
-                       'application/x-msdownload'\r
-               ),\r
-               'html' /*5*/ => array( 'text/html' ),\r
-       );\r
-\r
-       /**\r
-        * Changes to the type table in later versions of IE\r
-        */\r
-       protected $addedTypes = array(\r
-               'ie07' => array(\r
-                       'text' => array( 'text/xml', 'application/xml' )\r
-               ),\r
-       );\r
-\r
-       /**\r
-        * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a\r
-        * typical Windows installation.\r
-        *\r
-        * Used for extension to MIME type mapping if detection fails.\r
-        */\r
-       protected $registry = array(\r
-               '.323' => 'text/h323',\r
-               '.3g2' => 'video/3gpp2',\r
-               '.3gp' => 'video/3gpp',\r
-               '.3gp2' => 'video/3gpp2',\r
-               '.3gpp' => 'video/3gpp',\r
-               '.aac' => 'audio/aac',\r
-               '.ac3' => 'audio/ac3',\r
-               '.accda' => 'application/msaccess',\r
-               '.accdb' => 'application/msaccess',\r
-               '.accdc' => 'application/msaccess',\r
-               '.accde' => 'application/msaccess',\r
-               '.accdr' => 'application/msaccess',\r
-               '.accdt' => 'application/msaccess',\r
-               '.ade' => 'application/msaccess',\r
-               '.adp' => 'application/msaccess',\r
-               '.adts' => 'audio/aac',\r
-               '.ai' => 'application/postscript',\r
-               '.aif' => 'audio/aiff',\r
-               '.aifc' => 'audio/aiff',\r
-               '.aiff' => 'audio/aiff',\r
-               '.amc' => 'application/x-mpeg',\r
-               '.application' => 'application/x-ms-application',\r
-               '.asf' => 'video/x-ms-asf',\r
-               '.asx' => 'video/x-ms-asf',\r
-               '.au' => 'audio/basic',\r
-               '.avi' => 'video/avi',\r
-               '.bmp' => 'image/bmp',\r
-               '.caf' => 'audio/x-caf',\r
-               '.cat' => 'application/vnd.ms-pki.seccat',\r
-               '.cbo' => 'application/sha',\r
-               '.cdda' => 'audio/aiff',\r
-               '.cer' => 'application/x-x509-ca-cert',\r
-               '.conf' => 'text/plain',\r
-               '.crl' => 'application/pkix-crl',\r
-               '.crt' => 'application/x-x509-ca-cert',\r
-               '.css' => 'text/css',\r
-               '.csv' => 'application/vnd.ms-excel',\r
-               '.der' => 'application/x-x509-ca-cert',\r
-               '.dib' => 'image/bmp',\r
-               '.dif' => 'video/x-dv',\r
-               '.dll' => 'application/x-msdownload',\r
-               '.doc' => 'application/msword',\r
-               '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',\r
-               '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',\r
-               '.dot' => 'application/msword',\r
-               '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',\r
-               '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',\r
-               '.dv' => 'video/x-dv',\r
-               '.dwfx' => 'model/vnd.dwfx+xps',\r
-               '.edn' => 'application/vnd.adobe.edn',\r
-               '.eml' => 'message/rfc822',\r
-               '.eps' => 'application/postscript',\r
-               '.etd' => 'application/x-ebx',\r
-               '.exe' => 'application/x-msdownload',\r
-               '.fdf' => 'application/vnd.fdf',\r
-               '.fif' => 'application/fractals',\r
-               '.gif' => 'image/gif',\r
-               '.gsm' => 'audio/x-gsm',\r
-               '.hqx' => 'application/mac-binhex40',\r
-               '.hta' => 'application/hta',\r
-               '.htc' => 'text/x-component',\r
-               '.htm' => 'text/html',\r
-               '.html' => 'text/html',\r
-               '.htt' => 'text/webviewhtml',\r
-               '.hxa' => 'application/xml',\r
-               '.hxc' => 'application/xml',\r
-               '.hxd' => 'application/octet-stream',\r
-               '.hxe' => 'application/xml',\r
-               '.hxf' => 'application/xml',\r
-               '.hxh' => 'application/octet-stream',\r
-               '.hxi' => 'application/octet-stream',\r
-               '.hxk' => 'application/xml',\r
-               '.hxq' => 'application/octet-stream',\r
-               '.hxr' => 'application/octet-stream',\r
-               '.hxs' => 'application/octet-stream',\r
-               '.hxt' => 'application/xml',\r
-               '.hxv' => 'application/xml',\r
-               '.hxw' => 'application/octet-stream',\r
-               '.ico' => 'image/x-icon',\r
-               '.iii' => 'application/x-iphone',\r
-               '.ins' => 'application/x-internet-signup',\r
-               '.iqy' => 'text/x-ms-iqy',\r
-               '.isp' => 'application/x-internet-signup',\r
-               '.jfif' => 'image/jpeg',\r
-               '.jnlp' => 'application/x-java-jnlp-file',\r
-               '.jpe' => 'image/jpeg',\r
-               '.jpeg' => 'image/jpeg',\r
-               '.jpg' => 'image/jpeg',\r
-               '.jtx' => 'application/x-jtx+xps',\r
-               '.latex' => 'application/x-latex',\r
-               '.log' => 'text/plain',\r
-               '.m1v' => 'video/mpeg',\r
-               '.m2v' => 'video/mpeg',\r
-               '.m3u' => 'audio/x-mpegurl',\r
-               '.mac' => 'image/x-macpaint',\r
-               '.man' => 'application/x-troff-man',\r
-               '.mda' => 'application/msaccess',\r
-               '.mdb' => 'application/msaccess',\r
-               '.mde' => 'application/msaccess',\r
-               '.mfp' => 'application/x-shockwave-flash',\r
-               '.mht' => 'message/rfc822',\r
-               '.mhtml' => 'message/rfc822',\r
-               '.mid' => 'audio/mid',\r
-               '.midi' => 'audio/mid',\r
-               '.mod' => 'video/mpeg',\r
-               '.mov' => 'video/quicktime',\r
-               '.mp2' => 'video/mpeg',\r
-               '.mp2v' => 'video/mpeg',\r
-               '.mp3' => 'audio/mpeg',\r
-               '.mp4' => 'video/mp4',\r
-               '.mpa' => 'video/mpeg',\r
-               '.mpe' => 'video/mpeg',\r
-               '.mpeg' => 'video/mpeg',\r
-               '.mpf' => 'application/vnd.ms-mediapackage',\r
-               '.mpg' => 'video/mpeg',\r
-               '.mpv2' => 'video/mpeg',\r
-               '.mqv' => 'video/quicktime',\r
-               '.NMW' => 'application/nmwb',\r
-               '.nws' => 'message/rfc822',\r
-               '.odc' => 'text/x-ms-odc',\r
-               '.ols' => 'application/vnd.ms-publisher',\r
-               '.p10' => 'application/pkcs10',\r
-               '.p12' => 'application/x-pkcs12',\r
-               '.p7b' => 'application/x-pkcs7-certificates',\r
-               '.p7c' => 'application/pkcs7-mime',\r
-               '.p7m' => 'application/pkcs7-mime',\r
-               '.p7r' => 'application/x-pkcs7-certreqresp',\r
-               '.p7s' => 'application/pkcs7-signature',\r
-               '.pct' => 'image/pict',\r
-               '.pdf' => 'application/pdf',\r
-               '.pdx' => 'application/vnd.adobe.pdx',\r
-               '.pfx' => 'application/x-pkcs12',\r
-               '.pic' => 'image/pict',\r
-               '.pict' => 'image/pict',\r
-               '.pinstall' => 'application/x-picasa-detect',\r
-               '.pko' => 'application/vnd.ms-pki.pko',\r
-               '.png' => 'image/png',\r
-               '.pnt' => 'image/x-macpaint',\r
-               '.pntg' => 'image/x-macpaint',\r
-               '.pot' => 'application/vnd.ms-powerpoint',\r
-               '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',\r
-               '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',\r
-               '.ppa' => 'application/vnd.ms-powerpoint',\r
-               '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',\r
-               '.pps' => 'application/vnd.ms-powerpoint',\r
-               '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',\r
-               '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',\r
-               '.ppt' => 'application/vnd.ms-powerpoint',\r
-               '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',\r
-               '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',\r
-               '.prf' => 'application/pics-rules',\r
-               '.ps' => 'application/postscript',\r
-               '.pub' => 'application/vnd.ms-publisher',\r
-               '.pwz' => 'application/vnd.ms-powerpoint',\r
-               '.py' => 'text/plain',\r
-               '.pyw' => 'text/plain',\r
-               '.qht' => 'text/x-html-insertion',\r
-               '.qhtm' => 'text/x-html-insertion',\r
-               '.qt' => 'video/quicktime',\r
-               '.qti' => 'image/x-quicktime',\r
-               '.qtif' => 'image/x-quicktime',\r
-               '.qtl' => 'application/x-quicktimeplayer',\r
-               '.rat' => 'application/rat-file',\r
-               '.rmf' => 'application/vnd.adobe.rmf',\r
-               '.rmi' => 'audio/mid',\r
-               '.rqy' => 'text/x-ms-rqy',\r
-               '.rtf' => 'application/msword',\r
-               '.sct' => 'text/scriptlet',\r
-               '.sd2' => 'audio/x-sd2',\r
-               '.sdp' => 'application/sdp',\r
-               '.shtml' => 'text/html',\r
-               '.sit' => 'application/x-stuffit',\r
-               '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',\r
-               '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',\r
-               '.slk' => 'application/vnd.ms-excel',\r
-               '.snd' => 'audio/basic',\r
-               '.so' => 'application/x-apachemodule',\r
-               '.sol' => 'text/plain',\r
-               '.sor' => 'text/plain',\r
-               '.spc' => 'application/x-pkcs7-certificates',\r
-               '.spl' => 'application/futuresplash',\r
-               '.sst' => 'application/vnd.ms-pki.certstore',\r
-               '.stl' => 'application/vnd.ms-pki.stl',\r
-               '.swf' => 'application/x-shockwave-flash',\r
-               '.thmx' => 'application/vnd.ms-officetheme',\r
-               '.tif' => 'image/tiff',\r
-               '.tiff' => 'image/tiff',\r
-               '.txt' => 'text/plain',\r
-               '.uls' => 'text/iuls',\r
-               '.vcf' => 'text/x-vcard',\r
-               '.vdx' => 'application/vnd.ms-visio.viewer',\r
-               '.vsd' => 'application/vnd.ms-visio.viewer',\r
-               '.vss' => 'application/vnd.ms-visio.viewer',\r
-               '.vst' => 'application/vnd.ms-visio.viewer',\r
-               '.vsx' => 'application/vnd.ms-visio.viewer',\r
-               '.vtx' => 'application/vnd.ms-visio.viewer',\r
-               '.wav' => 'audio/wav',\r
-               '.wax' => 'audio/x-ms-wax',\r
-               '.wbk' => 'application/msword',\r
-               '.wdp' => 'image/vnd.ms-photo',\r
-               '.wiz' => 'application/msword',\r
-               '.wm' => 'video/x-ms-wm',\r
-               '.wma' => 'audio/x-ms-wma',\r
-               '.wmd' => 'application/x-ms-wmd',\r
-               '.wmv' => 'video/x-ms-wmv',\r
-               '.wmx' => 'video/x-ms-wmx',\r
-               '.wmz' => 'application/x-ms-wmz',\r
-               '.wpl' => 'application/vnd.ms-wpl',\r
-               '.wsc' => 'text/scriptlet',\r
-               '.wvx' => 'video/x-ms-wvx',\r
-               '.xaml' => 'application/xaml+xml',\r
-               '.xbap' => 'application/x-ms-xbap',\r
-               '.xdp' => 'application/vnd.adobe.xdp+xml',\r
-               '.xfdf' => 'application/vnd.adobe.xfdf',\r
-               '.xht' => 'application/xhtml+xml',\r
-               '.xhtml' => 'application/xhtml+xml',\r
-               '.xla' => 'application/vnd.ms-excel',\r
-               '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',\r
-               '.xlk' => 'application/vnd.ms-excel',\r
-               '.xll' => 'application/vnd.ms-excel',\r
-               '.xlm' => 'application/vnd.ms-excel',\r
-               '.xls' => 'application/vnd.ms-excel',\r
-               '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',\r
-               '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',\r
-               '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',\r
-               '.xlt' => 'application/vnd.ms-excel',\r
-               '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',\r
-               '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',\r
-               '.xlw' => 'application/vnd.ms-excel',\r
-               '.xml' => 'text/xml',\r
-               '.xps' => 'application/vnd.ms-xpsdocument',\r
-               '.xsl' => 'text/xml',\r
-       );\r
-\r
-       /** \r
-        * IE versions which have been analysed to bring you this class, and for \r
-        * which some substantive difference exists. These will appear as keys \r
-        * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.\r
-        */\r
-       protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );\r
-\r
-       /**\r
-        * Type table with versions expanded \r
-        */\r
-       protected $typeTable = array();\r
-\r
-       /** constructor */\r
-       function __construct() {\r
-               // Construct versioned type arrays from the base type array plus additions \r
-               $types = $this->baseTypeTable;\r
-               foreach ( $this->versions as $version ) {\r
-                       if ( isset( $this->addedTypes[$version] ) ) {\r
-                               foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {\r
-                                       $types[$format] = array_merge( $types[$format], $addedTypes );\r
-                               }\r
-                       }\r
-                       $this->typeTable[$version] = $types;\r
-               }\r
-       }\r
-\r
-       /**\r
-        * Get the MIME types from getMimesFromData(), but convert the result from IE's \r
-        * idiosyncratic private types into something other apps will understand.\r
-        *\r
-        * @param string $fileName The file name (unused at present)\r
-        * @param string $chunk The first 256 bytes of the file\r
-        * @param string $proposed The MIME type proposed by the server\r
-        *\r
-        * @return array Map of IE version to detected mime type\r
-        */\r
-       public function getRealMimesFromData( $fileName, $chunk, $proposed ) {\r
-               $types = $this->getMimesFromData( $fileName, $chunk, $proposed );\r
-               $types = array_map( array( $this, 'translateMimeType' ), $types );\r
-               return $types;\r
-       }\r
-\r
-       /**\r
-        * Translate a MIME type from IE's idiosyncratic private types into\r
-        * more commonly understood type strings\r
-        */\r
-       public function translateMimeType( $type ) {\r
-               static $table = array(\r
-                       'image/pjpeg' => 'image/jpeg',\r
-                       'image/x-png' => 'image/png',\r
-                       'image/x-wmf' => 'application/x-msmetafile',\r
-                       'image/bmp' => 'image/x-bmp',\r
-                       'application/x-zip-compressed' => 'application/zip',\r
-                       'application/x-compressed' => 'application/x-compress',\r
-                       'application/x-gzip-compressed' => 'application/x-gzip',\r
-                       'audio/mid' => 'audio/midi',\r
-               );\r
-               if ( isset( $table[$type] ) ) {\r
-                       $type = $table[$type];\r
-               }\r
-               return $type;\r
-       }\r
-\r
-       /**\r
-        * Get the untranslated MIME types for all known versions\r
-        *\r
-        * @param string $fileName The file name (unused at present)\r
-        * @param string $chunk The first 256 bytes of the file\r
-        * @param string $proposed The MIME type proposed by the server\r
-        *\r
-        * @return array Map of IE version to detected mime type\r
-        */\r
-       public function getMimesFromData( $fileName, $chunk, $proposed ) {\r
-               $types = array();\r
-               foreach ( $this->versions as $version ) {\r
-                       $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );\r
-               }\r
-               return $types;\r
-       }\r
-\r
-       /**\r
-        * Get the MIME type for a given named version\r
-        */\r
-       protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {\r
-               // Strip text after a semicolon\r
-               $semiPos = strpos( $proposed, ';' );\r
-               if ( $semiPos !== false ) {\r
-                       $proposed = substr( $proposed, 0, $semiPos );\r
-               }\r
-\r
-               $proposedFormat = $this->getDataFormat( $version, $proposed );\r
-               if ( $proposedFormat == 'unknown'\r
-                       && $proposed != 'multipart/mixed'\r
-                       && $proposed != 'multipart/x-mixed-replace' )\r
-               {\r
-                       return $proposed;\r
-               }\r
-               if ( strval( $chunk ) === '' ) {\r
-                       return $proposed;\r
-               }\r
-\r
-               // Truncate chunk at 255 bytes\r
-               $chunk = substr( $chunk, 0, 255 );\r
-\r
-               // IE does the Check*Headers() calls last, and instead does the following image \r
-               // type checks by directly looking for the magic numbers. What I do here should \r
-               // have the same effect since the magic number checks are identical in both cases.\r
-               $result = $this->sampleData( $version, $chunk );\r
-               $sampleFound = $result['found'];\r
-               $counters = $result['counters'];\r
-               $binaryType = $this->checkBinaryHeaders( $version, $chunk );\r
-               $textType = $this->checkTextHeaders( $version, $chunk );\r
-\r
-               if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {\r
-                       return 'text/html';\r
-               }\r
-               if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {\r
-                       return 'image/gif';\r
-               }\r
-               if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )\r
-                       && $binaryType == 'image/pjpeg' ) \r
-               {\r
-                       return $proposed;\r
-               }\r
-               // PNG check added in IE 7\r
-               if ( $version >= 'ie07'\r
-                       && ( $proposed == 'image/x-png' || $proposed == 'image/png' )\r
-                       && $binaryType == 'image/x-png' )\r
-               {\r
-                       return $proposed;\r
-               }\r
-\r
-               // CDF was removed in IE 7 so it won't be in $sampleFound for later versions\r
-               if ( isset( $sampleFound['cdf'] ) ) {\r
-                       return 'application/x-cdf';\r
-               }\r
-\r
-               // RSS and Atom were added in IE 7 so they won't be in $sampleFound for \r
-               // previous versions\r
-               if ( isset( $sampleFound['rss'] ) ) {\r
-                       return 'application/rss+xml';\r
-               }\r
-               if ( isset( $sampleFound['rdf-tag'] )\r
-                       && isset( $sampleFound['rdf-url'] )\r
-                       && isset( $sampleFound['rdf-purl'] ) )\r
-               {\r
-                       return 'application/rss+xml';\r
-               }\r
-               if ( isset( $sampleFound['atom'] ) ) {\r
-                       return 'application/atom+xml';\r
-               }\r
-\r
-               if ( isset( $sampleFound['xml'] ) ) {\r
-                       // TODO: I'm not sure under what circumstances this flag is enabled\r
-                       if ( strpos( $version, 'strict' ) !== false ) {\r
-                               if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {\r
-                                       return 'text/xml';\r
-                               }\r
-                       } else {\r
-                               return 'text/xml';\r
-                       }\r
-               }\r
-               if ( isset( $sampleFound['html'] ) ) {\r
-                       // TODO: I'm not sure under what circumstances this flag is enabled\r
-                       if ( strpos( $version, 'nohtml' ) !== false ) {\r
-                               if ( $proposed == 'text/plain' ) {\r
-                                       return 'text/html';\r
-                               }\r
-                       } else {\r
-                               return 'text/html';\r
-                       }\r
-               }\r
-               if ( isset( $sampleFound['xbm'] ) ) {\r
-                       return 'image/x-bitmap';\r
-               }\r
-               if ( isset( $sampleFound['binhex'] ) ) {\r
-                       return 'application/macbinhex40';\r
-               }\r
-               if ( isset( $sampleFound['scriptlet'] ) ) {\r
-                       if ( strpos( $version, 'strict' ) !== false ) {\r
-                               if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {\r
-                                       return 'text/scriptlet';\r
-                               }\r
-                       } else {\r
-                               return 'text/scriptlet';\r
-                       }\r
-               }\r
-\r
-               // Freaky heuristics to determine if the data is text or binary\r
-               // The heuristic is of course broken for non-ASCII text\r
-               if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) \r
-                       < ( $counters['ctrl'] + $counters['high'] ) * 16 ) \r
-               {\r
-                       $kindOfBinary = true;\r
-                       $type = $binaryType ? $binaryType : $textType;\r
-                       if ( $type === false ) {\r
-                               $type = 'application/octet-stream';\r
-                       }\r
-               } else {\r
-                       $kindOfBinary = false;\r
-                       $type = $textType ? $textType : $binaryType;\r
-                       if ( $type === false ) {\r
-                               $type = 'text/plain';\r
-                       }\r
-               }\r
-\r
-               // Check if the output format is ambiguous\r
-               // This generally means that detection failed, real types aren't ambiguous\r
-               $detectedFormat = $this->getDataFormat( $version, $type );\r
-               if ( $detectedFormat != 'ambiguous' ) {\r
-                       return $type;\r
-               }\r
-\r
-               if ( $proposedFormat != 'ambiguous' ) {\r
-                       // FormatAgreesWithData()\r
-                       if ( $proposedFormat == 'text' && !$kindOfBinary ) {\r
-                               return $proposed;\r
-                       }\r
-                       if ( $proposedFormat == 'binary' && $kindOfBinary ) {\r
-                               return $proposed;\r
-                       }\r
-                       if ( $proposedFormat == 'html' ) {\r
-                               return $proposed;\r
-                       }\r
-               }\r
-\r
-               // Find a MIME type by searching the registry for the file extension.\r
-               $dotPos = strrpos( $fileName, '.' );\r
-               if ( $dotPos === false ) {\r
-                       return $type;\r
-               }\r
-               $ext = substr( $fileName, $dotPos );\r
-               if ( isset( $this->registry[$ext] ) ) {\r
-                       return $this->registry[$ext];\r
-               }\r
-\r
-               // TODO: If the extension has an application registered to it, IE will return \r
-               // application/octet-stream. We'll skip that, so we could erroneously \r
-               // return text/plain or application/x-netcdf where application/octet-stream\r
-               // would be correct.\r
-\r
-               return $type;\r
-       }\r
-\r
-       /**\r
-        * Check for text headers at the start of the chunk\r
-        * Confirmed same in 5 and 7.\r
-        */\r
-       private function checkTextHeaders( $version, $chunk ) {\r
-               $chunk2 = substr( $chunk, 0, 2 );\r
-               $chunk4 = substr( $chunk, 0, 4 );\r
-               $chunk5 = substr( $chunk, 0, 5 );\r
-               if ( $chunk4 == '%PDF' ) {\r
-                       return 'application/pdf';\r
-               }\r
-               if ( $chunk2 == '%!' ) {\r
-                       return 'application/postscript';\r
-               }\r
-               if ( $chunk5 == '{\\rtf' ) {\r
-                       return 'text/richtext';\r
-               }\r
-               if ( $chunk5 == 'begin' ) {\r
-                       return 'application/base64';\r
-               }\r
-               return false;\r
-       }\r
-\r
-       /**\r
-        * Check for binary headers at the start of the chunk\r
-        * Confirmed same in 5 and 7.\r
-        */\r
-       private function checkBinaryHeaders( $version, $chunk ) {\r
-               $chunk2 = substr( $chunk, 0, 2 );\r
-               $chunk3 = substr( $chunk, 0, 3 );\r
-               $chunk4 = substr( $chunk, 0, 4 );\r
-               $chunk5 = substr( $chunk, 0, 5 );\r
-               $chunk8 = substr( $chunk, 0, 8 );\r
-               if ( $chunk5 == 'GIF87' || $chunk5 == 'GIF89' ) {\r
-                       return 'image/gif';\r
-               }\r
-               if ( $chunk2 == "\xff\xd8" ) {\r
-                       return 'image/pjpeg'; // actually plain JPEG but this is what IE returns\r
-               }\r
-\r
-               if ( $chunk2 == 'BM' \r
-                       && substr( $chunk, 6, 2 ) == "\000\000"\r
-                       && substr( $chunk, 8, 2 ) != "\000\000" )\r
-               {\r
-                       return 'image/bmp'; // another non-standard MIME\r
-               }\r
-               if ( $chunk4 == 'RIFF' \r
-                       && substr( $chunk, 8, 4 ) == 'WAVE' )\r
-               {\r
-                       return 'audio/wav';\r
-               }\r
-               // These were integer literals in IE\r
-               // Perhaps the author was not sure what the target endianness was\r
-               if ( $chunk4 == ".sd\000"\r
-                       || $chunk4 == ".snd"\r
-                       || $chunk4 == "\000ds."\r
-                       || $chunk4 == "dns." )\r
-               {\r
-                       return 'audio/basic';\r
-               }\r
-               if ( $chunk3 == "MM\000" ) {\r
-                       return 'image/tiff';\r
-               }\r
-               if ( $chunk2 == 'MZ' ) {\r
-                       return 'application/x-msdownload';\r
-               }\r
-               if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {\r
-                       return 'image/x-png'; // [sic]\r
-               }\r
-               if ( strlen( $chunk ) >= 5 ) {\r
-                       $byte2 = ord( $chunk[2] );\r
-                       $byte4 = ord( $chunk[4] );\r
-                       if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {\r
-                               return 'image/x-jg';\r
-                       }\r
-               }\r
-               // More endian confusion?\r
-               if ( $chunk4 == 'MROF' ) {\r
-                       return 'audio/x-aiff';\r
-               }\r
-               $chunk4_8 = substr( $chunk, 8, 4 );\r
-               if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {\r
-                       return 'audio/x-aiff';\r
-               }\r
-               if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {\r
-                       return 'video/avi';\r
-               }\r
-               if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {\r
-                       return 'video/mpeg';\r
-               }\r
-               if ( $chunk4 == "\001\000\000\000"\r
-                       && substr( $chunk, 40, 4 ) == ' EMF' )\r
-               {\r
-                       return 'image/x-emf';\r
-               }\r
-               if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {\r
-                       return 'image/x-wmf';\r
-               }\r
-               if ( $chunk4 == "\xca\xfe\xba\xbe" ) {\r
-                       return 'application/java';\r
-               }\r
-               if ( $chunk2 == 'PK' ) {\r
-                       return 'application/x-zip-compressed';\r
-               }\r
-               if ( $chunk2 == "\x1f\x9d" ) {\r
-                       return 'application/x-compressed';\r
-               }\r
-               if ( $chunk2 == "\x1f\x8b" ) {\r
-                       return 'application/x-gzip-compressed';\r
-               }\r
-               // Skip redundant check for ZIP\r
-               if ( $chunk5 == "MThd\000" ) {\r
-                       return 'audio/mid';\r
-               }\r
-               if ( $chunk4 == '%PDF' ) {\r
-                       return 'application/pdf';\r
-               }\r
-               return false;\r
-       }\r
-\r
-       /**\r
-        * Do heuristic checks on the bulk of the data sample.\r
-        * Search for HTML tags.\r
-        */\r
-       protected function sampleData( $version, $chunk ) {\r
-               $found = array();\r
-               $counters = array(\r
-                       'ctrl' => 0,\r
-                       'high' => 0,\r
-                       'low' => 0,\r
-                       'lf' => 0,\r
-                       'cr' => 0,\r
-                       'ff' => 0\r
-               );\r
-               $htmlTags = array(\r
-                       'html',\r
-                       'head',\r
-                       'title',\r
-                       'body',\r
-                       'script',\r
-                       'a href',\r
-                       'pre',\r
-                       'img',\r
-                       'plaintext',\r
-                       'table'\r
-               );\r
-               $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';\r
-               $rdfPurl = 'http://purl.org/rss/1.0/';\r
-               $xbmMagic1 = '#define';\r
-               $xbmMagic2 = '_width';\r
-               $xbmMagic3 = '_bits';\r
-               $binhexMagic = 'converted with BinHex';\r
-\r
-               for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) {\r
-                       $curChar = $chunk[$offset];\r
-                       if ( $curChar == "\x0a" ) {\r
-                               $counters['lf']++;\r
-                               continue;\r
-                       } elseif ( $curChar == "\x0d" ) {\r
-                               $counters['cr']++;\r
-                               continue;\r
-                       } elseif ( $curChar == "\x0c" ) {\r
-                               $counters['ff']++;\r
-                               continue;\r
-                       } elseif ( $curChar == "\t" ) {\r
-                               $counters['low']++;\r
-                               continue;\r
-                       } elseif ( ord( $curChar ) < 32 ) {\r
-                               $counters['ctrl']++;\r
-                               continue;\r
-                       } elseif ( ord( $curChar ) >= 128 ) {\r
-                               $counters['high']++;\r
-                               continue;\r
-                       }\r
-\r
-                       $counters['low']++;\r
-                       if ( $curChar == '<' ) {\r
-                               // XML\r
-                               $remainder = substr( $chunk, $offset + 1 );\r
-                               if ( !strncasecmp( $remainder, '?XML', 4 ) ) {\r
-                                       $nextChar = substr( $chunk, $offset + 5, 1 );\r
-                                       if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {\r
-                                               $found['xml'] = true;\r
-                                       }\r
-                               }\r
-                               // Scriptlet (JSP)\r
-                               if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {\r
-                                       $found['scriptlet'] = true;\r
-                                       break;\r
-                               }\r
-                               // HTML\r
-                               foreach ( $htmlTags as $tag ) {\r
-                                       if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {\r
-                                               $found['html'] = true;\r
-                                       }\r
-                               }\r
-                               // Skip broken check for additional tags (HR etc.)\r
-\r
-                               // CHANNEL replaced by RSS, RDF and FEED in IE 7\r
-                               if ( $version < 'ie07' ) {\r
-                                       if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {\r
-                                               $found['cdf'] = true;\r
-                                       }\r
-                               } else {\r
-                                       // RSS\r
-                                       if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {\r
-                                               $found['rss'] = true;\r
-                                               break; // return from SampleData\r
-                                       }\r
-                                       if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {\r
-                                               $found['rdf-tag'] = true;\r
-                                               // no break\r
-                                       }\r
-                                       if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {\r
-                                               $found['atom'] = true;\r
-                                               break;\r
-                                       }\r
-                               }\r
-                               continue;\r
-                       }\r
-                       // Skip broken check for -->\r
-\r
-                       // RSS URL checks\r
-                       // For some reason both URLs must appear before it is recognised\r
-                       $remainder = substr( $chunk, $offset );\r
-                       if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {\r
-                               $found['rdf-url'] = true;\r
-                               if ( isset( $found['rdf-tag'] )\r
-                                       && isset( $found['rdf-purl'] ) ) // [sic]\r
-                               {\r
-                                       break;\r
-                               }\r
-                               continue;\r
-                       }\r
-\r
-                       if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {\r
-                               if ( isset( $found['rdf-tag'] ) \r
-                                       && isset( $found['rdf-url'] ) ) // [sic]\r
-                               {\r
-                                       break;\r
-                               }\r
-                               continue;\r
-                       }\r
-\r
-                       // XBM checks\r
-                       if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {\r
-                               $found['xbm1'] = true;\r
-                               continue;\r
-                       }\r
-                       if ( $curChar == '_' ) {\r
-                               if ( isset( $found['xbm2'] ) ) {\r
-                                       if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {\r
-                                               $found['xbm'] = true;\r
-                                               break;\r
-                                       }\r
-                               } elseif ( isset( $found['xbm1'] ) ) {\r
-                                       if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {\r
-                                               $found['xbm2'] = true;\r
-                                       }\r
-                               }\r
-                       }\r
-\r
-                       // BinHex\r
-                       if ( !strncasecmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {\r
-                               $found['binhex'] = true;\r
-                       }\r
-               }\r
-               return array( 'found' => $found, 'counters' => $counters );\r
-       }\r
-\r
-       protected function getDataFormat( $version, $type ) {\r
-               $types = $this->typeTable[$version];\r
-               if ( $type == '(null)' || strval( $type ) === '' ) {\r
-                       return 'ambiguous';\r
-               }\r
-               foreach ( $types as $format => $list ) {\r
-                       if ( in_array( $type, $list ) ) {\r
-                               return $format;\r
-                       }\r
-               }\r
-               return 'unknown';\r
-       }\r
-}\r
-\r
+<?php
+
+/**
+ * This class simulates Microsoft Internet Explorer's terribly broken and 
+ * insecure MIME type detection algorithm. It can be used to check web uploads
+ * with an apparently safe type, to see if IE will reinterpret them to produce 
+ * something dangerous.
+ *
+ * It is full of bugs and strange design choices should not under any 
+ * circumstances be used to determine a MIME type to present to a user or 
+ * client. (Apple Safari developers, this means you too.)
+ *
+ * This class is based on a disassembly of IE 5.0, 6.0 and 7.0. Although I have 
+ * attempted to ensure that this code works in exactly the same way as Internet 
+ * Explorer, it does not share any source code, or creative choices such as 
+ * variable names, thus I (Tim Starling) claim copyright on it. 
+ *
+ * It may be redistributed without restriction. To aid reuse, this class does
+ * not depend on any MediaWiki module.
+ */
+class IEContentAnalyzer {
+       /**
+        * Relevant data taken from the type table in IE 5
+        */
+       protected $baseTypeTable = array(
+               'ambiguous' /*1*/ => array(
+                       'text/plain', 
+                       'application/octet-stream', 
+                       'application/x-netcdf', // [sic]
+               ),
+               'text' /*3*/ => array(
+                       'text/richtext', 'image/x-bitmap', 'application/postscript', 'application/base64',
+                       'application/macbinhex40', 'application/x-cdf', 'text/scriptlet'
+               ),
+               'binary' /*4*/ => array(
+                       'application/pdf', 'audio/x-aiff', 'audio/basic', 'audio/wav', 'image/gif',
+                       'image/pjpeg', 'image/jpeg', 'image/tiff', 'image/x-png', 'image/png', 'image/bmp', 
+                       'image/x-jg', 'image/x-art', 'image/x-emf', 'image/x-wmf', 'video/avi', 
+                       'video/x-msvideo', 'video/mpeg', 'application/x-compressed',
+                       'application/x-zip-compressed', 'application/x-gzip-compressed', 'application/java',
+                       'application/x-msdownload'
+               ),
+               'html' /*5*/ => array( 'text/html' ),
+       );
+
+       /**
+        * Changes to the type table in later versions of IE
+        */
+       protected $addedTypes = array(
+               'ie07' => array(
+                       'text' => array( 'text/xml', 'application/xml' )
+               ),
+       );
+
+       /**
+        * An approximation of the "Content Type" values in HKEY_CLASSES_ROOT in a
+        * typical Windows installation.
+        *
+        * Used for extension to MIME type mapping if detection fails.
+        */
+       protected $registry = array(
+               '.323' => 'text/h323',
+               '.3g2' => 'video/3gpp2',
+               '.3gp' => 'video/3gpp',
+               '.3gp2' => 'video/3gpp2',
+               '.3gpp' => 'video/3gpp',
+               '.aac' => 'audio/aac',
+               '.ac3' => 'audio/ac3',
+               '.accda' => 'application/msaccess',
+               '.accdb' => 'application/msaccess',
+               '.accdc' => 'application/msaccess',
+               '.accde' => 'application/msaccess',
+               '.accdr' => 'application/msaccess',
+               '.accdt' => 'application/msaccess',
+               '.ade' => 'application/msaccess',
+               '.adp' => 'application/msaccess',
+               '.adts' => 'audio/aac',
+               '.ai' => 'application/postscript',
+               '.aif' => 'audio/aiff',
+               '.aifc' => 'audio/aiff',
+               '.aiff' => 'audio/aiff',
+               '.amc' => 'application/x-mpeg',
+               '.application' => 'application/x-ms-application',
+               '.asf' => 'video/x-ms-asf',
+               '.asx' => 'video/x-ms-asf',
+               '.au' => 'audio/basic',
+               '.avi' => 'video/avi',
+               '.bmp' => 'image/bmp',
+               '.caf' => 'audio/x-caf',
+               '.cat' => 'application/vnd.ms-pki.seccat',
+               '.cbo' => 'application/sha',
+               '.cdda' => 'audio/aiff',
+               '.cer' => 'application/x-x509-ca-cert',
+               '.conf' => 'text/plain',
+               '.crl' => 'application/pkix-crl',
+               '.crt' => 'application/x-x509-ca-cert',
+               '.css' => 'text/css',
+               '.csv' => 'application/vnd.ms-excel',
+               '.der' => 'application/x-x509-ca-cert',
+               '.dib' => 'image/bmp',
+               '.dif' => 'video/x-dv',
+               '.dll' => 'application/x-msdownload',
+               '.doc' => 'application/msword',
+               '.docm' => 'application/vnd.ms-word.document.macroEnabled.12',
+               '.docx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+               '.dot' => 'application/msword',
+               '.dotm' => 'application/vnd.ms-word.template.macroEnabled.12',
+               '.dotx' => 'application/vnd.openxmlformats-officedocument.wordprocessingml.template',
+               '.dv' => 'video/x-dv',
+               '.dwfx' => 'model/vnd.dwfx+xps',
+               '.edn' => 'application/vnd.adobe.edn',
+               '.eml' => 'message/rfc822',
+               '.eps' => 'application/postscript',
+               '.etd' => 'application/x-ebx',
+               '.exe' => 'application/x-msdownload',
+               '.fdf' => 'application/vnd.fdf',
+               '.fif' => 'application/fractals',
+               '.gif' => 'image/gif',
+               '.gsm' => 'audio/x-gsm',
+               '.hqx' => 'application/mac-binhex40',
+               '.hta' => 'application/hta',
+               '.htc' => 'text/x-component',
+               '.htm' => 'text/html',
+               '.html' => 'text/html',
+               '.htt' => 'text/webviewhtml',
+               '.hxa' => 'application/xml',
+               '.hxc' => 'application/xml',
+               '.hxd' => 'application/octet-stream',
+               '.hxe' => 'application/xml',
+               '.hxf' => 'application/xml',
+               '.hxh' => 'application/octet-stream',
+               '.hxi' => 'application/octet-stream',
+               '.hxk' => 'application/xml',
+               '.hxq' => 'application/octet-stream',
+               '.hxr' => 'application/octet-stream',
+               '.hxs' => 'application/octet-stream',
+               '.hxt' => 'application/xml',
+               '.hxv' => 'application/xml',
+               '.hxw' => 'application/octet-stream',
+               '.ico' => 'image/x-icon',
+               '.iii' => 'application/x-iphone',
+               '.ins' => 'application/x-internet-signup',
+               '.iqy' => 'text/x-ms-iqy',
+               '.isp' => 'application/x-internet-signup',
+               '.jfif' => 'image/jpeg',
+               '.jnlp' => 'application/x-java-jnlp-file',
+               '.jpe' => 'image/jpeg',
+               '.jpeg' => 'image/jpeg',
+               '.jpg' => 'image/jpeg',
+               '.jtx' => 'application/x-jtx+xps',
+               '.latex' => 'application/x-latex',
+               '.log' => 'text/plain',
+               '.m1v' => 'video/mpeg',
+               '.m2v' => 'video/mpeg',
+               '.m3u' => 'audio/x-mpegurl',
+               '.mac' => 'image/x-macpaint',
+               '.man' => 'application/x-troff-man',
+               '.mda' => 'application/msaccess',
+               '.mdb' => 'application/msaccess',
+               '.mde' => 'application/msaccess',
+               '.mfp' => 'application/x-shockwave-flash',
+               '.mht' => 'message/rfc822',
+               '.mhtml' => 'message/rfc822',
+               '.mid' => 'audio/mid',
+               '.midi' => 'audio/mid',
+               '.mod' => 'video/mpeg',
+               '.mov' => 'video/quicktime',
+               '.mp2' => 'video/mpeg',
+               '.mp2v' => 'video/mpeg',
+               '.mp3' => 'audio/mpeg',
+               '.mp4' => 'video/mp4',
+               '.mpa' => 'video/mpeg',
+               '.mpe' => 'video/mpeg',
+               '.mpeg' => 'video/mpeg',
+               '.mpf' => 'application/vnd.ms-mediapackage',
+               '.mpg' => 'video/mpeg',
+               '.mpv2' => 'video/mpeg',
+               '.mqv' => 'video/quicktime',
+               '.NMW' => 'application/nmwb',
+               '.nws' => 'message/rfc822',
+               '.odc' => 'text/x-ms-odc',
+               '.ols' => 'application/vnd.ms-publisher',
+               '.p10' => 'application/pkcs10',
+               '.p12' => 'application/x-pkcs12',
+               '.p7b' => 'application/x-pkcs7-certificates',
+               '.p7c' => 'application/pkcs7-mime',
+               '.p7m' => 'application/pkcs7-mime',
+               '.p7r' => 'application/x-pkcs7-certreqresp',
+               '.p7s' => 'application/pkcs7-signature',
+               '.pct' => 'image/pict',
+               '.pdf' => 'application/pdf',
+               '.pdx' => 'application/vnd.adobe.pdx',
+               '.pfx' => 'application/x-pkcs12',
+               '.pic' => 'image/pict',
+               '.pict' => 'image/pict',
+               '.pinstall' => 'application/x-picasa-detect',
+               '.pko' => 'application/vnd.ms-pki.pko',
+               '.png' => 'image/png',
+               '.pnt' => 'image/x-macpaint',
+               '.pntg' => 'image/x-macpaint',
+               '.pot' => 'application/vnd.ms-powerpoint',
+               '.potm' => 'application/vnd.ms-powerpoint.template.macroEnabled.12',
+               '.potx' => 'application/vnd.openxmlformats-officedocument.presentationml.template',
+               '.ppa' => 'application/vnd.ms-powerpoint',
+               '.ppam' => 'application/vnd.ms-powerpoint.addin.macroEnabled.12',
+               '.pps' => 'application/vnd.ms-powerpoint',
+               '.ppsm' => 'application/vnd.ms-powerpoint.slideshow.macroEnabled.12',
+               '.ppsx' => 'application/vnd.openxmlformats-officedocument.presentationml.slideshow',
+               '.ppt' => 'application/vnd.ms-powerpoint',
+               '.pptm' => 'application/vnd.ms-powerpoint.presentation.macroEnabled.12',
+               '.pptx' => 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+               '.prf' => 'application/pics-rules',
+               '.ps' => 'application/postscript',
+               '.pub' => 'application/vnd.ms-publisher',
+               '.pwz' => 'application/vnd.ms-powerpoint',
+               '.py' => 'text/plain',
+               '.pyw' => 'text/plain',
+               '.qht' => 'text/x-html-insertion',
+               '.qhtm' => 'text/x-html-insertion',
+               '.qt' => 'video/quicktime',
+               '.qti' => 'image/x-quicktime',
+               '.qtif' => 'image/x-quicktime',
+               '.qtl' => 'application/x-quicktimeplayer',
+               '.rat' => 'application/rat-file',
+               '.rmf' => 'application/vnd.adobe.rmf',
+               '.rmi' => 'audio/mid',
+               '.rqy' => 'text/x-ms-rqy',
+               '.rtf' => 'application/msword',
+               '.sct' => 'text/scriptlet',
+               '.sd2' => 'audio/x-sd2',
+               '.sdp' => 'application/sdp',
+               '.shtml' => 'text/html',
+               '.sit' => 'application/x-stuffit',
+               '.sldm' => 'application/vnd.ms-powerpoint.slide.macroEnabled.12',
+               '.sldx' => 'application/vnd.openxmlformats-officedocument.presentationml.slide',
+               '.slk' => 'application/vnd.ms-excel',
+               '.snd' => 'audio/basic',
+               '.so' => 'application/x-apachemodule',
+               '.sol' => 'text/plain',
+               '.sor' => 'text/plain',
+               '.spc' => 'application/x-pkcs7-certificates',
+               '.spl' => 'application/futuresplash',
+               '.sst' => 'application/vnd.ms-pki.certstore',
+               '.stl' => 'application/vnd.ms-pki.stl',
+               '.swf' => 'application/x-shockwave-flash',
+               '.thmx' => 'application/vnd.ms-officetheme',
+               '.tif' => 'image/tiff',
+               '.tiff' => 'image/tiff',
+               '.txt' => 'text/plain',
+               '.uls' => 'text/iuls',
+               '.vcf' => 'text/x-vcard',
+               '.vdx' => 'application/vnd.ms-visio.viewer',
+               '.vsd' => 'application/vnd.ms-visio.viewer',
+               '.vss' => 'application/vnd.ms-visio.viewer',
+               '.vst' => 'application/vnd.ms-visio.viewer',
+               '.vsx' => 'application/vnd.ms-visio.viewer',
+               '.vtx' => 'application/vnd.ms-visio.viewer',
+               '.wav' => 'audio/wav',
+               '.wax' => 'audio/x-ms-wax',
+               '.wbk' => 'application/msword',
+               '.wdp' => 'image/vnd.ms-photo',
+               '.wiz' => 'application/msword',
+               '.wm' => 'video/x-ms-wm',
+               '.wma' => 'audio/x-ms-wma',
+               '.wmd' => 'application/x-ms-wmd',
+               '.wmv' => 'video/x-ms-wmv',
+               '.wmx' => 'video/x-ms-wmx',
+               '.wmz' => 'application/x-ms-wmz',
+               '.wpl' => 'application/vnd.ms-wpl',
+               '.wsc' => 'text/scriptlet',
+               '.wvx' => 'video/x-ms-wvx',
+               '.xaml' => 'application/xaml+xml',
+               '.xbap' => 'application/x-ms-xbap',
+               '.xdp' => 'application/vnd.adobe.xdp+xml',
+               '.xfdf' => 'application/vnd.adobe.xfdf',
+               '.xht' => 'application/xhtml+xml',
+               '.xhtml' => 'application/xhtml+xml',
+               '.xla' => 'application/vnd.ms-excel',
+               '.xlam' => 'application/vnd.ms-excel.addin.macroEnabled.12',
+               '.xlk' => 'application/vnd.ms-excel',
+               '.xll' => 'application/vnd.ms-excel',
+               '.xlm' => 'application/vnd.ms-excel',
+               '.xls' => 'application/vnd.ms-excel',
+               '.xlsb' => 'application/vnd.ms-excel.sheet.binary.macroEnabled.12',
+               '.xlsm' => 'application/vnd.ms-excel.sheet.macroEnabled.12',
+               '.xlsx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+               '.xlt' => 'application/vnd.ms-excel',
+               '.xltm' => 'application/vnd.ms-excel.template.macroEnabled.12',
+               '.xltx' => 'application/vnd.openxmlformats-officedocument.spreadsheetml.template',
+               '.xlw' => 'application/vnd.ms-excel',
+               '.xml' => 'text/xml',
+               '.xps' => 'application/vnd.ms-xpsdocument',
+               '.xsl' => 'text/xml',
+       );
+
+       /** 
+        * IE versions which have been analysed to bring you this class, and for 
+        * which some substantive difference exists. These will appear as keys 
+        * in the return value of getRealMimesFromData(). The names are chosen to sort correctly.
+        */
+       protected $versions = array( 'ie05', 'ie06', 'ie07', 'ie07.strict', 'ie07.nohtml' );
+
+       /**
+        * Type table with versions expanded 
+        */
+       protected $typeTable = array();
+
+       /** constructor */
+       function __construct() {
+               // Construct versioned type arrays from the base type array plus additions 
+               $types = $this->baseTypeTable;
+               foreach ( $this->versions as $version ) {
+                       if ( isset( $this->addedTypes[$version] ) ) {
+                               foreach ( $this->addedTypes[$version] as $format => $addedTypes ) {
+                                       $types[$format] = array_merge( $types[$format], $addedTypes );
+                               }
+                       }
+                       $this->typeTable[$version] = $types;
+               }
+       }
+
+       /**
+        * Get the MIME types from getMimesFromData(), but convert the result from IE's 
+        * idiosyncratic private types into something other apps will understand.
+        *
+        * @param $fileName String: the file name (unused at present)
+        * @param $chunk String: the first 256 bytes of the file
+        * @param $proposed String: the MIME type proposed by the server
+        *
+        * @return Array: map of IE version to detected mime type
+        */
+       public function getRealMimesFromData( $fileName, $chunk, $proposed ) {
+               $types = $this->getMimesFromData( $fileName, $chunk, $proposed );
+               $types = array_map( array( $this, 'translateMimeType' ), $types );
+               return $types;
+       }
+
+       /**
+        * Translate a MIME type from IE's idiosyncratic private types into
+        * more commonly understood type strings
+        */
+       public function translateMimeType( $type ) {
+               static $table = array(
+                       'image/pjpeg' => 'image/jpeg',
+                       'image/x-png' => 'image/png',
+                       'image/x-wmf' => 'application/x-msmetafile',
+                       'image/bmp' => 'image/x-bmp',
+                       'application/x-zip-compressed' => 'application/zip',
+                       'application/x-compressed' => 'application/x-compress',
+                       'application/x-gzip-compressed' => 'application/x-gzip',
+                       'audio/mid' => 'audio/midi',
+               );
+               if ( isset( $table[$type] ) ) {
+                       $type = $table[$type];
+               }
+               return $type;
+       }
+
+       /**
+        * Get the untranslated MIME types for all known versions
+        *
+        * @param $fileName String: the file name (unused at present)
+        * @param $chunk String: the first 256 bytes of the file
+        * @param $proposed String: the MIME type proposed by the server
+        *
+        * @return Array: map of IE version to detected mime type
+        */
+       public function getMimesFromData( $fileName, $chunk, $proposed ) {
+               $types = array();
+               foreach ( $this->versions as $version ) {
+                       $types[$version] = $this->getMimeTypeForVersion( $version, $fileName, $chunk, $proposed );
+               }
+               return $types;
+       }
+
+       /**
+        * Get the MIME type for a given named version
+        */
+       protected function getMimeTypeForVersion( $version, $fileName, $chunk, $proposed ) {
+               // Strip text after a semicolon
+               $semiPos = strpos( $proposed, ';' );
+               if ( $semiPos !== false ) {
+                       $proposed = substr( $proposed, 0, $semiPos );
+               }
+
+               $proposedFormat = $this->getDataFormat( $version, $proposed );
+               if ( $proposedFormat == 'unknown'
+                       && $proposed != 'multipart/mixed'
+                       && $proposed != 'multipart/x-mixed-replace' )
+               {
+                       return $proposed;
+               }
+               if ( strval( $chunk ) === '' ) {
+                       return $proposed;
+               }
+
+               // Truncate chunk at 255 bytes
+               $chunk = substr( $chunk, 0, 255 );
+
+               // IE does the Check*Headers() calls last, and instead does the following image 
+               // type checks by directly looking for the magic numbers. What I do here should 
+               // have the same effect since the magic number checks are identical in both cases.
+               $result = $this->sampleData( $version, $chunk );
+               $sampleFound = $result['found'];
+               $counters = $result['counters'];
+               $binaryType = $this->checkBinaryHeaders( $version, $chunk );
+               $textType = $this->checkTextHeaders( $version, $chunk );
+
+               if ( $proposed == 'text/html' && isset( $sampleFound['html'] ) ) {
+                       return 'text/html';
+               }
+               if ( $proposed == 'image/gif' && $binaryType == 'image/gif' ) {
+                       return 'image/gif';
+               }
+               if ( ( $proposed == 'image/pjpeg' || $proposed == 'image/jpeg' )
+                       && $binaryType == 'image/pjpeg' ) 
+               {
+                       return $proposed;
+               }
+               // PNG check added in IE 7
+               if ( $version >= 'ie07'
+                       && ( $proposed == 'image/x-png' || $proposed == 'image/png' )
+                       && $binaryType == 'image/x-png' )
+               {
+                       return $proposed;
+               }
+
+               // CDF was removed in IE 7 so it won't be in $sampleFound for later versions
+               if ( isset( $sampleFound['cdf'] ) ) {
+                       return 'application/x-cdf';
+               }
+
+               // RSS and Atom were added in IE 7 so they won't be in $sampleFound for 
+               // previous versions
+               if ( isset( $sampleFound['rss'] ) ) {
+                       return 'application/rss+xml';
+               }
+               if ( isset( $sampleFound['rdf-tag'] )
+                       && isset( $sampleFound['rdf-url'] )
+                       && isset( $sampleFound['rdf-purl'] ) )
+               {
+                       return 'application/rss+xml';
+               }
+               if ( isset( $sampleFound['atom'] ) ) {
+                       return 'application/atom+xml';
+               }
+
+               if ( isset( $sampleFound['xml'] ) ) {
+                       // TODO: I'm not sure under what circumstances this flag is enabled
+                       if ( strpos( $version, 'strict' ) !== false ) {
+                               if ( $proposed == 'text/html' || $proposed == 'text/xml' ) {
+                                       return 'text/xml';
+                               }
+                       } else {
+                               return 'text/xml';
+                       }
+               }
+               if ( isset( $sampleFound['html'] ) ) {
+                       // TODO: I'm not sure under what circumstances this flag is enabled
+                       if ( strpos( $version, 'nohtml' ) !== false ) {
+                               if ( $proposed == 'text/plain' ) {
+                                       return 'text/html';
+                               }
+                       } else {
+                               return 'text/html';
+                       }
+               }
+               if ( isset( $sampleFound['xbm'] ) ) {
+                       return 'image/x-bitmap';
+               }
+               if ( isset( $sampleFound['binhex'] ) ) {
+                       return 'application/macbinhex40';
+               }
+               if ( isset( $sampleFound['scriptlet'] ) ) {
+                       if ( strpos( $version, 'strict' ) !== false ) {
+                               if ( $proposed == 'text/plain' || $proposed == 'text/scriptlet' ) {
+                                       return 'text/scriptlet';
+                               }
+                       } else {
+                               return 'text/scriptlet';
+                       }
+               }
+
+               // Freaky heuristics to determine if the data is text or binary
+               // The heuristic is of course broken for non-ASCII text
+               if ( $counters['ctrl'] != 0 && ( $counters['ff'] + $counters['low'] ) 
+                       < ( $counters['ctrl'] + $counters['high'] ) * 16 ) 
+               {
+                       $kindOfBinary = true;
+                       $type = $binaryType ? $binaryType : $textType;
+                       if ( $type === false ) {
+                               $type = 'application/octet-stream';
+                       }
+               } else {
+                       $kindOfBinary = false;
+                       $type = $textType ? $textType : $binaryType;
+                       if ( $type === false ) {
+                               $type = 'text/plain';
+                       }
+               }
+
+               // Check if the output format is ambiguous
+               // This generally means that detection failed, real types aren't ambiguous
+               $detectedFormat = $this->getDataFormat( $version, $type );
+               if ( $detectedFormat != 'ambiguous' ) {
+                       return $type;
+               }
+
+               if ( $proposedFormat != 'ambiguous' ) {
+                       // FormatAgreesWithData()
+                       if ( $proposedFormat == 'text' && !$kindOfBinary ) {
+                               return $proposed;
+                       }
+                       if ( $proposedFormat == 'binary' && $kindOfBinary ) {
+                               return $proposed;
+                       }
+                       if ( $proposedFormat == 'html' ) {
+                               return $proposed;
+                       }
+               }
+
+               // Find a MIME type by searching the registry for the file extension.
+               $dotPos = strrpos( $fileName, '.' );
+               if ( $dotPos === false ) {
+                       return $type;
+               }
+               $ext = substr( $fileName, $dotPos );
+               if ( isset( $this->registry[$ext] ) ) {
+                       return $this->registry[$ext];
+               }
+
+               // TODO: If the extension has an application registered to it, IE will return 
+               // application/octet-stream. We'll skip that, so we could erroneously 
+               // return text/plain or application/x-netcdf where application/octet-stream
+               // would be correct.
+
+               return $type;
+       }
+
+       /**
+        * Check for text headers at the start of the chunk
+        * Confirmed same in 5 and 7.
+        */
+       private function checkTextHeaders( $version, $chunk ) {
+               $chunk2 = substr( $chunk, 0, 2 );
+               $chunk4 = substr( $chunk, 0, 4 );
+               $chunk5 = substr( $chunk, 0, 5 );
+               if ( $chunk4 == '%PDF' ) {
+                       return 'application/pdf';
+               }
+               if ( $chunk2 == '%!' ) {
+                       return 'application/postscript';
+               }
+               if ( $chunk5 == '{\\rtf' ) {
+                       return 'text/richtext';
+               }
+               if ( $chunk5 == 'begin' ) {
+                       return 'application/base64';
+               }
+               return false;
+       }
+
+       /**
+        * Check for binary headers at the start of the chunk
+        * Confirmed same in 5 and 7.
+        */
+       private function checkBinaryHeaders( $version, $chunk ) {
+               $chunk2 = substr( $chunk, 0, 2 );
+               $chunk3 = substr( $chunk, 0, 3 );
+               $chunk4 = substr( $chunk, 0, 4 );
+               $chunk5 = substr( $chunk, 0, 5 );
+               $chunk5uc = strtoupper( $chunk5 );
+               $chunk8 = substr( $chunk, 0, 8 );
+               if ( $chunk5uc == 'GIF87' || $chunk5uc == 'GIF89' ) {
+                       return 'image/gif';
+               }
+               if ( $chunk2 == "\xff\xd8" ) {
+                       return 'image/pjpeg'; // actually plain JPEG but this is what IE returns
+               }
+
+               if ( $chunk2 == 'BM' 
+                       && substr( $chunk, 6, 2 ) == "\000\000"
+                       && substr( $chunk, 8, 2 ) == "\000\000" )
+               {
+                       return 'image/bmp'; // another non-standard MIME
+               }
+               if ( $chunk4 == 'RIFF' 
+                       && substr( $chunk, 8, 4 ) == 'WAVE' )
+               {
+                       return 'audio/wav';
+               }
+               // These were integer literals in IE
+               // Perhaps the author was not sure what the target endianness was
+               if ( $chunk4 == ".sd\000"
+                       || $chunk4 == ".snd"
+                       || $chunk4 == "\000ds."
+                       || $chunk4 == "dns." )
+               {
+                       return 'audio/basic';
+               }
+               if ( $chunk3 == "MM\000" ) {
+                       return 'image/tiff';
+               }
+               if ( $chunk2 == 'MZ' ) {
+                       return 'application/x-msdownload';
+               }
+               if ( $chunk8 == "\x89PNG\x0d\x0a\x1a\x0a" ) {
+                       return 'image/x-png'; // [sic]
+               }
+               if ( strlen( $chunk ) >= 5 ) {
+                       $byte2 = ord( $chunk[2] );
+                       $byte4 = ord( $chunk[4] );
+                       if ( $byte2 >= 3 && $byte2 <= 31 && $byte4 == 0 && $chunk2 == 'JG' ) {
+                               return 'image/x-jg';
+                       }
+               }
+               // More endian confusion?
+               if ( $chunk4 == 'MROF' ) {
+                       return 'audio/x-aiff';
+               }
+               $chunk4_8 = substr( $chunk, 8, 4 );
+               if ( $chunk4 == 'FORM' && ( $chunk4_8 == 'AIFF' || $chunk4_8 == 'AIFC' ) ) {
+                       return 'audio/x-aiff';
+               }
+               if ( $chunk4 == 'RIFF' && $chunk4_8 == 'AVI ' ) {
+                       return 'video/avi';
+               }
+               if ( $chunk4 == "\x00\x00\x01\xb3" || $chunk4 == "\x00\x00\x01\xba" ) {
+                       return 'video/mpeg';
+               }
+               if ( $chunk4 == "\001\000\000\000"
+                       && substr( $chunk, 40, 4 ) == ' EMF' )
+               {
+                       return 'image/x-emf';
+               }
+               if ( $chunk4 == "\xd7\xcd\xc6\x9a" ) {
+                       return 'image/x-wmf';
+               }
+               if ( $chunk4 == "\xca\xfe\xba\xbe" ) {
+                       return 'application/java';
+               }
+               if ( $chunk2 == 'PK' ) {
+                       return 'application/x-zip-compressed';
+               }
+               if ( $chunk2 == "\x1f\x9d" ) {
+                       return 'application/x-compressed';
+               }
+               if ( $chunk2 == "\x1f\x8b" ) {
+                       return 'application/x-gzip-compressed';
+               }
+               // Skip redundant check for ZIP
+               if ( $chunk5 == "MThd\000" ) {
+                       return 'audio/mid';
+               }
+               if ( $chunk4 == '%PDF' ) {
+                       return 'application/pdf';
+               }
+               return false;
+       }
+
+       /**
+        * Do heuristic checks on the bulk of the data sample.
+        * Search for HTML tags.
+        */
+       protected function sampleData( $version, $chunk ) {
+               $found = array();
+               $counters = array(
+                       'ctrl' => 0,
+                       'high' => 0,
+                       'low' => 0,
+                       'lf' => 0,
+                       'cr' => 0,
+                       'ff' => 0
+               );
+               $htmlTags = array(
+                       'html',
+                       'head',
+                       'title',
+                       'body',
+                       'script',
+                       'a href',
+                       'pre',
+                       'img',
+                       'plaintext',
+                       'table'
+               );
+               $rdfUrl = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
+               $rdfPurl = 'http://purl.org/rss/1.0/';
+               $xbmMagic1 = '#define';
+               $xbmMagic2 = '_width';
+               $xbmMagic3 = '_bits';
+               $binhexMagic = 'converted with BinHex';
+
+               for ( $offset = 0; $offset < strlen( $chunk ); $offset++ ) {
+                       $curChar = $chunk[$offset];
+                       if ( $curChar == "\x0a" ) {
+                               $counters['lf']++;
+                               continue;
+                       } elseif ( $curChar == "\x0d" ) {
+                               $counters['cr']++;
+                               continue;
+                       } elseif ( $curChar == "\x0c" ) {
+                               $counters['ff']++;
+                               continue;
+                       } elseif ( $curChar == "\t" ) {
+                               $counters['low']++;
+                               continue;
+                       } elseif ( ord( $curChar ) < 32 ) {
+                               $counters['ctrl']++;
+                               continue;
+                       } elseif ( ord( $curChar ) >= 128 ) {
+                               $counters['high']++;
+                               continue;
+                       }
+
+                       $counters['low']++;
+                       if ( $curChar == '<' ) {
+                               // XML
+                               $remainder = substr( $chunk, $offset + 1 );
+                               if ( !strncasecmp( $remainder, '?XML', 4 ) ) {
+                                       $nextChar = substr( $chunk, $offset + 5, 1 );
+                                       if ( $nextChar == ':' || $nextChar == ' ' || $nextChar == "\t" ) {
+                                               $found['xml'] = true;
+                                       }
+                               }
+                               // Scriptlet (JSP)
+                               if ( !strncasecmp( $remainder, 'SCRIPTLET', 9 ) ) {
+                                       $found['scriptlet'] = true;
+                                       break;
+                               }
+                               // HTML
+                               foreach ( $htmlTags as $tag ) {
+                                       if ( !strncasecmp( $remainder, $tag, strlen( $tag ) ) ) {
+                                               $found['html'] = true;
+                                       }
+                               }
+                               // Skip broken check for additional tags (HR etc.)
+
+                               // CHANNEL replaced by RSS, RDF and FEED in IE 7
+                               if ( $version < 'ie07' ) {
+                                       if ( !strncasecmp( $remainder, 'CHANNEL', 7 ) ) {
+                                               $found['cdf'] = true;
+                                       }
+                               } else {
+                                       // RSS
+                                       if ( !strncasecmp( $remainder, 'RSS', 3 ) ) {
+                                               $found['rss'] = true;
+                                               break; // return from SampleData
+                                       }
+                                       if ( !strncasecmp( $remainder, 'rdf:RDF', 7 ) ) {
+                                               $found['rdf-tag'] = true;
+                                               // no break
+                                       }
+                                       if ( !strncasecmp( $remainder, 'FEED', 4 ) ) {
+                                               $found['atom'] = true;
+                                               break;
+                                       }
+                               }
+                               continue;
+                       }
+                       // Skip broken check for -->
+
+                       // RSS URL checks
+                       // For some reason both URLs must appear before it is recognised
+                       $remainder = substr( $chunk, $offset );
+                       if ( !strncasecmp( $remainder, $rdfUrl, strlen( $rdfUrl ) ) ) {
+                               $found['rdf-url'] = true;
+                               if ( isset( $found['rdf-tag'] )
+                                       && isset( $found['rdf-purl'] ) ) // [sic]
+                               {
+                                       break;
+                               }
+                               continue;
+                       }
+
+                       if ( !strncasecmp( $remainder, $rdfPurl, strlen( $rdfPurl ) ) ) {
+                               if ( isset( $found['rdf-tag'] ) 
+                                       && isset( $found['rdf-url'] ) ) // [sic]
+                               {
+                                       break;
+                               }
+                               continue;
+                       }
+
+                       // XBM checks
+                       if ( !strncasecmp( $remainder, $xbmMagic1, strlen( $xbmMagic1 ) ) ) {
+                               $found['xbm1'] = true;
+                               continue;
+                       }
+                       if ( $curChar == '_' ) {
+                               if ( isset( $found['xbm2'] ) ) {
+                                       if ( !strncasecmp( $remainder, $xbmMagic3, strlen( $xbmMagic3 ) ) ) {
+                                               $found['xbm'] = true;
+                                               break;
+                                       }
+                               } elseif ( isset( $found['xbm1'] ) ) {
+                                       if ( !strncasecmp( $remainder, $xbmMagic2, strlen( $xbmMagic2 ) ) ) {
+                                               $found['xbm2'] = true;
+                                       }
+                               }
+                       }
+
+                       // BinHex
+                       if ( !strncmp( $remainder, $binhexMagic, strlen( $binhexMagic ) ) ) {
+                               $found['binhex'] = true;
+                       }
+               }
+               return array( 'found' => $found, 'counters' => $counters );
+       }
+
+       protected function getDataFormat( $version, $type ) {
+               $types = $this->typeTable[$version];
+               if ( $type == '(null)' || strval( $type ) === '' ) {
+                       return 'ambiguous';
+               }
+               foreach ( $types as $format => $list ) {
+                       if ( in_array( $type, $list ) ) {
+                               return $format;
+                       }
+               }
+               return 'unknown';
+       }
+}
+