X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FMimeMagic.php;h=cde30bc009978117c0550ec0d7558a7737f1de69;hb=92ec8a2bec1c34474e5450eec34203c19f253908;hp=77a306214a679070911008cf697a1fb741b60efd;hpb=d3ddd6efdbfefa8065a2abcdbcb019b3effd209b;p=lhc%2Fweb%2Fwiklou.git

diff --git a/includes/MimeMagic.php b/includes/MimeMagic.php
index 77a306214a..cde30bc009 100644
--- a/includes/MimeMagic.php
+++ b/includes/MimeMagic.php
@@ -1,23 +1,42 @@
 <?php
-/** Module defining helper functions for detecting and dealing with mime types.
+/**
+ * Module defining helper functions for detecting and dealing with mime types.
  *
+ * @file
  */
 
- /** Defines a set of well known mime types
+/**
+ * Defines a set of well known mime types
  * This is used as a fallback to mime.types files.
  * An extensive list of well known mime types is provided by
  * the file mime.types in the includes directory.
  */
 define('MM_WELL_KNOWN_MIME_TYPES',<<<END_STRING
-application/ogg ogg ogm
+application/ogg ogx ogg ogm ogv oga spx
 application/pdf pdf
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-master otm
+application/vnd.oasis.opendocument.text-web oth
 application/x-javascript js
 application/x-shockwave-flash swf
 audio/midi mid midi kar
 audio/mpeg mpga mpa mp2 mp3
 audio/x-aiff aif aiff aifc
 audio/x-wav wav
-audio/ogg ogg
+audio/ogg oga spx ogg
 image/x-bmp bmp
 image/gif gif
 image/jpeg jpeg jpg jpe
@@ -29,18 +48,35 @@ image/x-portable-pixmap ppm
 image/x-xcf xcf
 text/plain txt
 text/html html htm
-video/ogg ogm ogg
+video/ogg ogv ogm ogg
 video/mpeg mpg mpeg
 END_STRING
 );
 
- /** Defines a set of well known mime info entries
+/**
+ * Defines a set of well known mime info entries
  * This is used as a fallback to mime.info files.
  * An extensive list of well known mime types is provided by
  * the file mime.info in the includes directory.
  */
 define('MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
 application/pdf [OFFICE]
+application/vnd.oasis.opendocument.chart [OFFICE]
+application/vnd.oasis.opendocument.chart-template [OFFICE]
+application/vnd.oasis.opendocument.formula [OFFICE]
+application/vnd.oasis.opendocument.formula-template [OFFICE]
+application/vnd.oasis.opendocument.graphics [OFFICE]
+application/vnd.oasis.opendocument.graphics-template [OFFICE]
+application/vnd.oasis.opendocument.image [OFFICE]
+application/vnd.oasis.opendocument.image-template [OFFICE]
+application/vnd.oasis.opendocument.presentation [OFFICE]
+application/vnd.oasis.opendocument.presentation-template [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
+application/vnd.oasis.opendocument.text [OFFICE]
+application/vnd.oasis.opendocument.text-template [OFFICE]
+application/vnd.oasis.opendocument.text-master [OFFICE]
+application/vnd.oasis.opendocument.text-web [OFFICE]
 text/javascript application/x-javascript [EXECUTABLE]
 application/x-shockwave-flash [MULTIMEDIA]
 audio/midi [AUDIO]
@@ -48,7 +84,7 @@ audio/x-aiff [AUDIO]
 audio/x-wav [AUDIO]
 audio/mp3 audio/mpeg [AUDIO]
 application/ogg audio/ogg video/ogg [MULTIMEDIA]
-image/x-bmp image/bmp [BITMAP]
+image/x-bmp image/x-ms-bmp image/bmp [BITMAP]
 image/gif [BITMAP]
 image/jpeg [BITMAP]
 image/png [BITMAP]
@@ -70,10 +106,10 @@ END_STRING
 global $wgLoadFileinfoExtension;
 
 if ($wgLoadFileinfoExtension) {
-	if(!extension_loaded('fileinfo')) dl('fileinfo.' . PHP_SHLIB_SUFFIX);
+	wfDl( 'fileinfo' );
 }
 
-/** 
+/**
  * Implements functions related to mime types such as detection and mapping to
  * file extension.
  *
@@ -86,19 +122,23 @@ class MimeMagic {
 	* Mapping of media types to arrays of mime types.
 	* This is used by findMediaType and getMediaType, respectively
 	*/
-	var $mMediaTypes= NULL;
+	var $mMediaTypes= null;
 
 	/** Map of mime type aliases
 	*/
-	var $mMimeTypeAliases= NULL;
+	var $mMimeTypeAliases= null;
 
 	/** map of mime types to file extensions (as a space seprarated list)
 	*/
-	var $mMimeToExt= NULL;
+	var $mMimeToExt= null;
 
 	/** map of file extensions types to mime types (as a space seprarated list)
 	*/
-	var $mExtToMime= NULL;
+	var $mExtToMime= null;
+
+	/** IEContentAnalyzer instance
+	 */
+	var $mIEAnalyzer;
 
 	/** The singleton instance
 	 */
@@ -120,7 +160,7 @@ class MimeMagic {
 		if ( $wgMimeTypeFile == 'includes/mime.types' ) {
 			$wgMimeTypeFile = "$IP/$wgMimeTypeFile";
 		}
-		
+
 		if ( $wgMimeTypeFile ) {
 			if ( is_file( $wgMimeTypeFile ) and is_readable( $wgMimeTypeFile ) ) {
 				wfDebug( __METHOD__.": loading mime types from $wgMimeTypeFile\n" );
@@ -292,7 +332,7 @@ class MimeMagic {
 	*/
 	function guessTypesForExtension( $ext ) {
 		$m = $this->getTypesForExtension( $ext );
-		if ( is_null( $m ) ) return NULL;
+		if ( is_null( $m ) ) return null;
 
 		$m = trim( $m );
 		$m = preg_replace( '/\s.*$/', '', $m );
@@ -309,7 +349,7 @@ class MimeMagic {
 		$ext = $this->getExtensionsForType( $mime );
 
 		if ( !$ext ) {
-			return NULL;  //unknown
+			return null;  //unknown
 		}
 
 		$ext = explode( ' ', $ext );
@@ -358,28 +398,84 @@ class MimeMagic {
 			'bmp', 'tiff', 'tif', 'jpc', 'jp2',
 			'jpx', 'jb2', 'swc', 'iff', 'wbmp',
 			'xbm',
-			
+
 			// Formats we recognize magic numbers for
-			'djvu', 'ogg', 'mid', 'pdf', 'wmf', 'xcf',
-			
+			'djvu', 'ogx', 'ogg', 'ogv', 'oga', 'spx',
+			'mid', 'pdf', 'wmf', 'xcf', 'webm', 'mkv', 'mka',
+
 			// XML formats we sure hope we recognize reliably
 			'svg',
 		);
 		return in_array( strtolower( $extension ), $types );
 	}
 
+	/** improves a mime type using the file extension. Some file formats are very generic,
+	* so their mime type is not very meaningful. A more useful mime type can be derived 
+	* by looking at the file extension. Typically, this method would be called on the 
+	* result of guessMimeType().
+	* 
+	* Currently, this method does the following:
+	*
+	* If $mime is "unknown/unknown" and isRecognizableExtension( $ext ) returns false,
+	* return the result of guessTypesForExtension($ext). 
+	*
+	* If $mime is "application/x-opc+zip" and isMatchingExtension( $ext, $mime )
+	* gives true, return the result of guessTypesForExtension($ext). 
+	*
+	* @param $mime String: the mime type, typically guessed from a file's content.
+	* @param $ext String: the file extension, as taken from the file name
+	*
+	* @return string the mime type
+	*/
+	function improveTypeFromExtension( $mime, $ext ) {
+		if ( $mime === "unknown/unknown" ) {
+			if( $this->isRecognizableExtension( $ext ) ) {
+				wfDebug( __METHOD__. ": refusing to guess mime type for .$ext file, " .
+					"we should have recognized it\n" );
+			} else {
+				/* Not something we can detect, so simply 
+				* trust the file extension */
+				$mime = $this->guessTypesForExtension( $ext );
+			}
+		}
+		else if ( $mime === "application/x-opc+zip" ) {
+			if ( $this->isMatchingExtension( $ext, $mime ) ) {
+				/* A known file extension for an OPC file,
+				* find the proper mime type for that file extension */
+				$mime = $this->guessTypesForExtension( $ext );
+			} else {
+				wfDebug( __METHOD__. ": refusing to guess better type for $mime file, " . 
+					".$ext is not a known OPC extension.\n" );
+				$mime = "application/zip";
+			}
+		}
+
+		if ( isset( $this->mMimeTypeAliases[$mime] ) ) {
+			$mime = $this->mMimeTypeAliases[$mime];
+		}
+
+		wfDebug(__METHOD__.": improved mime type for .$ext: $mime\n");
+		return $mime;
+	}
 
 	/** mime type detection. This uses detectMimeType to detect the mime type of the file,
 	* but applies additional checks to determine some well known file formats that may be missed
-	* or misinterpreter by the default mime detection (namely xml based formats like XHTML or SVG).
+	* or misinterpreter by the default mime detection (namely XML based formats like XHTML or SVG,
+	* as well as ZIP based formats like OPC/ODF files).
 	*
-	* @param string $file The file to check
-	* @param mixed $ext The file extension, or true to extract it from the filename. 
-	*                   Set it to false to ignore the extension.
+	* @param $file String: the file to check
+	* @param $ext Mixed: the file extension, or true (default) to extract it from the filename.
+	*             Set it to false to ignore the extension. DEPRECATED! Set to false, use 
+	*             improveTypeFromExtension($mime, $ext) later to improve mime type.
 	*
 	* @return string the mime type of $file
 	*/
 	function guessMimeType( $file, $ext = true ) {
+		if( $ext ) { # TODO: make $ext default to false. Or better, remove it.
+			wfDebug( __METHOD__.": WARNING: use of the \$ext parameter is deprecated. " .
+				"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+		}
+
 		$mime = $this->doGuessMimeType( $file, $ext );
 
 		if( !$mime ) {
@@ -391,38 +487,42 @@ class MimeMagic {
 			$mime = $this->mMimeTypeAliases[$mime];
 		}
 
-		wfDebug(__METHOD__.": final mime type of $file: $mime\n");
+		wfDebug(__METHOD__.": guessed mime type of $file: $mime\n");
 		return $mime;
 	}
-	
-	function doGuessMimeType( $file, $ext = true ) {
+
+	private function doGuessMimeType( $file, $ext ) { # TODO: remove $ext param
 		// Read a chunk of the file
 		wfSuppressWarnings();
 		$f = fopen( $file, "rt" );
 		wfRestoreWarnings();
 		if( !$f ) return "unknown/unknown";
 		$head = fread( $f, 1024 );
+		fseek( $f, -65558, SEEK_END );
+		$tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR
 		fclose( $f );
 
+		wfDebug( __METHOD__ . ": analyzing head and tail of $file for magic numbers.\n" );
+
 		// Hardcode a few magic number checks...
 		$headers = array(
 			// Multimedia...
 			'MThd'             => 'audio/midi',
 			'OggS'             => 'application/ogg',
-			
+
 			// Image formats...
 			// Note that WMF may have a bare header, no magic number.
 			"\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
 			"\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
-			'PDF%'             => 'application/pdf',
+			'%PDF'             => 'application/pdf',
 			'gimp xcf'         => 'image/x-xcf',
-			
+
 			// Some forbidden fruit...
 			'MZ'               => 'application/octet-stream', // DOS/Windows executable
 			"\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
 			"\x7fELF"          => 'application/octet-stream', // ELF binary
 		);
-		
+
 		foreach( $headers as $magic => $candidate ) {
 			if( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
 				wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
@@ -430,17 +530,37 @@ class MimeMagic {
 			}
 		}
 
+		/* Look for WebM and Matroska files */
+		if( strncmp( $head, pack( "C4", 0x1a, 0x45, 0xdf, 0xa3 ), 4 ) == 0 ) {
+			$doctype = strpos( $head, "\x42\x82" );
+			if( $doctype ) {
+				// Next byte is datasize, then data (sizes larger than 1 byte are very stupid muxers)
+				$data = substr($head, $doctype+3, 8);
+				if( strncmp( $data, "matroska", 8 ) == 0 ) {
+					wfDebug( __METHOD__ . ": recognized file as video/x-matroska\n" );
+					return "video/x-matroska";
+				} else if ( strncmp( $data, "webm", 4 ) == 0 ) {
+					wfDebug( __METHOD__ . ": recognized file as video/webm\n" );
+					return "video/webm";
+				}
+			}
+			wfDebug( __METHOD__ . ": unknown EBML file\n" );
+			return "unknown/unknown";
+		}
+
 		/*
-		 * look for PHP
-		 * Check for this before HTML/XML...
-		 * Warning: this is a heuristic, and won't match a file with a lot of non-PHP before.
-		 * It will also match text files which could be PHP. :)
+		 * Look for PHP.  Check for this before HTML/XML...  Warning: this is a
+		 * heuristic, and won't match a file with a lot of non-PHP before.  It
+		 * will also match text files which could be PHP. :)
+		 *
+		 * FIXME: For this reason, the check is probably useless -- an attacker
+		 * could almost certainly just pad the file with a lot of nonsense to
+		 * circumvent the check in any case where it would be a security
+		 * problem.  On the other hand, it causes harmful false positives (bug
+		 * 16583).  The heuristic has been cut down to exclude three-character
+		 * strings like "<? ", but should it be axed completely?
 		 */
 		if( ( strpos( $head, '<?php' ) !== false ) ||
-		    ( strpos( $head, '<? ' ) !== false ) ||
-		    ( strpos( $head, "<?\n" ) !== false ) ||
-		    ( strpos( $head, "<?\t" ) !== false ) ||
-		    ( strpos( $head, "<?=" ) !== false ) ||
 
 		    ( strpos( $head, "<\x00?\x00p\x00h\x00p" ) !== false ) ||
 		    ( strpos( $head, "<\x00?\x00 " ) !== false ) ||
@@ -451,82 +571,24 @@ class MimeMagic {
 			wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
 			return "application/x-php";
 		}
-		
+
 		/*
 		 * look for XML formats (XHTML and SVG)
 		 */
-		$xml_type = NULL;
-		if ( substr( $head, 0, 5 ) == "<?xml" ) {
-			$xml_type = "ASCII";
-		} elseif ( substr( $head, 0, 8 ) == "\xef\xbb\xbf<?xml") {
-			$xml_type = "UTF-8";
-		} elseif ( substr( $head, 0, 12 ) == "\xfe\xff\x00<\x00?\x00x\x00m\x00l" ) {
-			$xml_type = "UTF-16BE";
-		} elseif ( substr( $head, 0, 12 ) == "\xff\xfe<\x00?\x00x\x00m\x00l\x00") {
-			$xml_type = "UTF-16LE";
-		} else {
-			/*
-			echo "WARNING: Undetected xml_type ...\n";
-			for( $i = 0; $i < 10; $i++ ) {
-				$c = ord( $head{$i} );
-				if( $c < 32 || $c > 126 ) {
-					printf( "\\x%02x", $c );
-				} else {
-					print $head{$i};
-				}
-			}
-			echo "\n";
-			*/
-		}
-
-		if( $xml_type == 'UTF-16BE' || $xml_type == 'UTF-16LE' ) {
-			// Quick and dirty fold down to ASCII!
-			$pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' );
-			$chars = unpack( $pack[$xml_type], substr( $head, 2 ) );
-			$head = '';
-			foreach( $chars as $codepoint ) {
-				if( $codepoint < 128 ) {
-					$head .= chr( $codepoint );
-				} else {
-					$head .= '?';
-				}
-			}
-		}
-
-		$match = array();
-		$doctype = "";
-		$tag = "";
-
-		if ( preg_match( '%<!DOCTYPE\s+[\w-]+\s+PUBLIC\s+["'."'".'"](.*?)["'."'".'"].*>%siD', 
-			$head, $match ) ) {
-				$doctype = $match[1];
-			}
-		
-		if( $xml_type || $doctype ) {
-			if ( preg_match( '%<(\w+)\b%si', $head, $match ) ) {
-				$tag = $match[1];
-			}
-
-			#print "<br>ANALYSING $file: doctype= $doctype; tag= $tag<br>";
-
-			if ( strpos( $doctype, "-//W3C//DTD SVG" ) === 0 ) {
-				return "image/svg+xml";
-			} elseif ( $tag === "svg" ) {
-				return "image/svg+xml";
-			} elseif ( strpos( $doctype, "-//W3C//DTD XHTML" ) === 0 ) {
-				return "text/html";
-			} elseif ( $tag === "html" ) {
-				return "text/html";
+		$xml = new XmlTypeCheck( $file );
+		if( $xml->wellFormed ) {
+			global $wgXMLMimeTypes;
+			if( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) {
+				return $wgXMLMimeTypes[$xml->getRootElement()];
 			} else {
-				/// Fixme -- this would be the place to allow additional XML type checks
-				return "application/xml";
+				return 'application/xml';
 			}
 		}
 
 		/*
 		 * look for shell scripts
 		 */
-		$script_type = NULL;
+		$script_type = null;
 
 		# detect by shebang
 		if ( substr( $head, 0, 2) == "#!" ) {
@@ -541,7 +603,17 @@ class MimeMagic {
 
 		if ( $script_type ) {
 			if ( $script_type !== "UTF-8" && $script_type !== "ASCII") {
-				$head = iconv( $script_type, "ASCII//IGNORE", $head);
+				// Quick and dirty fold down to ASCII!
+				$pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' );
+				$chars = unpack( $pack[$script_type], substr( $head, 2 ) );
+				$head = '';
+				foreach( $chars as $codepoint ) {
+					if( $codepoint < 128 ) {
+						$head .= chr( $codepoint );
+					} else {
+						$head .= '?';
+					}
+				}
 			}
 
 			$match = array();
@@ -552,17 +624,21 @@ class MimeMagic {
 				return $mime;
 			}
 		}
-		
+
+		// Check for ZIP variants (before getimagesize)
+		if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
+			wfDebug( __METHOD__.": ZIP header present in $file\n" );
+			return $this->detectZipType( $head, $tail, $ext );
+		}
+
 		wfSuppressWarnings();
 		$gis = getimagesize( $file );
 		wfRestoreWarnings();
-		
+
 		if( $gis && isset( $gis['mime'] ) ) {
 			$mime = $gis['mime'];
 			wfDebug( __METHOD__.": getimagesize detected $file as $mime\n" );
 			return $mime;
-		} else {
-			return false;
 		}
 
 		// Also test DjVu
@@ -571,27 +647,133 @@ class MimeMagic {
 			wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" );
 			return 'image/vnd.djvu';
 		}
+
+		return false;
+	}
+	
+	/**
+	 * Detect application-specific file type of a given ZIP file from its
+	 * header data.  Currently works for OpenDocument and OpenXML types...
+	 * If can't tell, returns 'application/zip'.
+	 *
+	 * @param $header String: some reasonably-sized chunk of file header
+	 * @param $tail   String: the tail of the file
+	 * @param $ext Mixed: the file extension, or true to extract it from the filename.
+	 *             Set it to false (default) to ignore the extension. DEPRECATED! Set to false, 
+	 *             use improveTypeFromExtension($mime, $ext) later to improve mime type.
+	 *
+	 * @return string
+	 */
+	function detectZipType( $header, $tail = null, $ext = false ) {
+		if( $ext ) { # TODO: remove $ext param
+			wfDebug( __METHOD__.": WARNING: use of the \$ext parameter is deprecated. " .
+				"Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+		}
+
+		$mime = 'application/zip';
+		$opendocTypes = array(
+			'chart-template',
+			'chart',
+			'formula-template',
+			'formula',
+			'graphics-template',
+			'graphics',
+			'image-template',
+			'image',
+			'presentation-template',
+			'presentation',
+			'spreadsheet-template',
+			'spreadsheet',
+			'text-template',
+			'text-master',
+			'text-web',
+			'text' );
+
+		// http://lists.oasis-open.org/archives/office/200505/msg00006.html
+		$types = '(?:' . implode( '|', $opendocTypes ) . ')';
+		$opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
+
+		$openxmlRegex = "/^\[Content_Types\].xml/";
+
+		if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
+			$mime = $matches[1];
+			wfDebug( __METHOD__.": detected $mime from ZIP archive\n" );
+		} elseif( preg_match( $openxmlRegex, substr( $header, 30 ) ) ) {
+			$mime = "application/x-opc+zip";
+			# TODO: remove the block below, as soon as improveTypeFromExtension is used everywhere 
+			if( $ext !== true && $ext !== false ) { 
+				/** This is the mode used by getPropsFromPath
+				* These mime's are stored in the database, where we don't really want
+				* x-opc+zip, because we use it only for internal purposes
+				*/
+				if( $this->isMatchingExtension( $ext, $mime) ) {
+					/* A known file extension for an OPC file,
+					* find the proper mime type for that file extension */
+					$mime = $this->guessTypesForExtension( $ext );
+				} else {
+					$mime = "application/zip";
+				}
+			}
+			wfDebug( __METHOD__.": detected an Open Packaging Conventions archive: $mime\n" );
+		} else if( substr( $header, 0, 8 ) == "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" && 
+				($headerpos = strpos( $tail, "PK\x03\x04" ) ) !== false &&
+				preg_match( $openxmlRegex, substr( $tail, $headerpos + 30 ) ) ) {
+			if( substr( $header, 512, 4) == "\xEC\xA5\xC1\x00" ) {
+				$mime = "application/msword";
+			} 
+			switch( substr( $header, 512, 6) ) {
+				case "\xEC\xA5\xC1\x00\x0E\x00":
+				case "\xEC\xA5\xC1\x00\x1C\x00":
+				case "\xEC\xA5\xC1\x00\x43\x00":
+					$mime = "application/vnd.ms-powerpoint";
+					break;
+				case "\xFD\xFF\xFF\xFF\x10\x00":
+				case "\xFD\xFF\xFF\xFF\x1F\x00":
+				case "\xFD\xFF\xFF\xFF\x22\x00":
+				case "\xFD\xFF\xFF\xFF\x23\x00":
+				case "\xFD\xFF\xFF\xFF\x28\x00":
+				case "\xFD\xFF\xFF\xFF\x29\x00":
+				case "\xFD\xFF\xFF\xFF\x10\x02":
+				case "\xFD\xFF\xFF\xFF\x1F\x02":
+				case "\xFD\xFF\xFF\xFF\x22\x02":
+				case "\xFD\xFF\xFF\xFF\x23\x02":
+				case "\xFD\xFF\xFF\xFF\x28\x02":
+				case "\xFD\xFF\xFF\xFF\x29\x02":
+					$mime = "application/vnd.msexcel";
+					break;
+			}
+
+			wfDebug( __METHOD__.": detected a MS Office document with OPC trailer\n");
+		} else {
+			wfDebug( __METHOD__.": unable to identify type of ZIP archive\n" );
+		}
+		return $mime;
 	}
 
 	/** Internal mime type detection, please use guessMimeType() for application code instead.
 	* Detection is done using an external program, if $wgMimeDetectorCommand is set.
 	* Otherwise, the fileinfo extension and mime_content_type are tried (in this order), if they are available.
-	* If the dections fails and $ext is not false, the mime type is guessed from the file extension, using 
+	* If the dections fails and $ext is not false, the mime type is guessed from the file extension, using
 	* guessTypesForExtension.
 	* If the mime type is still unknown, getimagesize is used to detect the mime type if the file is an image.
 	* If no mime type can be determined, this function returns "unknown/unknown".
 	*
-	* @param string $file The file to check
-	* @param mixed $ext The file extension, or true to extract it from the filename. 
-	*                   Set it to false to ignore the extension.
+	* @param $file String: the file to check
+	* @param $ext Mixed: the file extension, or true (default) to extract it from the filename.
+	*             Set it to false to ignore the extension. DEPRECATED! Set to false, use 
+	*             improveTypeFromExtension($mime, $ext) later to improve mime type.
 	*
 	* @return string the mime type of $file
 	* @access private
 	*/
-	function detectMimeType( $file, $ext = true ) {
+	private function detectMimeType( $file, $ext = true ) {
 		global $wgMimeDetectorCommand;
 
-		$m = NULL;
+		if( $ext ) { # TODO:  make $ext default to false. Or better, remove it.
+			wfDebug( __METHOD__.": WARNING: use of the \$ext parameter is deprecated. Use improveTypeFromExtension(\$mime, \$ext) instead.\n" );
+		}
+
+		$m = null;
 		if ( $wgMimeDetectorCommand ) {
 			$fn = wfEscapeShellArg( $file );
 			$m = `$wgMimeDetectorCommand $fn`;
@@ -638,7 +820,7 @@ class MimeMagic {
 			$m = strtolower( $m );
 
 			if ( strpos( $m, 'unknown' ) !== false ) {
-				$m = NULL;
+				$m = null;
 			} else {
 				wfDebug( __METHOD__.": magic mime type of $file: $m\n" );
 				return $m;
@@ -677,13 +859,13 @@ class MimeMagic {
 	* @todo analyse file if need be
 	* @todo look at multiple extension, separately and together.
 	*
-	* @param string $path full path to the image file, in case we have to look at the contents
+	* @param $path String: full path to the image file, in case we have to look at the contents
 	*        (if null, only the mime type is used to determine the media type code).
-	* @param string $mime mime type. If null it will be guessed using guessMimeType.
+	* @param $mime String: mime type. If null it will be guessed using guessMimeType.
 	*
 	* @return (int?string?) a value to be used with the MEDIATYPE_xxx constants.
 	*/
-	function getMediaType( $path = NULL, $mime = NULL ) {
+	function getMediaType( $path = null, $mime = null ) {
 		if( !$mime && !$path ) return MEDIATYPE_UNKNOWN;
 
 		# If mime type is unknown, guess it
@@ -716,7 +898,7 @@ class MimeMagic {
 		}
 
 		# Check for entry for file extension
-		$e = NULL;
+		$e = null;
 		if ( $path ) {
 			$i = strrpos( $path, '.' );
 			$e = strtolower( $i ? substr( $path, $i + 1 ) : '' );
@@ -755,7 +937,7 @@ class MimeMagic {
 			if ( !$m ) return MEDIATYPE_UNKNOWN;
 
 			$m = explode( ' ', $m );
-		} else { 
+		} else {
 			# Normalize mime type
 			if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
 				$extMime = $this->mMimeTypeAliases[$extMime];
@@ -774,6 +956,27 @@ class MimeMagic {
 
 		return MEDIATYPE_UNKNOWN;
 	}
-}
 
+	/**
+	 * Get the MIME types that various versions of Internet Explorer would 
+	 * detect from a chunk of the content.
+	 *
+	 * @param $fileName String: the file name (unused at present)
+	 * @param $chunk String: the first 256 bytes of the file
+	 * @param $proposed String: the MIME type proposed by the server
+	 */
+	public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
+		$ca = $this->getIEContentAnalyzer();
+		return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
+	}
 
+	/**
+	 * Get a cached instance of IEContentAnalyzer
+	 */
+	protected function getIEContentAnalyzer() {
+		if ( is_null( $this->mIEAnalyzer ) ) {
+			$this->mIEAnalyzer = new IEContentAnalyzer;
+		}
+		return $this->mIEAnalyzer;
+	}
+}