Hide title if DELETED_ACTION is on, but don't worry about type/action, which isn...
[lhc/web/wiklou.git] / includes / MimeMagic.php
index 90009f5..d52de99 100644 (file)
@@ -9,8 +9,24 @@
  * the file mime.types in the includes directory.
  */
 define('MM_WELL_KNOWN_MIME_TYPES',<<<END_STRING
-application/ogg ogg ogm
+application/ogg ogg ogm ogv
 application/pdf pdf
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-master otm
+application/vnd.oasis.opendocument.text-web oth
 application/x-javascript js
 application/x-shockwave-flash swf
 audio/midi mid midi kar
@@ -29,7 +45,7 @@ image/x-portable-pixmap ppm
 image/x-xcf xcf
 text/plain txt
 text/html html htm
-video/ogg ogm ogg
+video/ogg ogm ogg ogv
 video/mpeg mpg mpeg
 END_STRING
 );
@@ -41,6 +57,22 @@ END_STRING
  */
 define('MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
 application/pdf [OFFICE]
+application/vnd.oasis.opendocument.chart [OFFICE]
+application/vnd.oasis.opendocument.chart-template [OFFICE]
+application/vnd.oasis.opendocument.formula [OFFICE]
+application/vnd.oasis.opendocument.formula-template [OFFICE]
+application/vnd.oasis.opendocument.graphics [OFFICE]
+application/vnd.oasis.opendocument.graphics-template [OFFICE]
+application/vnd.oasis.opendocument.image [OFFICE]
+application/vnd.oasis.opendocument.image-template [OFFICE]
+application/vnd.oasis.opendocument.presentation [OFFICE]
+application/vnd.oasis.opendocument.presentation-template [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
+application/vnd.oasis.opendocument.text [OFFICE]
+application/vnd.oasis.opendocument.text-template [OFFICE]
+application/vnd.oasis.opendocument.text-master [OFFICE]
+application/vnd.oasis.opendocument.text-web [OFFICE]
 text/javascript application/x-javascript [EXECUTABLE]
 application/x-shockwave-flash [MULTIMEDIA]
 audio/midi [AUDIO]
@@ -73,7 +105,7 @@ if ($wgLoadFileinfoExtension) {
        if(!extension_loaded('fileinfo')) dl('fileinfo.' . PHP_SHLIB_SUFFIX);
 }
 
-/** 
+/**
  * Implements functions related to mime types such as detection and mapping to
  * file extension.
  *
@@ -100,6 +132,10 @@ class MimeMagic {
        */
        var $mExtToMime= NULL;
 
+       /** IEContentAnalyzer instance
+        */
+       var $mIEAnalyzer;
+
        /** The singleton instance
         */
        private static $instance;
@@ -120,7 +156,7 @@ class MimeMagic {
                if ( $wgMimeTypeFile == 'includes/mime.types' ) {
                        $wgMimeTypeFile = "$IP/$wgMimeTypeFile";
                }
-               
+
                if ( $wgMimeTypeFile ) {
                        if ( is_file( $wgMimeTypeFile ) and is_readable( $wgMimeTypeFile ) ) {
                                wfDebug( __METHOD__.": loading mime types from $wgMimeTypeFile\n" );
@@ -358,10 +394,10 @@ class MimeMagic {
                        'bmp', 'tiff', 'tif', 'jpc', 'jp2',
                        'jpx', 'jb2', 'swc', 'iff', 'wbmp',
                        'xbm',
-                       
+
                        // Formats we recognize magic numbers for
-                       'djvu', 'ogg', 'mid', 'pdf', 'wmf', 'xcf',
-                       
+                       'djvu', 'ogg', 'ogv', 'mid', 'pdf', 'wmf', 'xcf',
+
                        // XML formats we sure hope we recognize reliably
                        'svg',
                );
@@ -374,7 +410,7 @@ class MimeMagic {
        * or misinterpreter by the default mime detection (namely xml based formats like XHTML or SVG).
        *
        * @param string $file The file to check
-       * @param mixed $ext The file extension, or true to extract it from the filename. 
+       * @param mixed $ext The file extension, or true to extract it from the filename.
        *                   Set it to false to ignore the extension.
        *
        * @return string the mime type of $file
@@ -394,7 +430,7 @@ class MimeMagic {
                wfDebug(__METHOD__.": final mime type of $file: $mime\n");
                return $mime;
        }
-       
+
        function doGuessMimeType( $file, $ext = true ) {
                // Read a chunk of the file
                wfSuppressWarnings();
@@ -402,6 +438,8 @@ class MimeMagic {
                wfRestoreWarnings();
                if( !$f ) return "unknown/unknown";
                $head = fread( $f, 1024 );
+               fseek( $f, -65558, SEEK_END );
+               $tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR
                fclose( $f );
 
                // Hardcode a few magic number checks...
@@ -409,20 +447,20 @@ class MimeMagic {
                        // Multimedia...
                        'MThd'             => 'audio/midi',
                        'OggS'             => 'application/ogg',
-                       
+
                        // Image formats...
                        // Note that WMF may have a bare header, no magic number.
                        "\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
                        "\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
-                       'PDF%'             => 'application/pdf',
+                       '%PDF'             => 'application/pdf',
                        'gimp xcf'         => 'image/x-xcf',
-                       
+
                        // Some forbidden fruit...
                        'MZ'               => 'application/octet-stream', // DOS/Windows executable
                        "\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
                        "\x7fELF"          => 'application/octet-stream', // ELF binary
                );
-               
+
                foreach( $headers as $magic => $candidate ) {
                        if( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
                                wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
@@ -451,51 +489,17 @@ class MimeMagic {
                        wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
                        return "application/x-php";
                }
-               
+
                /*
                 * look for XML formats (XHTML and SVG)
                 */
-               $xml_type = NULL;
-               if ( substr( $head, 0, 5 ) == "<?xml" ) {
-                       $xml_type = "ASCII";
-               } elseif ( substr( $head, 0, 8 ) == "\xef\xbb\xbf<?xml") {
-                       $xml_type = "UTF-8";
-               } elseif ( substr( $head, 0, 10 ) == "\xfe\xff\x00<\x00?\x00x\x00m\x00l" ) {
-                       $xml_type = "UTF-16BE";
-               } elseif ( substr( $head, 0, 10 ) == "\xff\xfe<\x00?\x00x\x00m\x00l\x00") {
-                       $xml_type = "UTF-16LE";
-               }
-
-               if ( $xml_type ) {
-                       if ( $xml_type !== "UTF-8" && $xml_type !== "ASCII" ) {
-                               $head = iconv( $xml_type, "ASCII//IGNORE", $head );
-                       }
-
-                       $match = array();
-                       $doctype = "";
-                       $tag = "";
-
-                       if ( preg_match( '%<!DOCTYPE\s+[\w-]+\s+PUBLIC\s+["'."'".'"](.*?)["'."'".'"].*>%sim', 
-                               $head, $match ) ) {
-                                       $doctype = $match[1];
-                               }
-                       if ( preg_match( '%<(\w+)\b%si', $head, $match ) ) {
-                               $tag = $match[1];
-                       }
-
-                       #print "<br>ANALYSING $file: doctype= $doctype; tag= $tag<br>";
-
-                       if ( strpos( $doctype, "-//W3C//DTD SVG" ) === 0 ) {
-                               return "image/svg+xml";
-                       } elseif ( $tag === "svg" ) {
-                               return "image/svg+xml";
-                       } elseif ( strpos( $doctype, "-//W3C//DTD XHTML" ) === 0 ) {
-                               return "text/html";
-                       } elseif ( $tag === "html" ) {
-                               return "text/html";
+               $xml = new XmlTypeCheck( $file );
+               if( $xml->wellFormed ) {
+                       global $wgXMLMimeTypes;
+                       if( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) {
+                               return $wgXMLMimeTypes[$xml->getRootElement()];
                        } else {
-                               /// Fixme -- this would be the place to allow additional XML type checks
-                               return "application/xml";
+                               return 'application/xml';
                        }
                }
 
@@ -517,7 +521,17 @@ class MimeMagic {
 
                if ( $script_type ) {
                        if ( $script_type !== "UTF-8" && $script_type !== "ASCII") {
-                               $head = iconv( $script_type, "ASCII//IGNORE", $head);
+                               // Quick and dirty fold down to ASCII!
+                               $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' );
+                               $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
+                               $head = '';
+                               foreach( $chars as $codepoint ) {
+                                       if( $codepoint < 128 ) {
+                                               $head .= chr( $codepoint );
+                                       } else {
+                                               $head .= '?';
+                                       }
+                               }
                        }
 
                        $match = array();
@@ -528,17 +542,21 @@ class MimeMagic {
                                return $mime;
                        }
                }
-               
+
+               // Check for ZIP (before getimagesize)
+               if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
+                       wfDebug( __METHOD__.": ZIP header present at end of $file\n" );
+                       return $this->detectZipType( $head );
+               }
+
                wfSuppressWarnings();
                $gis = getimagesize( $file );
                wfRestoreWarnings();
-               
+
                if( $gis && isset( $gis['mime'] ) ) {
                        $mime = $gis['mime'];
                        wfDebug( __METHOD__.": getimagesize detected $file as $mime\n" );
                        return $mime;
-               } else {
-                       return false;
                }
 
                // Also test DjVu
@@ -547,18 +565,62 @@ class MimeMagic {
                        wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" );
                        return 'image/vnd.djvu';
                }
+
+               return false;
+       }
+       
+       /**
+        * Detect application-specific file type of a given ZIP file from its
+        * header data.  Currently works for OpenDocument types...
+        * If can't tell, returns 'application/zip'.
+        *
+        * @param string $header Some reasonably-sized chunk of file header
+        * @return string
+        */
+       function detectZipType( $header ) {
+               $opendocTypes = array(
+                       'chart-template',
+                       'chart',
+                       'formula-template',
+                       'formula',
+                       'graphics-template',
+                       'graphics',
+                       'image-template',
+                       'image',
+                       'presentation-template',
+                       'presentation',
+                       'spreadsheet-template',
+                       'spreadsheet',
+                       'text-template',
+                       'text-master',
+                       'text-web',
+                       'text' );
+
+               // http://lists.oasis-open.org/archives/office/200505/msg00006.html
+               $types = '(?:' . implode( '|', $opendocTypes ) . ')';
+               $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
+               wfDebug( __METHOD__.": $opendocRegex\n" );
+               
+               if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
+                       $mime = $matches[1];
+                       wfDebug( __METHOD__.": detected $mime from ZIP archive\n" );
+                       return $mime;
+               } else {
+                       wfDebug( __METHOD__.": unable to identify type of ZIP archive\n" );
+                       return 'application/zip';
+               }
        }
 
        /** Internal mime type detection, please use guessMimeType() for application code instead.
        * Detection is done using an external program, if $wgMimeDetectorCommand is set.
        * Otherwise, the fileinfo extension and mime_content_type are tried (in this order), if they are available.
-       * If the dections fails and $ext is not false, the mime type is guessed from the file extension, using 
+       * If the dections fails and $ext is not false, the mime type is guessed from the file extension, using
        * guessTypesForExtension.
        * If the mime type is still unknown, getimagesize is used to detect the mime type if the file is an image.
        * If no mime type can be determined, this function returns "unknown/unknown".
        *
        * @param string $file The file to check
-       * @param mixed $ext The file extension, or true to extract it from the filename. 
+       * @param mixed $ext The file extension, or true to extract it from the filename.
        *                   Set it to false to ignore the extension.
        *
        * @return string the mime type of $file
@@ -731,7 +793,7 @@ class MimeMagic {
                        if ( !$m ) return MEDIATYPE_UNKNOWN;
 
                        $m = explode( ' ', $m );
-               } else { 
+               } else {
                        # Normalize mime type
                        if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
                                $extMime = $this->mMimeTypeAliases[$extMime];
@@ -750,6 +812,27 @@ class MimeMagic {
 
                return MEDIATYPE_UNKNOWN;
        }
-}
 
+       /**
+        * Get the MIME types that various versions of Internet Explorer would 
+        * detect from a chunk of the content.
+        *
+        * @param string $fileName The file name (unused at present)
+        * @param string $chunk The first 256 bytes of the file
+        * @param string $proposed The MIME type proposed by the server
+        */
+       public function getIEMimeTypes( $fileName, $chunk, $proposed ) {
+               $ca = $this->getIEContentAnalyzer();
+               return $ca->getRealMimesFromData( $fileName, $chunk, $proposed );
+       }
 
+       /**
+        * Get a cached instance of IEContentAnalyzer
+        */
+       protected function getIEContentAnalyzer() {
+               if ( is_null( $this->mIEAnalyzer ) ) {
+                       $this->mIEAnalyzer = new IEContentAnalyzer;
+               }
+               return $this->mIEAnalyzer;
+       }
+}