* the file mime.types in the includes directory.
*/
define('MM_WELL_KNOWN_MIME_TYPES',<<<END_STRING
-application/ogg ogg ogm
+application/ogg ogg ogm ogv
application/pdf pdf
+application/vnd.oasis.opendocument.chart odc
+application/vnd.oasis.opendocument.chart-template otc
+application/vnd.oasis.opendocument.formula odf
+application/vnd.oasis.opendocument.formula-template otf
+application/vnd.oasis.opendocument.graphics odg
+application/vnd.oasis.opendocument.graphics-template otg
+application/vnd.oasis.opendocument.image odi
+application/vnd.oasis.opendocument.image-template oti
+application/vnd.oasis.opendocument.presentation odp
+application/vnd.oasis.opendocument.presentation-template otp
+application/vnd.oasis.opendocument.spreadsheet ods
+application/vnd.oasis.opendocument.spreadsheet-template ots
+application/vnd.oasis.opendocument.text odt
+application/vnd.oasis.opendocument.text-template ott
+application/vnd.oasis.opendocument.text-master otm
+application/vnd.oasis.opendocument.text-web oth
application/x-javascript js
application/x-shockwave-flash swf
audio/midi mid midi kar
image/x-xcf xcf
text/plain txt
text/html html htm
-video/ogg ogm ogg
+video/ogg ogm ogg ogv
video/mpeg mpg mpeg
END_STRING
);
*/
define('MM_WELL_KNOWN_MIME_INFO', <<<END_STRING
application/pdf [OFFICE]
+application/vnd.oasis.opendocument.chart [OFFICE]
+application/vnd.oasis.opendocument.chart-template [OFFICE]
+application/vnd.oasis.opendocument.formula [OFFICE]
+application/vnd.oasis.opendocument.formula-template [OFFICE]
+application/vnd.oasis.opendocument.graphics [OFFICE]
+application/vnd.oasis.opendocument.graphics-template [OFFICE]
+application/vnd.oasis.opendocument.image [OFFICE]
+application/vnd.oasis.opendocument.image-template [OFFICE]
+application/vnd.oasis.opendocument.presentation [OFFICE]
+application/vnd.oasis.opendocument.presentation-template [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet [OFFICE]
+application/vnd.oasis.opendocument.spreadsheet-template [OFFICE]
+application/vnd.oasis.opendocument.text [OFFICE]
+application/vnd.oasis.opendocument.text-template [OFFICE]
+application/vnd.oasis.opendocument.text-master [OFFICE]
+application/vnd.oasis.opendocument.text-web [OFFICE]
text/javascript application/x-javascript [EXECUTABLE]
application/x-shockwave-flash [MULTIMEDIA]
audio/midi [AUDIO]
if(!extension_loaded('fileinfo')) dl('fileinfo.' . PHP_SHLIB_SUFFIX);
}
-/**
+/**
* Implements functions related to mime types such as detection and mapping to
* file extension.
*
if ( $wgMimeTypeFile == 'includes/mime.types' ) {
$wgMimeTypeFile = "$IP/$wgMimeTypeFile";
}
-
+
if ( $wgMimeTypeFile ) {
if ( is_file( $wgMimeTypeFile ) and is_readable( $wgMimeTypeFile ) ) {
wfDebug( __METHOD__.": loading mime types from $wgMimeTypeFile\n" );
'bmp', 'tiff', 'tif', 'jpc', 'jp2',
'jpx', 'jb2', 'swc', 'iff', 'wbmp',
'xbm',
-
+
// Formats we recognize magic numbers for
- 'djvu', 'ogg', 'mid', 'pdf', 'wmf', 'xcf',
-
+ 'djvu', 'ogg', 'ogv', 'mid', 'pdf', 'wmf', 'xcf',
+
// XML formats we sure hope we recognize reliably
'svg',
);
* or misinterpreter by the default mime detection (namely xml based formats like XHTML or SVG).
*
* @param string $file The file to check
- * @param mixed $ext The file extension, or true to extract it from the filename.
+ * @param mixed $ext The file extension, or true to extract it from the filename.
* Set it to false to ignore the extension.
*
* @return string the mime type of $file
wfDebug(__METHOD__.": final mime type of $file: $mime\n");
return $mime;
}
-
+
function doGuessMimeType( $file, $ext = true ) {
// Read a chunk of the file
wfSuppressWarnings();
wfRestoreWarnings();
if( !$f ) return "unknown/unknown";
$head = fread( $f, 1024 );
+ fseek( $f, -65558, SEEK_END );
+ $tail = fread( $f, 65558 ); // 65558 = maximum size of a zip EOCDR
fclose( $f );
// Hardcode a few magic number checks...
// Multimedia...
'MThd' => 'audio/midi',
'OggS' => 'application/ogg',
-
+
// Image formats...
// Note that WMF may have a bare header, no magic number.
"\x01\x00\x09\x00" => 'application/x-msmetafile', // Possibly prone to false positives?
"\xd7\xcd\xc6\x9a" => 'application/x-msmetafile',
- 'PDF%' => 'application/pdf',
+ '%PDF' => 'application/pdf',
'gimp xcf' => 'image/x-xcf',
-
+
// Some forbidden fruit...
'MZ' => 'application/octet-stream', // DOS/Windows executable
"\xca\xfe\xba\xbe" => 'application/octet-stream', // Mach-O binary
"\x7fELF" => 'application/octet-stream', // ELF binary
);
-
+
foreach( $headers as $magic => $candidate ) {
if( strncmp( $head, $magic, strlen( $magic ) ) == 0 ) {
wfDebug( __METHOD__ . ": magic header in $file recognized as $candidate\n" );
wfDebug( __METHOD__ . ": recognized $file as application/x-php\n" );
return "application/x-php";
}
-
+
/*
* look for XML formats (XHTML and SVG)
*/
- $xml_type = NULL;
- if ( substr( $head, 0, 5 ) == "<?xml" ) {
- $xml_type = "ASCII";
- } elseif ( substr( $head, 0, 8 ) == "\xef\xbb\xbf<?xml") {
- $xml_type = "UTF-8";
- } elseif ( substr( $head, 0, 10 ) == "\xfe\xff\x00<\x00?\x00x\x00m\x00l" ) {
- $xml_type = "UTF-16BE";
- } elseif ( substr( $head, 0, 10 ) == "\xff\xfe<\x00?\x00x\x00m\x00l\x00") {
- $xml_type = "UTF-16LE";
- }
-
- if ( $xml_type ) {
- if ( $xml_type !== "UTF-8" && $xml_type !== "ASCII" ) {
- $head = iconv( $xml_type, "ASCII//IGNORE", $head );
- }
-
- $match = array();
- $doctype = "";
- $tag = "";
-
- if ( preg_match( '%<!DOCTYPE\s+[\w-]+\s+PUBLIC\s+["'."'".'"](.*?)["'."'".'"].*>%sim',
- $head, $match ) ) {
- $doctype = $match[1];
- }
- if ( preg_match( '%<(\w+).*>%sim', $head, $match ) ) {
- $tag = $match[1];
- }
-
- #print "<br>ANALYSING $file ($mime): doctype= $doctype; tag= $tag<br>";
-
- if ( strpos( $doctype, "-//W3C//DTD SVG" ) === 0 ) {
- return "image/svg+xml";
- } elseif ( $tag === "svg" ) {
- return "image/svg+xml";
- } elseif ( strpos( $doctype, "-//W3C//DTD XHTML" ) === 0 ) {
- return "text/html";
- } elseif ( $tag === "html" ) {
- return "text/html";
+ $xml = new XmlTypeCheck( $file );
+ if( $xml->wellFormed ) {
+ global $wgXMLMimeTypes;
+ if( isset( $wgXMLMimeTypes[$xml->getRootElement()] ) ) {
+ return $wgXMLMimeTypes[$xml->getRootElement()];
} else {
- /// Fixme -- this would be the place to allow additional XML type checks
- return "application/xml";
+ return 'application/xml';
}
}
if ( $script_type ) {
if ( $script_type !== "UTF-8" && $script_type !== "ASCII") {
- $head = iconv( $script_type, "ASCII//IGNORE", $head);
+ // Quick and dirty fold down to ASCII!
+ $pack = array( 'UTF-16BE' => 'n*', 'UTF-16LE' => 'v*' );
+ $chars = unpack( $pack[$script_type], substr( $head, 2 ) );
+ $head = '';
+ foreach( $chars as $codepoint ) {
+ if( $codepoint < 128 ) {
+ $head .= chr( $codepoint );
+ } else {
+ $head .= '?';
+ }
+ }
}
$match = array();
return $mime;
}
}
-
+
+ // Check for ZIP (before getimagesize)
+ if ( strpos( $tail, "PK\x05\x06" ) !== false ) {
+ wfDebug( __METHOD__.": ZIP header present at end of $file\n" );
+ return $this->detectZipType( $head );
+ }
+
wfSuppressWarnings();
$gis = getimagesize( $file );
wfRestoreWarnings();
-
+
if( $gis && isset( $gis['mime'] ) ) {
$mime = $gis['mime'];
wfDebug( __METHOD__.": getimagesize detected $file as $mime\n" );
return $mime;
- } else {
- return false;
}
// Also test DjVu
wfDebug( __METHOD__.": detected $file as image/vnd.djvu\n" );
return 'image/vnd.djvu';
}
+
+ return false;
+ }
+
+ /**
+ * Detect application-specific file type of a given ZIP file from its
+ * header data. Currently works for OpenDocument types...
+ * If can't tell, returns 'application/zip'.
+ *
+ * @param string $header Some reasonably-sized chunk of file header
+ * @return string
+ */
+ function detectZipType( $header ) {
+ $opendocTypes = array(
+ 'chart',
+ 'chart-template',
+ 'formula',
+ 'formula-template',
+ 'graphics',
+ 'graphics-template',
+ 'image',
+ 'image-template',
+ 'presentation',
+ 'presentation-template',
+ 'spreadsheet',
+ 'spreadsheet-template',
+ 'text',
+ 'text-template',
+ 'text-master',
+ 'text-web' );
+
+ // http://lists.oasis-open.org/archives/office/200505/msg00006.html
+ $types = '(?:' . implode( '|', $opendocTypes ) . ')';
+ $opendocRegex = "/^mimetype(application\/vnd\.oasis\.opendocument\.$types)/";
+ wfDebug( __METHOD__.": $opendocRegex\n" );
+
+ if( preg_match( $opendocRegex, substr( $header, 30 ), $matches ) ) {
+ $mime = $matches[1];
+ wfDebug( __METHOD__.": detected $mime from ZIP archive\n" );
+ return $mime;
+ } else {
+ wfDebug( __METHOD__.": unable to identify type of ZIP archive\n" );
+ return 'application/zip';
+ }
}
/** Internal mime type detection, please use guessMimeType() for application code instead.
* Detection is done using an external program, if $wgMimeDetectorCommand is set.
* Otherwise, the fileinfo extension and mime_content_type are tried (in this order), if they are available.
- * If the dections fails and $ext is not false, the mime type is guessed from the file extension, using
+ * If the dections fails and $ext is not false, the mime type is guessed from the file extension, using
* guessTypesForExtension.
* If the mime type is still unknown, getimagesize is used to detect the mime type if the file is an image.
* If no mime type can be determined, this function returns "unknown/unknown".
*
* @param string $file The file to check
- * @param mixed $ext The file extension, or true to extract it from the filename.
+ * @param mixed $ext The file extension, or true to extract it from the filename.
* Set it to false to ignore the extension.
*
* @return string the mime type of $file
if ( !$m ) return MEDIATYPE_UNKNOWN;
$m = explode( ' ', $m );
- } else {
+ } else {
# Normalize mime type
if ( isset( $this->mMimeTypeAliases[$extMime] ) ) {
$extMime = $this->mMimeTypeAliases[$extMime];
return MEDIATYPE_UNKNOWN;
}
}
-
-