Relax HTML sniffing checks on image upload
authorBrion Vibber <brion@pobox.com>
Thu, 6 Jun 2019 21:54:29 +0000 (14:54 -0700)
committerBrion Vibber <brion@pobox.com>
Fri, 7 Jun 2019 21:21:00 +0000 (14:21 -0700)
Allows uploaded files to include some HTML tag strings that were
previously forbidden in the first 1k or so of the file:
* <a href
* <img
* <pre
* <table
* <title

They are now allowed as long as the IE MIME type detection heuristic
would not change their types. This should reduce the number of false
positive checks in JPEGs with EXIF data with links.

Also deprecates $wgAllowTitlesInSVG and allows it by default.

This should still protect against malformed PNG attacks on old IE
versions, though false positive checks are conceivable on PNG files
containing comments very close to the beginning of the file.

Adds $wgVerifyMimeTypeIE config var to allow disabling the IE checks
entirely, if desired, but leaves it in place by default. These are
more conservative than the checks that were removed.

Added test cases for the old IE5/6 bug and the particular sort of
JPEG metadata that struck false positives previously.

Bug: T27707
Change-Id: I66642a74fce1a1894cad67d62b0da61020db469a

RELEASE-NOTES-1.34
includes/DefaultSettings.php
includes/upload/UploadBase.php
tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg [new file with mode: 0644]
tests/phpunit/data/upload/png-embedded-breaks-ie5.png [new file with mode: 0644]
tests/phpunit/data/upload/png-plain.png [new file with mode: 0644]
tests/phpunit/includes/upload/UploadBaseTest.php

index dca64bd..db0c732 100644 (file)
@@ -42,6 +42,13 @@ For notes on 1.33.x and older releases, see HISTORY.
   variable $wgCdnMaxageLagged. The previous configuration variable names are
   deprecated, but will be used as the fall back if they are still set.
   Note that wgSquidPurgeUseHostHeader has not been renamed, as it is deprecated.
+* (T27707) File type checks for image uploads have been relaxed to allow files
+  containing some HTML markup in metadata. As a result, the $wgAllowTitlesInSVG
+  setting is no longer applied and is now always true. Note that MSIE 7 may
+  still be able to misinterpret certain malformed PNG files as HTML.
+* Introduced $wgVerifyMimeTypeIE to allow disabling the MSIE 6/7 file type
+  detection heuristic on upload, which is more conservative than the checks
+  that were changed above.
 * …
 
 ==== Removed configuration ====
index ab1afe2..73d05ff 100644 (file)
@@ -1214,17 +1214,12 @@ $wgSVGMaxSize = 5120;
 $wgSVGMetadataCutoff = 262144;
 
 /**
- * Disallow <title> element in SVG files.
+ * Obsolete, no longer used.
+ * SVG file uploads now always allow <title> elements.
  *
- * MediaWiki will reject HTMLesque tags in uploaded files due to idiotic
- * browsers which can not perform basic stuff like MIME detection and which are
- * vulnerable to further idiots uploading crap files as images.
- *
- * When this directive is on, "<title>" will be allowed in files with an
- * "image/svg+xml" MIME type. You should leave this disabled if your web server
- * is misconfigured and doesn't send appropriate MIME types for SVG images.
+ * @deprecated 1.34
  */
-$wgAllowTitlesInSVG = false;
+$wgAllowTitlesInSVG = true;
 
 /**
  * Whether thumbnails should be generated in target language (usually, same as
@@ -1390,6 +1385,16 @@ $wgAntivirusRequired = true;
  */
 $wgVerifyMimeType = true;
 
+/**
+ * Determines whether extra checks for IE type detection should be applied.
+ * This is a conservative check for exactly what IE 6 or so checked for,
+ * and shouldn't trigger on for instance JPEG files containing links in EXIF
+ * metadata.
+ *
+ * @since 1.34
+ */
+$wgVerifyMimeTypeIE = true;
+
 /**
  * Sets the MIME type definition file to use by includes/libs/mime/MimeAnalyzer.php.
  * Set to null, to use built-in defaults only.
index d905aa4..597c277 100644 (file)
@@ -404,7 +404,7 @@ abstract class UploadBase {
         * @return mixed True if the file is verified, an array otherwise
         */
        protected function verifyMimeType( $mime ) {
-               global $wgVerifyMimeType;
+               global $wgVerifyMimeType, $wgVerifyMimeTypeIE;
                if ( $wgVerifyMimeType ) {
                        wfDebug( "mime: <$mime> extension: <{$this->mFinalExtension}>\n" );
                        global $wgMimeTypeBlacklist;
@@ -412,17 +412,19 @@ abstract class UploadBase {
                                return [ 'filetype-badmime', $mime ];
                        }
 
-                       # Check what Internet Explorer would detect
-                       $fp = fopen( $this->mTempPath, 'rb' );
-                       $chunk = fread( $fp, 256 );
-                       fclose( $fp );
-
-                       $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
-                       $extMime = $magic->guessTypesForExtension( $this->mFinalExtension );
-                       $ieTypes = $magic->getIEMimeTypes( $this->mTempPath, $chunk, $extMime );
-                       foreach ( $ieTypes as $ieType ) {
-                               if ( $this->checkFileExtension( $ieType, $wgMimeTypeBlacklist ) ) {
-                                       return [ 'filetype-bad-ie-mime', $ieType ];
+                       if ( $wgVerifyMimeTypeIE ) {
+                               # Check what Internet Explorer would detect
+                               $fp = fopen( $this->mTempPath, 'rb' );
+                               $chunk = fread( $fp, 256 );
+                               fclose( $fp );
+
+                               $magic = MediaWiki\MediaWikiServices::getInstance()->getMimeAnalyzer();
+                               $extMime = $magic->guessTypesForExtension( $this->mFinalExtension );
+                               $ieTypes = $magic->getIEMimeTypes( $this->mTempPath, $chunk, $extMime );
+                               foreach ( $ieTypes as $ieType ) {
+                                       if ( $this->checkFileExtension( $ieType, $wgMimeTypeBlacklist ) ) {
+                                               return [ 'filetype-bad-ie-mime', $ieType ];
+                                       }
                                }
                        }
                }
@@ -1262,12 +1264,11 @@ abstract class UploadBase {
         * @return bool True if the file contains something looking like embedded scripts
         */
        public static function detectScript( $file, $mime, $extension ) {
-               global $wgAllowTitlesInSVG;
-
                # ugly hack: for text files, always look at the entire file.
                # For binary field, just check the first K.
 
-               if ( strpos( $mime, 'text/' ) === 0 ) {
+               $isText = strpos( $mime, 'text/' ) === 0;
+               if ( $isText ) {
                        $chunk = file_get_contents( $file );
                } else {
                        $fp = fopen( $file, 'rb' );
@@ -1312,36 +1313,19 @@ abstract class UploadBase {
                        }
                }
 
-               /**
-                * Internet Explorer for Windows performs some really stupid file type
-                * autodetection which can cause it to interpret valid image files as HTML
-                * and potentially execute JavaScript, creating a cross-site scripting
-                * attack vectors.
-                *
-                * Apple's Safari browser also performs some unsafe file type autodetection
-                * which can cause legitimate files to be interpreted as HTML if the
-                * web server is not correctly configured to send the right content-type
-                * (or if you're really uploading plain text and octet streams!)
-                *
-                * Returns true if IE is likely to mistake the given file for HTML.
-                * Also returns true if Safari would mistake the given file for HTML
-                * when served with a generic content-type.
-                */
+               // Quick check for HTML heuristics in old IE and Safari.
+               //
+               // The exact heuristics IE uses are checked separately via verifyMimeType(), so we
+               // don't need them all here as it can cause many false positives.
+               //
+               // Check for `<script` and such still to forbid script tags and embedded HTML in SVG:
                $tags = [
-                       '<a href',
                        '<body',
                        '<head',
                        '<html', # also in safari
-                       '<img',
-                       '<pre',
                        '<script', # also in safari
-                       '<table'
                ];
 
-               if ( !$wgAllowTitlesInSVG && $extension !== 'svg' && $mime !== 'image/svg' ) {
-                       $tags[] = '<title';
-               }
-
                foreach ( $tags as $tag ) {
                        if ( strpos( $chunk, $tag ) !== false ) {
                                wfDebug( __METHOD__ . ": found something that may make it be mistaken for html: $tag\n" );
diff --git a/tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg b/tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg
new file mode 100644 (file)
index 0000000..5438736
Binary files /dev/null and b/tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg differ
diff --git a/tests/phpunit/data/upload/png-embedded-breaks-ie5.png b/tests/phpunit/data/upload/png-embedded-breaks-ie5.png
new file mode 100644 (file)
index 0000000..0af03fc
Binary files /dev/null and b/tests/phpunit/data/upload/png-embedded-breaks-ie5.png differ
diff --git a/tests/phpunit/data/upload/png-plain.png b/tests/phpunit/data/upload/png-plain.png
new file mode 100644 (file)
index 0000000..83e9130
Binary files /dev/null and b/tests/phpunit/data/upload/png-plain.png differ
index 58c69e3..cafc846 100644 (file)
@@ -585,6 +585,42 @@ class UploadBaseTest extends MediaWikiTestCase {
                        [ '<?xml version="1.0" encoding="WINDOWS-1252"?><svg></svg>', false ],
                ];
        }
+
+       /**
+        * @covers UploadBase::detectScript
+        * @dataProvider provideDetectScript
+        */
+       public function testDetectScript( $filename, $mime, $extension, $expected, $message ) {
+               $result = $this->upload->detectScript( $filename, $mime, $extension );
+               $this->assertSame( $expected, $result, $message );
+       }
+
+       public static function provideDetectScript() {
+               global $IP;
+               return [
+                       [
+                               "$IP/tests/phpunit/data/upload/png-plain.png",
+                               'image/png',
+                               'png',
+                               false,
+                               'PNG with no suspicious things in it, should pass.'
+                       ],
+                       [
+                               "$IP/tests/phpunit/data/upload/png-embedded-breaks-ie5.png",
+                               'image/png',
+                               'png',
+                               true,
+                               'PNG with embedded data that IE5/6 interprets as HTML; should be rejected.'
+                       ],
+                       [
+                               "$IP/tests/phpunit/data/upload/jpeg-a-href-in-metadata.jpg",
+                               'image/jpeg',
+                               'jpeg',
+                               false,
+                               'JPEG with innocuous HTML in metadata from a flickr photo; should pass (T27707).'
+                       ],
+               ];
+       }
 }
 
 class UploadTestHandler extends UploadBase {