MimeAnalyzer: fix ZIP parsing failure
authorTim Starling <tstarling@wikimedia.org>
Thu, 25 Jul 2019 03:29:44 +0000 (13:29 +1000)
committerTim Starling <tstarling@wikimedia.org>
Thu, 25 Jul 2019 03:40:18 +0000 (13:40 +1000)
unpack() actually returns an array with indexes starting from 1, not
zero, so unpack(...)[0] gives a notice and always returns null. It is
lucky that ZIPs normally have zero-length comments, so this would have
had little impact on file type detection aside from log spam.

Also, add a check to make sure the unpack() will not read beyond
the end of the file. Without this, unpack() could generate a warning.

The bug was introduced by me in f12db3804882272794b.

Add tests. The test files were generated by appending an EOCDR signature
and some extra bytes to 1bit-png.png.

Bug: T223728
Change-Id: I6fab63102d1d8eea92cdcce5ab6d1eb747a0a890

includes/libs/mime/MimeAnalyzer.php
tests/phpunit/data/media/zip-comment-overflow.png [new file with mode: 0644]
tests/phpunit/data/media/zip-kind-of-valid-2.png [new file with mode: 0644]
tests/phpunit/data/media/zip-kind-of-valid.png [new file with mode: 0644]
tests/phpunit/data/media/zip-sig-near-end.png [new file with mode: 0644]
tests/phpunit/includes/libs/mime/MimeAnalyzerTest.php

index 2462174..bafe5e3 100644 (file)
@@ -806,10 +806,10 @@ EOT;
 
                // Check for ZIP variants (before getimagesize)
                $eocdrPos = strpos( $tail, "PK\x05\x06" );
-               if ( $eocdrPos !== false ) {
+               if ( $eocdrPos !== false && $eocdrPos <= strlen( $tail ) - 22 ) {
                        $this->logger->info( __METHOD__ . ": ZIP signature present in $file\n" );
                        // Check if it really is a ZIP file, make sure the EOCDR is at the end (T40432)
-                       $commentLength = unpack( "n", substr( $tail, $eocdrPos + 20 ) )[0];
+                       $commentLength = unpack( "n", substr( $tail, $eocdrPos + 20 ) )[1];
                        if ( $eocdrPos + 22 + $commentLength !== strlen( $tail ) ) {
                                $this->logger->info( __METHOD__ . ": ZIP EOCDR not at end. Not a ZIP file." );
                        } else {
diff --git a/tests/phpunit/data/media/zip-comment-overflow.png b/tests/phpunit/data/media/zip-comment-overflow.png
new file mode 100644 (file)
index 0000000..710831f
Binary files /dev/null and b/tests/phpunit/data/media/zip-comment-overflow.png differ
diff --git a/tests/phpunit/data/media/zip-kind-of-valid-2.png b/tests/phpunit/data/media/zip-kind-of-valid-2.png
new file mode 100644 (file)
index 0000000..c0e7ff6
Binary files /dev/null and b/tests/phpunit/data/media/zip-kind-of-valid-2.png differ
diff --git a/tests/phpunit/data/media/zip-kind-of-valid.png b/tests/phpunit/data/media/zip-kind-of-valid.png
new file mode 100644 (file)
index 0000000..1121af4
Binary files /dev/null and b/tests/phpunit/data/media/zip-kind-of-valid.png differ
diff --git a/tests/phpunit/data/media/zip-sig-near-end.png b/tests/phpunit/data/media/zip-sig-near-end.png
new file mode 100644 (file)
index 0000000..29f3684
Binary files /dev/null and b/tests/phpunit/data/media/zip-sig-near-end.png differ
index 0f23b8c..51ad915 100644 (file)
@@ -163,4 +163,40 @@ class MimeAnalyzerTest extends PHPUnit\Framework\TestCase {
                ];
        }
 
+       function providePngZipConfusion() {
+               return [
+                       [
+                               'An invalid ZIP file due to the signature being too close to the ' .
+                                       'end to accomodate an EOCDR',
+                               'zip-sig-near-end.png',
+                               'image/png',
+                       ],
+                       [
+                               'An invalid ZIP file due to the comment length running beyond the ' .
+                                       'end of the file',
+                               'zip-comment-overflow.png',
+                               'image/png',
+                       ],
+                       [
+                               'A ZIP file similar to the above, but without either of those two ' .
+                                       'problems. Not a valid ZIP file, but it passes MimeAnalyzer\'s ' .
+                                       'definition of a ZIP file. This is mostly a sanity check of the ' .
+                                       'above two tests.',
+                               'zip-kind-of-valid.png',
+                               'application/zip',
+                       ],
+                       [
+                               'As above with non-zero comment length',
+                               'zip-kind-of-valid-2.png',
+                               'application/zip',
+                       ],
+               ];
+       }
+
+       /** @dataProvider providePngZipConfusion */
+       function testPngZipConfusion( $description, $fileName, $expectedType ) {
+               $file = __DIR__ . '/../../../data/media/' . $fileName;
+               $actualType = $this->doGuessMimeType( [ $file, 'png' ] );
+               $this->assertEquals( $expectedType, $actualType, $description );
+       }
 }