Make MSCompoundFileReader::readFile platform-agnostic
authorMáté Szabó <mszabo@wikia-inc.com>
Mon, 8 Jul 2019 16:05:22 +0000 (18:05 +0200)
committerJames D. Forrester <jforrester@wikimedia.org>
Mon, 8 Jul 2019 17:45:42 +0000 (10:45 -0700)
MSCompoundFileReader::readFile uses iconv to convert information
given in UTF-16 character set with little-endian byte order to
the UTF-8 character set. The input string has no BOM and the byte order
is not explicitly given, causing iconv to try to guess the byte order
based on the host operating system. This causes the method to return
different results for the same file in different environments.

This patch explicitly provides the byte order for the input to be
converted (UTF-16LE) to ensure portability and predictability.

As part of this, move MSCompoundFileReaderTest into the unit test tree.

Bug: T225019
Change-Id: I62154897d303b28c288c3a4f2f5456bedcc81852

includes/libs/mime/MSCompoundFileReader.php
tests/phpunit/includes/libs/mime/MSCompoundFileReaderTest.php [deleted file]
tests/phpunit/unit/includes/libs/mime/MSCompoundFileReaderTest.php [new file with mode: 0644]

index aea0a02..8afaa38 100644 (file)
@@ -333,7 +333,7 @@ class MSCompoundFileReader {
                                continue;
                        }
 
                                continue;
                        }
 
-                       $name = iconv( 'UTF-16', 'UTF-8', substr( $entry['name_raw'], 0, $entry['name_length'] - 2 ) );
+                       $name = iconv( 'UTF-16LE', 'UTF-8', substr( $entry['name_raw'], 0, $entry['name_length'] - 2 ) );
 
                        $clsid = $this->decodeClsid( $entry['clsid'] );
                        if ( $type == self::TYPE_ROOT && isset( self::$mimesByClsid[$clsid] ) ) {
 
                        $clsid = $this->decodeClsid( $entry['clsid'] );
                        if ( $type == self::TYPE_ROOT && isset( self::$mimesByClsid[$clsid] ) ) {
diff --git a/tests/phpunit/includes/libs/mime/MSCompoundFileReaderTest.php b/tests/phpunit/includes/libs/mime/MSCompoundFileReaderTest.php
deleted file mode 100644 (file)
index 4509a61..0000000
+++ /dev/null
@@ -1,60 +0,0 @@
-<?php
-/*
- * Copyright 2019 Wikimedia Foundation
- *
- * Licensed under the Apache License, Version 2.0 (the "License"); you may
- * not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software distributed
- * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
- * OF ANY KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations under the License.
- */
-
-/**
- * @group Media
- * @covers MSCompoundFileReader
- */
-class MSCompoundFileReaderTest extends PHPUnit\Framework\TestCase {
-       public static function provideValid() {
-               return [
-                       [ 'calc.xls', 'application/vnd.ms-excel' ],
-                       [ 'excel2016-compat97.xls', 'application/vnd.ms-excel' ],
-                       [ 'gnumeric.xls', 'application/vnd.ms-excel' ],
-                       [ 'impress.ppt', 'application/vnd.ms-powerpoint' ],
-                       [ 'powerpoint2016-compat97.ppt', 'application/vnd.ms-powerpoint' ],
-                       [ 'word2016-compat97.doc', 'application/msword' ],
-                       [ 'writer.doc', 'application/msword' ],
-               ];
-       }
-
-       /** @dataProvider provideValid */
-       public function testReadFile( $fileName, $expectedMime ) {
-               global $IP;
-
-               $info = MSCompoundFileReader::readFile( "$IP/tests/phpunit/data/MSCompoundFileReader/$fileName" );
-               $this->assertTrue( $info['valid'] );
-               $this->assertSame( $expectedMime, $info['mime'] );
-       }
-
-       public static function provideInvalid() {
-               return [
-                       [ 'dir-beyond-end.xls', 'ERROR_READ_PAST_END' ],
-                       [ 'fat-loop.xls', 'ERROR_INVALID_FORMAT' ],
-                       [ 'invalid-signature.xls', 'ERROR_INVALID_SIGNATURE' ],
-               ];
-       }
-
-       /** @dataProvider provideInvalid */
-       public function testReadFileInvalid( $fileName, $expectedError ) {
-               global $IP;
-
-               $info = MSCompoundFileReader::readFile( "$IP/tests/phpunit/data/MSCompoundFileReader/$fileName" );
-               $this->assertFalse( $info['valid'] );
-               $this->assertSame( constant( MSCompoundFileReader::class . '::' . $expectedError ),
-                       $info['errorCode'] );
-       }
-}
diff --git a/tests/phpunit/unit/includes/libs/mime/MSCompoundFileReaderTest.php b/tests/phpunit/unit/includes/libs/mime/MSCompoundFileReaderTest.php
new file mode 100644 (file)
index 0000000..4509a61
--- /dev/null
@@ -0,0 +1,60 @@
+<?php
+/*
+ * Copyright 2019 Wikimedia Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software distributed
+ * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
+ * OF ANY KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations under the License.
+ */
+
+/**
+ * @group Media
+ * @covers MSCompoundFileReader
+ */
+class MSCompoundFileReaderTest extends PHPUnit\Framework\TestCase {
+       public static function provideValid() {
+               return [
+                       [ 'calc.xls', 'application/vnd.ms-excel' ],
+                       [ 'excel2016-compat97.xls', 'application/vnd.ms-excel' ],
+                       [ 'gnumeric.xls', 'application/vnd.ms-excel' ],
+                       [ 'impress.ppt', 'application/vnd.ms-powerpoint' ],
+                       [ 'powerpoint2016-compat97.ppt', 'application/vnd.ms-powerpoint' ],
+                       [ 'word2016-compat97.doc', 'application/msword' ],
+                       [ 'writer.doc', 'application/msword' ],
+               ];
+       }
+
+       /** @dataProvider provideValid */
+       public function testReadFile( $fileName, $expectedMime ) {
+               global $IP;
+
+               $info = MSCompoundFileReader::readFile( "$IP/tests/phpunit/data/MSCompoundFileReader/$fileName" );
+               $this->assertTrue( $info['valid'] );
+               $this->assertSame( $expectedMime, $info['mime'] );
+       }
+
+       public static function provideInvalid() {
+               return [
+                       [ 'dir-beyond-end.xls', 'ERROR_READ_PAST_END' ],
+                       [ 'fat-loop.xls', 'ERROR_INVALID_FORMAT' ],
+                       [ 'invalid-signature.xls', 'ERROR_INVALID_SIGNATURE' ],
+               ];
+       }
+
+       /** @dataProvider provideInvalid */
+       public function testReadFileInvalid( $fileName, $expectedError ) {
+               global $IP;
+
+               $info = MSCompoundFileReader::readFile( "$IP/tests/phpunit/data/MSCompoundFileReader/$fileName" );
+               $this->assertFalse( $info['valid'] );
+               $this->assertSame( constant( MSCompoundFileReader::class . '::' . $expectedError ),
+                       $info['errorCode'] );
+       }
+}