resourceloader: Strip leading BOM when concatenating files
authorDerk-Jan Hartman <hartman.wiki@gmail.com>
Wed, 11 May 2016 11:53:59 +0000 (13:53 +0200)
committerTimo Tijhof <krinklemail@gmail.com>
Tue, 31 May 2016 18:47:19 +0000 (19:47 +0100)
We read files and concatenate their contents. Files may start with a BOM character.
BOM characters are only allowed at the beginning of a file, not half way.
Stripping it should be safe, since we already assume that everything is UTF-8.

Change-Id: I14ad698a684e78976e873e9ae2c367475550a063

includes/resourceloader/ResourceLoaderFileModule.php
tests/phpunit/data/css/bom.css [new file with mode: 0644]
tests/phpunit/includes/resourceloader/ResourceLoaderFileModuleTest.php

index 1e7329a..b06553a 100644 (file)
@@ -468,7 +468,7 @@ class ResourceLoaderFileModule extends ResourceLoaderModule {
                if ( !file_exists( $localPath ) ) {
                        throw new MWException( __METHOD__ . ": skip function file not found: \"$localPath\"" );
                }
-               $contents = file_get_contents( $localPath );
+               $contents = $this->stripBom( file_get_contents( $localPath ) );
                if ( $this->getConfig()->get( 'ResourceLoaderValidateStaticJS' ) ) {
                        $contents = $this->validateScriptFile( $localPath, $contents );
                }
@@ -810,7 +810,7 @@ class ResourceLoaderFileModule extends ResourceLoaderModule {
                        if ( !file_exists( $localPath ) ) {
                                throw new MWException( __METHOD__ . ": script file not found: \"$localPath\"" );
                        }
-                       $contents = file_get_contents( $localPath );
+                       $contents = $this->stripBom( file_get_contents( $localPath ) );
                        if ( $this->getConfig()->get( 'ResourceLoaderValidateStaticJS' ) ) {
                                // Static files don't really need to be checked as often; unlike
                                // on-wiki module they shouldn't change unexpectedly without
@@ -882,7 +882,7 @@ class ResourceLoaderFileModule extends ResourceLoaderModule {
                        $style = $this->compileLessFile( $localPath, $context );
                        $this->hasGeneratedStyles = true;
                } else {
-                       $style = file_get_contents( $localPath );
+                       $style = $this->stripBom( file_get_contents( $localPath ) );
                }
 
                if ( $flip ) {
@@ -990,7 +990,7 @@ class ResourceLoaderFileModule extends ResourceLoaderModule {
                        $localPath = $this->getLocalPath( $templatePath );
                        if ( file_exists( $localPath ) ) {
                                $content = file_get_contents( $localPath );
-                               $templates[$alias] = $content;
+                               $templates[$alias] = $this->stripBom( $content );
                        } else {
                                $msg = __METHOD__ . ": template file not found: \"$localPath\"";
                                wfDebugLog( 'resourceloader', $msg );
@@ -999,4 +999,20 @@ class ResourceLoaderFileModule extends ResourceLoaderModule {
                }
                return $templates;
        }
+
+       /**
+        * Takes an input string and removes the UTF-8 BOM character if present
+        *
+        * We need to remove these after reading a file, because we concatenate our files and
+        * the BOM character is not valid in the middle of a string.
+        * We already assume UTF-8 everywhere, so this should be safe.
+        *
+        * @return string input minus the intial BOM char
+        */
+       protected function stripBom( $input ) {
+               if ( substr_compare( "\xef\xbb\xbf", $input, 0, 3 ) === 0 ) {
+                       return substr( $input, 3 );
+               }
+               return $input;
+       }
 }
diff --git a/tests/phpunit/data/css/bom.css b/tests/phpunit/data/css/bom.css
new file mode 100644 (file)
index 0000000..3382ea4
--- /dev/null
@@ -0,0 +1 @@
+.efbbbf_bom_char_at_start_of_file {}
index 90d8e9f..2dec02b 100644 (file)
@@ -226,4 +226,24 @@ class ResourceLoaderFileModuleTest extends ResourceLoaderTestCase {
 
                $this->assertEquals( $rl->getTemplates(), $expected );
        }
+
+       public function testBomConcatenation() {
+               $basePath = __DIR__ . '/../../data/css';
+               $testModule = new ResourceLoaderFileModule( [
+                       'localBasePath' => $basePath,
+                       'styles' => [ 'bom.css' ],
+                       ] );
+               $this->assertEquals(
+                       substr( file_get_contents( "$basePath/bom.css" ), 0, 10 ),
+                       "\xef\xbb\xbf.efbbbf",
+                       'File has leading BOM'
+               );
+
+               $contextLtr = $this->getResourceLoaderContext( 'en', 'ltr' );
+               $this->assertEquals(
+                       $testModule->getStyles( $contextLtr ),
+                       [ 'all' => ".efbbbf_bom_char_at_start_of_file {}\n" ],
+                       'Leading BOM removed when concatenating files'
+               );
+       }
 }