Merge "CSSMin: Support parenthesis and quotes in url references"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Fri, 5 May 2017 18:58:22 +0000 (18:58 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Fri, 5 May 2017 18:58:22 +0000 (18:58 +0000)
includes/libs/CSSMin.php
tests/phpunit/includes/libs/CSSMinTest.php

index bba07e2..c504f35 100644 (file)
@@ -38,7 +38,7 @@ class CSSMin {
         * Internet Explorer data URI length limit. See encodeImageAsDataURI().
         */
        const DATA_URI_SIZE_LIMIT = 32768;
-       const URL_REGEX = 'url\(\s*[\'"]?(?P<file>[^\?\)\'"]*?)(?P<query>\?[^\)\'"]*?|)[\'"]?\s*\)';
+
        const EMBED_REGEX = '\/\*\s*\@embed\s*\*\/';
        const COMMENT_REGEX = '\/\*.*?\*\/';
 
@@ -72,8 +72,9 @@ class CSSMin {
                $files = [];
 
                $rFlags = PREG_OFFSET_CAPTURE | PREG_SET_ORDER;
-               if ( preg_match_all( '/' . self::URL_REGEX . '/', $stripped, $matches, $rFlags ) ) {
+               if ( preg_match_all( '/' . self::getUrlRegex() . '/', $stripped, $matches, $rFlags ) ) {
                        foreach ( $matches as $match ) {
+                               self::processUrlMatch( $match, $rFlags );
                                $url = $match['file'][0];
 
                                // Skip fully-qualified and protocol-relative URLs and data URIs
@@ -266,7 +267,7 @@ class CSSMin {
                // appears in the rule itself, e.g. in a quoted string. You are advised
                // not to use such characters in file names. We also match start/end of
                // the string to be consistent in edge-cases ('@import url(…)').
-               $pattern = '/(?:^|[;{])\K[^;{}]*' . CSSMin::URL_REGEX . '[^;}]*(?=[;}]|$)/';
+               $pattern = '/(?:^|[;{])\K[^;{}]*' . self::getUrlRegex() . '[^;}]*(?=[;}]|$)/';
 
                $source = preg_replace_callback(
                        $pattern,
@@ -290,13 +291,14 @@ class CSSMin {
 
                                // Build two versions of current rule: with remapped URLs
                                // and with embedded data: URIs (where possible).
-                               $pattern = '/(?P<embed>' . CSSMin::EMBED_REGEX . '\s*|)' . CSSMin::URL_REGEX . '/';
+                               $pattern = '/(?P<embed>' . CSSMin::EMBED_REGEX . '\s*|)' . self::getUrlRegex() . '/';
 
                                $ruleWithRemapped = preg_replace_callback(
                                        $pattern,
                                        function ( $match ) use ( $local, $remote ) {
-                                               $remapped = CSSMin::remapOne( $match['file'], $match['query'], $local, $remote, false );
+                                               self::processUrlMatch( $match );
 
+                                               $remapped = CSSMin::remapOne( $match['file'], $match['query'], $local, $remote, false );
                                                return CSSMin::buildUrlValue( $remapped );
                                        },
                                        $rule
@@ -309,6 +311,8 @@ class CSSMin {
                                        $ruleWithEmbedded = preg_replace_callback(
                                                $pattern,
                                                function ( $match ) use ( $embedAll, $local, $remote, &$mimeTypes ) {
+                                                       self::processUrlMatch( $match );
+
                                                        $embed = $embedAll || $match['embed'];
                                                        $embedded = CSSMin::remapOne(
                                                                $match['file'],
@@ -385,6 +389,69 @@ class CSSMin {
                return false;
        }
 
+       private static function getUrlRegex() {
+               static $urlRegex;
+               if ( $urlRegex === null ) {
+                       // Match these three variants separately to avoid broken urls when
+                       // e.g. a double quoted url contains a parenthesis, or when a
+                       // single quoted url contains a double quote, etc.
+                       // Note: PCRE doesn't support multiple capture groups with the same name by default.
+                       // - PCRE 6.7 introduced the "J" modifier (PCRE_INFO_JCHANGED for PCRE_DUPNAMES).
+                       //   https://secure.php.net/manual/en/reference.pcre.pattern.modifiers.php
+                       //   However this isn't useful since it just ignores all but the first one.
+                       //   Also, while the modifier was introduced in PCRE 6.7 (PHP 5.2+) it was
+                       //   not exposed to public preg_* functions until PHP 5.6.0.
+                       // - PCRE 8.36 fixed this to work as expected (e.g. merge conceptually to
+                       //   only return the one matched in the part that actually matched).
+                       //   However MediaWiki supports 5.5.9, which has PCRE 8.32
+                       //   Per https://secure.php.net/manual/en/pcre.installation.php:
+                       //   - PCRE 8.32 (PHP 5.5.0)
+                       //   - PCRE 8.34 (PHP 5.5.10, PHP 5.6.0)
+                       //   - PCRE 8.37 (PHP 5.5.26, PHP 5.6.9, PHP 7.0.0)
+                       //   Workaround by using different groups and merge via processUrlMatch().
+                       // - Using string concatenation for class constant or member assignments
+                       //   is only supported in PHP 5.6. Use a getter method for now.
+                       $urlRegex = '(' .
+                               // Unquoted url
+                               'url\(\s*(?P<file0>[^\'"][^\?\)]*?)(?P<query0>\?[^\)]*?|)\s*\)' .
+                               // Single quoted url
+                               '|url\(\s*\'(?P<file1>[^\?\']*?)(?P<query1>\?[^\']*?|)\'\s*\)' .
+                               // Double quoted url
+                               '|url\(\s*"(?P<file2>[^\?"]*?)(?P<query2>\?[^"]*?|)"\s*\)' .
+                               ')';
+               }
+               return $urlRegex;
+       }
+
+       private static function processUrlMatch( array &$match, $flags = 0 ) {
+               if ( $flags & PREG_SET_ORDER ) {
+                       // preg_match_all with PREG_SET_ORDER will return each group in each
+                       // match array, and if it didn't match, instead of the sub array
+                       // being an empty array it is `[ '', -1 ]`...
+                       if ( isset( $match['file0'] ) && $match['file0'][1] !== -1 ) {
+                               $match['file'] = $match['file0'];
+                               $match['query'] = $match['query0'];
+                       } elseif ( isset( $match['file1'] ) && $match['file1'][1] !== -1 ) {
+                               $match['file'] = $match['file1'];
+                               $match['query'] = $match['query1'];
+                       } else {
+                               $match['file'] = $match['file2'];
+                               $match['query'] = $match['query2'];
+                       }
+               } else {
+                       if ( isset( $match['file0'] ) && $match['file0'] !== '' ) {
+                               $match['file'] = $match['file0'];
+                               $match['query'] = $match['query0'];
+                       } elseif ( isset( $match['file1'] ) && $match['file1'] !== '' ) {
+                               $match['file'] = $match['file1'];
+                               $match['query'] = $match['query1'];
+                       } else {
+                               $match['file'] = $match['file2'];
+                               $match['query'] = $match['query2'];
+                       }
+               }
+       }
+
        /**
         * Remap or embed a CSS URL path.
         *
index 42f08cc..d0121b1 100644 (file)
@@ -147,6 +147,45 @@ class CSSMinTest extends MediaWikiTestCase {
                ];
        }
 
+       public static function provideIsRemoteUrl() {
+               return [
+                       [ true, 'http://localhost/w/red.gif?123' ],
+                       [ true, 'https://example.org/x.png' ],
+                       [ true, '//example.org/x.y.z/image.png' ],
+                       [ true, '//localhost/styles.css?query=yes' ],
+                       [ true, '' ],
+                       [ false, 'x.gif' ],
+                       [ false, '/x.gif' ],
+                       [ false, './x.gif' ],
+                       [ false, '../x.gif' ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideIsRemoteUrl
+        * @cover CSSMin::isRemoteUrl
+        */
+       public function testIsRemoteUrl( $expect, $url ) {
+               $this->assertEquals( CSSMinTestable::isRemoteUrl( $url ), $expect );
+       }
+
+       public static function provideIsLocalUrls() {
+               return [
+                       [ false, 'x.gif' ],
+                       [ true, '/x.gif' ],
+                       [ false, './x.gif' ],
+                       [ false, '../x.gif' ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideIsLocalUrls
+        * @cover CSSMin::isLocalUrl
+        */
+       public function testIsLocalUrl( $expect, $url ) {
+               $this->assertEquals( CSSMinTestable::isLocalUrl( $url ), $expect );
+       }
+
        /**
         * This tests funky parameters to CSSMin::remap. testRemapRemapping tests
         * the basic functionality.
@@ -211,45 +250,6 @@ class CSSMinTest extends MediaWikiTestCase {
                $this->assertEquals( $expectedOutput, $realOutput, "CSSMin::remap: $message" );
        }
 
-       public static function provideIsRemoteUrl() {
-               return [
-                       [ true, 'http://localhost/w/red.gif?123' ],
-                       [ true, 'https://example.org/x.png' ],
-                       [ true, '//example.org/x.y.z/image.png' ],
-                       [ true, '//localhost/styles.css?query=yes' ],
-                       [ true, '' ],
-                       [ false, 'x.gif' ],
-                       [ false, '/x.gif' ],
-                       [ false, './x.gif' ],
-                       [ false, '../x.gif' ],
-               ];
-       }
-
-       /**
-        * @dataProvider provideIsRemoteUrl
-        * @cover CSSMin::isRemoteUrl
-        */
-       public function testIsRemoteUrl( $expect, $url ) {
-               $this->assertEquals( CSSMinTestable::isRemoteUrl( $url ), $expect );
-       }
-
-       public static function provideIsLocalUrls() {
-               return [
-                       [ false, 'x.gif' ],
-                       [ true, '/x.gif' ],
-                       [ false, './x.gif' ],
-                       [ false, '../x.gif' ],
-               ];
-       }
-
-       /**
-        * @dataProvider provideIsLocalUrls
-        * @cover CSSMin::isLocalUrl
-        */
-       public function testIsLocalUrl( $expect, $url ) {
-               $this->assertEquals( CSSMinTestable::isLocalUrl( $url ), $expect );
-       }
-
        public static function provideRemapRemappingCases() {
                // red.gif and green.gif are one-pixel 35-byte GIFs.
                // large.png is a 35K PNG that should be non-embeddable.
@@ -307,8 +307,8 @@ class CSSMinTest extends MediaWikiTestCase {
                        ],
                        [
                                'Remote URL (unnecessary quotes not preserved)',
-                               'foo { background: url("http://example.org/w/foo.png"); }',
-                               'foo { background: url(http://example.org/w/foo.png); }',
+                               'foo { background: url("http://example.org/w/unnecessary-quotes.png"); }',
+                               'foo { background: url(http://example.org/w/unnecessary-quotes.png); }',
                        ],
                        [
                                'Embedded file',
@@ -410,9 +410,39 @@ class CSSMinTest extends MediaWikiTestCase {
                                '@import url(http://doc.example.org/styles.css)',
                        ],
                        [
-                               '@import rule to URL (should we remap this?)',
-                               '@import url(//localhost/styles.css?query=yes)',
-                               '@import url(//localhost/styles.css?query=yes)',
+                               '@import rule to local file (should we remap this?)',
+                               '@import url(/styles.css)',
+                               '@import url(http://doc.example.org/styles.css)',
+                       ],
+                       [
+                               '@import rule to URL',
+                               '@import url(//localhost/styles.css?query=val)',
+                               '@import url(//localhost/styles.css?query=val)',
+                       ],
+                       [
+                               'Background URL (double quotes)',
+                               'foo { background: url("//localhost/styles.css?quoted=double") }',
+                               'foo { background: url(//localhost/styles.css?quoted=double) }',
+                       ],
+                       [
+                               'Background URL (single quotes)',
+                               'foo { background: url(\'//localhost/styles.css?quoted=single\') }',
+                               'foo { background: url(//localhost/styles.css?quoted=single) }',
+                       ],
+                       [
+                               'Background URL (containing parentheses; T60473)',
+                               'foo { background: url("//localhost/styles.css?query=(parens)") }',
+                               'foo { background: url("//localhost/styles.css?query=(parens)") }',
+                       ],
+                       [
+                               'Background URL (double quoted, containing single quotes; T60473)',
+                               'foo { background: url("//localhost/styles.css?quote=\'") }',
+                               'foo { background: url("//localhost/styles.css?quote=\'") }',
+                       ],
+                       [
+                               'Background URL (single quoted, containing double quotes; T60473)',
+                               'foo { background: url(\'//localhost/styles.css?quote="\') }',
+                               'foo { background: url("//localhost/styles.css?quote=\"") }',
                        ],
                        [
                                'Simple case with comments before url',