CSSMin: Support parenthesis and quotes in url references
authorTimo Tijhof <krinklemail@gmail.com>
Thu, 4 May 2017 02:54:52 +0000 (19:54 -0700)
committerKrinkle <krinklemail@gmail.com>
Fri, 5 May 2017 18:33:24 +0000 (18:33 +0000)
Previously they were often being cut short due to the url pattern
ending at the first single quote, double quote or closing parenthesis
regardless of which of those started the url match.

Running benchmarkCSSMin.php before and after the change doesn't seem
produce consistent improvement or regression. Repeated runs with count=100
with and without this change both have a median between 2.6ms and 2.9ms
using PHP 5.6, and between 2.6ms and 2.8ms using HHVM 3.12.

Bug: T60473
Change-Id: I6d6a077ad76588f3ed81b1901a26b7e56d2157ee

includes/libs/CSSMin.php
tests/phpunit/includes/libs/CSSMinTest.php

index bba07e2..c504f35 100644 (file)
@@ -38,7 +38,7 @@ class CSSMin {
         * Internet Explorer data URI length limit. See encodeImageAsDataURI().
         */
        const DATA_URI_SIZE_LIMIT = 32768;
-       const URL_REGEX = 'url\(\s*[\'"]?(?P<file>[^\?\)\'"]*?)(?P<query>\?[^\)\'"]*?|)[\'"]?\s*\)';
+
        const EMBED_REGEX = '\/\*\s*\@embed\s*\*\/';
        const COMMENT_REGEX = '\/\*.*?\*\/';
 
@@ -72,8 +72,9 @@ class CSSMin {
                $files = [];
 
                $rFlags = PREG_OFFSET_CAPTURE | PREG_SET_ORDER;
-               if ( preg_match_all( '/' . self::URL_REGEX . '/', $stripped, $matches, $rFlags ) ) {
+               if ( preg_match_all( '/' . self::getUrlRegex() . '/', $stripped, $matches, $rFlags ) ) {
                        foreach ( $matches as $match ) {
+                               self::processUrlMatch( $match, $rFlags );
                                $url = $match['file'][0];
 
                                // Skip fully-qualified and protocol-relative URLs and data URIs
@@ -266,7 +267,7 @@ class CSSMin {
                // appears in the rule itself, e.g. in a quoted string. You are advised
                // not to use such characters in file names. We also match start/end of
                // the string to be consistent in edge-cases ('@import url(…)').
-               $pattern = '/(?:^|[;{])\K[^;{}]*' . CSSMin::URL_REGEX . '[^;}]*(?=[;}]|$)/';
+               $pattern = '/(?:^|[;{])\K[^;{}]*' . self::getUrlRegex() . '[^;}]*(?=[;}]|$)/';
 
                $source = preg_replace_callback(
                        $pattern,
@@ -290,13 +291,14 @@ class CSSMin {
 
                                // Build two versions of current rule: with remapped URLs
                                // and with embedded data: URIs (where possible).
-                               $pattern = '/(?P<embed>' . CSSMin::EMBED_REGEX . '\s*|)' . CSSMin::URL_REGEX . '/';
+                               $pattern = '/(?P<embed>' . CSSMin::EMBED_REGEX . '\s*|)' . self::getUrlRegex() . '/';
 
                                $ruleWithRemapped = preg_replace_callback(
                                        $pattern,
                                        function ( $match ) use ( $local, $remote ) {
-                                               $remapped = CSSMin::remapOne( $match['file'], $match['query'], $local, $remote, false );
+                                               self::processUrlMatch( $match );
 
+                                               $remapped = CSSMin::remapOne( $match['file'], $match['query'], $local, $remote, false );
                                                return CSSMin::buildUrlValue( $remapped );
                                        },
                                        $rule
@@ -309,6 +311,8 @@ class CSSMin {
                                        $ruleWithEmbedded = preg_replace_callback(
                                                $pattern,
                                                function ( $match ) use ( $embedAll, $local, $remote, &$mimeTypes ) {
+                                                       self::processUrlMatch( $match );
+
                                                        $embed = $embedAll || $match['embed'];
                                                        $embedded = CSSMin::remapOne(
                                                                $match['file'],
@@ -385,6 +389,69 @@ class CSSMin {
                return false;
        }
 
+       private static function getUrlRegex() {
+               static $urlRegex;
+               if ( $urlRegex === null ) {
+                       // Match these three variants separately to avoid broken urls when
+                       // e.g. a double quoted url contains a parenthesis, or when a
+                       // single quoted url contains a double quote, etc.
+                       // Note: PCRE doesn't support multiple capture groups with the same name by default.
+                       // - PCRE 6.7 introduced the "J" modifier (PCRE_INFO_JCHANGED for PCRE_DUPNAMES).
+                       //   https://secure.php.net/manual/en/reference.pcre.pattern.modifiers.php
+                       //   However this isn't useful since it just ignores all but the first one.
+                       //   Also, while the modifier was introduced in PCRE 6.7 (PHP 5.2+) it was
+                       //   not exposed to public preg_* functions until PHP 5.6.0.
+                       // - PCRE 8.36 fixed this to work as expected (e.g. merge conceptually to
+                       //   only return the one matched in the part that actually matched).
+                       //   However MediaWiki supports 5.5.9, which has PCRE 8.32
+                       //   Per https://secure.php.net/manual/en/pcre.installation.php:
+                       //   - PCRE 8.32 (PHP 5.5.0)
+                       //   - PCRE 8.34 (PHP 5.5.10, PHP 5.6.0)
+                       //   - PCRE 8.37 (PHP 5.5.26, PHP 5.6.9, PHP 7.0.0)
+                       //   Workaround by using different groups and merge via processUrlMatch().
+                       // - Using string concatenation for class constant or member assignments
+                       //   is only supported in PHP 5.6. Use a getter method for now.
+                       $urlRegex = '(' .
+                               // Unquoted url
+                               'url\(\s*(?P<file0>[^\'"][^\?\)]*?)(?P<query0>\?[^\)]*?|)\s*\)' .
+                               // Single quoted url
+                               '|url\(\s*\'(?P<file1>[^\?\']*?)(?P<query1>\?[^\']*?|)\'\s*\)' .
+                               // Double quoted url
+                               '|url\(\s*"(?P<file2>[^\?"]*?)(?P<query2>\?[^"]*?|)"\s*\)' .
+                               ')';
+               }
+               return $urlRegex;
+       }
+
+       private static function processUrlMatch( array &$match, $flags = 0 ) {
+               if ( $flags & PREG_SET_ORDER ) {
+                       // preg_match_all with PREG_SET_ORDER will return each group in each
+                       // match array, and if it didn't match, instead of the sub array
+                       // being an empty array it is `[ '', -1 ]`...
+                       if ( isset( $match['file0'] ) && $match['file0'][1] !== -1 ) {
+                               $match['file'] = $match['file0'];
+                               $match['query'] = $match['query0'];
+                       } elseif ( isset( $match['file1'] ) && $match['file1'][1] !== -1 ) {
+                               $match['file'] = $match['file1'];
+                               $match['query'] = $match['query1'];
+                       } else {
+                               $match['file'] = $match['file2'];
+                               $match['query'] = $match['query2'];
+                       }
+               } else {
+                       if ( isset( $match['file0'] ) && $match['file0'] !== '' ) {
+                               $match['file'] = $match['file0'];
+                               $match['query'] = $match['query0'];
+                       } elseif ( isset( $match['file1'] ) && $match['file1'] !== '' ) {
+                               $match['file'] = $match['file1'];
+                               $match['query'] = $match['query1'];
+                       } else {
+                               $match['file'] = $match['file2'];
+                               $match['query'] = $match['query2'];
+                       }
+               }
+       }
+
        /**
         * Remap or embed a CSS URL path.
         *
index 42f08cc..d0121b1 100644 (file)
@@ -147,6 +147,45 @@ class CSSMinTest extends MediaWikiTestCase {
                ];
        }
 
+       public static function provideIsRemoteUrl() {
+               return [
+                       [ true, 'http://localhost/w/red.gif?123' ],
+                       [ true, 'https://example.org/x.png' ],
+                       [ true, '//example.org/x.y.z/image.png' ],
+                       [ true, '//localhost/styles.css?query=yes' ],
+                       [ true, '' ],
+                       [ false, 'x.gif' ],
+                       [ false, '/x.gif' ],
+                       [ false, './x.gif' ],
+                       [ false, '../x.gif' ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideIsRemoteUrl
+        * @cover CSSMin::isRemoteUrl
+        */
+       public function testIsRemoteUrl( $expect, $url ) {
+               $this->assertEquals( CSSMinTestable::isRemoteUrl( $url ), $expect );
+       }
+
+       public static function provideIsLocalUrls() {
+               return [
+                       [ false, 'x.gif' ],
+                       [ true, '/x.gif' ],
+                       [ false, './x.gif' ],
+                       [ false, '../x.gif' ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideIsLocalUrls
+        * @cover CSSMin::isLocalUrl
+        */
+       public function testIsLocalUrl( $expect, $url ) {
+               $this->assertEquals( CSSMinTestable::isLocalUrl( $url ), $expect );
+       }
+
        /**
         * This tests funky parameters to CSSMin::remap. testRemapRemapping tests
         * the basic functionality.
@@ -211,45 +250,6 @@ class CSSMinTest extends MediaWikiTestCase {
                $this->assertEquals( $expectedOutput, $realOutput, "CSSMin::remap: $message" );
        }
 
-       public static function provideIsRemoteUrl() {
-               return [
-                       [ true, 'http://localhost/w/red.gif?123' ],
-                       [ true, 'https://example.org/x.png' ],
-                       [ true, '//example.org/x.y.z/image.png' ],
-                       [ true, '//localhost/styles.css?query=yes' ],
-                       [ true, '' ],
-                       [ false, 'x.gif' ],
-                       [ false, '/x.gif' ],
-                       [ false, './x.gif' ],
-                       [ false, '../x.gif' ],
-               ];
-       }
-
-       /**
-        * @dataProvider provideIsRemoteUrl
-        * @cover CSSMin::isRemoteUrl
-        */
-       public function testIsRemoteUrl( $expect, $url ) {
-               $this->assertEquals( CSSMinTestable::isRemoteUrl( $url ), $expect );
-       }
-
-       public static function provideIsLocalUrls() {
-               return [
-                       [ false, 'x.gif' ],
-                       [ true, '/x.gif' ],
-                       [ false, './x.gif' ],
-                       [ false, '../x.gif' ],
-               ];
-       }
-
-       /**
-        * @dataProvider provideIsLocalUrls
-        * @cover CSSMin::isLocalUrl
-        */
-       public function testIsLocalUrl( $expect, $url ) {
-               $this->assertEquals( CSSMinTestable::isLocalUrl( $url ), $expect );
-       }
-
        public static function provideRemapRemappingCases() {
                // red.gif and green.gif are one-pixel 35-byte GIFs.
                // large.png is a 35K PNG that should be non-embeddable.
@@ -307,8 +307,8 @@ class CSSMinTest extends MediaWikiTestCase {
                        ],
                        [
                                'Remote URL (unnecessary quotes not preserved)',
-                               'foo { background: url("http://example.org/w/foo.png"); }',
-                               'foo { background: url(http://example.org/w/foo.png); }',
+                               'foo { background: url("http://example.org/w/unnecessary-quotes.png"); }',
+                               'foo { background: url(http://example.org/w/unnecessary-quotes.png); }',
                        ],
                        [
                                'Embedded file',
@@ -410,9 +410,39 @@ class CSSMinTest extends MediaWikiTestCase {
                                '@import url(http://doc.example.org/styles.css)',
                        ],
                        [
-                               '@import rule to URL (should we remap this?)',
-                               '@import url(//localhost/styles.css?query=yes)',
-                               '@import url(//localhost/styles.css?query=yes)',
+                               '@import rule to local file (should we remap this?)',
+                               '@import url(/styles.css)',
+                               '@import url(http://doc.example.org/styles.css)',
+                       ],
+                       [
+                               '@import rule to URL',
+                               '@import url(//localhost/styles.css?query=val)',
+                               '@import url(//localhost/styles.css?query=val)',
+                       ],
+                       [
+                               'Background URL (double quotes)',
+                               'foo { background: url("//localhost/styles.css?quoted=double") }',
+                               'foo { background: url(//localhost/styles.css?quoted=double) }',
+                       ],
+                       [
+                               'Background URL (single quotes)',
+                               'foo { background: url(\'//localhost/styles.css?quoted=single\') }',
+                               'foo { background: url(//localhost/styles.css?quoted=single) }',
+                       ],
+                       [
+                               'Background URL (containing parentheses; T60473)',
+                               'foo { background: url("//localhost/styles.css?query=(parens)") }',
+                               'foo { background: url("//localhost/styles.css?query=(parens)") }',
+                       ],
+                       [
+                               'Background URL (double quoted, containing single quotes; T60473)',
+                               'foo { background: url("//localhost/styles.css?quote=\'") }',
+                               'foo { background: url("//localhost/styles.css?quote=\'") }',
+                       ],
+                       [
+                               'Background URL (single quoted, containing double quotes; T60473)',
+                               'foo { background: url(\'//localhost/styles.css?quote="\') }',
+                               'foo { background: url("//localhost/styles.css?quote=\"") }',
                        ],
                        [
                                'Simple case with comments before url',