From 3fe1056d7063c20cf7def479020fd499e1a4a0fe Mon Sep 17 00:00:00 2001 From: Trevor Parscal Date: Fri, 21 Jan 2011 00:03:58 +0000 Subject: [PATCH] Addresses issues raised in an excellent review of r80656. --- includes/AutoLoader.php | 1 - includes/DefaultSettings.php | 4 + includes/libs/JavaScriptDistiller.php | 320 ++++++++++++++++++++++---- includes/libs/ParseMaster.php | 214 ----------------- 4 files changed, 282 insertions(+), 257 deletions(-) delete mode 100644 includes/libs/ParseMaster.php diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index a02f681fb4..0764d84a40 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -180,7 +180,6 @@ $wgAutoloadLocalClasses = array( 'PageHistory' => 'includes/HistoryPage.php', 'PageHistoryPager' => 'includes/HistoryPage.php', 'Pager' => 'includes/Pager.php', - 'ParseMaster' => 'includes/libs/ParseMaster.php', 'PasswordError' => 'includes/User.php', 'PatrolLog' => 'includes/PatrolLog.php', 'PhpHttpRequest' => 'includes/HttpFunctions.php', diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 90db83939e..7e8c7c886e 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -2448,6 +2448,10 @@ $wgResourceLoaderDebug = false; */ $wgResourceLoaderUseESI = false; +/** + * Enable removal of some of the vertical whitespace (like \r and \n) from + * JavaScript code when minifying. + */ $wgResourceLoaderMinifyJSVerticalSpace = false; /** @} */ # End of resource loader settings } diff --git a/includes/libs/JavaScriptDistiller.php b/includes/libs/JavaScriptDistiller.php index 53b4d7012f..8d1e0d0816 100644 --- a/includes/libs/JavaScriptDistiller.php +++ b/includes/libs/JavaScriptDistiller.php @@ -16,12 +16,69 @@ class JavaScriptDistiller { * JSMin::minify, this produces < 1% larger output (after gzip) in approx. 25% of the time. * * @param $script String: JavaScript code to minify + * @param $stripVerticalSpace Boolean: Try to remove as much vertical whitespace as possible */ - public static function stripWhiteSpace( $script, $collapseVertical = false ) { - // This parser is based on regular expressions, which all get or'd together, so rules take - // precedence in the order they are added. We can use it to minify by armoring certain - // regions by matching them and replacing them with the full match, leaving the remaining - // regions around for further matching and replacing. + public static function stripWhiteSpace( $script, $stripVerticalSpace = false ) { + $script = self::stripComments( $script ); + $script = self::stripHorizontalSpace( $script ); + // If requested, make some vertical whitespace collapsing as well + if ( $collapseVertical ) { + $script = self::stripVerticalSpace( $script ); + } + // Done + return $script; + } + + private static function stripComments( $script ) { + $parser = self::createParser(); + // Remove comments + $parser->add( '/\\/\\/[^\\r\\n]*[\\r\\n]/' ); + $parser->add( '/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//' ); + // Execute and return + return $parser->exec( $script ); + } + + private static function stripHorizontalSpace( $script ) { + $parser = self::createParser(); + // Collapse horizontal whitespaces between variable names into a single space + $parser->add( '/(\\b|\\$)[ \\t]+(\\b|\\$)/', '$2 $3' ); + // Collapse horizontal whitespaces between unary operators into a single space + $parser->add( '/([+\\-])[ \\t]+([+\\-])/', '$2 $3' ); + // Remove all remaining un-protected horizontal whitespace + $parser->add( '/[ \\t]+/'); + // Collapse multiple vertical whitespaces with some horizontal spaces between them + $parser->add( '/[\\r\\n]+[ \\t]*[\\r\\n]+/', "\n" ); + // Execute and return + return $parser->exec($script); + } + + private static function stripVerticalSpace( $script ) { + $parser = self::createParser(); + // Collapse whitespaces between and after a ){ pair (function definitions) + $parser->add( '/\\)\\s+\\{\\s+/', '){' ); + // Collapse whitespaces between and after a ({ pair (JSON argument) + $parser->add( '/\\(\\s+\\{\\s+/', '({' ); + // Collapse whitespaces between a parenthesis and a period (call chaining) + $parser->add( '/\\)\\s+\\./', ').'); + // Collapse vertical whitespaces which come directly after a semicolon or a comma + $parser->add( '/([;,])\\s+/', '$2' ); + // Collapse whitespaces between multiple parenthesis/brackets of similar direction + $parser->add( '/([\\)\\}])\\s+([\\)\\}])/', '$2$3' ); + $parser->add( '/([\\(\\{])\\s+([\\(\\{])/', '$2$3' ); + return $parser->exec( $script ); + } + + /* + * Creates an instance of ParseMaster and protects sensitive JavaScript regions. + * + * This parser is based on regular expressions, which all get or'd together, so rules take + * precedence in the order they are added. We can use it to minify by armoring certain regions + * by matching them and replacing them with the full match, leaving the remaining regions around + * for further matching and replacing. When creating rules please note that because ParseMaster + * "or"s all of the rules together in a single pattern, encapsulating them in parenthesis, $1 + * represents the whole match for a given rule, and $2 is the first submatch. + */ + private static function createParser() { $parser = new ParseMaster(); // There is a bug in ParseMaster that causes a backslash at the end of a line to be changed // to \s if we use a backslash as the escape character. We work around this by using an @@ -30,46 +87,225 @@ class JavaScriptDistiller { // Protect strings. The original code had [^\'\\v] here, but that didn't armor multiline // strings correctly. This also armors multiline strings that don't have backslashes at the // end of the line (these are invalid), but that's fine because we're just armoring here. - $parser->add('/\'[^\']*\'/', '$1' ); - $parser->add('/"[^"]*"/', '$1' ); - // Remove comments - $parser->add('/\\/\\/[^\v]*[\v]/', ' '); - $parser->add('/\\/\\*[^*]*\\*+([^\\/][^*]*\\*+)*\\//', ' '); + $parser->add( '/\'[^\']*\'/', '$1' ); + $parser->add( '/"[^"]*"/', '$1' ); // Protect regular expressions - $parser->add('/\\h+(\\/[^\\/\\v\\*][^\\/\\v]*\\/g?i?)/', '$2'); // IGNORE - $parser->add('/[^\\w\\x24\\/\'"*)\\?:]\\/[^\\/\\v\\*][^\\/\\v]*\\/g?i?/', '$1'); - // Remove: ;;; doSomething(); - $parser->add('/;;;[^\\v]+[\\v]/'); - // Remove redundant semi-colons - $parser->add('/\\(;;\\)/', '$1'); // protect for (;;) loops - $parser->add('/;+\\h*([};])/', '$2'); - // Apply all rules defined up to this point - $script = $parser->exec($script); - // If requested, make some vertical whitespace collapsing as well - if ( $collapseVertical ) { - // Collapse whitespaces between and after a ){ pair (function definitions) - $parser->add('/\\)\\s+\\{\\s+/', '){'); - // Collapse whitespaces between and after a ({ pair (JSON argument) - $parser->add('/\\(\\s+\\{\\s+/', '({'); - // Collapse whitespaces between a parenthesis and a period (call chaining) - $parser->add('/\\)\\s+\\./', ').'); - // Collapse vertical whitespaces which come directly after a semicolon or a comma - $parser->add('/([;,])\\s+/', '$2'); - // Collapse whitespaces between multiple parenthesis/brackets of similar direction - $parser->add('/([\\)\\}])\\s+([\\)\\}])/', '$2$3'); - $parser->add('/([\\(\\{])\\s+([\\(\\{])/', '$2$3'); + $parser->add( '/[ \\t]+(\\/[^\\/\\r\\n\\*][^\\/\\r\\n]*\\/g?i?)/', '$2' ); + $parser->add( '/[^\\w\\$\\/\'"*)\\?:]\\/[^\\/\\r\\n\\*][^\\/\\r\\n]*\\/g?i?/', '$1' ); + return $parser; + } +} + +/** + * ParseMaster, version 1.0.2 (2005-08-19) Copyright 2005, Dean Edwards + * A multi-pattern parser. + * License: http://creativecommons.org/licenses/LGPL/2.1/ + * + * This is the PHP version of the ParseMaster component of Dean Edwards' (http://dean.edwards.name/) + * Packer, which was originally written in JavaScript. It was ported to PHP by Nicolas Martin. + * + * Original Source: http://joliclic.free.fr/php/javascript-packer/en/ + * + * Changes should be pushed back upstream. + */ +class ParseMaster { + public $ignoreCase = false; + public $escapeChar = ''; + + // constants + const EXPRESSION = 0; + const REPLACEMENT = 1; + const LENGTH = 2; + + // used to determine nesting levels + private $GROUPS = '/\\(/';//g + private $SUB_REPLACE = '/\\$\\d/'; + private $INDEXED = '/^\\$\\d+$/'; + private $TRIM = '/([\'"])\\1\\.(.*)\\.\\1\\1$/'; + private $ESCAPE = '/\\\./';//g + private $QUOTE = '/\'/'; + private $DELETED = '/\\x01[^\\x01]*\\x01/';//g + + public function add($expression, $replacement = '') { + // count the number of sub-expressions + // - add one because each pattern is itself a sub-expression + $length = 1 + preg_match_all($this->GROUPS, $this->_internalEscape((string)$expression), $out); + + // treat only strings $replacement + if (is_string($replacement)) { + // does the pattern deal with sub-expressions? + if (preg_match($this->SUB_REPLACE, $replacement)) { + // a simple lookup? (e.g. "$2") + if (preg_match($this->INDEXED, $replacement)) { + // store the index (used for fast retrieval of matched strings) + $replacement = (int)(substr($replacement, 1)) - 1; + } else { // a complicated lookup (e.g. "Hello $2 $1") + // build a function to do the lookup + $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement)) + ? '"' : "'"; + $replacement = array( + 'fn' => '_backReferences', + 'data' => array( + 'replacement' => $replacement, + 'length' => $length, + 'quote' => $quote + ) + ); + } + } } - // Collapse horizontal whitespaces between variable names into a single space - $parser->add('/(\\b|\\x24)\\h+(\\b|\\x24)/', '$2 $3'); - // Collapse horizontal whitespaces between urinary operators into a single space - $parser->add('/([+\\-])\\h+([+\\-])/', '$2 $3'); - // Collapse all remaining un-protected horizontal whitespace - $parser->add('/\\h+/', ''); - // Collapse multiple vertical whitespaces with some horizontal spaces between them - $parser->add('/\\v+\\h*\\v*/', "\n"); + // pass the modified arguments + if (!empty($expression)) $this->_add($expression, $replacement, $length); + else $this->_add('/^$/', $replacement, $length); + } + + public function exec($string) { + // execute the global replacement + $this->_escaped = array(); - // Done - return $parser->exec($script); + // simulate the _patterns.toSTring of Dean + $regexp = '/'; + foreach ($this->_patterns as $reg) { + $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|'; + } + $regexp = substr($regexp, 0, -1) . '/'; + $regexp .= ($this->ignoreCase) ? 'i' : ''; + + $string = $this->_escape($string, $this->escapeChar); + $string = preg_replace_callback( + $regexp, + array( + &$this, + '_replacement' + ), + $string + ); + $string = $this->_unescape($string, $this->escapeChar); + + return preg_replace($this->DELETED, '', $string); + } + + public function reset() { + // clear the patterns collection so that this object may be re-used + $this->_patterns = array(); + } + + // private + private $_escaped = array(); // escaped characters + private $_patterns = array(); // patterns stored by index + + // create and add a new pattern to the patterns collection + private function _add() { + $arguments = func_get_args(); + $this->_patterns[] = $arguments; + } + + // this is the global replace function (it's quite complicated) + private function _replacement($arguments) { + if (empty($arguments)) return ''; + $i = 1; $j = 0; + // loop through the patterns + while (isset($this->_patterns[$j])) { + $pattern = $this->_patterns[$j++]; + // do we have a result? + if (isset($arguments[$i]) && ($arguments[$i] != '')) { + $replacement = $pattern[self::REPLACEMENT]; + + if (is_array($replacement) && isset($replacement['fn'])) { + + if (isset($replacement['data'])) $this->buffer = $replacement['data']; + return call_user_func(array(&$this, $replacement['fn']), $arguments, $i); + + } elseif (is_int($replacement)) { + return $arguments[$replacement + $i]; + + } + $delete = ($this->escapeChar == '' || + strpos($arguments[$i], $this->escapeChar) === false) + ? '' : "\x01" . $arguments[$i] . "\x01"; + return $delete . $replacement; + + // skip over references to sub-expressions + } else { + $i += $pattern[self::LENGTH]; + } + } + } + + private function _backReferences($match, $offset) { + $replacement = $this->buffer['replacement']; + $quote = $this->buffer['quote']; + $i = $this->buffer['length']; + while ($i) { + $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement); + } + return $replacement; + } + + private function _replace_name($match, $offset){ + $length = strlen($match[$offset + 2]); + $start = $length - max($length - strlen($match[$offset + 3]), 0); + return substr($match[$offset + 1], $start, $length) . $match[$offset + 4]; + } + + private function _replace_encoded($match, $offset) { + return $this->buffer[$match[$offset]]; + } + + + // php : we cannot pass additional data to preg_replace_callback, + // and we cannot use &$this in create_function, so let's go to lower level + private $buffer; + + // encode escaped characters + private function _escape($string, $escapeChar) { + if ($escapeChar) { + $this->buffer = $escapeChar; + return preg_replace_callback( + '/\\' . $escapeChar . '(.)' .'/', + array(&$this, '_escapeBis'), + $string + ); + + } else { + return $string; + } + } + private function _escapeBis($match) { + $this->_escaped[] = $match[1]; + return $this->buffer; + } + + // decode escaped characters + private function _unescape($string, $escapeChar) { + if ($escapeChar) { + $regexp = '/'.'\\'.$escapeChar.'/'; + $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0); + return preg_replace_callback + ( + $regexp, + array(&$this, '_unescapeBis'), + $string + ); + + } else { + return $string; + } + } + private function _unescapeBis() { + if (isset($this->_escaped[$this->buffer['i']]) + && $this->_escaped[$this->buffer['i']] != '') + { + $temp = $this->_escaped[$this->buffer['i']]; + } else { + $temp = ''; + } + $this->buffer['i']++; + return $this->buffer['escapeChar'] . $temp; + } + + private function _internalEscape($string) { + return preg_replace($this->ESCAPE, '', $string); } } diff --git a/includes/libs/ParseMaster.php b/includes/libs/ParseMaster.php deleted file mode 100644 index a95600ecbc..0000000000 --- a/includes/libs/ParseMaster.php +++ /dev/null @@ -1,214 +0,0 @@ -GROUPS, $this->_internalEscape((string)$expression), $out); - - // treat only strings $replacement - if (is_string($replacement)) { - // does the pattern deal with sub-expressions? - if (preg_match($this->SUB_REPLACE, $replacement)) { - // a simple lookup? (e.g. "$2") - if (preg_match($this->INDEXED, $replacement)) { - // store the index (used for fast retrieval of matched strings) - $replacement = (int)(substr($replacement, 1)) - 1; - } else { // a complicated lookup (e.g. "Hello $2 $1") - // build a function to do the lookup - $quote = preg_match($this->QUOTE, $this->_internalEscape($replacement)) - ? '"' : "'"; - $replacement = array( - 'fn' => '_backReferences', - 'data' => array( - 'replacement' => $replacement, - 'length' => $length, - 'quote' => $quote - ) - ); - } - } - } - // pass the modified arguments - if (!empty($expression)) $this->_add($expression, $replacement, $length); - else $this->_add('/^$/', $replacement, $length); - } - - public function exec($string) { - // execute the global replacement - $this->_escaped = array(); - - // simulate the _patterns.toSTring of Dean - $regexp = '/'; - foreach ($this->_patterns as $reg) { - $regexp .= '(' . substr($reg[self::EXPRESSION], 1, -1) . ')|'; - } - $regexp = substr($regexp, 0, -1) . '/'; - $regexp .= ($this->ignoreCase) ? 'i' : ''; - - $string = $this->_escape($string, $this->escapeChar); - $string = preg_replace_callback( - $regexp, - array( - &$this, - '_replacement' - ), - $string - ); - $string = $this->_unescape($string, $this->escapeChar); - - return preg_replace($this->DELETED, '', $string); - } - - public function reset() { - // clear the patterns collection so that this object may be re-used - $this->_patterns = array(); - } - - // private - private $_escaped = array(); // escaped characters - private $_patterns = array(); // patterns stored by index - - // create and add a new pattern to the patterns collection - private function _add() { - $arguments = func_get_args(); - $this->_patterns[] = $arguments; - } - - // this is the global replace function (it's quite complicated) - private function _replacement($arguments) { - if (empty($arguments)) return ''; - - $i = 1; $j = 0; - // loop through the patterns - while (isset($this->_patterns[$j])) { - $pattern = $this->_patterns[$j++]; - // do we have a result? - if (isset($arguments[$i]) && ($arguments[$i] != '')) { - $replacement = $pattern[self::REPLACEMENT]; - - if (is_array($replacement) && isset($replacement['fn'])) { - - if (isset($replacement['data'])) $this->buffer = $replacement['data']; - return call_user_func(array(&$this, $replacement['fn']), $arguments, $i); - - } elseif (is_int($replacement)) { - return $arguments[$replacement + $i]; - - } - $delete = ($this->escapeChar == '' || - strpos($arguments[$i], $this->escapeChar) === false) - ? '' : "\x01" . $arguments[$i] . "\x01"; - return $delete . $replacement; - - // skip over references to sub-expressions - } else { - $i += $pattern[self::LENGTH]; - } - } - } - - private function _backReferences($match, $offset) { - $replacement = $this->buffer['replacement']; - $quote = $this->buffer['quote']; - $i = $this->buffer['length']; - while ($i) { - $replacement = str_replace('$'.$i--, $match[$offset + $i], $replacement); - } - return $replacement; - } - - private function _replace_name($match, $offset){ - $length = strlen($match[$offset + 2]); - $start = $length - max($length - strlen($match[$offset + 3]), 0); - return substr($match[$offset + 1], $start, $length) . $match[$offset + 4]; - } - - private function _replace_encoded($match, $offset) { - return $this->buffer[$match[$offset]]; - } - - - // php : we cannot pass additional data to preg_replace_callback, - // and we cannot use &$this in create_function, so let's go to lower level - private $buffer; - - // encode escaped characters - private function _escape($string, $escapeChar) { - if ($escapeChar) { - $this->buffer = $escapeChar; - return preg_replace_callback( - '/\\' . $escapeChar . '(.)' .'/', - array(&$this, '_escapeBis'), - $string - ); - - } else { - return $string; - } - } - private function _escapeBis($match) { - $this->_escaped[] = $match[1]; - return $this->buffer; - } - - // decode escaped characters - private function _unescape($string, $escapeChar) { - if ($escapeChar) { - $regexp = '/'.'\\'.$escapeChar.'/'; - $this->buffer = array('escapeChar'=> $escapeChar, 'i' => 0); - return preg_replace_callback - ( - $regexp, - array(&$this, '_unescapeBis'), - $string - ); - - } else { - return $string; - } - } - private function _unescapeBis() { - if (isset($this->_escaped[$this->buffer['i']]) - && $this->_escaped[$this->buffer['i']] != '') - { - $temp = $this->_escaped[$this->buffer['i']]; - } else { - $temp = ''; - } - $this->buffer['i']++; - return $this->buffer['escapeChar'] . $temp; - } - - private function _internalEscape($string) { - return preg_replace($this->ESCAPE, '', $string); - } -} -- 2.20.1