Merge "Add line breaks to the output of Special:Watchlist"
[lhc/web/wiklou.git] / includes / libs / jsminplus.php
index 4afa43a..7c4e32b 100644 (file)
@@ -1,7 +1,6 @@
 <?php
-
 /**
- * JSMinPlus version 1.3
+ * JSMinPlus version 1.4
  *
  * Minifies a javascript file using a javascript parser
  *
  * Usage: $minified = JSMinPlus::minify($script [, $filename])
  *
  * Versionlog (see also changelog.txt):
- * 17-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
+ * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top
+ *              reduce memory footprint by minifying by block-scope
+ *              some small byte-saving and performance improvements
+ * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
  * 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes
  * 12-04-2009 - some small bugfixes and performance improvements
  * 09-04-2009 - initial open sourced version 1.0
  *
  * Latest version of this script: http://files.tweakers.net/jsminplus/jsminplus.zip
  *
+ * @file
  */
 
 /* ***** BEGIN LICENSE BLOCK *****
@@ -45,7 +48,7 @@
  * the Initial Developer. All Rights Reserved.
  *
  * Contributor(s): Tino Zijdel <crisp@tweakers.net>
- * PHP port, modifications and minifier routine are (C) 2009
+ * PHP port, modifications and minifier routine are (C) 2009-2011
  *
  * Alternatively, the contents of this file may be used under the terms of
  * either the GNU General Public License Version 2 or later (the "GPL"), or
@@ -85,10 +88,89 @@ define('JS_SETTER', 111);
 define('JS_GROUP', 112);
 define('JS_LIST', 113);
 
+define('JS_MINIFIED', 999);
+
 define('DECLARED_FORM', 0);
 define('EXPRESSED_FORM', 1);
 define('STATEMENT_FORM', 2);
 
+/* Operators */
+define('OP_SEMICOLON', ';');
+define('OP_COMMA', ',');
+define('OP_HOOK', '?');
+define('OP_COLON', ':');
+define('OP_OR', '||');
+define('OP_AND', '&&');
+define('OP_BITWISE_OR', '|');
+define('OP_BITWISE_XOR', '^');
+define('OP_BITWISE_AND', '&');
+define('OP_STRICT_EQ', '===');
+define('OP_EQ', '==');
+define('OP_ASSIGN', '=');
+define('OP_STRICT_NE', '!==');
+define('OP_NE', '!=');
+define('OP_LSH', '<<');
+define('OP_LE', '<=');
+define('OP_LT', '<');
+define('OP_URSH', '>>>');
+define('OP_RSH', '>>');
+define('OP_GE', '>=');
+define('OP_GT', '>');
+define('OP_INCREMENT', '++');
+define('OP_DECREMENT', '--');
+define('OP_PLUS', '+');
+define('OP_MINUS', '-');
+define('OP_MUL', '*');
+define('OP_DIV', '/');
+define('OP_MOD', '%');
+define('OP_NOT', '!');
+define('OP_BITWISE_NOT', '~');
+define('OP_DOT', '.');
+define('OP_LEFT_BRACKET', '[');
+define('OP_RIGHT_BRACKET', ']');
+define('OP_LEFT_CURLY', '{');
+define('OP_RIGHT_CURLY', '}');
+define('OP_LEFT_PAREN', '(');
+define('OP_RIGHT_PAREN', ')');
+define('OP_CONDCOMMENT_END', '@*/');
+
+define('OP_UNARY_PLUS', 'U+');
+define('OP_UNARY_MINUS', 'U-');
+
+/* Keywords */
+define('KEYWORD_BREAK', 'break');
+define('KEYWORD_CASE', 'case');
+define('KEYWORD_CATCH', 'catch');
+define('KEYWORD_CONST', 'const');
+define('KEYWORD_CONTINUE', 'continue');
+define('KEYWORD_DEBUGGER', 'debugger');
+define('KEYWORD_DEFAULT', 'default');
+define('KEYWORD_DELETE', 'delete');
+define('KEYWORD_DO', 'do');
+define('KEYWORD_ELSE', 'else');
+define('KEYWORD_ENUM', 'enum');
+define('KEYWORD_FALSE', 'false');
+define('KEYWORD_FINALLY', 'finally');
+define('KEYWORD_FOR', 'for');
+define('KEYWORD_FUNCTION', 'function');
+define('KEYWORD_IF', 'if');
+define('KEYWORD_IN', 'in');
+define('KEYWORD_INSTANCEOF', 'instanceof');
+define('KEYWORD_NEW', 'new');
+define('KEYWORD_NULL', 'null');
+define('KEYWORD_RETURN', 'return');
+define('KEYWORD_SWITCH', 'switch');
+define('KEYWORD_THIS', 'this');
+define('KEYWORD_THROW', 'throw');
+define('KEYWORD_TRUE', 'true');
+define('KEYWORD_TRY', 'try');
+define('KEYWORD_TYPEOF', 'typeof');
+define('KEYWORD_VAR', 'var');
+define('KEYWORD_VOID', 'void');
+define('KEYWORD_WHILE', 'while');
+define('KEYWORD_WITH', 'with');
+
+
 class JSMinPlus
 {
        private $parser;
@@ -110,7 +192,7 @@ class JSMinPlus
 
        private function __construct()
        {
-               $this->parser = new JSParser();
+               $this->parser = new JSParser($this);
        }
 
        public static function minify($js, $filename='')
@@ -139,22 +221,18 @@ class JSMinPlus
                return false;
        }
 
-       private function parseTree($n, $noBlockGrouping = false)
+       public function parseTree($n, $noBlockGrouping = false)
        {
                $s = '';
 
                switch ($n->type)
                {
-                       case KEYWORD_FUNCTION:
-                               $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
-                               $params = $n->params;
-                               for ($i = 0, $j = count($params); $i < $j; $i++)
-                                       $s .= ($i ? ',' : '') . $params[$i];
-                               $s .= '){' . $this->parseTree($n->body, true) . '}';
+                       case JS_MINIFIED:
+                               $s = $n->value;
                        break;
 
                        case JS_SCRIPT:
-                               // we do nothing with funDecls or varDecls
+                               // we do nothing yet with funDecls or varDecls
                                $noBlockGrouping = true;
                        // FALL THROUGH
 
@@ -201,6 +279,14 @@ class JSMinPlus
                                }
                        break;
 
+                       case KEYWORD_FUNCTION:
+                               $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
+                               $params = $n->params;
+                               for ($i = 0, $j = count($params); $i < $j; $i++)
+                                       $s .= ($i ? ',' : '') . $params[$i];
+                               $s .= '){' . $this->parseTree($n->body, true) . '}';
+                       break;
+
                        case KEYWORD_IF:
                                $s = 'if(' . $this->parseTree($n->condition) . ')';
                                $thenPart = $this->parseTree($n->thenPart);
@@ -307,19 +393,14 @@ class JSMinPlus
                        break;
 
                        case KEYWORD_THROW:
-                               $s = 'throw ' . $this->parseTree($n->exception);
-                       break;
-
                        case KEYWORD_RETURN:
-                               $s = 'return';
+                               $s = $n->type;
                                if ($n->value)
                                {
                                        $t = $this->parseTree($n->value);
                                        if (strlen($t))
                                        {
-                                               if (    $t[0] != '(' && $t[0] != '[' && $t[0] != '{' &&
-                                                       $t[0] != '"' && $t[0] != "'" && $t[0] != '/'
-                                               )
+                                               if ($this->isWordChar($t[0]) || $t[0] == '\\')
                                                        $s .= ' ';
 
                                                $s .= $t;
@@ -345,6 +426,40 @@ class JSMinPlus
                                }
                        break;
 
+                       case KEYWORD_IN:
+                       case KEYWORD_INSTANCEOF:
+                               $left = $this->parseTree($n->treeNodes[0]);
+                               $right = $this->parseTree($n->treeNodes[1]);
+
+                               $s = $left;
+
+                               if ($this->isWordChar(substr($left, -1)))
+                                       $s .= ' ';
+
+                               $s .= $n->type;
+
+                               if ($this->isWordChar($right[0]) || $right[0] == '\\')
+                                       $s .= ' ';
+
+                               $s .= $right;
+                       break;
+
+                       case KEYWORD_DELETE:
+                       case KEYWORD_TYPEOF:
+                               $right = $this->parseTree($n->treeNodes[0]);
+
+                               $s = $n->type;
+
+                               if ($this->isWordChar($right[0]) || $right[0] == '\\')
+                                       $s .= ' ';
+
+                               $s .= $right;
+                       break;
+
+                       case KEYWORD_VOID:
+                               $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
+                       break;
+
                        case KEYWORD_DEBUGGER:
                                throw new Exception('NOT IMPLEMENTED: DEBUGGER');
                        break;
@@ -419,26 +534,6 @@ class JSMinPlus
                                }
                        break;
 
-                       case KEYWORD_IN:
-                               $s = $this->parseTree($n->treeNodes[0]) . ' in ' . $this->parseTree($n->treeNodes[1]);
-                       break;
-
-                       case KEYWORD_INSTANCEOF:
-                               $s = $this->parseTree($n->treeNodes[0]) . ' instanceof ' . $this->parseTree($n->treeNodes[1]);
-                       break;
-
-                       case KEYWORD_DELETE:
-                               $s = 'delete ' . $this->parseTree($n->treeNodes[0]);
-                       break;
-
-                       case KEYWORD_VOID:
-                               $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
-                       break;
-
-                       case KEYWORD_TYPEOF:
-                               $s = 'typeof ' . $this->parseTree($n->treeNodes[0]);
-                       break;
-
                        case OP_NOT:
                        case OP_BITWISE_NOT:
                        case OP_UNARY_PLUS:
@@ -528,13 +623,33 @@ class JSMinPlus
                                $s .= '}';
                        break;
 
+                       case TOKEN_NUMBER:
+                               $s = $n->value;
+                               if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m))
+                                       $s = $m[1] . 'e' . strlen($m[2]);
+                       break;
+
                        case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
-                       case TOKEN_IDENTIFIER: case TOKEN_NUMBER: case TOKEN_STRING: case TOKEN_REGEXP:
+                       case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP:
                                $s = $n->value;
                        break;
 
                        case JS_GROUP:
-                               $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
+                               if (in_array(
+                                       $n->treeNodes[0]->type,
+                                       array(
+                                               JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP,
+                                               TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER,
+                                               KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE
+                                       )
+                               ))
+                               {
+                                       $s = $this->parseTree($n->treeNodes[0]);
+                               }
+                               else
+                               {
+                                       $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
+                               }
                        break;
 
                        default:
@@ -548,11 +663,17 @@ class JSMinPlus
        {
                return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved);
        }
+
+       private function isWordChar($char)
+       {
+               return $char == '_' || $char == '$' || ctype_alnum($char);
+       }
 }
 
 class JSParser
 {
        private $t;
+       private $minifier;
 
        private $opPrecedence = array(
                ';' => 0,
@@ -602,8 +723,9 @@ class JSParser
                TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1
        );
 
-       public function __construct()
+       public function __construct($minifier=null)
        {
+               $this->minifier = $minifier;
                $this->t = new JSTokenizer();
        }
 
@@ -627,6 +749,19 @@ class JSParser
                $n->funDecls = $x->funDecls;
                $n->varDecls = $x->varDecls;
 
+               // minify by scope
+               if ($this->minifier)
+               {
+                       $n->value = $this->minifier->parseTree($n);
+
+                       // clear tree from node to save memory
+                       $n->treeNodes = null;
+                       $n->funDecls = null;
+                       $n->varDecls = null;
+
+                       $n->type = JS_MINIFIED;
+               }
+
                return $n;
        }
 
@@ -885,7 +1020,7 @@ class JSParser
 
                        case KEYWORD_THROW:
                                $n = new JSNode($this->t);
-                               $n->exception = $this->Expression($x);
+                               $n->value = $this->Expression($x);
                        break;
 
                        case KEYWORD_RETURN:
@@ -1536,7 +1671,7 @@ class JSNode
 
                if (($numargs = func_num_args()) > 2)
                {
-                       $args = func_get_args();;
+                       $args = func_get_args();
                        for ($i = 2; $i < $numargs; $i++)
                                $this->addNode($args[$i]);
                }
@@ -1600,44 +1735,11 @@ class JSTokenizer
        );
 
        private $opTypeNames = array(
-               ';'     => 'SEMICOLON',
-               ','     => 'COMMA',
-               '?'     => 'HOOK',
-               ':'     => 'COLON',
-               '||'    => 'OR',
-               '&&'    => 'AND',
-               '|'     => 'BITWISE_OR',
-               '^'     => 'BITWISE_XOR',
-               '&'     => 'BITWISE_AND',
-               '==='   => 'STRICT_EQ',
-               '=='    => 'EQ',
-               '='     => 'ASSIGN',
-               '!=='   => 'STRICT_NE',
-               '!='    => 'NE',
-               '<<'    => 'LSH',
-               '<='    => 'LE',
-               '<'     => 'LT',
-               '>>>'   => 'URSH',
-               '>>'    => 'RSH',
-               '>='    => 'GE',
-               '>'     => 'GT',
-               '++'    => 'INCREMENT',
-               '--'    => 'DECREMENT',
-               '+'     => 'PLUS',
-               '-'     => 'MINUS',
-               '*'     => 'MUL',
-               '/'     => 'DIV',
-               '%'     => 'MOD',
-               '!'     => 'NOT',
-               '~'     => 'BITWISE_NOT',
-               '.'     => 'DOT',
-               '['     => 'LEFT_BRACKET',
-               ']'     => 'RIGHT_BRACKET',
-               '{'     => 'LEFT_CURLY',
-               '}'     => 'RIGHT_CURLY',
-               '('     => 'LEFT_PAREN',
-               ')'     => 'RIGHT_PAREN',
-               '@*/'   => 'CONDCOMMENT_END'
+               ';', ',', '?', ':', '||', '&&', '|', '^',
+               '&', '===', '==', '=', '!==', '!=', '<<', '<=',
+               '<', '>>>', '>>', '>=', '>', '++', '--', '+',
+               '-', '*', '/', '%', '!', '~', '.', '[',
+               ']', '{', '}', '(', ')', '@*/'
        );
 
        private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%');
@@ -1645,17 +1747,7 @@ class JSTokenizer
 
        public function __construct()
        {
-               $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', array_keys($this->opTypeNames))) . ')#';
-
-               // this is quite a hidden yet convenient place to create the defines for operators and keywords
-               foreach ($this->opTypeNames as $operand => $name)
-                       define('OP_' . $name, $operand);
-
-               define('OP_UNARY_PLUS', 'U+');
-               define('OP_UNARY_MINUS', 'U-');
-
-               foreach ($this->keywords as $keyword)
-                       define('KEYWORD_' . strtoupper($keyword), $keyword);
+               $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
        }
 
        public function init($source, $filename = '', $lineno = 1)
@@ -1806,22 +1898,38 @@ class JSTokenizer
                {
                        switch ($input[0])
                        {
-                               case '0': case '1': case '2': case '3': case '4':
-                               case '5': case '6': case '7': case '8': case '9':
-                                       if (preg_match('/^\d+\.\d*(?:[eE][-+]?\d+)?|^\d+(?:\.\d*)?[eE][-+]?\d+/', $input, $match))
+                               case '0':
+                                       // hexadecimal
+                                       if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match))
                                        {
                                                $tt = TOKEN_NUMBER;
+                                               break;
+                                       }
+                               // FALL THROUGH
+
+                               case '1': case '2': case '3': case '4': case '5':
+                               case '6': case '7': case '8': case '9':
+                                       // should always match
+                                       preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
+                                       $tt = TOKEN_NUMBER;
+                               break;
+
+                               case "'":
+                                       if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match))
+                                       {
+                                               $tt = TOKEN_STRING;
                                        }
-                                       else if (preg_match('/^0[xX][\da-fA-F]+|^0[0-7]*|^\d+/', $input, $match))
+                                       else
                                        {
-                                               // this should always match because of \d+
-                                               $tt = TOKEN_NUMBER;
+                                               if ($chunksize)
+                                                       return $this->get(null); // retry with a full chunk fetch
+
+                                               throw $this->newSyntaxError('Unterminated string literal');
                                        }
                                break;
 
                                case '"':
-                               case "'":
-                                       if (preg_match('/^"(?:\\\\(?:.|\r?\n)|[^\\\\"\r\n]+)*"|^\'(?:\\\\(?:.|\r?\n)|[^\\\\\'\r\n]+)*\'/', $input, $match))
+                                       if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match))
                                        {
                                                $tt = TOKEN_STRING;
                                        }
@@ -1921,13 +2029,55 @@ class JSTokenizer
                                break;
 
                                default:
-                                       // FIXME: add support for unicode and unicode escape sequence \uHHHH
-                                       if (preg_match('/^[$\w]+/', $input, $match))
+                                       // Fast path for identifiers: word chars followed by whitespace or various other tokens.
+                                       // Note we don't need to exclude digits in the first char, as they've already been found
+                                       // above.
+                                       if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match))
                                        {
-                                               $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
+                                               // Character classes per ECMA-262 edition 5.1 section 7.6
+                                               // Per spec, must accept Unicode 3.0, *may* accept later versions.
+                                               // We'll take whatever PCRE understands, which should be more recent.
+                                               $identifierStartChars = "\\p{L}\\p{Nl}" .  # UnicodeLetter
+                                                                       "\$" .
+                                                                       "_";
+                                               $identifierPartChars  = $identifierStartChars .
+                                                                       "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
+                                                                       "\\p{Nd}" .        # UnicodeDigit
+                                                                       "\\p{Pc}";         # UnicodeConnectorPunctuation
+                                               $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
+                                               $identifierRegex = "/^" .
+                                                                  "(?:[$identifierStartChars]|$unicodeEscape)" .
+                                                                  "(?:[$identifierPartChars]|$unicodeEscape)*" .
+                                                                  "/uS";
+                                               if (preg_match($identifierRegex, $input, $match))
+                                               {
+                                                       if (strpos($match[0], '\\') !== false) {
+                                                               // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
+                                                               // the original chars, but only within the boundaries of the identifier.
+                                                               $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
+                                                                               array(__CLASS__, 'unicodeEscapeCallback'),
+                                                                               $match[0]);
+
+                                                               // Since our original regex didn't de-escape the originals, we need to check for validity again.
+                                                               // No need to worry about token boundaries, as anything outside the identifier is illegal!
+                                                               if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
+                                                                       throw $this->newSyntaxError('Illegal token');
+                                                               }
+
+                                                               // Per spec it _ought_ to work to use these escapes for keywords words as well...
+                                                               // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
+                                                               // that don't match the keyword.
+                                                               if (in_array($decoded, $this->keywords)) {
+                                                                       throw $this->newSyntaxError('Illegal token');
+                                                               }
+
+                                                               // TODO: save the decoded form for output?
+                                                       }
+                                               }
+                                               else
+                                                       throw $this->newSyntaxError('Illegal token');
                                        }
-                                       else
-                                               throw $this->newSyntaxError('Illegal token');
+                                       $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
                        }
                }
 
@@ -1965,6 +2115,11 @@ class JSTokenizer
        {
                return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
        }
+
+       public static function unicodeEscapeCallback($m)
+       {
+               return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');
+       }
 }
 
 class JSToken
@@ -1976,5 +2131,3 @@ class JSToken
        public $lineno;
        public $assignOp;
 }
-
-?>
\ No newline at end of file