f250217f008a87725faa6e921aba6572029bfd9a
[lhc/web/wiklou.git] / includes / libs / jsminplus.php
1 <?php
2 /**
3 * JSMinPlus version 1.4
4 *
5 * Minifies a javascript file using a javascript parser
6 *
7 * This implements a PHP port of Brendan Eich's Narcissus open source javascript engine (in javascript)
8 * References: http://en.wikipedia.org/wiki/Narcissus_(JavaScript_engine)
9 * Narcissus sourcecode: http://mxr.mozilla.org/mozilla/source/js/narcissus/
10 * JSMinPlus weblog: http://crisp.tweakblogs.net/blog/cat/716
11 *
12 * Tino Zijdel <crisp@tweakers.net>
13 *
14 * Usage: $minified = JSMinPlus::minify($script [, $filename])
15 *
16 * Versionlog (see also changelog.txt):
17 * 23-07-2011 - remove dynamic creation of OP_* and KEYWORD_* defines and declare them on top
18 * reduce memory footprint by minifying by block-scope
19 * some small byte-saving and performance improvements
20 * 12-05-2009 - fixed hook:colon precedence, fixed empty body in loop and if-constructs
21 * 18-04-2009 - fixed crashbug in PHP 5.2.9 and several other bugfixes
22 * 12-04-2009 - some small bugfixes and performance improvements
23 * 09-04-2009 - initial open sourced version 1.0
24 *
25 * Latest version of this script: http://files.tweakers.net/jsminplus/jsminplus.zip
26 *
27 * @file
28 */
29
30 /* ***** BEGIN LICENSE BLOCK *****
31 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
32 *
33 * The contents of this file are subject to the Mozilla Public License Version
34 * 1.1 (the "License"); you may not use this file except in compliance with
35 * the License. You may obtain a copy of the License at
36 * http://www.mozilla.org/MPL/
37 *
38 * Software distributed under the License is distributed on an "AS IS" basis,
39 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
40 * for the specific language governing rights and limitations under the
41 * License.
42 *
43 * The Original Code is the Narcissus JavaScript engine.
44 *
45 * The Initial Developer of the Original Code is
46 * Brendan Eich <brendan@mozilla.org>.
47 * Portions created by the Initial Developer are Copyright (C) 2004
48 * the Initial Developer. All Rights Reserved.
49 *
50 * Contributor(s): Tino Zijdel <crisp@tweakers.net>
51 * PHP port, modifications and minifier routine are (C) 2009-2011
52 *
53 * Alternatively, the contents of this file may be used under the terms of
54 * either the GNU General Public License Version 2 or later (the "GPL"), or
55 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
56 * in which case the provisions of the GPL or the LGPL are applicable instead
57 * of those above. If you wish to allow use of your version of this file only
58 * under the terms of either the GPL or the LGPL, and not to allow others to
59 * use your version of this file under the terms of the MPL, indicate your
60 * decision by deleting the provisions above and replace them with the notice
61 * and other provisions required by the GPL or the LGPL. If you do not delete
62 * the provisions above, a recipient may use your version of this file under
63 * the terms of any one of the MPL, the GPL or the LGPL.
64 *
65 * ***** END LICENSE BLOCK ***** */
66
67 define('TOKEN_END', 1);
68 define('TOKEN_NUMBER', 2);
69 define('TOKEN_IDENTIFIER', 3);
70 define('TOKEN_STRING', 4);
71 define('TOKEN_REGEXP', 5);
72 define('TOKEN_NEWLINE', 6);
73 define('TOKEN_CONDCOMMENT_START', 7);
74 define('TOKEN_CONDCOMMENT_END', 8);
75
76 define('JS_SCRIPT', 100);
77 define('JS_BLOCK', 101);
78 define('JS_LABEL', 102);
79 define('JS_FOR_IN', 103);
80 define('JS_CALL', 104);
81 define('JS_NEW_WITH_ARGS', 105);
82 define('JS_INDEX', 106);
83 define('JS_ARRAY_INIT', 107);
84 define('JS_OBJECT_INIT', 108);
85 define('JS_PROPERTY_INIT', 109);
86 define('JS_GETTER', 110);
87 define('JS_SETTER', 111);
88 define('JS_GROUP', 112);
89 define('JS_LIST', 113);
90
91 define('JS_MINIFIED', 999);
92
93 define('DECLARED_FORM', 0);
94 define('EXPRESSED_FORM', 1);
95 define('STATEMENT_FORM', 2);
96
97 /* Operators */
98 define('OP_SEMICOLON', ';');
99 define('OP_COMMA', ',');
100 define('OP_HOOK', '?');
101 define('OP_COLON', ':');
102 define('OP_OR', '||');
103 define('OP_AND', '&&');
104 define('OP_BITWISE_OR', '|');
105 define('OP_BITWISE_XOR', '^');
106 define('OP_BITWISE_AND', '&');
107 define('OP_STRICT_EQ', '===');
108 define('OP_EQ', '==');
109 define('OP_ASSIGN', '=');
110 define('OP_STRICT_NE', '!==');
111 define('OP_NE', '!=');
112 define('OP_LSH', '<<');
113 define('OP_LE', '<=');
114 define('OP_LT', '<');
115 define('OP_URSH', '>>>');
116 define('OP_RSH', '>>');
117 define('OP_GE', '>=');
118 define('OP_GT', '>');
119 define('OP_INCREMENT', '++');
120 define('OP_DECREMENT', '--');
121 define('OP_PLUS', '+');
122 define('OP_MINUS', '-');
123 define('OP_MUL', '*');
124 define('OP_DIV', '/');
125 define('OP_MOD', '%');
126 define('OP_NOT', '!');
127 define('OP_BITWISE_NOT', '~');
128 define('OP_DOT', '.');
129 define('OP_LEFT_BRACKET', '[');
130 define('OP_RIGHT_BRACKET', ']');
131 define('OP_LEFT_CURLY', '{');
132 define('OP_RIGHT_CURLY', '}');
133 define('OP_LEFT_PAREN', '(');
134 define('OP_RIGHT_PAREN', ')');
135 define('OP_CONDCOMMENT_END', '@*/');
136
137 define('OP_UNARY_PLUS', 'U+');
138 define('OP_UNARY_MINUS', 'U-');
139
140 /* Keywords */
141 define('KEYWORD_BREAK', 'break');
142 define('KEYWORD_CASE', 'case');
143 define('KEYWORD_CATCH', 'catch');
144 define('KEYWORD_CONST', 'const');
145 define('KEYWORD_CONTINUE', 'continue');
146 define('KEYWORD_DEBUGGER', 'debugger');
147 define('KEYWORD_DEFAULT', 'default');
148 define('KEYWORD_DELETE', 'delete');
149 define('KEYWORD_DO', 'do');
150 define('KEYWORD_ELSE', 'else');
151 define('KEYWORD_ENUM', 'enum');
152 define('KEYWORD_FALSE', 'false');
153 define('KEYWORD_FINALLY', 'finally');
154 define('KEYWORD_FOR', 'for');
155 define('KEYWORD_FUNCTION', 'function');
156 define('KEYWORD_IF', 'if');
157 define('KEYWORD_IN', 'in');
158 define('KEYWORD_INSTANCEOF', 'instanceof');
159 define('KEYWORD_NEW', 'new');
160 define('KEYWORD_NULL', 'null');
161 define('KEYWORD_RETURN', 'return');
162 define('KEYWORD_SWITCH', 'switch');
163 define('KEYWORD_THIS', 'this');
164 define('KEYWORD_THROW', 'throw');
165 define('KEYWORD_TRUE', 'true');
166 define('KEYWORD_TRY', 'try');
167 define('KEYWORD_TYPEOF', 'typeof');
168 define('KEYWORD_VAR', 'var');
169 define('KEYWORD_VOID', 'void');
170 define('KEYWORD_WHILE', 'while');
171 define('KEYWORD_WITH', 'with');
172
173
174 class JSMinPlus
175 {
176 private $parser;
177 private $reserved = array(
178 'break', 'case', 'catch', 'continue', 'default', 'delete', 'do',
179 'else', 'finally', 'for', 'function', 'if', 'in', 'instanceof',
180 'new', 'return', 'switch', 'this', 'throw', 'try', 'typeof', 'var',
181 'void', 'while', 'with',
182 // Words reserved for future use
183 'abstract', 'boolean', 'byte', 'char', 'class', 'const', 'debugger',
184 'double', 'enum', 'export', 'extends', 'final', 'float', 'goto',
185 'implements', 'import', 'int', 'interface', 'long', 'native',
186 'package', 'private', 'protected', 'public', 'short', 'static',
187 'super', 'synchronized', 'throws', 'transient', 'volatile',
188 // These are not reserved, but should be taken into account
189 // in isValidIdentifier (See jslint source code)
190 'arguments', 'eval', 'true', 'false', 'Infinity', 'NaN', 'null', 'undefined'
191 );
192
193 private function __construct()
194 {
195 $this->parser = new JSParser($this);
196 }
197
198 public static function minify($js, $filename='')
199 {
200 static $instance;
201
202 // this is a singleton
203 if(!$instance)
204 $instance = new JSMinPlus();
205
206 return $instance->min($js, $filename);
207 }
208
209 private function min($js, $filename)
210 {
211 try
212 {
213 $n = $this->parser->parse($js, $filename, 1);
214 return $this->parseTree($n);
215 }
216 catch(Exception $e)
217 {
218 echo $e->getMessage() . "\n";
219 }
220
221 return false;
222 }
223
224 public function parseTree($n, $noBlockGrouping = false)
225 {
226 $s = '';
227
228 switch ($n->type)
229 {
230 case JS_MINIFIED:
231 $s = $n->value;
232 break;
233
234 case JS_SCRIPT:
235 // we do nothing yet with funDecls or varDecls
236 $noBlockGrouping = true;
237 // FALL THROUGH
238
239 case JS_BLOCK:
240 $childs = $n->treeNodes;
241 $lastType = 0;
242 for ($c = 0, $i = 0, $j = count($childs); $i < $j; $i++)
243 {
244 $type = $childs[$i]->type;
245 $t = $this->parseTree($childs[$i]);
246 if (strlen($t))
247 {
248 if ($c)
249 {
250 $s = rtrim($s, ';');
251
252 if ($type == KEYWORD_FUNCTION && $childs[$i]->functionForm == DECLARED_FORM)
253 {
254 // put declared functions on a new line
255 $s .= "\n";
256 }
257 elseif ($type == KEYWORD_VAR && $type == $lastType)
258 {
259 // multiple var-statements can go into one
260 $t = ',' . substr($t, 4);
261 }
262 else
263 {
264 // add terminator
265 $s .= ';';
266 }
267 }
268
269 $s .= $t;
270
271 $c++;
272 $lastType = $type;
273 }
274 }
275
276 if ($c > 1 && !$noBlockGrouping)
277 {
278 $s = '{' . $s . '}';
279 }
280 break;
281
282 case KEYWORD_FUNCTION:
283 $s .= 'function' . ($n->name ? ' ' . $n->name : '') . '(';
284 $params = $n->params;
285 for ($i = 0, $j = count($params); $i < $j; $i++)
286 $s .= ($i ? ',' : '') . $params[$i];
287 $s .= '){' . $this->parseTree($n->body, true) . '}';
288 break;
289
290 case KEYWORD_IF:
291 $s = 'if(' . $this->parseTree($n->condition) . ')';
292 $thenPart = $this->parseTree($n->thenPart);
293 $elsePart = $n->elsePart ? $this->parseTree($n->elsePart) : null;
294
295 // empty if-statement
296 if ($thenPart == '')
297 $thenPart = ';';
298
299 if ($elsePart)
300 {
301 // be careful and always make a block out of the thenPart; could be more optimized but is a lot of trouble
302 if ($thenPart != ';' && $thenPart[0] != '{')
303 $thenPart = '{' . $thenPart . '}';
304
305 $s .= $thenPart . 'else';
306
307 // we could check for more, but that hardly ever applies so go for performance
308 if ($elsePart[0] != '{')
309 $s .= ' ';
310
311 $s .= $elsePart;
312 }
313 else
314 {
315 $s .= $thenPart;
316 }
317 break;
318
319 case KEYWORD_SWITCH:
320 $s = 'switch(' . $this->parseTree($n->discriminant) . '){';
321 $cases = $n->cases;
322 for ($i = 0, $j = count($cases); $i < $j; $i++)
323 {
324 $case = $cases[$i];
325 if ($case->type == KEYWORD_CASE)
326 $s .= 'case' . ($case->caseLabel->type != TOKEN_STRING ? ' ' : '') . $this->parseTree($case->caseLabel) . ':';
327 else
328 $s .= 'default:';
329
330 $statement = $this->parseTree($case->statements, true);
331 if ($statement)
332 {
333 $s .= $statement;
334 // no terminator for last statement
335 if ($i + 1 < $j)
336 $s .= ';';
337 }
338 }
339 $s .= '}';
340 break;
341
342 case KEYWORD_FOR:
343 $s = 'for(' . ($n->setup ? $this->parseTree($n->setup) : '')
344 . ';' . ($n->condition ? $this->parseTree($n->condition) : '')
345 . ';' . ($n->update ? $this->parseTree($n->update) : '') . ')';
346
347 $body = $this->parseTree($n->body);
348 if ($body == '')
349 $body = ';';
350
351 $s .= $body;
352 break;
353
354 case KEYWORD_WHILE:
355 $s = 'while(' . $this->parseTree($n->condition) . ')';
356
357 $body = $this->parseTree($n->body);
358 if ($body == '')
359 $body = ';';
360
361 $s .= $body;
362 break;
363
364 case JS_FOR_IN:
365 $s = 'for(' . ($n->varDecl ? $this->parseTree($n->varDecl) : $this->parseTree($n->iterator)) . ' in ' . $this->parseTree($n->object) . ')';
366
367 $body = $this->parseTree($n->body);
368 if ($body == '')
369 $body = ';';
370
371 $s .= $body;
372 break;
373
374 case KEYWORD_DO:
375 $s = 'do{' . $this->parseTree($n->body, true) . '}while(' . $this->parseTree($n->condition) . ')';
376 break;
377
378 case KEYWORD_BREAK:
379 case KEYWORD_CONTINUE:
380 $s = $n->value . ($n->label ? ' ' . $n->label : '');
381 break;
382
383 case KEYWORD_TRY:
384 $s = 'try{' . $this->parseTree($n->tryBlock, true) . '}';
385 $catchClauses = $n->catchClauses;
386 for ($i = 0, $j = count($catchClauses); $i < $j; $i++)
387 {
388 $t = $catchClauses[$i];
389 $s .= 'catch(' . $t->varName . ($t->guard ? ' if ' . $this->parseTree($t->guard) : '') . '){' . $this->parseTree($t->block, true) . '}';
390 }
391 if ($n->finallyBlock)
392 $s .= 'finally{' . $this->parseTree($n->finallyBlock, true) . '}';
393 break;
394
395 case KEYWORD_THROW:
396 case KEYWORD_RETURN:
397 $s = $n->type;
398 if ($n->value)
399 {
400 $t = $this->parseTree($n->value);
401 if (strlen($t))
402 {
403 if ($this->isWordChar($t[0]) || $t[0] == '\\')
404 $s .= ' ';
405
406 $s .= $t;
407 }
408 }
409 break;
410
411 case KEYWORD_WITH:
412 $s = 'with(' . $this->parseTree($n->object) . ')' . $this->parseTree($n->body);
413 break;
414
415 case KEYWORD_VAR:
416 case KEYWORD_CONST:
417 $s = $n->value . ' ';
418 $childs = $n->treeNodes;
419 for ($i = 0, $j = count($childs); $i < $j; $i++)
420 {
421 $t = $childs[$i];
422 $s .= ($i ? ',' : '') . $t->name;
423 $u = $t->initializer;
424 if ($u)
425 $s .= '=' . $this->parseTree($u);
426 }
427 break;
428
429 case KEYWORD_IN:
430 case KEYWORD_INSTANCEOF:
431 $left = $this->parseTree($n->treeNodes[0]);
432 $right = $this->parseTree($n->treeNodes[1]);
433
434 $s = $left;
435
436 if ($this->isWordChar(substr($left, -1)))
437 $s .= ' ';
438
439 $s .= $n->type;
440
441 if ($this->isWordChar($right[0]) || $right[0] == '\\')
442 $s .= ' ';
443
444 $s .= $right;
445 break;
446
447 case KEYWORD_DELETE:
448 case KEYWORD_TYPEOF:
449 $right = $this->parseTree($n->treeNodes[0]);
450
451 $s = $n->type;
452
453 if ($this->isWordChar($right[0]) || $right[0] == '\\')
454 $s .= ' ';
455
456 $s .= $right;
457 break;
458
459 case KEYWORD_VOID:
460 $s = 'void(' . $this->parseTree($n->treeNodes[0]) . ')';
461 break;
462
463 case KEYWORD_DEBUGGER:
464 throw new Exception('NOT IMPLEMENTED: DEBUGGER');
465 break;
466
467 case TOKEN_CONDCOMMENT_START:
468 case TOKEN_CONDCOMMENT_END:
469 $s = $n->value . ($n->type == TOKEN_CONDCOMMENT_START ? ' ' : '');
470 $childs = $n->treeNodes;
471 for ($i = 0, $j = count($childs); $i < $j; $i++)
472 $s .= $this->parseTree($childs[$i]);
473 break;
474
475 case OP_SEMICOLON:
476 if ($expression = $n->expression)
477 $s = $this->parseTree($expression);
478 break;
479
480 case JS_LABEL:
481 $s = $n->label . ':' . $this->parseTree($n->statement);
482 break;
483
484 case OP_COMMA:
485 $childs = $n->treeNodes;
486 for ($i = 0, $j = count($childs); $i < $j; $i++)
487 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
488 break;
489
490 case OP_ASSIGN:
491 $s = $this->parseTree($n->treeNodes[0]) . $n->value . $this->parseTree($n->treeNodes[1]);
492 break;
493
494 case OP_HOOK:
495 $s = $this->parseTree($n->treeNodes[0]) . '?' . $this->parseTree($n->treeNodes[1]) . ':' . $this->parseTree($n->treeNodes[2]);
496 break;
497
498 case OP_OR: case OP_AND:
499 case OP_BITWISE_OR: case OP_BITWISE_XOR: case OP_BITWISE_AND:
500 case OP_EQ: case OP_NE: case OP_STRICT_EQ: case OP_STRICT_NE:
501 case OP_LT: case OP_LE: case OP_GE: case OP_GT:
502 case OP_LSH: case OP_RSH: case OP_URSH:
503 case OP_MUL: case OP_DIV: case OP_MOD:
504 $s = $this->parseTree($n->treeNodes[0]) . $n->type . $this->parseTree($n->treeNodes[1]);
505 break;
506
507 case OP_PLUS:
508 case OP_MINUS:
509 $left = $this->parseTree($n->treeNodes[0]);
510 $right = $this->parseTree($n->treeNodes[1]);
511
512 switch ($n->treeNodes[1]->type)
513 {
514 case OP_PLUS:
515 case OP_MINUS:
516 case OP_INCREMENT:
517 case OP_DECREMENT:
518 case OP_UNARY_PLUS:
519 case OP_UNARY_MINUS:
520 $s = $left . $n->type . ' ' . $right;
521 break;
522
523 case TOKEN_STRING:
524 //combine concatenated strings with same quote style
525 if ($n->type == OP_PLUS && substr($left, -1) == $right[0])
526 {
527 $s = substr($left, 0, -1) . substr($right, 1);
528 break;
529 }
530 // FALL THROUGH
531
532 default:
533 $s = $left . $n->type . $right;
534 }
535 break;
536
537 case OP_NOT:
538 case OP_BITWISE_NOT:
539 case OP_UNARY_PLUS:
540 case OP_UNARY_MINUS:
541 $s = $n->value . $this->parseTree($n->treeNodes[0]);
542 break;
543
544 case OP_INCREMENT:
545 case OP_DECREMENT:
546 if ($n->postfix)
547 $s = $this->parseTree($n->treeNodes[0]) . $n->value;
548 else
549 $s = $n->value . $this->parseTree($n->treeNodes[0]);
550 break;
551
552 case OP_DOT:
553 $s = $this->parseTree($n->treeNodes[0]) . '.' . $this->parseTree($n->treeNodes[1]);
554 break;
555
556 case JS_INDEX:
557 $s = $this->parseTree($n->treeNodes[0]);
558 // See if we can replace named index with a dot saving 3 bytes
559 if ( $n->treeNodes[0]->type == TOKEN_IDENTIFIER &&
560 $n->treeNodes[1]->type == TOKEN_STRING &&
561 $this->isValidIdentifier(substr($n->treeNodes[1]->value, 1, -1))
562 )
563 $s .= '.' . substr($n->treeNodes[1]->value, 1, -1);
564 else
565 $s .= '[' . $this->parseTree($n->treeNodes[1]) . ']';
566 break;
567
568 case JS_LIST:
569 $childs = $n->treeNodes;
570 for ($i = 0, $j = count($childs); $i < $j; $i++)
571 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
572 break;
573
574 case JS_CALL:
575 $s = $this->parseTree($n->treeNodes[0]) . '(' . $this->parseTree($n->treeNodes[1]) . ')';
576 break;
577
578 case KEYWORD_NEW:
579 case JS_NEW_WITH_ARGS:
580 $s = 'new ' . $this->parseTree($n->treeNodes[0]) . '(' . ($n->type == JS_NEW_WITH_ARGS ? $this->parseTree($n->treeNodes[1]) : '') . ')';
581 break;
582
583 case JS_ARRAY_INIT:
584 $s = '[';
585 $childs = $n->treeNodes;
586 for ($i = 0, $j = count($childs); $i < $j; $i++)
587 {
588 $s .= ($i ? ',' : '') . $this->parseTree($childs[$i]);
589 }
590 $s .= ']';
591 break;
592
593 case JS_OBJECT_INIT:
594 $s = '{';
595 $childs = $n->treeNodes;
596 for ($i = 0, $j = count($childs); $i < $j; $i++)
597 {
598 $t = $childs[$i];
599 if ($i)
600 $s .= ',';
601 if ($t->type == JS_PROPERTY_INIT)
602 {
603 // Ditch the quotes when the index is a valid identifier
604 if ( $t->treeNodes[0]->type == TOKEN_STRING &&
605 $this->isValidIdentifier(substr($t->treeNodes[0]->value, 1, -1))
606 )
607 $s .= substr($t->treeNodes[0]->value, 1, -1);
608 else
609 $s .= $t->treeNodes[0]->value;
610
611 $s .= ':' . $this->parseTree($t->treeNodes[1]);
612 }
613 else
614 {
615 $s .= $t->type == JS_GETTER ? 'get' : 'set';
616 $s .= ' ' . $t->name . '(';
617 $params = $t->params;
618 for ($i = 0, $j = count($params); $i < $j; $i++)
619 $s .= ($i ? ',' : '') . $params[$i];
620 $s .= '){' . $this->parseTree($t->body, true) . '}';
621 }
622 }
623 $s .= '}';
624 break;
625
626 case TOKEN_NUMBER:
627 $s = $n->value;
628 if (preg_match('/^([1-9]+)(0{3,})$/', $s, $m))
629 $s = $m[1] . 'e' . strlen($m[2]);
630 break;
631
632 case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
633 case TOKEN_IDENTIFIER: case TOKEN_STRING: case TOKEN_REGEXP:
634 $s = $n->value;
635 break;
636
637 case JS_GROUP:
638 if (in_array(
639 $n->treeNodes[0]->type,
640 array(
641 JS_ARRAY_INIT, JS_OBJECT_INIT, JS_GROUP,
642 TOKEN_NUMBER, TOKEN_STRING, TOKEN_REGEXP, TOKEN_IDENTIFIER,
643 KEYWORD_NULL, KEYWORD_THIS, KEYWORD_TRUE, KEYWORD_FALSE
644 )
645 ))
646 {
647 $s = $this->parseTree($n->treeNodes[0]);
648 }
649 else
650 {
651 $s = '(' . $this->parseTree($n->treeNodes[0]) . ')';
652 }
653 break;
654
655 default:
656 throw new Exception('UNKNOWN TOKEN TYPE: ' . $n->type);
657 }
658
659 return $s;
660 }
661
662 private function isValidIdentifier($string)
663 {
664 return preg_match('/^[a-zA-Z_][a-zA-Z0-9_]*$/', $string) && !in_array($string, $this->reserved);
665 }
666
667 private function isWordChar($char)
668 {
669 return $char == '_' || $char == '$' || ctype_alnum($char);
670 }
671 }
672
673 class JSParser
674 {
675 private $t;
676 private $minifier;
677
678 private $opPrecedence = array(
679 ';' => 0,
680 ',' => 1,
681 '=' => 2, '?' => 2, ':' => 2,
682 // The above all have to have the same precedence, see bug 330975
683 '||' => 4,
684 '&&' => 5,
685 '|' => 6,
686 '^' => 7,
687 '&' => 8,
688 '==' => 9, '!=' => 9, '===' => 9, '!==' => 9,
689 '<' => 10, '<=' => 10, '>=' => 10, '>' => 10, 'in' => 10, 'instanceof' => 10,
690 '<<' => 11, '>>' => 11, '>>>' => 11,
691 '+' => 12, '-' => 12,
692 '*' => 13, '/' => 13, '%' => 13,
693 'delete' => 14, 'void' => 14, 'typeof' => 14,
694 '!' => 14, '~' => 14, 'U+' => 14, 'U-' => 14,
695 '++' => 15, '--' => 15,
696 'new' => 16,
697 '.' => 17,
698 JS_NEW_WITH_ARGS => 0, JS_INDEX => 0, JS_CALL => 0,
699 JS_ARRAY_INIT => 0, JS_OBJECT_INIT => 0, JS_GROUP => 0
700 );
701
702 private $opArity = array(
703 ',' => -2,
704 '=' => 2,
705 '?' => 3,
706 '||' => 2,
707 '&&' => 2,
708 '|' => 2,
709 '^' => 2,
710 '&' => 2,
711 '==' => 2, '!=' => 2, '===' => 2, '!==' => 2,
712 '<' => 2, '<=' => 2, '>=' => 2, '>' => 2, 'in' => 2, 'instanceof' => 2,
713 '<<' => 2, '>>' => 2, '>>>' => 2,
714 '+' => 2, '-' => 2,
715 '*' => 2, '/' => 2, '%' => 2,
716 'delete' => 1, 'void' => 1, 'typeof' => 1,
717 '!' => 1, '~' => 1, 'U+' => 1, 'U-' => 1,
718 '++' => 1, '--' => 1,
719 'new' => 1,
720 '.' => 2,
721 JS_NEW_WITH_ARGS => 2, JS_INDEX => 2, JS_CALL => 2,
722 JS_ARRAY_INIT => 1, JS_OBJECT_INIT => 1, JS_GROUP => 1,
723 TOKEN_CONDCOMMENT_START => 1, TOKEN_CONDCOMMENT_END => 1
724 );
725
726 public function __construct($minifier=null)
727 {
728 $this->minifier = $minifier;
729 $this->t = new JSTokenizer();
730 }
731
732 public function parse($s, $f, $l)
733 {
734 // initialize tokenizer
735 $this->t->init($s, $f, $l);
736
737 $x = new JSCompilerContext(false);
738 $n = $this->Script($x);
739 if (!$this->t->isDone())
740 throw $this->t->newSyntaxError('Syntax error');
741
742 return $n;
743 }
744
745 private function Script($x)
746 {
747 $n = $this->Statements($x);
748 $n->type = JS_SCRIPT;
749 $n->funDecls = $x->funDecls;
750 $n->varDecls = $x->varDecls;
751
752 // minify by scope
753 if ($this->minifier)
754 {
755 $n->value = $this->minifier->parseTree($n);
756
757 // clear tree from node to save memory
758 $n->treeNodes = null;
759 $n->funDecls = null;
760 $n->varDecls = null;
761
762 $n->type = JS_MINIFIED;
763 }
764
765 return $n;
766 }
767
768 private function Statements($x)
769 {
770 $n = new JSNode($this->t, JS_BLOCK);
771 array_push($x->stmtStack, $n);
772
773 while (!$this->t->isDone() && $this->t->peek() != OP_RIGHT_CURLY)
774 $n->addNode($this->Statement($x));
775
776 array_pop($x->stmtStack);
777
778 return $n;
779 }
780
781 private function Block($x)
782 {
783 $this->t->mustMatch(OP_LEFT_CURLY);
784 $n = $this->Statements($x);
785 $this->t->mustMatch(OP_RIGHT_CURLY);
786
787 return $n;
788 }
789
790 private function Statement($x)
791 {
792 $tt = $this->t->get();
793 $n2 = null;
794
795 // Cases for statements ending in a right curly return early, avoiding the
796 // common semicolon insertion magic after this switch.
797 switch ($tt)
798 {
799 case KEYWORD_FUNCTION:
800 return $this->FunctionDefinition(
801 $x,
802 true,
803 count($x->stmtStack) > 1 ? STATEMENT_FORM : DECLARED_FORM
804 );
805 break;
806
807 case OP_LEFT_CURLY:
808 $n = $this->Statements($x);
809 $this->t->mustMatch(OP_RIGHT_CURLY);
810 return $n;
811
812 case KEYWORD_IF:
813 $n = new JSNode($this->t);
814 $n->condition = $this->ParenExpression($x);
815 array_push($x->stmtStack, $n);
816 $n->thenPart = $this->Statement($x);
817 $n->elsePart = $this->t->match(KEYWORD_ELSE) ? $this->Statement($x) : null;
818 array_pop($x->stmtStack);
819 return $n;
820
821 case KEYWORD_SWITCH:
822 $n = new JSNode($this->t);
823 $this->t->mustMatch(OP_LEFT_PAREN);
824 $n->discriminant = $this->Expression($x);
825 $this->t->mustMatch(OP_RIGHT_PAREN);
826 $n->cases = array();
827 $n->defaultIndex = -1;
828
829 array_push($x->stmtStack, $n);
830
831 $this->t->mustMatch(OP_LEFT_CURLY);
832
833 while (($tt = $this->t->get()) != OP_RIGHT_CURLY)
834 {
835 switch ($tt)
836 {
837 case KEYWORD_DEFAULT:
838 if ($n->defaultIndex >= 0)
839 throw $this->t->newSyntaxError('More than one switch default');
840 // FALL THROUGH
841 case KEYWORD_CASE:
842 $n2 = new JSNode($this->t);
843 if ($tt == KEYWORD_DEFAULT)
844 $n->defaultIndex = count($n->cases);
845 else
846 $n2->caseLabel = $this->Expression($x, OP_COLON);
847 break;
848 default:
849 throw $this->t->newSyntaxError('Invalid switch case');
850 }
851
852 $this->t->mustMatch(OP_COLON);
853 $n2->statements = new JSNode($this->t, JS_BLOCK);
854 while (($tt = $this->t->peek()) != KEYWORD_CASE && $tt != KEYWORD_DEFAULT && $tt != OP_RIGHT_CURLY)
855 $n2->statements->addNode($this->Statement($x));
856
857 array_push($n->cases, $n2);
858 }
859
860 array_pop($x->stmtStack);
861 return $n;
862
863 case KEYWORD_FOR:
864 $n = new JSNode($this->t);
865 $n->isLoop = true;
866 $this->t->mustMatch(OP_LEFT_PAREN);
867
868 if (($tt = $this->t->peek()) != OP_SEMICOLON)
869 {
870 $x->inForLoopInit = true;
871 if ($tt == KEYWORD_VAR || $tt == KEYWORD_CONST)
872 {
873 $this->t->get();
874 $n2 = $this->Variables($x);
875 }
876 else
877 {
878 $n2 = $this->Expression($x);
879 }
880 $x->inForLoopInit = false;
881 }
882
883 if ($n2 && $this->t->match(KEYWORD_IN))
884 {
885 $n->type = JS_FOR_IN;
886 if ($n2->type == KEYWORD_VAR)
887 {
888 if (count($n2->treeNodes) != 1)
889 {
890 throw $this->t->SyntaxError(
891 'Invalid for..in left-hand side',
892 $this->t->filename,
893 $n2->lineno
894 );
895 }
896
897 // NB: n2[0].type == IDENTIFIER and n2[0].value == n2[0].name.
898 $n->iterator = $n2->treeNodes[0];
899 $n->varDecl = $n2;
900 }
901 else
902 {
903 $n->iterator = $n2;
904 $n->varDecl = null;
905 }
906
907 $n->object = $this->Expression($x);
908 }
909 else
910 {
911 $n->setup = $n2 ? $n2 : null;
912 $this->t->mustMatch(OP_SEMICOLON);
913 $n->condition = $this->t->peek() == OP_SEMICOLON ? null : $this->Expression($x);
914 $this->t->mustMatch(OP_SEMICOLON);
915 $n->update = $this->t->peek() == OP_RIGHT_PAREN ? null : $this->Expression($x);
916 }
917
918 $this->t->mustMatch(OP_RIGHT_PAREN);
919 $n->body = $this->nest($x, $n);
920 return $n;
921
922 case KEYWORD_WHILE:
923 $n = new JSNode($this->t);
924 $n->isLoop = true;
925 $n->condition = $this->ParenExpression($x);
926 $n->body = $this->nest($x, $n);
927 return $n;
928
929 case KEYWORD_DO:
930 $n = new JSNode($this->t);
931 $n->isLoop = true;
932 $n->body = $this->nest($x, $n, KEYWORD_WHILE);
933 $n->condition = $this->ParenExpression($x);
934 if (!$x->ecmaStrictMode)
935 {
936 // <script language="JavaScript"> (without version hints) may need
937 // automatic semicolon insertion without a newline after do-while.
938 // See http://bugzilla.mozilla.org/show_bug.cgi?id=238945.
939 $this->t->match(OP_SEMICOLON);
940 return $n;
941 }
942 break;
943
944 case KEYWORD_BREAK:
945 case KEYWORD_CONTINUE:
946 $n = new JSNode($this->t);
947
948 if ($this->t->peekOnSameLine() == TOKEN_IDENTIFIER)
949 {
950 $this->t->get();
951 $n->label = $this->t->currentToken()->value;
952 }
953
954 $ss = $x->stmtStack;
955 $i = count($ss);
956 $label = $n->label;
957 if ($label)
958 {
959 do
960 {
961 if (--$i < 0)
962 throw $this->t->newSyntaxError('Label not found');
963 }
964 while ($ss[$i]->label != $label);
965 }
966 else
967 {
968 do
969 {
970 if (--$i < 0)
971 throw $this->t->newSyntaxError('Invalid ' . $tt);
972 }
973 while (!$ss[$i]->isLoop && ($tt != KEYWORD_BREAK || $ss[$i]->type != KEYWORD_SWITCH));
974 }
975
976 $n->target = $ss[$i];
977 break;
978
979 case KEYWORD_TRY:
980 $n = new JSNode($this->t);
981 $n->tryBlock = $this->Block($x);
982 $n->catchClauses = array();
983
984 while ($this->t->match(KEYWORD_CATCH))
985 {
986 $n2 = new JSNode($this->t);
987 $this->t->mustMatch(OP_LEFT_PAREN);
988 $n2->varName = $this->t->mustMatch(TOKEN_IDENTIFIER)->value;
989
990 if ($this->t->match(KEYWORD_IF))
991 {
992 if ($x->ecmaStrictMode)
993 throw $this->t->newSyntaxError('Illegal catch guard');
994
995 if (count($n->catchClauses) && !end($n->catchClauses)->guard)
996 throw $this->t->newSyntaxError('Guarded catch after unguarded');
997
998 $n2->guard = $this->Expression($x);
999 }
1000 else
1001 {
1002 $n2->guard = null;
1003 }
1004
1005 $this->t->mustMatch(OP_RIGHT_PAREN);
1006 $n2->block = $this->Block($x);
1007 array_push($n->catchClauses, $n2);
1008 }
1009
1010 if ($this->t->match(KEYWORD_FINALLY))
1011 $n->finallyBlock = $this->Block($x);
1012
1013 if (!count($n->catchClauses) && !$n->finallyBlock)
1014 throw $this->t->newSyntaxError('Invalid try statement');
1015 return $n;
1016
1017 case KEYWORD_CATCH:
1018 case KEYWORD_FINALLY:
1019 throw $this->t->newSyntaxError($tt + ' without preceding try');
1020
1021 case KEYWORD_THROW:
1022 $n = new JSNode($this->t);
1023 $n->value = $this->Expression($x);
1024 break;
1025
1026 case KEYWORD_RETURN:
1027 if (!$x->inFunction)
1028 throw $this->t->newSyntaxError('Invalid return');
1029
1030 $n = new JSNode($this->t);
1031 $tt = $this->t->peekOnSameLine();
1032 if ($tt != TOKEN_END && $tt != TOKEN_NEWLINE && $tt != OP_SEMICOLON && $tt != OP_RIGHT_CURLY)
1033 $n->value = $this->Expression($x);
1034 else
1035 $n->value = null;
1036 break;
1037
1038 case KEYWORD_WITH:
1039 $n = new JSNode($this->t);
1040 $n->object = $this->ParenExpression($x);
1041 $n->body = $this->nest($x, $n);
1042 return $n;
1043
1044 case KEYWORD_VAR:
1045 case KEYWORD_CONST:
1046 $n = $this->Variables($x);
1047 break;
1048
1049 case TOKEN_CONDCOMMENT_START:
1050 case TOKEN_CONDCOMMENT_END:
1051 $n = new JSNode($this->t);
1052 return $n;
1053
1054 case KEYWORD_DEBUGGER:
1055 $n = new JSNode($this->t);
1056 break;
1057
1058 case TOKEN_NEWLINE:
1059 case OP_SEMICOLON:
1060 $n = new JSNode($this->t, OP_SEMICOLON);
1061 $n->expression = null;
1062 return $n;
1063
1064 default:
1065 if ($tt == TOKEN_IDENTIFIER)
1066 {
1067 $this->t->scanOperand = false;
1068 $tt = $this->t->peek();
1069 $this->t->scanOperand = true;
1070 if ($tt == OP_COLON)
1071 {
1072 $label = $this->t->currentToken()->value;
1073 $ss = $x->stmtStack;
1074 for ($i = count($ss) - 1; $i >= 0; --$i)
1075 {
1076 if ($ss[$i]->label == $label)
1077 throw $this->t->newSyntaxError('Duplicate label');
1078 }
1079
1080 $this->t->get();
1081 $n = new JSNode($this->t, JS_LABEL);
1082 $n->label = $label;
1083 $n->statement = $this->nest($x, $n);
1084
1085 return $n;
1086 }
1087 }
1088
1089 $n = new JSNode($this->t, OP_SEMICOLON);
1090 $this->t->unget();
1091 $n->expression = $this->Expression($x);
1092 $n->end = $n->expression->end;
1093 break;
1094 }
1095
1096 if ($this->t->lineno == $this->t->currentToken()->lineno)
1097 {
1098 $tt = $this->t->peekOnSameLine();
1099 if ($tt != TOKEN_END && $tt != TOKEN_NEWLINE && $tt != OP_SEMICOLON && $tt != OP_RIGHT_CURLY)
1100 throw $this->t->newSyntaxError('Missing ; before statement');
1101 }
1102
1103 $this->t->match(OP_SEMICOLON);
1104
1105 return $n;
1106 }
1107
1108 private function FunctionDefinition($x, $requireName, $functionForm)
1109 {
1110 $f = new JSNode($this->t);
1111
1112 if ($f->type != KEYWORD_FUNCTION)
1113 $f->type = ($f->value == 'get') ? JS_GETTER : JS_SETTER;
1114
1115 if ($this->t->match(TOKEN_IDENTIFIER))
1116 $f->name = $this->t->currentToken()->value;
1117 elseif ($requireName)
1118 throw $this->t->newSyntaxError('Missing function identifier');
1119
1120 $this->t->mustMatch(OP_LEFT_PAREN);
1121 $f->params = array();
1122
1123 while (($tt = $this->t->get()) != OP_RIGHT_PAREN)
1124 {
1125 if ($tt != TOKEN_IDENTIFIER)
1126 throw $this->t->newSyntaxError('Missing formal parameter');
1127
1128 array_push($f->params, $this->t->currentToken()->value);
1129
1130 if ($this->t->peek() != OP_RIGHT_PAREN)
1131 $this->t->mustMatch(OP_COMMA);
1132 }
1133
1134 $this->t->mustMatch(OP_LEFT_CURLY);
1135
1136 $x2 = new JSCompilerContext(true);
1137 $f->body = $this->Script($x2);
1138
1139 $this->t->mustMatch(OP_RIGHT_CURLY);
1140 $f->end = $this->t->currentToken()->end;
1141
1142 $f->functionForm = $functionForm;
1143 if ($functionForm == DECLARED_FORM)
1144 array_push($x->funDecls, $f);
1145
1146 return $f;
1147 }
1148
1149 private function Variables($x)
1150 {
1151 $n = new JSNode($this->t);
1152
1153 do
1154 {
1155 $this->t->mustMatch(TOKEN_IDENTIFIER);
1156
1157 $n2 = new JSNode($this->t);
1158 $n2->name = $n2->value;
1159
1160 if ($this->t->match(OP_ASSIGN))
1161 {
1162 if ($this->t->currentToken()->assignOp)
1163 throw $this->t->newSyntaxError('Invalid variable initialization');
1164
1165 $n2->initializer = $this->Expression($x, OP_COMMA);
1166 }
1167
1168 $n2->readOnly = $n->type == KEYWORD_CONST;
1169
1170 $n->addNode($n2);
1171 array_push($x->varDecls, $n2);
1172 }
1173 while ($this->t->match(OP_COMMA));
1174
1175 return $n;
1176 }
1177
1178 private function Expression($x, $stop=false)
1179 {
1180 $operators = array();
1181 $operands = array();
1182 $n = false;
1183
1184 $bl = $x->bracketLevel;
1185 $cl = $x->curlyLevel;
1186 $pl = $x->parenLevel;
1187 $hl = $x->hookLevel;
1188
1189 while (($tt = $this->t->get()) != TOKEN_END)
1190 {
1191 if ($tt == $stop &&
1192 $x->bracketLevel == $bl &&
1193 $x->curlyLevel == $cl &&
1194 $x->parenLevel == $pl &&
1195 $x->hookLevel == $hl
1196 )
1197 {
1198 // Stop only if tt matches the optional stop parameter, and that
1199 // token is not quoted by some kind of bracket.
1200 break;
1201 }
1202
1203 switch ($tt)
1204 {
1205 case OP_SEMICOLON:
1206 // NB: cannot be empty, Statement handled that.
1207 break 2;
1208
1209 case OP_HOOK:
1210 if ($this->t->scanOperand)
1211 break 2;
1212
1213 while ( !empty($operators) &&
1214 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]
1215 )
1216 $this->reduce($operators, $operands);
1217
1218 array_push($operators, new JSNode($this->t));
1219
1220 ++$x->hookLevel;
1221 $this->t->scanOperand = true;
1222 $n = $this->Expression($x);
1223
1224 if (!$this->t->match(OP_COLON))
1225 break 2;
1226
1227 --$x->hookLevel;
1228 array_push($operands, $n);
1229 break;
1230
1231 case OP_COLON:
1232 if ($x->hookLevel)
1233 break 2;
1234
1235 throw $this->t->newSyntaxError('Invalid label');
1236 break;
1237
1238 case OP_ASSIGN:
1239 if ($this->t->scanOperand)
1240 break 2;
1241
1242 // Use >, not >=, for right-associative ASSIGN
1243 while ( !empty($operators) &&
1244 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt]
1245 )
1246 $this->reduce($operators, $operands);
1247
1248 array_push($operators, new JSNode($this->t));
1249 end($operands)->assignOp = $this->t->currentToken()->assignOp;
1250 $this->t->scanOperand = true;
1251 break;
1252
1253 case KEYWORD_IN:
1254 // An in operator should not be parsed if we're parsing the head of
1255 // a for (...) loop, unless it is in the then part of a conditional
1256 // expression, or parenthesized somehow.
1257 if ($x->inForLoopInit && !$x->hookLevel &&
1258 !$x->bracketLevel && !$x->curlyLevel &&
1259 !$x->parenLevel
1260 )
1261 break 2;
1262 // FALL THROUGH
1263 case OP_COMMA:
1264 // A comma operator should not be parsed if we're parsing the then part
1265 // of a conditional expression unless it's parenthesized somehow.
1266 if ($tt == OP_COMMA && $x->hookLevel &&
1267 !$x->bracketLevel && !$x->curlyLevel &&
1268 !$x->parenLevel
1269 )
1270 break 2;
1271 // Treat comma as left-associative so reduce can fold left-heavy
1272 // COMMA trees into a single array.
1273 // FALL THROUGH
1274 case OP_OR:
1275 case OP_AND:
1276 case OP_BITWISE_OR:
1277 case OP_BITWISE_XOR:
1278 case OP_BITWISE_AND:
1279 case OP_EQ: case OP_NE: case OP_STRICT_EQ: case OP_STRICT_NE:
1280 case OP_LT: case OP_LE: case OP_GE: case OP_GT:
1281 case KEYWORD_INSTANCEOF:
1282 case OP_LSH: case OP_RSH: case OP_URSH:
1283 case OP_PLUS: case OP_MINUS:
1284 case OP_MUL: case OP_DIV: case OP_MOD:
1285 case OP_DOT:
1286 if ($this->t->scanOperand)
1287 break 2;
1288
1289 while ( !empty($operators) &&
1290 $this->opPrecedence[end($operators)->type] >= $this->opPrecedence[$tt]
1291 )
1292 $this->reduce($operators, $operands);
1293
1294 if ($tt == OP_DOT)
1295 {
1296 $this->t->mustMatch(TOKEN_IDENTIFIER);
1297 array_push($operands, new JSNode($this->t, OP_DOT, array_pop($operands), new JSNode($this->t)));
1298 }
1299 else
1300 {
1301 array_push($operators, new JSNode($this->t));
1302 $this->t->scanOperand = true;
1303 }
1304 break;
1305
1306 case KEYWORD_DELETE: case KEYWORD_VOID: case KEYWORD_TYPEOF:
1307 case OP_NOT: case OP_BITWISE_NOT: case OP_UNARY_PLUS: case OP_UNARY_MINUS:
1308 case KEYWORD_NEW:
1309 if (!$this->t->scanOperand)
1310 break 2;
1311
1312 array_push($operators, new JSNode($this->t));
1313 break;
1314
1315 case OP_INCREMENT: case OP_DECREMENT:
1316 if ($this->t->scanOperand)
1317 {
1318 array_push($operators, new JSNode($this->t)); // prefix increment or decrement
1319 }
1320 else
1321 {
1322 // Don't cross a line boundary for postfix {in,de}crement.
1323 $t = $this->t->tokens[($this->t->tokenIndex + $this->t->lookahead - 1) & 3];
1324 if ($t && $t->lineno != $this->t->lineno)
1325 break 2;
1326
1327 if (!empty($operators))
1328 {
1329 // Use >, not >=, so postfix has higher precedence than prefix.
1330 while ($this->opPrecedence[end($operators)->type] > $this->opPrecedence[$tt])
1331 $this->reduce($operators, $operands);
1332 }
1333
1334 $n = new JSNode($this->t, $tt, array_pop($operands));
1335 $n->postfix = true;
1336 array_push($operands, $n);
1337 }
1338 break;
1339
1340 case KEYWORD_FUNCTION:
1341 if (!$this->t->scanOperand)
1342 break 2;
1343
1344 array_push($operands, $this->FunctionDefinition($x, false, EXPRESSED_FORM));
1345 $this->t->scanOperand = false;
1346 break;
1347
1348 case KEYWORD_NULL: case KEYWORD_THIS: case KEYWORD_TRUE: case KEYWORD_FALSE:
1349 case TOKEN_IDENTIFIER: case TOKEN_NUMBER: case TOKEN_STRING: case TOKEN_REGEXP:
1350 if (!$this->t->scanOperand)
1351 break 2;
1352
1353 array_push($operands, new JSNode($this->t));
1354 $this->t->scanOperand = false;
1355 break;
1356
1357 case TOKEN_CONDCOMMENT_START:
1358 case TOKEN_CONDCOMMENT_END:
1359 if ($this->t->scanOperand)
1360 array_push($operators, new JSNode($this->t));
1361 else
1362 array_push($operands, new JSNode($this->t));
1363 break;
1364
1365 case OP_LEFT_BRACKET:
1366 if ($this->t->scanOperand)
1367 {
1368 // Array initialiser. Parse using recursive descent, as the
1369 // sub-grammar here is not an operator grammar.
1370 $n = new JSNode($this->t, JS_ARRAY_INIT);
1371 while (($tt = $this->t->peek()) != OP_RIGHT_BRACKET)
1372 {
1373 if ($tt == OP_COMMA)
1374 {
1375 $this->t->get();
1376 $n->addNode(null);
1377 continue;
1378 }
1379
1380 $n->addNode($this->Expression($x, OP_COMMA));
1381 if (!$this->t->match(OP_COMMA))
1382 break;
1383 }
1384
1385 $this->t->mustMatch(OP_RIGHT_BRACKET);
1386 array_push($operands, $n);
1387 $this->t->scanOperand = false;
1388 }
1389 else
1390 {
1391 // Property indexing operator.
1392 array_push($operators, new JSNode($this->t, JS_INDEX));
1393 $this->t->scanOperand = true;
1394 ++$x->bracketLevel;
1395 }
1396 break;
1397
1398 case OP_RIGHT_BRACKET:
1399 if ($this->t->scanOperand || $x->bracketLevel == $bl)
1400 break 2;
1401
1402 while ($this->reduce($operators, $operands)->type != JS_INDEX)
1403 continue;
1404
1405 --$x->bracketLevel;
1406 break;
1407
1408 case OP_LEFT_CURLY:
1409 if (!$this->t->scanOperand)
1410 break 2;
1411
1412 // Object initialiser. As for array initialisers (see above),
1413 // parse using recursive descent.
1414 ++$x->curlyLevel;
1415 $n = new JSNode($this->t, JS_OBJECT_INIT);
1416 while (!$this->t->match(OP_RIGHT_CURLY))
1417 {
1418 do
1419 {
1420 $tt = $this->t->get();
1421 $tv = $this->t->currentToken()->value;
1422 if (($tv == 'get' || $tv == 'set') && $this->t->peek() == TOKEN_IDENTIFIER)
1423 {
1424 if ($x->ecmaStrictMode)
1425 throw $this->t->newSyntaxError('Illegal property accessor');
1426
1427 $n->addNode($this->FunctionDefinition($x, true, EXPRESSED_FORM));
1428 }
1429 else
1430 {
1431 switch ($tt)
1432 {
1433 case TOKEN_IDENTIFIER:
1434 case TOKEN_NUMBER:
1435 case TOKEN_STRING:
1436 $id = new JSNode($this->t);
1437 break;
1438
1439 case OP_RIGHT_CURLY:
1440 if ($x->ecmaStrictMode)
1441 throw $this->t->newSyntaxError('Illegal trailing ,');
1442 break 3;
1443
1444 default:
1445 throw $this->t->newSyntaxError('Invalid property name');
1446 }
1447
1448 $this->t->mustMatch(OP_COLON);
1449 $n->addNode(new JSNode($this->t, JS_PROPERTY_INIT, $id, $this->Expression($x, OP_COMMA)));
1450 }
1451 }
1452 while ($this->t->match(OP_COMMA));
1453
1454 $this->t->mustMatch(OP_RIGHT_CURLY);
1455 break;
1456 }
1457
1458 array_push($operands, $n);
1459 $this->t->scanOperand = false;
1460 --$x->curlyLevel;
1461 break;
1462
1463 case OP_RIGHT_CURLY:
1464 if (!$this->t->scanOperand && $x->curlyLevel != $cl)
1465 throw new Exception('PANIC: right curly botch');
1466 break 2;
1467
1468 case OP_LEFT_PAREN:
1469 if ($this->t->scanOperand)
1470 {
1471 array_push($operators, new JSNode($this->t, JS_GROUP));
1472 }
1473 else
1474 {
1475 while ( !empty($operators) &&
1476 $this->opPrecedence[end($operators)->type] > $this->opPrecedence[KEYWORD_NEW]
1477 )
1478 $this->reduce($operators, $operands);
1479
1480 // Handle () now, to regularize the n-ary case for n > 0.
1481 // We must set scanOperand in case there are arguments and
1482 // the first one is a regexp or unary+/-.
1483 $n = end($operators);
1484 $this->t->scanOperand = true;
1485 if ($this->t->match(OP_RIGHT_PAREN))
1486 {
1487 if ($n && $n->type == KEYWORD_NEW)
1488 {
1489 array_pop($operators);
1490 $n->addNode(array_pop($operands));
1491 }
1492 else
1493 {
1494 $n = new JSNode($this->t, JS_CALL, array_pop($operands), new JSNode($this->t, JS_LIST));
1495 }
1496
1497 array_push($operands, $n);
1498 $this->t->scanOperand = false;
1499 break;
1500 }
1501
1502 if ($n && $n->type == KEYWORD_NEW)
1503 $n->type = JS_NEW_WITH_ARGS;
1504 else
1505 array_push($operators, new JSNode($this->t, JS_CALL));
1506 }
1507
1508 ++$x->parenLevel;
1509 break;
1510
1511 case OP_RIGHT_PAREN:
1512 if ($this->t->scanOperand || $x->parenLevel == $pl)
1513 break 2;
1514
1515 while (($tt = $this->reduce($operators, $operands)->type) != JS_GROUP &&
1516 $tt != JS_CALL && $tt != JS_NEW_WITH_ARGS
1517 )
1518 {
1519 continue;
1520 }
1521
1522 if ($tt != JS_GROUP)
1523 {
1524 $n = end($operands);
1525 if ($n->treeNodes[1]->type != OP_COMMA)
1526 $n->treeNodes[1] = new JSNode($this->t, JS_LIST, $n->treeNodes[1]);
1527 else
1528 $n->treeNodes[1]->type = JS_LIST;
1529 }
1530
1531 --$x->parenLevel;
1532 break;
1533
1534 // Automatic semicolon insertion means we may scan across a newline
1535 // and into the beginning of another statement. If so, break out of
1536 // the while loop and let the t.scanOperand logic handle errors.
1537 default:
1538 break 2;
1539 }
1540 }
1541
1542 if ($x->hookLevel != $hl)
1543 throw $this->t->newSyntaxError('Missing : in conditional expression');
1544
1545 if ($x->parenLevel != $pl)
1546 throw $this->t->newSyntaxError('Missing ) in parenthetical');
1547
1548 if ($x->bracketLevel != $bl)
1549 throw $this->t->newSyntaxError('Missing ] in index expression');
1550
1551 if ($this->t->scanOperand)
1552 throw $this->t->newSyntaxError('Missing operand');
1553
1554 // Resume default mode, scanning for operands, not operators.
1555 $this->t->scanOperand = true;
1556 $this->t->unget();
1557
1558 while (count($operators))
1559 $this->reduce($operators, $operands);
1560
1561 return array_pop($operands);
1562 }
1563
1564 private function ParenExpression($x)
1565 {
1566 $this->t->mustMatch(OP_LEFT_PAREN);
1567 $n = $this->Expression($x);
1568 $this->t->mustMatch(OP_RIGHT_PAREN);
1569
1570 return $n;
1571 }
1572
1573 // Statement stack and nested statement handler.
1574 private function nest($x, $node, $end = false)
1575 {
1576 array_push($x->stmtStack, $node);
1577 $n = $this->statement($x);
1578 array_pop($x->stmtStack);
1579
1580 if ($end)
1581 $this->t->mustMatch($end);
1582
1583 return $n;
1584 }
1585
1586 private function reduce(&$operators, &$operands)
1587 {
1588 $n = array_pop($operators);
1589 $op = $n->type;
1590 $arity = $this->opArity[$op];
1591 $c = count($operands);
1592 if ($arity == -2)
1593 {
1594 // Flatten left-associative trees
1595 if ($c >= 2)
1596 {
1597 $left = $operands[$c - 2];
1598 if ($left->type == $op)
1599 {
1600 $right = array_pop($operands);
1601 $left->addNode($right);
1602 return $left;
1603 }
1604 }
1605 $arity = 2;
1606 }
1607
1608 // Always use push to add operands to n, to update start and end
1609 $a = array_splice($operands, $c - $arity);
1610 for ($i = 0; $i < $arity; $i++)
1611 $n->addNode($a[$i]);
1612
1613 // Include closing bracket or postfix operator in [start,end]
1614 $te = $this->t->currentToken()->end;
1615 if ($n->end < $te)
1616 $n->end = $te;
1617
1618 array_push($operands, $n);
1619
1620 return $n;
1621 }
1622 }
1623
1624 class JSCompilerContext
1625 {
1626 public $inFunction = false;
1627 public $inForLoopInit = false;
1628 public $ecmaStrictMode = false;
1629 public $bracketLevel = 0;
1630 public $curlyLevel = 0;
1631 public $parenLevel = 0;
1632 public $hookLevel = 0;
1633
1634 public $stmtStack = array();
1635 public $funDecls = array();
1636 public $varDecls = array();
1637
1638 public function __construct($inFunction)
1639 {
1640 $this->inFunction = $inFunction;
1641 }
1642 }
1643
1644 class JSNode
1645 {
1646 private $type;
1647 private $value;
1648 private $lineno;
1649 private $start;
1650 private $end;
1651
1652 public $treeNodes = array();
1653 public $funDecls = array();
1654 public $varDecls = array();
1655
1656 public function __construct($t, $type=0)
1657 {
1658 if ($token = $t->currentToken())
1659 {
1660 $this->type = $type ? $type : $token->type;
1661 $this->value = $token->value;
1662 $this->lineno = $token->lineno;
1663 $this->start = $token->start;
1664 $this->end = $token->end;
1665 }
1666 else
1667 {
1668 $this->type = $type;
1669 $this->lineno = $t->lineno;
1670 }
1671
1672 if (($numargs = func_num_args()) > 2)
1673 {
1674 $args = func_get_args();
1675 for ($i = 2; $i < $numargs; $i++)
1676 $this->addNode($args[$i]);
1677 }
1678 }
1679
1680 // we don't want to bloat our object with all kind of specific properties, so we use overloading
1681 public function __set($name, $value)
1682 {
1683 $this->$name = $value;
1684 }
1685
1686 public function __get($name)
1687 {
1688 if (isset($this->$name))
1689 return $this->$name;
1690
1691 return null;
1692 }
1693
1694 public function addNode($node)
1695 {
1696 if ($node !== null)
1697 {
1698 if ($node->start < $this->start)
1699 $this->start = $node->start;
1700 if ($this->end < $node->end)
1701 $this->end = $node->end;
1702 }
1703
1704 $this->treeNodes[] = $node;
1705 }
1706 }
1707
1708 class JSTokenizer
1709 {
1710 private $cursor = 0;
1711 private $source;
1712
1713 public $tokens = array();
1714 public $tokenIndex = 0;
1715 public $lookahead = 0;
1716 public $scanNewlines = false;
1717 public $scanOperand = true;
1718
1719 public $filename;
1720 public $lineno;
1721
1722 private $keywords = array(
1723 'break',
1724 'case', 'catch', 'const', 'continue',
1725 'debugger', 'default', 'delete', 'do',
1726 'else', 'enum',
1727 'false', 'finally', 'for', 'function',
1728 'if', 'in', 'instanceof',
1729 'new', 'null',
1730 'return',
1731 'switch',
1732 'this', 'throw', 'true', 'try', 'typeof',
1733 'var', 'void',
1734 'while', 'with'
1735 );
1736
1737 private $opTypeNames = array(
1738 ';', ',', '?', ':', '||', '&&', '|', '^',
1739 '&', '===', '==', '=', '!==', '!=', '<<', '<=',
1740 '<', '>>>', '>>', '>=', '>', '++', '--', '+',
1741 '-', '*', '/', '%', '!', '~', '.', '[',
1742 ']', '{', '}', '(', ')', '@*/'
1743 );
1744
1745 private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%');
1746 private $opRegExp;
1747
1748 public function __construct()
1749 {
1750 $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#';
1751 }
1752
1753 public function init($source, $filename = '', $lineno = 1)
1754 {
1755 $this->source = $source;
1756 $this->filename = $filename ? $filename : '[inline]';
1757 $this->lineno = $lineno;
1758
1759 $this->cursor = 0;
1760 $this->tokens = array();
1761 $this->tokenIndex = 0;
1762 $this->lookahead = 0;
1763 $this->scanNewlines = false;
1764 $this->scanOperand = true;
1765 }
1766
1767 public function getInput($chunksize)
1768 {
1769 if ($chunksize)
1770 return substr($this->source, $this->cursor, $chunksize);
1771
1772 return substr($this->source, $this->cursor);
1773 }
1774
1775 public function isDone()
1776 {
1777 return $this->peek() == TOKEN_END;
1778 }
1779
1780 public function match($tt)
1781 {
1782 return $this->get() == $tt || $this->unget();
1783 }
1784
1785 public function mustMatch($tt)
1786 {
1787 if (!$this->match($tt))
1788 throw $this->newSyntaxError('Unexpected token; token ' . $tt . ' expected');
1789
1790 return $this->currentToken();
1791 }
1792
1793 public function peek()
1794 {
1795 if ($this->lookahead)
1796 {
1797 $next = $this->tokens[($this->tokenIndex + $this->lookahead) & 3];
1798 if ($this->scanNewlines && $next->lineno != $this->lineno)
1799 $tt = TOKEN_NEWLINE;
1800 else
1801 $tt = $next->type;
1802 }
1803 else
1804 {
1805 $tt = $this->get();
1806 $this->unget();
1807 }
1808
1809 return $tt;
1810 }
1811
1812 public function peekOnSameLine()
1813 {
1814 $this->scanNewlines = true;
1815 $tt = $this->peek();
1816 $this->scanNewlines = false;
1817
1818 return $tt;
1819 }
1820
1821 public function currentToken()
1822 {
1823 if (!empty($this->tokens))
1824 return $this->tokens[$this->tokenIndex];
1825 }
1826
1827 public function get($chunksize = 1000)
1828 {
1829 while($this->lookahead)
1830 {
1831 $this->lookahead--;
1832 $this->tokenIndex = ($this->tokenIndex + 1) & 3;
1833 $token = $this->tokens[$this->tokenIndex];
1834 if ($token->type != TOKEN_NEWLINE || $this->scanNewlines)
1835 return $token->type;
1836 }
1837
1838 $conditional_comment = false;
1839
1840 // strip whitespace and comments
1841 while(true)
1842 {
1843 $input = $this->getInput($chunksize);
1844
1845 // whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!)
1846 $re = $this->scanNewlines ? '/^[ \r\t]+/' : '/^\s+/';
1847 if (preg_match($re, $input, $match))
1848 {
1849 $spaces = $match[0];
1850 $spacelen = strlen($spaces);
1851 $this->cursor += $spacelen;
1852 if (!$this->scanNewlines)
1853 $this->lineno += substr_count($spaces, "\n");
1854
1855 if ($spacelen == $chunksize)
1856 continue; // complete chunk contained whitespace
1857
1858 $input = $this->getInput($chunksize);
1859 if ($input == '' || $input[0] != '/')
1860 break;
1861 }
1862
1863 // Comments
1864 if (!preg_match('/^\/(?:\*(@(?:cc_on|if|elif|else|end))?.*?\*\/|\/[^\n]*)/s', $input, $match))
1865 {
1866 if (!$chunksize)
1867 break;
1868
1869 // retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment)
1870 $chunksize = null;
1871 continue;
1872 }
1873
1874 // check if this is a conditional (JScript) comment
1875 if (!empty($match[1]))
1876 {
1877 $match[0] = '/*' . $match[1];
1878 $conditional_comment = true;
1879 break;
1880 }
1881 else
1882 {
1883 $this->cursor += strlen($match[0]);
1884 $this->lineno += substr_count($match[0], "\n");
1885 }
1886 }
1887
1888 if ($input == '')
1889 {
1890 $tt = TOKEN_END;
1891 $match = array('');
1892 }
1893 elseif ($conditional_comment)
1894 {
1895 $tt = TOKEN_CONDCOMMENT_START;
1896 }
1897 else
1898 {
1899 switch ($input[0])
1900 {
1901 case '0':
1902 // hexadecimal
1903 if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match))
1904 {
1905 $tt = TOKEN_NUMBER;
1906 break;
1907 }
1908 // FALL THROUGH
1909
1910 case '1': case '2': case '3': case '4': case '5':
1911 case '6': case '7': case '8': case '9':
1912 // should always match
1913 preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match);
1914 $tt = TOKEN_NUMBER;
1915 break;
1916
1917 case "'":
1918 if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match))
1919 {
1920 $tt = TOKEN_STRING;
1921 }
1922 else
1923 {
1924 if ($chunksize)
1925 return $this->get(null); // retry with a full chunk fetch
1926
1927 throw $this->newSyntaxError('Unterminated string literal');
1928 }
1929 break;
1930
1931 case '"':
1932 if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match))
1933 {
1934 $tt = TOKEN_STRING;
1935 }
1936 else
1937 {
1938 if ($chunksize)
1939 return $this->get(null); // retry with a full chunk fetch
1940
1941 throw $this->newSyntaxError('Unterminated string literal');
1942 }
1943 break;
1944
1945 case '/':
1946 if ($this->scanOperand && preg_match('/^\/((?:\\\\.|\[(?:\\\\.|[^\]])*\]|[^\/])+)\/([gimy]*)/', $input, $match))
1947 {
1948 $tt = TOKEN_REGEXP;
1949 break;
1950 }
1951 // FALL THROUGH
1952
1953 case '|':
1954 case '^':
1955 case '&':
1956 case '<':
1957 case '>':
1958 case '+':
1959 case '-':
1960 case '*':
1961 case '%':
1962 case '=':
1963 case '!':
1964 // should always match
1965 preg_match($this->opRegExp, $input, $match);
1966 $op = $match[0];
1967 if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=')
1968 {
1969 $tt = OP_ASSIGN;
1970 $match[0] .= '=';
1971 }
1972 else
1973 {
1974 $tt = $op;
1975 if ($this->scanOperand)
1976 {
1977 if ($op == OP_PLUS)
1978 $tt = OP_UNARY_PLUS;
1979 elseif ($op == OP_MINUS)
1980 $tt = OP_UNARY_MINUS;
1981 }
1982 $op = null;
1983 }
1984 break;
1985
1986 case '.':
1987 if (preg_match('/^\.\d+(?:[eE][-+]?\d+)?/', $input, $match))
1988 {
1989 $tt = TOKEN_NUMBER;
1990 break;
1991 }
1992 // FALL THROUGH
1993
1994 case ';':
1995 case ',':
1996 case '?':
1997 case ':':
1998 case '~':
1999 case '[':
2000 case ']':
2001 case '{':
2002 case '}':
2003 case '(':
2004 case ')':
2005 // these are all single
2006 $match = array($input[0]);
2007 $tt = $input[0];
2008 break;
2009
2010 case '@':
2011 // check end of conditional comment
2012 if (substr($input, 0, 3) == '@*/')
2013 {
2014 $match = array('@*/');
2015 $tt = TOKEN_CONDCOMMENT_END;
2016 }
2017 else
2018 throw $this->newSyntaxError('Illegal token');
2019 break;
2020
2021 case "\n":
2022 if ($this->scanNewlines)
2023 {
2024 $match = array("\n");
2025 $tt = TOKEN_NEWLINE;
2026 }
2027 else
2028 throw $this->newSyntaxError('Illegal token');
2029 break;
2030
2031 default:
2032 // Fast path for identifiers: word chars followed by whitespace or various other tokens.
2033 // Note we don't need to exclude digits in the first char, as they've already been found
2034 // above.
2035 if (!preg_match('/^[$\w]+(?=[\s\/\|\^\&<>\+\-\*%=!.;,\?:~\[\]\{\}\(\)@])/', $input, $match))
2036 {
2037 // Character classes per ECMA-262 edition 5.1 section 7.6
2038 // Per spec, must accept Unicode 3.0, *may* accept later versions.
2039 // We'll take whatever PCRE understands, which should be more recent.
2040 $identifierStartChars = "\\p{L}\\p{Nl}" . # UnicodeLetter
2041 "\$" .
2042 "_";
2043 $identifierPartChars = $identifierStartChars .
2044 "\\p{Mn}\\p{Mc}" . # UnicodeCombiningMark
2045 "\\p{Nd}" . # UnicodeDigit
2046 "\\p{Pc}"; # UnicodeConnectorPunctuation
2047 $unicodeEscape = "\\\\u[0-9A-F-a-f]{4}";
2048 $identifierRegex = "/^" .
2049 "(?:[$identifierStartChars]|$unicodeEscape)" .
2050 "(?:[$identifierPartChars]|$unicodeEscape)*" .
2051 "/uS";
2052 if (preg_match($identifierRegex, $input, $match))
2053 {
2054 if (strpos($match[0], '\\') !== false) {
2055 // Per ECMA-262 edition 5.1, section 7.6 escape sequences should behave as if they were
2056 // the original chars, but only within the boundaries of the identifier.
2057 $decoded = preg_replace_callback('/\\\\u([0-9A-Fa-f]{4})/',
2058 array(__CLASS__, 'unicodeEscapeCallback'),
2059 $match[0]);
2060
2061 // Since our original regex didn't de-escape the originals, we need to check for validity again.
2062 // No need to worry about token boundaries, as anything outside the identifier is illegal!
2063 if (!preg_match("/^[$identifierStartChars][$identifierPartChars]*$/u", $decoded)) {
2064 throw $this->newSyntaxError('Illegal token');
2065 }
2066
2067 // Per spec it _ought_ to work to use these escapes for keywords words as well...
2068 // but IE rejects them as invalid, while Firefox and Chrome treat them as identifiers
2069 // that don't match the keyword.
2070 if (in_array($decoded, $this->keywords)) {
2071 throw $this->newSyntaxError('Illegal token');
2072 }
2073
2074 // TODO: save the decoded form for output?
2075 }
2076 }
2077 else
2078 throw $this->newSyntaxError('Illegal token');
2079 }
2080 $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER;
2081 }
2082 }
2083
2084 $this->tokenIndex = ($this->tokenIndex + 1) & 3;
2085
2086 if (!isset($this->tokens[$this->tokenIndex]))
2087 $this->tokens[$this->tokenIndex] = new JSToken();
2088
2089 $token = $this->tokens[$this->tokenIndex];
2090 $token->type = $tt;
2091
2092 if ($tt == OP_ASSIGN)
2093 $token->assignOp = $op;
2094
2095 $token->start = $this->cursor;
2096
2097 $token->value = $match[0];
2098 $this->cursor += strlen($match[0]);
2099
2100 $token->end = $this->cursor;
2101 $token->lineno = $this->lineno;
2102
2103 return $tt;
2104 }
2105
2106 public function unget()
2107 {
2108 if (++$this->lookahead == 4)
2109 throw $this->newSyntaxError('PANIC: too much lookahead!');
2110
2111 $this->tokenIndex = ($this->tokenIndex - 1) & 3;
2112 }
2113
2114 public function newSyntaxError($m)
2115 {
2116 return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);
2117 }
2118
2119 public static function unicodeEscapeCallback($m)
2120 {
2121 return html_entity_decode('&#x' . $m[1]. ';', ENT_QUOTES, 'UTF-8');
2122 }
2123 }
2124
2125 class JSToken
2126 {
2127 public $type;
2128 public $value;
2129 public $start;
2130 public $end;
2131 public $lineno;
2132 public $assignOp;
2133 }