Merge "Remove dead code from Title::secureAndSplit()"
[lhc/web/wiklou.git] / includes / libs / JavaScriptMinifier.php
1 <?php
2 /**
3 * JavaScript Minifier
4 *
5 * @file
6 * @author Paul Copperman <paul.copperman@gmail.com>
7 * @license Apache-2.0
8 * @license MIT
9 * @license GPL-2.0-or-later
10 * @license LGPL-2.1-or-later
11 */
12
13 /**
14 * This class is meant to safely minify javascript code, while leaving syntactically correct
15 * programs intact. Other libraries, such as JSMin require a certain coding style to work
16 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
17 * slow, because they construct a complete parse tree before outputting the code minified.
18 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
19 * fast enough to be used for on-the-fly minifying.
20 *
21 * This class was written with ECMA-262 Edition 3 in mind ("ECMAScript 3"). Parsing features
22 * new to ECMAScript 5 or later might not be supported. However, Edition 5.1 better reflects
23 * how actual JS engines worked and work and is simpler and more readable prose. As such,
24 * the below code will refer to sections of the 5.1 specification.
25 *
26 * See <https://www.ecma-international.org/ecma-262/5.1/>.
27 */
28 class JavaScriptMinifier {
29
30 /* Parsing states.
31 * The state machine is only necessary to decide whether to parse a slash as division
32 * operator or as regexp literal.
33 * States are named after the next expected item. We only distinguish states when the
34 * distinction is relevant for our purpose.
35 */
36 const STATEMENT = 0;
37 const CONDITION = 1;
38 const PROPERTY_ASSIGNMENT = 2;
39 const EXPRESSION = 3;
40 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
41 const EXPRESSION_OP = 5;
42 const EXPRESSION_FUNC = 6;
43 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
44 const EXPRESSION_TERNARY_OP = 8;
45 const EXPRESSION_TERNARY_FUNC = 9;
46 const PAREN_EXPRESSION = 10; // expression which is not on the top level
47 const PAREN_EXPRESSION_OP = 11;
48 const PAREN_EXPRESSION_FUNC = 12;
49 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
50 const PROPERTY_EXPRESSION_OP = 14;
51 const PROPERTY_EXPRESSION_FUNC = 15;
52
53 /* Token types */
54 const TYPE_UN_OP = 101; // unary operators
55 const TYPE_INCR_OP = 102; // ++ and --
56 const TYPE_BIN_OP = 103; // binary operators
57 const TYPE_ADD_OP = 104; // + and - which can be either unary or binary ops
58 const TYPE_HOOK = 105; // ?
59 const TYPE_COLON = 106; // :
60 const TYPE_COMMA = 107; // ,
61 const TYPE_SEMICOLON = 108; // ;
62 const TYPE_BRACE_OPEN = 109; // {
63 const TYPE_BRACE_CLOSE = 110; // }
64 const TYPE_PAREN_OPEN = 111; // ( and [
65 const TYPE_PAREN_CLOSE = 112; // ) and ]
66 const TYPE_RETURN = 113; // keywords: break, continue, return, throw
67 const TYPE_IF = 114; // keywords: catch, for, with, switch, while, if
68 const TYPE_DO = 115; // keywords: case, var, finally, else, do, try
69 const TYPE_FUNC = 116; // keywords: function
70 const TYPE_LITERAL = 117; // all literals, identifiers and unrecognised tokens
71
72 const ACTION_GOTO = 201;
73 const ACTION_PUSH = 202;
74 const ACTION_POP = 203;
75
76 // Sanity limit to avoid excessive memory usage
77 const STACK_LIMIT = 1000;
78
79 /**
80 * Maximum line length
81 *
82 * This is not a strict maximum, but a guideline. Longer lines will be
83 * produced when literals (e.g. quoted strings) longer than this are
84 * encountered, or when required to guard against semicolon insertion.
85 *
86 * This is a private member (instead of constant) to allow tests to
87 * set it to 1, to verify ASI and line-breaking behaviour.
88 */
89 private static $maxLineLength = 1000;
90
91 /**
92 * Returns minified JavaScript code.
93 *
94 * @param string $s JavaScript code to minify
95 * @return String Minified code
96 */
97 public static function minify( $s ) {
98 // First we declare a few tables that contain our parsing rules
99
100 // $opChars : Characters which can be combined without whitespace between them.
101 $opChars = [
102 // ECMAScript 5.1 § 7.7 Punctuators
103 // Unlike the spec, these are individual symbols, not sequences.
104 '{' => true,
105 '}' => true,
106 '(' => true,
107 ')' => true,
108 '[' => true,
109 ']' => true,
110 '.' => true,
111 ';' => true,
112 ',' => true,
113 '<' => true,
114 '>' => true,
115 '=' => true,
116 '!' => true,
117 '+' => true,
118 '-' => true,
119 '*' => true,
120 '%' => true,
121 '&' => true,
122 '|' => true,
123 '^' => true,
124 '~' => true,
125 '?' => true,
126 ':' => true,
127 '/' => true,
128 // ECMAScript 5.1 § 7.8.4 String Literals
129 '"' => true,
130 "'" => true,
131 ];
132
133 // $tokenTypes : Map keywords and operators to their corresponding token type
134 $tokenTypes = [
135 // ECMAScript 5.1 § 11.4 Unary Operators
136 // ECMAScript 5.1 § 11.6 Additive Operators
137 // UnaryExpression includes PostfixExpression, which includes 'new'.
138 'new' => self::TYPE_UN_OP,
139 'delete' => self::TYPE_UN_OP,
140 'void' => self::TYPE_UN_OP,
141 'typeof' => self::TYPE_UN_OP,
142 '++' => self::TYPE_INCR_OP,
143 '--' => self::TYPE_INCR_OP,
144 '+' => self::TYPE_ADD_OP,
145 '-' => self::TYPE_ADD_OP,
146 '~' => self::TYPE_UN_OP,
147 '!' => self::TYPE_UN_OP,
148 // ECMAScript 5.1 § 11.5 Multiplicative Operators
149 '*' => self::TYPE_BIN_OP,
150 '/' => self::TYPE_BIN_OP,
151 '%' => self::TYPE_BIN_OP,
152 // ECMAScript 5.1 § 11.7 Bitwise Shift Operators
153 '<<' => self::TYPE_BIN_OP,
154 '>>' => self::TYPE_BIN_OP,
155 '>>>' => self::TYPE_BIN_OP,
156 // ECMAScript 5.1 § 11.8 Relational Operators
157 '<' => self::TYPE_BIN_OP,
158 '>' => self::TYPE_BIN_OP,
159 '<=' => self::TYPE_BIN_OP,
160 '>=' => self::TYPE_BIN_OP,
161 // ECMAScript 5.1 § 11.9 Equality Operators
162 '==' => self::TYPE_BIN_OP,
163 '!=' => self::TYPE_BIN_OP,
164 '===' => self::TYPE_BIN_OP,
165 '!==' => self::TYPE_BIN_OP,
166 'instanceof' => self::TYPE_BIN_OP,
167 'in' => self::TYPE_BIN_OP,
168 // ECMAScript 5.1 § 11.10 Binary Bitwise Operators
169 '&' => self::TYPE_BIN_OP,
170 '^' => self::TYPE_BIN_OP,
171 '|' => self::TYPE_BIN_OP,
172 // ECMAScript 5.1 § 11.11 Binary Logical Operators
173 '&&' => self::TYPE_BIN_OP,
174 '||' => self::TYPE_BIN_OP,
175 // ECMAScript 5.1 § 11.12 Conditional Operator
176 // Also known as ternary.
177 '?' => self::TYPE_HOOK,
178 ':' => self::TYPE_COLON,
179 // ECMAScript 5.1 § 11.13 Assignment Operators
180 '=' => self::TYPE_BIN_OP,
181 '*=' => self::TYPE_BIN_OP,
182 '/=' => self::TYPE_BIN_OP,
183 '%=' => self::TYPE_BIN_OP,
184 '+=' => self::TYPE_BIN_OP,
185 '-=' => self::TYPE_BIN_OP,
186 '<<=' => self::TYPE_BIN_OP,
187 '>>=' => self::TYPE_BIN_OP,
188 '>>>=' => self::TYPE_BIN_OP,
189 '&=' => self::TYPE_BIN_OP,
190 '^=' => self::TYPE_BIN_OP,
191 '|=' => self::TYPE_BIN_OP,
192 // ECMAScript 5.1 § 11.14 Comma Operator
193 ',' => self::TYPE_COMMA,
194
195 // The keywords that disallow LineTerminator before their
196 // (sometimes optional) Expression or Identifier.
197 //
198 // keyword ;
199 // keyword [no LineTerminator here] Identifier ;
200 // keyword [no LineTerminator here] Expression ;
201 //
202 // See also ECMAScript 5.1:
203 // - § 12.7 The continue Statement
204 // - $ 12.8 The break Statement
205 // - § 12.9 The return Statement
206 // - § 12.13 The throw Statement
207 'continue' => self::TYPE_RETURN,
208 'break' => self::TYPE_RETURN,
209 'return' => self::TYPE_RETURN,
210 'throw' => self::TYPE_RETURN,
211
212 // The keywords require a parenthesised Expression or Identifier
213 // before the next Statement.
214 //
215 // keyword ( Expression ) Statement
216 // keyword ( Identifier ) Statement
217 //
218 // See also ECMAScript 5.1:
219 // - § 12.5 The if Statement
220 // - § 12.6 Iteration Statements (do, while, for)
221 // - § 12.10 The with Statement
222 // - § 12.11 The switch Statement
223 // - § 12.13 The throw Statement
224 'if' => self::TYPE_IF,
225 'catch' => self::TYPE_IF,
226 'while' => self::TYPE_IF,
227 'for' => self::TYPE_IF,
228 'switch' => self::TYPE_IF,
229 'with' => self::TYPE_IF,
230
231 // The keywords followed by an Identifier, Statement,
232 // Expression, or Block.
233 //
234 // var Identifier
235 // else Statement
236 // do Statement
237 // case Expression
238 // try Block
239 // finally Block
240 //
241 // See also ECMAScript 5.1:
242 // - § 12.2 Variable Statement
243 // - § 12.5 The if Statement (else)
244 // - § 12.6 Iteration Statements (do, while, for)
245 // - § 12.11 The switch Statement (case)
246 // - § 12.14 The try Statement
247 'var' => self::TYPE_DO,
248 'else' => self::TYPE_DO,
249 'do' => self::TYPE_DO,
250 'case' => self::TYPE_DO,
251 'try' => self::TYPE_DO,
252 'finally' => self::TYPE_DO,
253
254 // ECMAScript 5.1 § 13 Function Definition
255 'function' => self::TYPE_FUNC,
256
257 // Can be one of:
258 // - DecimalLiteral (ECMAScript 5.1 § 7.8.3 Numeric Literals)
259 // - MemberExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
260 '.' => self::TYPE_BIN_OP,
261
262 // Can be one of:
263 // - Block (ECMAScript 5.1 § 12.1 Block)
264 // - ObjectLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
265 '{' => self::TYPE_BRACE_OPEN,
266 '}' => self::TYPE_BRACE_CLOSE,
267
268 // Can be one of:
269 // - Parenthesised Identifier or Expression after a
270 // TYPE_IF or TYPE_FUNC keyword.
271 // - PrimaryExpression (ECMAScript 5.1 § 11.1 Primary Expressions)
272 // - CallExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
273 '(' => self::TYPE_PAREN_OPEN,
274 ')' => self::TYPE_PAREN_CLOSE,
275
276 // Can be one of:
277 // - ArrayLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
278 '[' => self::TYPE_PAREN_OPEN,
279 ']' => self::TYPE_PAREN_CLOSE,
280
281 // Can be one of:
282 // - End of any statement
283 // - EmptyStatement (ECMAScript 5.1 § 12.3 Empty Statement)
284 ';' => self::TYPE_SEMICOLON,
285 ];
286
287 // $model : This is the main table for our state machine. For every state/token pair
288 // the desired action is defined.
289 //
290 // The state pushed onto the stack by ACTION_PUSH will be returned to by ACTION_POP.
291 //
292 // A given state/token pair MAY NOT specify both ACTION_POP and ACTION_GOTO.
293 // In the event of such mistake, ACTION_POP is used instead of ACTION_GOTO.
294 $model = [
295 // Statement - This is the initial state.
296 self::STATEMENT => [
297 self::TYPE_UN_OP => [
298 self::ACTION_GOTO => self::EXPRESSION,
299 ],
300 self::TYPE_INCR_OP => [
301 self::ACTION_GOTO => self::EXPRESSION,
302 ],
303 self::TYPE_ADD_OP => [
304 self::ACTION_GOTO => self::EXPRESSION,
305 ],
306 self::TYPE_BRACE_OPEN => [
307 // Use of '{' in statement context, creates a Block.
308 self::ACTION_PUSH => self::STATEMENT,
309 ],
310 self::TYPE_BRACE_CLOSE => [
311 // Ends a Block
312 self::ACTION_POP => true,
313 ],
314 self::TYPE_PAREN_OPEN => [
315 self::ACTION_PUSH => self::EXPRESSION_OP,
316 self::ACTION_GOTO => self::PAREN_EXPRESSION,
317 ],
318 self::TYPE_RETURN => [
319 self::ACTION_GOTO => self::EXPRESSION_NO_NL,
320 ],
321 self::TYPE_IF => [
322 self::ACTION_GOTO => self::CONDITION,
323 ],
324 self::TYPE_FUNC => [
325 self::ACTION_GOTO => self::CONDITION,
326 ],
327 self::TYPE_LITERAL => [
328 self::ACTION_GOTO => self::EXPRESSION_OP,
329 ],
330 ],
331 self::CONDITION => [
332 self::TYPE_PAREN_OPEN => [
333 self::ACTION_PUSH => self::STATEMENT,
334 self::ACTION_GOTO => self::PAREN_EXPRESSION,
335 ],
336 ],
337 self::PROPERTY_ASSIGNMENT => [
338 self::TYPE_COLON => [
339 self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
340 ],
341 self::TYPE_BRACE_OPEN => [
342 self::ACTION_PUSH => self::PROPERTY_ASSIGNMENT,
343 self::ACTION_GOTO => self::STATEMENT,
344 ],
345 self::TYPE_BRACE_CLOSE => [
346 self::ACTION_POP => true,
347 ],
348 ],
349 self::EXPRESSION => [
350 self::TYPE_SEMICOLON => [
351 self::ACTION_GOTO => self::STATEMENT,
352 ],
353 self::TYPE_BRACE_OPEN => [
354 self::ACTION_PUSH => self::EXPRESSION_OP,
355 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
356 ],
357 self::TYPE_BRACE_CLOSE => [
358 self::ACTION_POP => true,
359 ],
360 self::TYPE_PAREN_OPEN => [
361 self::ACTION_PUSH => self::EXPRESSION_OP,
362 self::ACTION_GOTO => self::PAREN_EXPRESSION,
363 ],
364 self::TYPE_FUNC => [
365 self::ACTION_GOTO => self::EXPRESSION_FUNC,
366 ],
367 self::TYPE_LITERAL => [
368 self::ACTION_GOTO => self::EXPRESSION_OP,
369 ],
370 ],
371 self::EXPRESSION_NO_NL => [
372 self::TYPE_SEMICOLON => [
373 self::ACTION_GOTO => self::STATEMENT,
374 ],
375 self::TYPE_BRACE_OPEN => [
376 self::ACTION_PUSH => self::EXPRESSION_OP,
377 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
378 ],
379 self::TYPE_BRACE_CLOSE => [
380 self::ACTION_POP => true,
381 ],
382 self::TYPE_PAREN_OPEN => [
383 self::ACTION_PUSH => self::EXPRESSION_OP,
384 self::ACTION_GOTO => self::PAREN_EXPRESSION,
385 ],
386 self::TYPE_FUNC => [
387 self::ACTION_GOTO => self::EXPRESSION_FUNC,
388 ],
389 self::TYPE_LITERAL => [
390 self::ACTION_GOTO => self::EXPRESSION_OP,
391 ],
392 ],
393 self::EXPRESSION_OP => [
394 self::TYPE_BIN_OP => [
395 self::ACTION_GOTO => self::EXPRESSION,
396 ],
397 self::TYPE_ADD_OP => [
398 self::ACTION_GOTO => self::EXPRESSION,
399 ],
400 self::TYPE_HOOK => [
401 self::ACTION_PUSH => self::EXPRESSION,
402 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
403 ],
404 self::TYPE_COLON => [
405 self::ACTION_GOTO => self::STATEMENT,
406 ],
407 self::TYPE_COMMA => [
408 self::ACTION_GOTO => self::EXPRESSION,
409 ],
410 self::TYPE_SEMICOLON => [
411 self::ACTION_GOTO => self::STATEMENT,
412 ],
413 self::TYPE_PAREN_OPEN => [
414 self::ACTION_PUSH => self::EXPRESSION_OP,
415 self::ACTION_GOTO => self::PAREN_EXPRESSION,
416 ],
417 self::TYPE_BRACE_CLOSE => [
418 self::ACTION_POP => true,
419 ],
420 ],
421 self::EXPRESSION_FUNC => [
422 self::TYPE_BRACE_OPEN => [
423 self::ACTION_PUSH => self::EXPRESSION_OP,
424 self::ACTION_GOTO => self::STATEMENT,
425 ],
426 ],
427 self::EXPRESSION_TERNARY => [
428 self::TYPE_BRACE_OPEN => [
429 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
430 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
431 ],
432 self::TYPE_PAREN_OPEN => [
433 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
434 self::ACTION_GOTO => self::PAREN_EXPRESSION,
435 ],
436 self::TYPE_FUNC => [
437 self::ACTION_GOTO => self::EXPRESSION_TERNARY_FUNC,
438 ],
439 self::TYPE_LITERAL => [
440 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP,
441 ],
442 ],
443 self::EXPRESSION_TERNARY_OP => [
444 self::TYPE_BIN_OP => [
445 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
446 ],
447 self::TYPE_ADD_OP => [
448 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
449 ],
450 self::TYPE_HOOK => [
451 self::ACTION_PUSH => self::EXPRESSION_TERNARY,
452 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
453 ],
454 self::TYPE_COMMA => [
455 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
456 ],
457 self::TYPE_PAREN_OPEN => [
458 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
459 self::ACTION_GOTO => self::PAREN_EXPRESSION,
460 ],
461 self::TYPE_COLON => [
462 self::ACTION_POP => true,
463 ],
464 ],
465 self::EXPRESSION_TERNARY_FUNC => [
466 self::TYPE_BRACE_OPEN => [
467 self::ACTION_PUSH => self::EXPRESSION_TERNARY_OP,
468 self::ACTION_GOTO => self::STATEMENT,
469 ],
470 ],
471 self::PAREN_EXPRESSION => [
472 self::TYPE_BRACE_OPEN => [
473 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
474 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
475 ],
476 self::TYPE_PAREN_OPEN => [
477 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
478 self::ACTION_GOTO => self::PAREN_EXPRESSION,
479 ],
480 self::TYPE_PAREN_CLOSE => [
481 self::ACTION_POP => true,
482 ],
483 self::TYPE_FUNC => [
484 self::ACTION_GOTO => self::PAREN_EXPRESSION_FUNC,
485 ],
486 self::TYPE_LITERAL => [
487 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP,
488 ],
489 ],
490 self::PAREN_EXPRESSION_OP => [
491 self::TYPE_BIN_OP => [
492 self::ACTION_GOTO => self::PAREN_EXPRESSION,
493 ],
494 self::TYPE_ADD_OP => [
495 self::ACTION_GOTO => self::PAREN_EXPRESSION,
496 ],
497 self::TYPE_HOOK => [
498 self::ACTION_GOTO => self::PAREN_EXPRESSION,
499 ],
500 self::TYPE_COLON => [
501 self::ACTION_GOTO => self::PAREN_EXPRESSION,
502 ],
503 self::TYPE_COMMA => [
504 self::ACTION_GOTO => self::PAREN_EXPRESSION,
505 ],
506 self::TYPE_SEMICOLON => [
507 self::ACTION_GOTO => self::PAREN_EXPRESSION,
508 ],
509 self::TYPE_PAREN_OPEN => [
510 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
511 self::ACTION_GOTO => self::PAREN_EXPRESSION,
512 ],
513 self::TYPE_PAREN_CLOSE => [
514 self::ACTION_POP => true,
515 ],
516 ],
517 self::PAREN_EXPRESSION_FUNC => [
518 self::TYPE_BRACE_OPEN => [
519 self::ACTION_PUSH => self::PAREN_EXPRESSION_OP,
520 self::ACTION_GOTO => self::STATEMENT,
521 ],
522 ],
523 self::PROPERTY_EXPRESSION => [
524 self::TYPE_BRACE_OPEN => [
525 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
526 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
527 ],
528 self::TYPE_BRACE_CLOSE => [
529 self::ACTION_POP => true,
530 ],
531 self::TYPE_PAREN_OPEN => [
532 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
533 self::ACTION_GOTO => self::PAREN_EXPRESSION,
534 ],
535 self::TYPE_FUNC => [
536 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_FUNC,
537 ],
538 self::TYPE_LITERAL => [
539 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP,
540 ],
541 ],
542 self::PROPERTY_EXPRESSION_OP => [
543 self::TYPE_BIN_OP => [
544 self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
545 ],
546 self::TYPE_ADD_OP => [
547 self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
548 ],
549 self::TYPE_HOOK => [
550 self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
551 ],
552 self::TYPE_COMMA => [
553 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
554 ],
555 self::TYPE_BRACE_OPEN => [
556 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
557 ],
558 self::TYPE_BRACE_CLOSE => [
559 self::ACTION_POP => true,
560 ],
561 self::TYPE_PAREN_OPEN => [
562 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
563 self::ACTION_GOTO => self::PAREN_EXPRESSION,
564 ],
565 ],
566 self::PROPERTY_EXPRESSION_FUNC => [
567 self::TYPE_BRACE_OPEN => [
568 self::ACTION_PUSH => self::PROPERTY_EXPRESSION_OP,
569 self::ACTION_GOTO => self::STATEMENT,
570 ],
571 ],
572 ];
573
574 // $semicolon : Rules for when a semicolon insertion is appropriate
575 $semicolon = [
576 self::EXPRESSION_NO_NL => [
577 self::TYPE_UN_OP => true,
578 self::TYPE_INCR_OP => true,
579 self::TYPE_ADD_OP => true,
580 self::TYPE_BRACE_OPEN => true,
581 self::TYPE_PAREN_OPEN => true,
582 self::TYPE_RETURN => true,
583 self::TYPE_IF => true,
584 self::TYPE_DO => true,
585 self::TYPE_FUNC => true,
586 self::TYPE_LITERAL => true
587 ],
588 self::EXPRESSION_OP => [
589 self::TYPE_UN_OP => true,
590 self::TYPE_INCR_OP => true,
591 self::TYPE_BRACE_OPEN => true,
592 self::TYPE_RETURN => true,
593 self::TYPE_IF => true,
594 self::TYPE_DO => true,
595 self::TYPE_FUNC => true,
596 self::TYPE_LITERAL => true
597 ]
598 ];
599
600 // $divStates : Contains all states that can be followed by a division operator
601 $divStates = [
602 self::EXPRESSION_OP => true,
603 self::EXPRESSION_TERNARY_OP => true,
604 self::PAREN_EXPRESSION_OP => true,
605 self::PROPERTY_EXPRESSION_OP => true
606 ];
607
608 // Here's where the minifying takes place: Loop through the input, looking for tokens
609 // and output them to $out, taking actions to the above defined rules when appropriate.
610 $out = '';
611 $pos = 0;
612 $length = strlen( $s );
613 $lineLength = 0;
614 $newlineFound = true;
615 $state = self::STATEMENT;
616 $stack = [];
617 $last = ';'; // Pretend that we have seen a semicolon yet
618 while ( $pos < $length ) {
619 // First, skip over any whitespace and multiline comments, recording whether we
620 // found any newline character
621 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
622 if ( !$skip ) {
623 $ch = $s[$pos];
624 if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
625 // Multiline comment. Search for the end token or EOT.
626 $end = strpos( $s, '*/', $pos + 2 );
627 $skip = $end === false ? $length - $pos : $end - $pos + 2;
628 }
629 }
630 if ( $skip ) {
631 // The semicolon insertion mechanism needs to know whether there was a newline
632 // between two tokens, so record it now.
633 if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
634 $newlineFound = true;
635 }
636 $pos += $skip;
637 continue;
638 }
639 // Handle C++-style comments and html comments, which are treated as single line
640 // comments by the browser, regardless of whether the end tag is on the same line.
641 // Handle --> the same way, but only if it's at the beginning of the line
642 if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
643 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
644 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
645 ) {
646 $pos += strcspn( $s, "\r\n", $pos );
647 continue;
648 }
649
650 // Find out which kind of token we're handling.
651 // Note: $end must point past the end of the current token
652 // so that `substr($s, $pos, $end - $pos)` would be the entire token.
653 // In order words, $end will be the offset of the last relevant character
654 // in the stream + 1, or simply put: The offset of the first character
655 // of any next token in the stream.
656 $end = $pos + 1;
657 // Handle string literals
658 if ( $ch === "'" || $ch === '"' ) {
659 // Search to the end of the string literal, skipping over backslash escapes
660 $search = $ch . '\\';
661 do{
662 // Speculatively add 2 to the end so that if we see a backslash,
663 // the next iteration will start 2 characters further (one for the
664 // backslash, one for the escaped character).
665 // We'll correct this outside the loop.
666 $end += strcspn( $s, $search, $end ) + 2;
667 // If the last character in our search for a quote or a backlash
668 // matched a backslash and we haven't reached the end, keep searching..
669 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
670 // Correction (1): Undo speculative add, keep only one (end of string literal)
671 $end--;
672 if ( $end > $length ) {
673 // Correction (2): Loop wrongly assumed an end quote ended the search,
674 // but search ended because we've reached the end. Correct $end.
675 // TODO: This is invalid and should throw.
676 $end--;
677 }
678 // We have to distinguish between regexp literals and division operators
679 // A division operator is only possible in certain states
680 } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
681 // Regexp literal
682 for ( ; ; ) {
683 // Search until we find "/" (end of regexp), "\" (backslash escapes),
684 // or "[" (start of character classes).
685 do{
686 // Speculatively add 2 to ensure next iteration skips
687 // over backslash and escaped character.
688 // We'll correct this outside the loop.
689 $end += strcspn( $s, '/[\\', $end ) + 2;
690 // If backslash escape, keep searching...
691 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
692 // Correction (1): Undo speculative add, keep only one (end of regexp)
693 $end--;
694 if ( $end > $length ) {
695 // Correction (2): Loop wrongly assumed end slash was seen
696 // String ended without end of regexp. Correct $end.
697 // TODO: This is invalid and should throw.
698 $end--;
699 break;
700 }
701 if ( $s[$end - 1] === '/' ) {
702 break;
703 }
704 // (Implicit else), we must've found the start of a char class,
705 // skip until we find "]" (end of char class), or "\" (backslash escape)
706 do{
707 // Speculatively add 2 for backslash escape.
708 // We'll substract one outside the loop.
709 $end += strcspn( $s, ']\\', $end ) + 2;
710 // If backslash escape, keep searching...
711 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
712 // Correction (1): Undo speculative add, keep only one (end of regexp)
713 $end--;
714 if ( $end > $length ) {
715 // Correction (2): Loop wrongly assumed "]" was seen
716 // String ended without ending char class or regexp. Correct $end.
717 // TODO: This is invalid and should throw.
718 $end--;
719 break;
720 }
721 }
722 // Search past the regexp modifiers (gi)
723 while ( $end < $length && ctype_alpha( $s[$end] ) ) {
724 $end++;
725 }
726 } elseif (
727 $ch === '0'
728 && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
729 ) {
730 // Hex numeric literal
731 $end++; // x or X
732 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
733 if ( !$len ) {
734 return self::parseError(
735 $s,
736 $pos,
737 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
738 );
739 }
740 $end += $len;
741 } elseif (
742 ctype_digit( $ch )
743 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
744 ) {
745 $end += strspn( $s, '0123456789', $end );
746 $decimal = strspn( $s, '.', $end );
747 if ( $decimal ) {
748 if ( $decimal > 2 ) {
749 return self::parseError( $s, $end, 'The number has too many decimal points' );
750 }
751 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
752 }
753 $exponent = strspn( $s, 'eE', $end );
754 if ( $exponent ) {
755 if ( $exponent > 1 ) {
756 return self::parseError( $s, $end, 'Number with several E' );
757 }
758 $end++;
759
760 // + sign is optional; - sign is required.
761 $end += strspn( $s, '-+', $end );
762 $len = strspn( $s, '0123456789', $end );
763 if ( !$len ) {
764 return self::parseError(
765 $s,
766 $pos,
767 'No decimal digits after e, how many zeroes should be added?'
768 );
769 }
770 $end += $len;
771 }
772 } elseif ( isset( $opChars[$ch] ) ) {
773 // Punctuation character. Search for the longest matching operator.
774 while (
775 $end < $length
776 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
777 ) {
778 $end++;
779 }
780 } else {
781 // Identifier or reserved word. Search for the end by excluding whitespace and
782 // punctuation.
783 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
784 }
785
786 // Now get the token type from our type array
787 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
788 $type = $tokenTypes[$token] ?? self::TYPE_LITERAL;
789
790 if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
791 // This token triggers the semicolon insertion mechanism of javascript. While we
792 // could add the ; token here ourselves, keeping the newline has a few advantages.
793 $out .= "\n";
794 $state = self::STATEMENT;
795 $lineLength = 0;
796 } elseif ( $lineLength + $end - $pos > self::$maxLineLength &&
797 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
798 // This line would get too long if we added $token, so add a newline first.
799 // Only do this if it won't trigger semicolon insertion and if it won't
800 // put a postfix increment operator on its own line, which is illegal in js.
801 $out .= "\n";
802 $lineLength = 0;
803 // Check, whether we have to separate the token from the last one with whitespace
804 } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
805 $out .= ' ';
806 $lineLength++;
807 // Don't accidentally create ++, -- or // tokens
808 } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
809 $out .= ' ';
810 $lineLength++;
811 }
812 if (
813 $type === self::TYPE_LITERAL
814 && ( $token === 'true' || $token === 'false' )
815 && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION )
816 && $last !== '.'
817 ) {
818 $token = ( $token === 'true' ) ? '!0' : '!1';
819 }
820
821 $out .= $token;
822 $lineLength += $end - $pos; // += strlen( $token )
823 $last = $s[$end - 1];
824 $pos = $end;
825 $newlineFound = false;
826
827 // Now that we have output our token, transition into the new state.
828 if ( isset( $model[$state][$type][self::ACTION_PUSH] ) &&
829 count( $stack ) < self::STACK_LIMIT
830 ) {
831 $stack[] = $model[$state][$type][self::ACTION_PUSH];
832 }
833 if ( $stack && isset( $model[$state][$type][self::ACTION_POP] ) ) {
834 $state = array_pop( $stack );
835 } elseif ( isset( $model[$state][$type][self::ACTION_GOTO] ) ) {
836 $state = $model[$state][$type][self::ACTION_GOTO];
837 }
838 }
839 return $out;
840 }
841
842 static function parseError( $fullJavascript, $position, $errorMsg ) {
843 // TODO: Handle the error: trigger_error, throw exception, return false...
844 return false;
845 }
846 }