Merge "password: Move commonpasswords.cdb to includes/password/"
[lhc/web/wiklou.git] / includes / libs / JavaScriptMinifier.php
1 <?php
2 /**
3 * JavaScript Minifier
4 *
5 * @file
6 * @author Paul Copperman <paul.copperman@gmail.com>
7 * @license Apache-2.0
8 * @license MIT
9 * @license GPL-2.0-or-later
10 * @license LGPL-2.1-or-later
11 */
12
13 /**
14 * This class is meant to safely minify javascript code, while leaving syntactically correct
15 * programs intact. Other libraries, such as JSMin require a certain coding style to work
16 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
17 * slow, because they construct a complete parse tree before outputting the code minified.
18 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
19 * fast enough to be used for on-the-fly minifying.
20 *
21 * This class was written with ECMA-262 Edition 3 in mind ("ECMAScript 3"). Parsing features
22 * new to ECMAScript 5 or later might not be supported. However, Edition 5.1 better reflects
23 * how actual JS engines worked and work and is simpler and more readable prose. As such,
24 * the below code will refer to sections of the 5.1 specification.
25 *
26 * See <https://www.ecma-international.org/ecma-262/5.1/>.
27 */
28 class JavaScriptMinifier {
29
30 /* Parsing states.
31 * The state machine is only necessary to decide whether to parse a slash as division
32 * operator or as regexp literal.
33 * States are named after the next expected item. We only distinguish states when the
34 * distinction is relevant for our purpose.
35 */
36 const STATEMENT = 0;
37 const CONDITION = 1;
38 const PROPERTY_ASSIGNMENT = 2;
39 const EXPRESSION = 3;
40 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
41 const EXPRESSION_OP = 5;
42 const EXPRESSION_FUNC = 6;
43 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
44 const EXPRESSION_TERNARY_OP = 8;
45 const EXPRESSION_TERNARY_FUNC = 9;
46 const PAREN_EXPRESSION = 10; // expression which is not on the top level
47 const PAREN_EXPRESSION_OP = 11;
48 const PAREN_EXPRESSION_FUNC = 12;
49 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
50 const PROPERTY_EXPRESSION_OP = 14;
51 const PROPERTY_EXPRESSION_FUNC = 15;
52
53 /* Token types */
54 const TYPE_UN_OP = 101; // unary operators
55 const TYPE_INCR_OP = 102; // ++ and --
56 const TYPE_BIN_OP = 103; // binary operators
57 const TYPE_ADD_OP = 104; // + and - which can be either unary or binary ops
58 const TYPE_HOOK = 105; // ?
59 const TYPE_COLON = 106; // :
60 const TYPE_COMMA = 107; // ,
61 const TYPE_SEMICOLON = 108; // ;
62 const TYPE_BRACE_OPEN = 109; // {
63 const TYPE_BRACE_CLOSE = 110; // }
64 const TYPE_PAREN_OPEN = 111; // ( and [
65 const TYPE_PAREN_CLOSE = 112; // ) and ]
66 const TYPE_RETURN = 113; // keywords: break, continue, return, throw
67 const TYPE_IF = 114; // keywords: catch, for, with, switch, while, if
68 const TYPE_DO = 115; // keywords: case, var, finally, else, do, try
69 const TYPE_FUNC = 116; // keywords: function
70 const TYPE_LITERAL = 117; // all literals, identifiers and unrecognised tokens
71
72 const ACTION_GOTO = 201;
73
74 // Sanity limit to avoid excessive memory usage
75 const STACK_LIMIT = 1000;
76
77 /**
78 * Maximum line length
79 *
80 * This is not a strict maximum, but a guideline. Longer lines will be
81 * produced when literals (e.g. quoted strings) longer than this are
82 * encountered, or when required to guard against semicolon insertion.
83 *
84 * This is a private member (instead of constant) to allow tests to
85 * set it to 1, to verify ASI and line-breaking behaviour.
86 */
87 private static $maxLineLength = 1000;
88
89 /**
90 * Returns minified JavaScript code.
91 *
92 * @param string $s JavaScript code to minify
93 * @return String Minified code
94 */
95 public static function minify( $s ) {
96 // First we declare a few tables that contain our parsing rules
97
98 // $opChars : Characters which can be combined without whitespace between them.
99 $opChars = [
100 // ECMAScript 5.1 § 7.7 Punctuators
101 // Unlike the spec, these are individual symbols, not sequences.
102 '{' => true,
103 '}' => true,
104 '(' => true,
105 ')' => true,
106 '[' => true,
107 ']' => true,
108 '.' => true,
109 ';' => true,
110 ',' => true,
111 '<' => true,
112 '>' => true,
113 '=' => true,
114 '!' => true,
115 '+' => true,
116 '-' => true,
117 '*' => true,
118 '%' => true,
119 '&' => true,
120 '|' => true,
121 '^' => true,
122 '~' => true,
123 '?' => true,
124 ':' => true,
125 '/' => true,
126 // ECMAScript 5.1 § 7.8.4 String Literals
127 '"' => true,
128 "'" => true,
129 ];
130
131 // $tokenTypes : Map keywords and operators to their corresponding token type
132 $tokenTypes = [
133 // ECMAScript 5.1 § 11.4 Unary Operators
134 // ECMAScript 5.1 § 11.6 Additive Operators
135 // UnaryExpression includes PostfixExpression, which includes 'new'.
136 'new' => self::TYPE_UN_OP,
137 'delete' => self::TYPE_UN_OP,
138 'void' => self::TYPE_UN_OP,
139 'typeof' => self::TYPE_UN_OP,
140 '++' => self::TYPE_INCR_OP,
141 '--' => self::TYPE_INCR_OP,
142 '+' => self::TYPE_ADD_OP,
143 '-' => self::TYPE_ADD_OP,
144 '~' => self::TYPE_UN_OP,
145 '!' => self::TYPE_UN_OP,
146 // ECMAScript 5.1 § 11.5 Multiplicative Operators
147 '*' => self::TYPE_BIN_OP,
148 '/' => self::TYPE_BIN_OP,
149 '%' => self::TYPE_BIN_OP,
150 // ECMAScript 5.1 § 11.7 Bitwise Shift Operators
151 '<<' => self::TYPE_BIN_OP,
152 '>>' => self::TYPE_BIN_OP,
153 '>>>' => self::TYPE_BIN_OP,
154 // ECMAScript 5.1 § 11.8 Relational Operators
155 '<' => self::TYPE_BIN_OP,
156 '>' => self::TYPE_BIN_OP,
157 '<=' => self::TYPE_BIN_OP,
158 '>=' => self::TYPE_BIN_OP,
159 // ECMAScript 5.1 § 11.9 Equality Operators
160 '==' => self::TYPE_BIN_OP,
161 '!=' => self::TYPE_BIN_OP,
162 '===' => self::TYPE_BIN_OP,
163 '!==' => self::TYPE_BIN_OP,
164 'instanceof' => self::TYPE_BIN_OP,
165 'in' => self::TYPE_BIN_OP,
166 // ECMAScript 5.1 § 11.10 Binary Bitwise Operators
167 '&' => self::TYPE_BIN_OP,
168 '^' => self::TYPE_BIN_OP,
169 '|' => self::TYPE_BIN_OP,
170 // ECMAScript 5.1 § 11.11 Binary Logical Operators
171 '&&' => self::TYPE_BIN_OP,
172 '||' => self::TYPE_BIN_OP,
173 // ECMAScript 5.1 § 11.12 Conditional Operator
174 // Also known as ternary.
175 '?' => self::TYPE_HOOK,
176 ':' => self::TYPE_COLON,
177 // ECMAScript 5.1 § 11.13 Assignment Operators
178 '=' => self::TYPE_BIN_OP,
179 '*=' => self::TYPE_BIN_OP,
180 '/=' => self::TYPE_BIN_OP,
181 '%=' => self::TYPE_BIN_OP,
182 '+=' => self::TYPE_BIN_OP,
183 '-=' => self::TYPE_BIN_OP,
184 '<<=' => self::TYPE_BIN_OP,
185 '>>=' => self::TYPE_BIN_OP,
186 '>>>=' => self::TYPE_BIN_OP,
187 '&=' => self::TYPE_BIN_OP,
188 '^=' => self::TYPE_BIN_OP,
189 '|=' => self::TYPE_BIN_OP,
190 // ECMAScript 5.1 § 11.14 Comma Operator
191 ',' => self::TYPE_COMMA,
192
193 // The keywords that disallow LineTerminator before their
194 // (sometimes optional) Expression or Identifier.
195 //
196 // keyword ;
197 // keyword [no LineTerminator here] Identifier ;
198 // keyword [no LineTerminator here] Expression ;
199 //
200 // See also ECMAScript 5.1:
201 // - § 12.7 The continue Statement
202 // - $ 12.8 The break Statement
203 // - § 12.9 The return Statement
204 // - § 12.13 The throw Statement
205 'continue' => self::TYPE_RETURN,
206 'break' => self::TYPE_RETURN,
207 'return' => self::TYPE_RETURN,
208 'throw' => self::TYPE_RETURN,
209
210 // The keywords require a parenthesised Expression or Identifier
211 // before the next Statement.
212 //
213 // keyword ( Expression ) Statement
214 // keyword ( Identifier ) Statement
215 //
216 // See also ECMAScript 5.1:
217 // - § 12.5 The if Statement
218 // - § 12.6 Iteration Statements (do, while, for)
219 // - § 12.10 The with Statement
220 // - § 12.11 The switch Statement
221 // - § 12.13 The throw Statement
222 'if' => self::TYPE_IF,
223 'catch' => self::TYPE_IF,
224 'while' => self::TYPE_IF,
225 'for' => self::TYPE_IF,
226 'switch' => self::TYPE_IF,
227 'with' => self::TYPE_IF,
228
229 // The keywords followed by an Identifier, Statement,
230 // Expression, or Block.
231 //
232 // var Identifier
233 // else Statement
234 // do Statement
235 // case Expression
236 // try Block
237 // finally Block
238 //
239 // See also ECMAScript 5.1:
240 // - § 12.2 Variable Statement
241 // - § 12.5 The if Statement (else)
242 // - § 12.6 Iteration Statements (do, while, for)
243 // - § 12.11 The switch Statement (case)
244 // - § 12.14 The try Statement
245 'var' => self::TYPE_DO,
246 'else' => self::TYPE_DO,
247 'do' => self::TYPE_DO,
248 'case' => self::TYPE_DO,
249 'try' => self::TYPE_DO,
250 'finally' => self::TYPE_DO,
251
252 // ECMAScript 5.1 § 13 Function Definition
253 'function' => self::TYPE_FUNC,
254
255 // Can be one of:
256 // - DecimalLiteral (ECMAScript 5.1 § 7.8.3 Numeric Literals)
257 // - MemberExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
258 '.' => self::TYPE_BIN_OP,
259
260 // Can be one of:
261 // - Block (ECMAScript 5.1 § 12.1 Block)
262 // - ObjectLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
263 '{' => self::TYPE_BRACE_OPEN,
264 '}' => self::TYPE_BRACE_CLOSE,
265
266 // Can be one of:
267 // - Parenthesised Identifier or Expression after a
268 // TYPE_IF or TYPE_FUNC keyword.
269 // - PrimaryExpression (ECMAScript 5.1 § 11.1 Primary Expressions)
270 // - CallExpression (ECMAScript 5.1 § 11.2 Left-Hand-Side Expressions)
271 '(' => self::TYPE_PAREN_OPEN,
272 ')' => self::TYPE_PAREN_CLOSE,
273
274 // Can be one of:
275 // - ArrayLiteral (ECMAScript 5.1 § 11.1 Primary Expressions)
276 '[' => self::TYPE_PAREN_OPEN,
277 ']' => self::TYPE_PAREN_CLOSE,
278
279 // Can be one of:
280 // - End of any statement
281 // - EmptyStatement (ECMAScript 5.1 § 12.3 Empty Statement)
282 ';' => self::TYPE_SEMICOLON,
283 ];
284
285 // $model : This is the main table for our state machine. For every state/token pair
286 // the desired action is defined.
287 $model = [
288 // Statement - This is the initial state.
289 self::STATEMENT => [
290 self::TYPE_UN_OP => [
291 self::ACTION_GOTO => self::EXPRESSION,
292 ],
293 self::TYPE_INCR_OP => [
294 self::ACTION_GOTO => self::EXPRESSION,
295 ],
296 self::TYPE_ADD_OP => [
297 self::ACTION_GOTO => self::EXPRESSION,
298 ],
299 self::TYPE_PAREN_OPEN => [
300 self::ACTION_GOTO => self::PAREN_EXPRESSION,
301 ],
302 self::TYPE_RETURN => [
303 self::ACTION_GOTO => self::EXPRESSION_NO_NL,
304 ],
305 self::TYPE_IF => [
306 self::ACTION_GOTO => self::CONDITION,
307 ],
308 self::TYPE_FUNC => [
309 self::ACTION_GOTO => self::CONDITION,
310 ],
311 self::TYPE_LITERAL => [
312 self::ACTION_GOTO => self::EXPRESSION_OP,
313 ],
314 ],
315 self::CONDITION => [
316 self::TYPE_PAREN_OPEN => [
317 self::ACTION_GOTO => self::PAREN_EXPRESSION,
318 ],
319 ],
320 self::PROPERTY_ASSIGNMENT => [
321 self::TYPE_COLON => [
322 self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
323 ],
324 self::TYPE_BRACE_OPEN => [
325 self::ACTION_GOTO => self::STATEMENT,
326 ],
327 ],
328 self::EXPRESSION => [
329 self::TYPE_SEMICOLON => [
330 self::ACTION_GOTO => self::STATEMENT,
331 ],
332 self::TYPE_BRACE_OPEN => [
333 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
334 ],
335 self::TYPE_PAREN_OPEN => [
336 self::ACTION_GOTO => self::PAREN_EXPRESSION,
337 ],
338 self::TYPE_FUNC => [
339 self::ACTION_GOTO => self::EXPRESSION_FUNC,
340 ],
341 self::TYPE_LITERAL => [
342 self::ACTION_GOTO => self::EXPRESSION_OP,
343 ],
344 ],
345 self::EXPRESSION_NO_NL => [
346 self::TYPE_SEMICOLON => [
347 self::ACTION_GOTO => self::STATEMENT,
348 ],
349 self::TYPE_BRACE_OPEN => [
350 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
351 ],
352 self::TYPE_PAREN_OPEN => [
353 self::ACTION_GOTO => self::PAREN_EXPRESSION,
354 ],
355 self::TYPE_FUNC => [
356 self::ACTION_GOTO => self::EXPRESSION_FUNC,
357 ],
358 self::TYPE_LITERAL => [
359 self::ACTION_GOTO => self::EXPRESSION_OP,
360 ],
361 ],
362 self::EXPRESSION_OP => [
363 self::TYPE_BIN_OP => [
364 self::ACTION_GOTO => self::EXPRESSION,
365 ],
366 self::TYPE_ADD_OP => [
367 self::ACTION_GOTO => self::EXPRESSION,
368 ],
369 self::TYPE_HOOK => [
370 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
371 ],
372 self::TYPE_COLON => [
373 self::ACTION_GOTO => self::STATEMENT,
374 ],
375 self::TYPE_COMMA => [
376 self::ACTION_GOTO => self::EXPRESSION,
377 ],
378 self::TYPE_SEMICOLON => [
379 self::ACTION_GOTO => self::STATEMENT,
380 ],
381 self::TYPE_PAREN_OPEN => [
382 self::ACTION_GOTO => self::PAREN_EXPRESSION,
383 ],
384 ],
385 self::EXPRESSION_FUNC => [
386 self::TYPE_BRACE_OPEN => [
387 self::ACTION_GOTO => self::STATEMENT,
388 ],
389 ],
390 self::EXPRESSION_TERNARY => [
391 self::TYPE_BRACE_OPEN => [
392 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
393 ],
394 self::TYPE_PAREN_OPEN => [
395 self::ACTION_GOTO => self::PAREN_EXPRESSION,
396 ],
397 self::TYPE_FUNC => [
398 self::ACTION_GOTO => self::EXPRESSION_TERNARY_FUNC,
399 ],
400 self::TYPE_LITERAL => [
401 self::ACTION_GOTO => self::EXPRESSION_TERNARY_OP,
402 ],
403 ],
404 self::EXPRESSION_TERNARY_OP => [
405 self::TYPE_BIN_OP => [
406 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
407 ],
408 self::TYPE_ADD_OP => [
409 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
410 ],
411 self::TYPE_HOOK => [
412 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
413 ],
414 self::TYPE_COMMA => [
415 self::ACTION_GOTO => self::EXPRESSION_TERNARY,
416 ],
417 self::TYPE_PAREN_OPEN => [
418 self::ACTION_GOTO => self::PAREN_EXPRESSION,
419 ],
420 ],
421 self::EXPRESSION_TERNARY_FUNC => [
422 self::TYPE_BRACE_OPEN => [
423 self::ACTION_GOTO => self::STATEMENT,
424 ],
425 ],
426 self::PAREN_EXPRESSION => [
427 self::TYPE_BRACE_OPEN => [
428 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
429 ],
430 self::TYPE_PAREN_OPEN => [
431 self::ACTION_GOTO => self::PAREN_EXPRESSION,
432 ],
433 self::TYPE_FUNC => [
434 self::ACTION_GOTO => self::PAREN_EXPRESSION_FUNC,
435 ],
436 self::TYPE_LITERAL => [
437 self::ACTION_GOTO => self::PAREN_EXPRESSION_OP,
438 ],
439 ],
440 self::PAREN_EXPRESSION_OP => [
441 self::TYPE_BIN_OP => [
442 self::ACTION_GOTO => self::PAREN_EXPRESSION,
443 ],
444 self::TYPE_ADD_OP => [
445 self::ACTION_GOTO => self::PAREN_EXPRESSION,
446 ],
447 self::TYPE_HOOK => [
448 self::ACTION_GOTO => self::PAREN_EXPRESSION,
449 ],
450 self::TYPE_COLON => [
451 self::ACTION_GOTO => self::PAREN_EXPRESSION,
452 ],
453 self::TYPE_COMMA => [
454 self::ACTION_GOTO => self::PAREN_EXPRESSION,
455 ],
456 self::TYPE_SEMICOLON => [
457 self::ACTION_GOTO => self::PAREN_EXPRESSION,
458 ],
459 self::TYPE_PAREN_OPEN => [
460 self::ACTION_GOTO => self::PAREN_EXPRESSION,
461 ],
462 ],
463 self::PAREN_EXPRESSION_FUNC => [
464 self::TYPE_BRACE_OPEN => [
465 self::ACTION_GOTO => self::STATEMENT,
466 ],
467 ],
468 self::PROPERTY_EXPRESSION => [
469 self::TYPE_BRACE_OPEN => [
470 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
471 ],
472 self::TYPE_PAREN_OPEN => [
473 self::ACTION_GOTO => self::PAREN_EXPRESSION,
474 ],
475 self::TYPE_FUNC => [
476 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_FUNC,
477 ],
478 self::TYPE_LITERAL => [
479 self::ACTION_GOTO => self::PROPERTY_EXPRESSION_OP,
480 ],
481 ],
482 self::PROPERTY_EXPRESSION_OP => [
483 self::TYPE_BIN_OP => [
484 self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
485 ],
486 self::TYPE_ADD_OP => [
487 self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
488 ],
489 self::TYPE_HOOK => [
490 self::ACTION_GOTO => self::PROPERTY_EXPRESSION,
491 ],
492 self::TYPE_COMMA => [
493 self::ACTION_GOTO => self::PROPERTY_ASSIGNMENT,
494 ],
495 self::TYPE_PAREN_OPEN => [
496 self::ACTION_GOTO => self::PAREN_EXPRESSION,
497 ],
498 ],
499 self::PROPERTY_EXPRESSION_FUNC => [
500 self::TYPE_BRACE_OPEN => [
501 self::ACTION_GOTO => self::STATEMENT,
502 ],
503 ]
504 ];
505
506 // $push : This table contains the rules for when to push a state onto the stack.
507 // The pushed state is the state to return to when the corresponding
508 // closing token is found
509 $push = [
510 self::STATEMENT => [
511 self::TYPE_BRACE_OPEN => self::STATEMENT,
512 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
513 ],
514 self::CONDITION => [
515 self::TYPE_PAREN_OPEN => self::STATEMENT
516 ],
517 self::PROPERTY_ASSIGNMENT => [
518 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
519 ],
520 self::EXPRESSION => [
521 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
522 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
523 ],
524 self::EXPRESSION_NO_NL => [
525 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
526 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
527 ],
528 self::EXPRESSION_OP => [
529 self::TYPE_HOOK => self::EXPRESSION,
530 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
531 ],
532 self::EXPRESSION_FUNC => [
533 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
534 ],
535 self::EXPRESSION_TERNARY => [
536 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
537 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
538 ],
539 self::EXPRESSION_TERNARY_OP => [
540 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
541 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
542 ],
543 self::EXPRESSION_TERNARY_FUNC => [
544 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
545 ],
546 self::PAREN_EXPRESSION => [
547 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
548 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
549 ],
550 self::PAREN_EXPRESSION_OP => [
551 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
552 ],
553 self::PAREN_EXPRESSION_FUNC => [
554 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
555 ],
556 self::PROPERTY_EXPRESSION => [
557 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
558 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
559 ],
560 self::PROPERTY_EXPRESSION_OP => [
561 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
562 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
563 ],
564 self::PROPERTY_EXPRESSION_FUNC => [
565 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
566 ]
567 ];
568
569 // $pop : Rules for when to pop a state from the stack
570 $pop = [
571 self::STATEMENT => [ self::TYPE_BRACE_CLOSE => true ],
572 self::PROPERTY_ASSIGNMENT => [ self::TYPE_BRACE_CLOSE => true ],
573 self::EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
574 self::EXPRESSION_NO_NL => [ self::TYPE_BRACE_CLOSE => true ],
575 self::EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ],
576 self::EXPRESSION_TERNARY_OP => [ self::TYPE_COLON => true ],
577 self::PAREN_EXPRESSION => [ self::TYPE_PAREN_CLOSE => true ],
578 self::PAREN_EXPRESSION_OP => [ self::TYPE_PAREN_CLOSE => true ],
579 self::PROPERTY_EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
580 self::PROPERTY_EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ]
581 ];
582
583 // $semicolon : Rules for when a semicolon insertion is appropriate
584 $semicolon = [
585 self::EXPRESSION_NO_NL => [
586 self::TYPE_UN_OP => true,
587 self::TYPE_INCR_OP => true,
588 self::TYPE_ADD_OP => true,
589 self::TYPE_BRACE_OPEN => true,
590 self::TYPE_PAREN_OPEN => true,
591 self::TYPE_RETURN => true,
592 self::TYPE_IF => true,
593 self::TYPE_DO => true,
594 self::TYPE_FUNC => true,
595 self::TYPE_LITERAL => true
596 ],
597 self::EXPRESSION_OP => [
598 self::TYPE_UN_OP => true,
599 self::TYPE_INCR_OP => true,
600 self::TYPE_BRACE_OPEN => true,
601 self::TYPE_RETURN => true,
602 self::TYPE_IF => true,
603 self::TYPE_DO => true,
604 self::TYPE_FUNC => true,
605 self::TYPE_LITERAL => true
606 ]
607 ];
608
609 // $divStates : Contains all states that can be followed by a division operator
610 $divStates = [
611 self::EXPRESSION_OP => true,
612 self::EXPRESSION_TERNARY_OP => true,
613 self::PAREN_EXPRESSION_OP => true,
614 self::PROPERTY_EXPRESSION_OP => true
615 ];
616
617 // Here's where the minifying takes place: Loop through the input, looking for tokens
618 // and output them to $out, taking actions to the above defined rules when appropriate.
619 $out = '';
620 $pos = 0;
621 $length = strlen( $s );
622 $lineLength = 0;
623 $newlineFound = true;
624 $state = self::STATEMENT;
625 $stack = [];
626 $last = ';'; // Pretend that we have seen a semicolon yet
627 while ( $pos < $length ) {
628 // First, skip over any whitespace and multiline comments, recording whether we
629 // found any newline character
630 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
631 if ( !$skip ) {
632 $ch = $s[$pos];
633 if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
634 // Multiline comment. Search for the end token or EOT.
635 $end = strpos( $s, '*/', $pos + 2 );
636 $skip = $end === false ? $length - $pos : $end - $pos + 2;
637 }
638 }
639 if ( $skip ) {
640 // The semicolon insertion mechanism needs to know whether there was a newline
641 // between two tokens, so record it now.
642 if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
643 $newlineFound = true;
644 }
645 $pos += $skip;
646 continue;
647 }
648 // Handle C++-style comments and html comments, which are treated as single line
649 // comments by the browser, regardless of whether the end tag is on the same line.
650 // Handle --> the same way, but only if it's at the beginning of the line
651 if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
652 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
653 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
654 ) {
655 $pos += strcspn( $s, "\r\n", $pos );
656 continue;
657 }
658
659 // Find out which kind of token we're handling.
660 // Note: $end must point past the end of the current token
661 // so that `substr($s, $pos, $end - $pos)` would be the entire token.
662 // In order words, $end will be the offset of the last relevant character
663 // in the stream + 1, or simply put: The offset of the first character
664 // of any next token in the stream.
665 $end = $pos + 1;
666 // Handle string literals
667 if ( $ch === "'" || $ch === '"' ) {
668 // Search to the end of the string literal, skipping over backslash escapes
669 $search = $ch . '\\';
670 do{
671 // Speculatively add 2 to the end so that if we see a backslash,
672 // the next iteration will start 2 characters further (one for the
673 // backslash, one for the escaped character).
674 // We'll correct this outside the loop.
675 $end += strcspn( $s, $search, $end ) + 2;
676 // If the last character in our search for a quote or a backlash
677 // matched a backslash and we haven't reached the end, keep searching..
678 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
679 // Correction (1): Undo speculative add, keep only one (end of string literal)
680 $end--;
681 if ( $end > $length ) {
682 // Correction (2): Loop wrongly assumed an end quote ended the search,
683 // but search ended because we've reached the end. Correct $end.
684 // TODO: This is invalid and should throw.
685 $end--;
686 }
687 // We have to distinguish between regexp literals and division operators
688 // A division operator is only possible in certain states
689 } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
690 // Regexp literal
691 for ( ; ; ) {
692 // Search until we find "/" (end of regexp), "\" (backslash escapes),
693 // or "[" (start of character classes).
694 do{
695 // Speculatively add 2 to ensure next iteration skips
696 // over backslash and escaped character.
697 // We'll correct this outside the loop.
698 $end += strcspn( $s, '/[\\', $end ) + 2;
699 // If backslash escape, keep searching...
700 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
701 // Correction (1): Undo speculative add, keep only one (end of regexp)
702 $end--;
703 if ( $end > $length ) {
704 // Correction (2): Loop wrongly assumed end slash was seen
705 // String ended without end of regexp. Correct $end.
706 // TODO: This is invalid and should throw.
707 $end--;
708 break;
709 }
710 if ( $s[$end - 1] === '/' ) {
711 break;
712 }
713 // (Implicit else), we must've found the start of a char class,
714 // skip until we find "]" (end of char class), or "\" (backslash escape)
715 do{
716 // Speculatively add 2 for backslash escape.
717 // We'll substract one outside the loop.
718 $end += strcspn( $s, ']\\', $end ) + 2;
719 // If backslash escape, keep searching...
720 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
721 // Correction (1): Undo speculative add, keep only one (end of regexp)
722 $end--;
723 if ( $end > $length ) {
724 // Correction (2): Loop wrongly assumed "]" was seen
725 // String ended without ending char class or regexp. Correct $end.
726 // TODO: This is invalid and should throw.
727 $end--;
728 break;
729 }
730 }
731 // Search past the regexp modifiers (gi)
732 while ( $end < $length && ctype_alpha( $s[$end] ) ) {
733 $end++;
734 }
735 } elseif (
736 $ch === '0'
737 && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
738 ) {
739 // Hex numeric literal
740 $end++; // x or X
741 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
742 if ( !$len ) {
743 return self::parseError(
744 $s,
745 $pos,
746 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
747 );
748 }
749 $end += $len;
750 } elseif (
751 ctype_digit( $ch )
752 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
753 ) {
754 $end += strspn( $s, '0123456789', $end );
755 $decimal = strspn( $s, '.', $end );
756 if ( $decimal ) {
757 if ( $decimal > 2 ) {
758 return self::parseError( $s, $end, 'The number has too many decimal points' );
759 }
760 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
761 }
762 $exponent = strspn( $s, 'eE', $end );
763 if ( $exponent ) {
764 if ( $exponent > 1 ) {
765 return self::parseError( $s, $end, 'Number with several E' );
766 }
767 $end++;
768
769 // + sign is optional; - sign is required.
770 $end += strspn( $s, '-+', $end );
771 $len = strspn( $s, '0123456789', $end );
772 if ( !$len ) {
773 return self::parseError(
774 $s,
775 $pos,
776 'No decimal digits after e, how many zeroes should be added?'
777 );
778 }
779 $end += $len;
780 }
781 } elseif ( isset( $opChars[$ch] ) ) {
782 // Punctuation character. Search for the longest matching operator.
783 while (
784 $end < $length
785 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
786 ) {
787 $end++;
788 }
789 } else {
790 // Identifier or reserved word. Search for the end by excluding whitespace and
791 // punctuation.
792 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
793 }
794
795 // Now get the token type from our type array
796 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
797 $type = $tokenTypes[$token] ?? self::TYPE_LITERAL;
798
799 if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
800 // This token triggers the semicolon insertion mechanism of javascript. While we
801 // could add the ; token here ourselves, keeping the newline has a few advantages.
802 $out .= "\n";
803 $state = self::STATEMENT;
804 $lineLength = 0;
805 } elseif ( $lineLength + $end - $pos > self::$maxLineLength &&
806 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
807 // This line would get too long if we added $token, so add a newline first.
808 // Only do this if it won't trigger semicolon insertion and if it won't
809 // put a postfix increment operator on its own line, which is illegal in js.
810 $out .= "\n";
811 $lineLength = 0;
812 // Check, whether we have to separate the token from the last one with whitespace
813 } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
814 $out .= ' ';
815 $lineLength++;
816 // Don't accidentally create ++, -- or // tokens
817 } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
818 $out .= ' ';
819 $lineLength++;
820 }
821 if (
822 $type === self::TYPE_LITERAL
823 && ( $token === 'true' || $token === 'false' )
824 && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION )
825 && $last !== '.'
826 ) {
827 $token = ( $token === 'true' ) ? '!0' : '!1';
828 }
829
830 $out .= $token;
831 $lineLength += $end - $pos; // += strlen( $token )
832 $last = $s[$end - 1];
833 $pos = $end;
834 $newlineFound = false;
835
836 // Now that we have output our token, transition into the new state.
837 if ( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
838 $stack[] = $push[$state][$type];
839 }
840 if ( $stack && isset( $pop[$state][$type] ) ) {
841 $state = array_pop( $stack );
842 } elseif ( isset( $model[$state][$type][self::ACTION_GOTO] ) ) {
843 $state = $model[$state][$type][self::ACTION_GOTO];
844 }
845 }
846 return $out;
847 }
848
849 static function parseError( $fullJavascript, $position, $errorMsg ) {
850 // TODO: Handle the error: trigger_error, throw exception, return false...
851 return false;
852 }
853 }