Fix MediaWiki.Commenting.LicenseComment.InvalidLicenseTag errors
[lhc/web/wiklou.git] / includes / libs / JavaScriptMinifier.php
1 <?php
2 /**
3 * JavaScript Minifier
4 *
5 * @file
6 * @author Paul Copperman <paul.copperman@gmail.com>
7 * @license Apache-2.0
8 * @license MIT
9 * @license GPL-2.0-or-later
10 * @license LGPL-2.1-or-later
11 */
12
13 /**
14 * This class is meant to safely minify javascript code, while leaving syntactically correct
15 * programs intact. Other libraries, such as JSMin require a certain coding style to work
16 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
17 * slow, because they construct a complete parse tree before outputting the code minified.
18 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
19 * fast enough to be used for on-the-fly minifying.
20 */
21 class JavaScriptMinifier {
22
23 /* Parsing states.
24 * The state machine is only necessary to decide whether to parse a slash as division
25 * operator or as regexp literal.
26 * States are named after the next expected item. We only distinguish states when the
27 * distinction is relevant for our purpose.
28 */
29 const STATEMENT = 0;
30 const CONDITION = 1;
31 const PROPERTY_ASSIGNMENT = 2;
32 const EXPRESSION = 3;
33 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
34 const EXPRESSION_OP = 5;
35 const EXPRESSION_FUNC = 6;
36 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
37 const EXPRESSION_TERNARY_OP = 8;
38 const EXPRESSION_TERNARY_FUNC = 9;
39 const PAREN_EXPRESSION = 10; // expression which is not on the top level
40 const PAREN_EXPRESSION_OP = 11;
41 const PAREN_EXPRESSION_FUNC = 12;
42 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
43 const PROPERTY_EXPRESSION_OP = 14;
44 const PROPERTY_EXPRESSION_FUNC = 15;
45
46 /* Token types */
47 const TYPE_UN_OP = 1; // unary operators
48 const TYPE_INCR_OP = 2; // ++ and --
49 const TYPE_BIN_OP = 3; // binary operators
50 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
51 const TYPE_HOOK = 5; // ?
52 const TYPE_COLON = 6; // :
53 const TYPE_COMMA = 7; // ,
54 const TYPE_SEMICOLON = 8; // ;
55 const TYPE_BRACE_OPEN = 9; // {
56 const TYPE_BRACE_CLOSE = 10; // }
57 const TYPE_PAREN_OPEN = 11; // ( and [
58 const TYPE_PAREN_CLOSE = 12; // ) and ]
59 const TYPE_RETURN = 13; // keywords: break, continue, return, throw
60 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
61 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
62 const TYPE_FUNC = 16; // keywords: function
63 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
64
65 // Sanity limit to avoid excessive memory usage
66 const STACK_LIMIT = 1000;
67
68 /**
69 * NOTE: This isn't a strict maximum. Longer lines will be produced when
70 * literals (e.g. quoted strings) longer than this are encountered
71 * or when required to guard against semicolon insertion.
72 */
73 const MAX_LINE_LENGTH = 1000;
74
75 /**
76 * Returns minified JavaScript code.
77 *
78 * @param string $s JavaScript code to minify
79 * @return String Minified code
80 */
81 public static function minify( $s ) {
82 // First we declare a few tables that contain our parsing rules
83
84 // $opChars : characters, which can be combined without whitespace in between them
85 $opChars = [
86 '!' => true,
87 '"' => true,
88 '%' => true,
89 '&' => true,
90 "'" => true,
91 '(' => true,
92 ')' => true,
93 '*' => true,
94 '+' => true,
95 ',' => true,
96 '-' => true,
97 '.' => true,
98 '/' => true,
99 ':' => true,
100 ';' => true,
101 '<' => true,
102 '=' => true,
103 '>' => true,
104 '?' => true,
105 '[' => true,
106 ']' => true,
107 '^' => true,
108 '{' => true,
109 '|' => true,
110 '}' => true,
111 '~' => true
112 ];
113
114 // $tokenTypes : maps keywords and operators to their corresponding token type
115 $tokenTypes = [
116 '!' => self::TYPE_UN_OP,
117 '~' => self::TYPE_UN_OP,
118 'delete' => self::TYPE_UN_OP,
119 'new' => self::TYPE_UN_OP,
120 'typeof' => self::TYPE_UN_OP,
121 'void' => self::TYPE_UN_OP,
122 '++' => self::TYPE_INCR_OP,
123 '--' => self::TYPE_INCR_OP,
124 '!=' => self::TYPE_BIN_OP,
125 '!==' => self::TYPE_BIN_OP,
126 '%' => self::TYPE_BIN_OP,
127 '%=' => self::TYPE_BIN_OP,
128 '&' => self::TYPE_BIN_OP,
129 '&&' => self::TYPE_BIN_OP,
130 '&=' => self::TYPE_BIN_OP,
131 '*' => self::TYPE_BIN_OP,
132 '*=' => self::TYPE_BIN_OP,
133 '+=' => self::TYPE_BIN_OP,
134 '-=' => self::TYPE_BIN_OP,
135 '.' => self::TYPE_BIN_OP,
136 '/' => self::TYPE_BIN_OP,
137 '/=' => self::TYPE_BIN_OP,
138 '<' => self::TYPE_BIN_OP,
139 '<<' => self::TYPE_BIN_OP,
140 '<<=' => self::TYPE_BIN_OP,
141 '<=' => self::TYPE_BIN_OP,
142 '=' => self::TYPE_BIN_OP,
143 '==' => self::TYPE_BIN_OP,
144 '===' => self::TYPE_BIN_OP,
145 '>' => self::TYPE_BIN_OP,
146 '>=' => self::TYPE_BIN_OP,
147 '>>' => self::TYPE_BIN_OP,
148 '>>=' => self::TYPE_BIN_OP,
149 '>>>' => self::TYPE_BIN_OP,
150 '>>>=' => self::TYPE_BIN_OP,
151 '^' => self::TYPE_BIN_OP,
152 '^=' => self::TYPE_BIN_OP,
153 '|' => self::TYPE_BIN_OP,
154 '|=' => self::TYPE_BIN_OP,
155 '||' => self::TYPE_BIN_OP,
156 'in' => self::TYPE_BIN_OP,
157 'instanceof' => self::TYPE_BIN_OP,
158 '+' => self::TYPE_ADD_OP,
159 '-' => self::TYPE_ADD_OP,
160 '?' => self::TYPE_HOOK,
161 ':' => self::TYPE_COLON,
162 ',' => self::TYPE_COMMA,
163 ';' => self::TYPE_SEMICOLON,
164 '{' => self::TYPE_BRACE_OPEN,
165 '}' => self::TYPE_BRACE_CLOSE,
166 '(' => self::TYPE_PAREN_OPEN,
167 '[' => self::TYPE_PAREN_OPEN,
168 ')' => self::TYPE_PAREN_CLOSE,
169 ']' => self::TYPE_PAREN_CLOSE,
170 'break' => self::TYPE_RETURN,
171 'continue' => self::TYPE_RETURN,
172 'return' => self::TYPE_RETURN,
173 'throw' => self::TYPE_RETURN,
174 'catch' => self::TYPE_IF,
175 'for' => self::TYPE_IF,
176 'if' => self::TYPE_IF,
177 'switch' => self::TYPE_IF,
178 'while' => self::TYPE_IF,
179 'with' => self::TYPE_IF,
180 'case' => self::TYPE_DO,
181 'do' => self::TYPE_DO,
182 'else' => self::TYPE_DO,
183 'finally' => self::TYPE_DO,
184 'try' => self::TYPE_DO,
185 'var' => self::TYPE_DO,
186 'function' => self::TYPE_FUNC
187 ];
188
189 // $goto : This is the main table for our state machine. For every state/token pair
190 // the following state is defined. When no rule exists for a given pair,
191 // the state is left unchanged.
192 $goto = [
193 self::STATEMENT => [
194 self::TYPE_UN_OP => self::EXPRESSION,
195 self::TYPE_INCR_OP => self::EXPRESSION,
196 self::TYPE_ADD_OP => self::EXPRESSION,
197 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
198 self::TYPE_RETURN => self::EXPRESSION_NO_NL,
199 self::TYPE_IF => self::CONDITION,
200 self::TYPE_FUNC => self::CONDITION,
201 self::TYPE_LITERAL => self::EXPRESSION_OP
202 ],
203 self::CONDITION => [
204 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
205 ],
206 self::PROPERTY_ASSIGNMENT => [
207 self::TYPE_COLON => self::PROPERTY_EXPRESSION,
208 self::TYPE_BRACE_OPEN => self::STATEMENT
209 ],
210 self::EXPRESSION => [
211 self::TYPE_SEMICOLON => self::STATEMENT,
212 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
213 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
214 self::TYPE_FUNC => self::EXPRESSION_FUNC,
215 self::TYPE_LITERAL => self::EXPRESSION_OP
216 ],
217 self::EXPRESSION_NO_NL => [
218 self::TYPE_SEMICOLON => self::STATEMENT,
219 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
220 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
221 self::TYPE_FUNC => self::EXPRESSION_FUNC,
222 self::TYPE_LITERAL => self::EXPRESSION_OP
223 ],
224 self::EXPRESSION_OP => [
225 self::TYPE_BIN_OP => self::EXPRESSION,
226 self::TYPE_ADD_OP => self::EXPRESSION,
227 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
228 self::TYPE_COLON => self::STATEMENT,
229 self::TYPE_COMMA => self::EXPRESSION,
230 self::TYPE_SEMICOLON => self::STATEMENT,
231 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
232 ],
233 self::EXPRESSION_FUNC => [
234 self::TYPE_BRACE_OPEN => self::STATEMENT
235 ],
236 self::EXPRESSION_TERNARY => [
237 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
238 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
239 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
240 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
241 ],
242 self::EXPRESSION_TERNARY_OP => [
243 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
244 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
245 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
246 self::TYPE_COMMA => self::EXPRESSION_TERNARY,
247 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
248 ],
249 self::EXPRESSION_TERNARY_FUNC => [
250 self::TYPE_BRACE_OPEN => self::STATEMENT
251 ],
252 self::PAREN_EXPRESSION => [
253 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
254 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
255 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
256 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
257 ],
258 self::PAREN_EXPRESSION_OP => [
259 self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
260 self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
261 self::TYPE_HOOK => self::PAREN_EXPRESSION,
262 self::TYPE_COLON => self::PAREN_EXPRESSION,
263 self::TYPE_COMMA => self::PAREN_EXPRESSION,
264 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
265 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
266 ],
267 self::PAREN_EXPRESSION_FUNC => [
268 self::TYPE_BRACE_OPEN => self::STATEMENT
269 ],
270 self::PROPERTY_EXPRESSION => [
271 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
272 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
273 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
274 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
275 ],
276 self::PROPERTY_EXPRESSION_OP => [
277 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
278 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
279 self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
280 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
281 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
282 ],
283 self::PROPERTY_EXPRESSION_FUNC => [
284 self::TYPE_BRACE_OPEN => self::STATEMENT
285 ]
286 ];
287
288 // $push : This table contains the rules for when to push a state onto the stack.
289 // The pushed state is the state to return to when the corresponding
290 // closing token is found
291 $push = [
292 self::STATEMENT => [
293 self::TYPE_BRACE_OPEN => self::STATEMENT,
294 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
295 ],
296 self::CONDITION => [
297 self::TYPE_PAREN_OPEN => self::STATEMENT
298 ],
299 self::PROPERTY_ASSIGNMENT => [
300 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
301 ],
302 self::EXPRESSION => [
303 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
304 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
305 ],
306 self::EXPRESSION_NO_NL => [
307 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
308 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
309 ],
310 self::EXPRESSION_OP => [
311 self::TYPE_HOOK => self::EXPRESSION,
312 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
313 ],
314 self::EXPRESSION_FUNC => [
315 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
316 ],
317 self::EXPRESSION_TERNARY => [
318 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
319 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
320 ],
321 self::EXPRESSION_TERNARY_OP => [
322 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
323 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
324 ],
325 self::EXPRESSION_TERNARY_FUNC => [
326 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
327 ],
328 self::PAREN_EXPRESSION => [
329 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
330 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
331 ],
332 self::PAREN_EXPRESSION_OP => [
333 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
334 ],
335 self::PAREN_EXPRESSION_FUNC => [
336 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
337 ],
338 self::PROPERTY_EXPRESSION => [
339 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
340 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
341 ],
342 self::PROPERTY_EXPRESSION_OP => [
343 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
344 ],
345 self::PROPERTY_EXPRESSION_FUNC => [
346 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
347 ]
348 ];
349
350 // $pop : Rules for when to pop a state from the stack
351 $pop = [
352 self::STATEMENT => [ self::TYPE_BRACE_CLOSE => true ],
353 self::PROPERTY_ASSIGNMENT => [ self::TYPE_BRACE_CLOSE => true ],
354 self::EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
355 self::EXPRESSION_NO_NL => [ self::TYPE_BRACE_CLOSE => true ],
356 self::EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ],
357 self::EXPRESSION_TERNARY_OP => [ self::TYPE_COLON => true ],
358 self::PAREN_EXPRESSION => [ self::TYPE_PAREN_CLOSE => true ],
359 self::PAREN_EXPRESSION_OP => [ self::TYPE_PAREN_CLOSE => true ],
360 self::PROPERTY_EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
361 self::PROPERTY_EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ]
362 ];
363
364 // $semicolon : Rules for when a semicolon insertion is appropriate
365 $semicolon = [
366 self::EXPRESSION_NO_NL => [
367 self::TYPE_UN_OP => true,
368 self::TYPE_INCR_OP => true,
369 self::TYPE_ADD_OP => true,
370 self::TYPE_BRACE_OPEN => true,
371 self::TYPE_PAREN_OPEN => true,
372 self::TYPE_RETURN => true,
373 self::TYPE_IF => true,
374 self::TYPE_DO => true,
375 self::TYPE_FUNC => true,
376 self::TYPE_LITERAL => true
377 ],
378 self::EXPRESSION_OP => [
379 self::TYPE_UN_OP => true,
380 self::TYPE_INCR_OP => true,
381 self::TYPE_BRACE_OPEN => true,
382 self::TYPE_RETURN => true,
383 self::TYPE_IF => true,
384 self::TYPE_DO => true,
385 self::TYPE_FUNC => true,
386 self::TYPE_LITERAL => true
387 ]
388 ];
389
390 // $divStates : Contains all states that can be followed by a division operator
391 $divStates = [
392 self::EXPRESSION_OP => true,
393 self::EXPRESSION_TERNARY_OP => true,
394 self::PAREN_EXPRESSION_OP => true,
395 self::PROPERTY_EXPRESSION_OP => true
396 ];
397
398 // Here's where the minifying takes place: Loop through the input, looking for tokens
399 // and output them to $out, taking actions to the above defined rules when appropriate.
400 $out = '';
401 $pos = 0;
402 $length = strlen( $s );
403 $lineLength = 0;
404 $newlineFound = true;
405 $state = self::STATEMENT;
406 $stack = [];
407 $last = ';'; // Pretend that we have seen a semicolon yet
408 while ( $pos < $length ) {
409 // First, skip over any whitespace and multiline comments, recording whether we
410 // found any newline character
411 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
412 if ( !$skip ) {
413 $ch = $s[$pos];
414 if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
415 // Multiline comment. Search for the end token or EOT.
416 $end = strpos( $s, '*/', $pos + 2 );
417 $skip = $end === false ? $length - $pos : $end - $pos + 2;
418 }
419 }
420 if ( $skip ) {
421 // The semicolon insertion mechanism needs to know whether there was a newline
422 // between two tokens, so record it now.
423 if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
424 $newlineFound = true;
425 }
426 $pos += $skip;
427 continue;
428 }
429 // Handle C++-style comments and html comments, which are treated as single line
430 // comments by the browser, regardless of whether the end tag is on the same line.
431 // Handle --> the same way, but only if it's at the beginning of the line
432 if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
433 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
434 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
435 ) {
436 $pos += strcspn( $s, "\r\n", $pos );
437 continue;
438 }
439
440 // Find out which kind of token we're handling.
441 // Note: $end must point past the end of the current token
442 // so that `substr($s, $pos, $end - $pos)` would be the entire token.
443 // In order words, $end will be the offset of the last relevant character
444 // in the stream + 1, or simply put: The offset of the first character
445 // of any next token in the stream.
446 $end = $pos + 1;
447 // Handle string literals
448 if ( $ch === "'" || $ch === '"' ) {
449 // Search to the end of the string literal, skipping over backslash escapes
450 $search = $ch . '\\';
451 do{
452 // Speculatively add 2 to the end so that if we see a backslash,
453 // the next iteration will start 2 characters further (one for the
454 // backslash, one for the escaped character).
455 // We'll correct this outside the loop.
456 $end += strcspn( $s, $search, $end ) + 2;
457 // If the last character in our search for a quote or a backlash
458 // matched a backslash and we haven't reached the end, keep searching..
459 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
460 // Correction (1): Undo speculative add, keep only one (end of string literal)
461 $end--;
462 if ( $end > $length ) {
463 // Correction (2): Loop wrongly assumed an end quote ended the search,
464 // but search ended because we've reached the end. Correct $end.
465 // TODO: This is invalid and should throw.
466 $end--;
467 }
468 // We have to distinguish between regexp literals and division operators
469 // A division operator is only possible in certain states
470 } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
471 // Regexp literal
472 for ( ; ; ) {
473 // Search until we find "/" (end of regexp), "\" (backslash escapes),
474 // or "[" (start of character classes).
475 do{
476 // Speculatively add 2 to ensure next iteration skips
477 // over backslash and escaped character.
478 // We'll correct this outside the loop.
479 $end += strcspn( $s, '/[\\', $end ) + 2;
480 // If backslash escape, keep searching...
481 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
482 // Correction (1): Undo speculative add, keep only one (end of regexp)
483 $end--;
484 if ( $end > $length ) {
485 // Correction (2): Loop wrongly assumed end slash was seen
486 // String ended without end of regexp. Correct $end.
487 // TODO: This is invalid and should throw.
488 $end--;
489 break;
490 }
491 if ( $s[$end - 1] === '/' ) {
492 break;
493 }
494 // (Implicit else), we must've found the start of a char class,
495 // skip until we find "]" (end of char class), or "\" (backslash escape)
496 do{
497 // Speculatively add 2 for backslash escape.
498 // We'll substract one outside the loop.
499 $end += strcspn( $s, ']\\', $end ) + 2;
500 // If backslash escape, keep searching...
501 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
502 // Correction (1): Undo speculative add, keep only one (end of regexp)
503 $end--;
504 if ( $end > $length ) {
505 // Correction (2): Loop wrongly assumed "]" was seen
506 // String ended without ending char class or regexp. Correct $end.
507 // TODO: This is invalid and should throw.
508 $end--;
509 break;
510 }
511 }
512 // Search past the regexp modifiers (gi)
513 while ( $end < $length && ctype_alpha( $s[$end] ) ) {
514 $end++;
515 }
516 } elseif (
517 $ch === '0'
518 && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
519 ) {
520 // Hex numeric literal
521 $end++; // x or X
522 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
523 if ( !$len ) {
524 return self::parseError(
525 $s,
526 $pos,
527 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
528 );
529 }
530 $end += $len;
531 } elseif (
532 ctype_digit( $ch )
533 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
534 ) {
535 $end += strspn( $s, '0123456789', $end );
536 $decimal = strspn( $s, '.', $end );
537 if ( $decimal ) {
538 if ( $decimal > 2 ) {
539 return self::parseError( $s, $end, 'The number has too many decimal points' );
540 }
541 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
542 }
543 $exponent = strspn( $s, 'eE', $end );
544 if ( $exponent ) {
545 if ( $exponent > 1 ) {
546 return self::parseError( $s, $end, 'Number with several E' );
547 }
548 $end++;
549
550 // + sign is optional; - sign is required.
551 $end += strspn( $s, '-+', $end );
552 $len = strspn( $s, '0123456789', $end );
553 if ( !$len ) {
554 return self::parseError(
555 $s,
556 $pos,
557 'No decimal digits after e, how many zeroes should be added?'
558 );
559 }
560 $end += $len;
561 }
562 } elseif ( isset( $opChars[$ch] ) ) {
563 // Punctuation character. Search for the longest matching operator.
564 while (
565 $end < $length
566 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
567 ) {
568 $end++;
569 }
570 } else {
571 // Identifier or reserved word. Search for the end by excluding whitespace and
572 // punctuation.
573 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
574 }
575
576 // Now get the token type from our type array
577 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
578 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
579
580 if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
581 // This token triggers the semicolon insertion mechanism of javascript. While we
582 // could add the ; token here ourselves, keeping the newline has a few advantages.
583 $out .= "\n";
584 $state = self::STATEMENT;
585 $lineLength = 0;
586 } elseif ( $lineLength + $end - $pos > self::MAX_LINE_LENGTH &&
587 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
588 // This line would get too long if we added $token, so add a newline first.
589 // Only do this if it won't trigger semicolon insertion and if it won't
590 // put a postfix increment operator on its own line, which is illegal in js.
591 $out .= "\n";
592 $lineLength = 0;
593 // Check, whether we have to separate the token from the last one with whitespace
594 } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
595 $out .= ' ';
596 $lineLength++;
597 // Don't accidentally create ++, -- or // tokens
598 } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
599 $out .= ' ';
600 $lineLength++;
601 }
602 if (
603 $type === self::TYPE_LITERAL
604 && ( $token === 'true' || $token === 'false' )
605 && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION )
606 && $last !== '.'
607 ) {
608 $token = ( $token === 'true' ) ? '!0' : '!1';
609 }
610
611 $out .= $token;
612 $lineLength += $end - $pos; // += strlen( $token )
613 $last = $s[$end - 1];
614 $pos = $end;
615 $newlineFound = false;
616
617 // Now that we have output our token, transition into the new state.
618 if ( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
619 $stack[] = $push[$state][$type];
620 }
621 if ( $stack && isset( $pop[$state][$type] ) ) {
622 $state = array_pop( $stack );
623 } elseif ( isset( $goto[$state][$type] ) ) {
624 $state = $goto[$state][$type];
625 }
626 }
627 return $out;
628 }
629
630 static function parseError( $fullJavascript, $position, $errorMsg ) {
631 // TODO: Handle the error: trigger_error, throw exception, return false...
632 return false;
633 }
634 }