Merge "Revert "Log the reason why revision->getContent() returns null""
[lhc/web/wiklou.git] / includes / libs / JavaScriptMinifier.php
1 <?php
2 /**
3 * JavaScript Minifier
4 *
5 * @file
6 * @author Paul Copperman <paul.copperman@gmail.com>
7 * @license Choose any of Apache, MIT, GPL, LGPL
8 */
9
10 /**
11 * This class is meant to safely minify javascript code, while leaving syntactically correct
12 * programs intact. Other libraries, such as JSMin require a certain coding style to work
13 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
14 * slow, because they construct a complete parse tree before outputting the code minified.
15 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
16 * fast enough to be used for on-the-fly minifying.
17 */
18 class JavaScriptMinifier {
19
20 /* Parsing states.
21 * The state machine is only necessary to decide whether to parse a slash as division
22 * operator or as regexp literal.
23 * States are named after the next expected item. We only distinguish states when the
24 * distinction is relevant for our purpose.
25 */
26 const STATEMENT = 0;
27 const CONDITION = 1;
28 const PROPERTY_ASSIGNMENT = 2;
29 const EXPRESSION = 3;
30 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
31 const EXPRESSION_OP = 5;
32 const EXPRESSION_FUNC = 6;
33 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
34 const EXPRESSION_TERNARY_OP = 8;
35 const EXPRESSION_TERNARY_FUNC = 9;
36 const PAREN_EXPRESSION = 10; // expression which is not on the top level
37 const PAREN_EXPRESSION_OP = 11;
38 const PAREN_EXPRESSION_FUNC = 12;
39 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
40 const PROPERTY_EXPRESSION_OP = 14;
41 const PROPERTY_EXPRESSION_FUNC = 15;
42
43 /* Token types */
44 const TYPE_UN_OP = 1; // unary operators
45 const TYPE_INCR_OP = 2; // ++ and --
46 const TYPE_BIN_OP = 3; // binary operators
47 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
48 const TYPE_HOOK = 5; // ?
49 const TYPE_COLON = 6; // :
50 const TYPE_COMMA = 7; // ,
51 const TYPE_SEMICOLON = 8; // ;
52 const TYPE_BRACE_OPEN = 9; // {
53 const TYPE_BRACE_CLOSE = 10; // }
54 const TYPE_PAREN_OPEN = 11; // ( and [
55 const TYPE_PAREN_CLOSE = 12; // ) and ]
56 const TYPE_RETURN = 13; // keywords: break, continue, return, throw
57 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
58 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
59 const TYPE_FUNC = 16; // keywords: function
60 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
61
62 // Sanity limit to avoid excessive memory usage
63 const STACK_LIMIT = 1000;
64
65 /**
66 * NOTE: This isn't a strict maximum. Longer lines will be produced when
67 * literals (e.g. quoted strings) longer than this are encountered
68 * or when required to guard against semicolon insertion.
69 */
70 const MAX_LINE_LENGTH = 1000;
71
72 /**
73 * Returns minified JavaScript code.
74 *
75 * @param string $s JavaScript code to minify
76 * @return String Minified code
77 */
78 public static function minify( $s ) {
79 // First we declare a few tables that contain our parsing rules
80
81 // $opChars : characters, which can be combined without whitespace in between them
82 $opChars = [
83 '!' => true,
84 '"' => true,
85 '%' => true,
86 '&' => true,
87 "'" => true,
88 '(' => true,
89 ')' => true,
90 '*' => true,
91 '+' => true,
92 ',' => true,
93 '-' => true,
94 '.' => true,
95 '/' => true,
96 ':' => true,
97 ';' => true,
98 '<' => true,
99 '=' => true,
100 '>' => true,
101 '?' => true,
102 '[' => true,
103 ']' => true,
104 '^' => true,
105 '{' => true,
106 '|' => true,
107 '}' => true,
108 '~' => true
109 ];
110
111 // $tokenTypes : maps keywords and operators to their corresponding token type
112 $tokenTypes = [
113 '!' => self::TYPE_UN_OP,
114 '~' => self::TYPE_UN_OP,
115 'delete' => self::TYPE_UN_OP,
116 'new' => self::TYPE_UN_OP,
117 'typeof' => self::TYPE_UN_OP,
118 'void' => self::TYPE_UN_OP,
119 '++' => self::TYPE_INCR_OP,
120 '--' => self::TYPE_INCR_OP,
121 '!=' => self::TYPE_BIN_OP,
122 '!==' => self::TYPE_BIN_OP,
123 '%' => self::TYPE_BIN_OP,
124 '%=' => self::TYPE_BIN_OP,
125 '&' => self::TYPE_BIN_OP,
126 '&&' => self::TYPE_BIN_OP,
127 '&=' => self::TYPE_BIN_OP,
128 '*' => self::TYPE_BIN_OP,
129 '*=' => self::TYPE_BIN_OP,
130 '+=' => self::TYPE_BIN_OP,
131 '-=' => self::TYPE_BIN_OP,
132 '.' => self::TYPE_BIN_OP,
133 '/' => self::TYPE_BIN_OP,
134 '/=' => self::TYPE_BIN_OP,
135 '<' => self::TYPE_BIN_OP,
136 '<<' => self::TYPE_BIN_OP,
137 '<<=' => self::TYPE_BIN_OP,
138 '<=' => self::TYPE_BIN_OP,
139 '=' => self::TYPE_BIN_OP,
140 '==' => self::TYPE_BIN_OP,
141 '===' => self::TYPE_BIN_OP,
142 '>' => self::TYPE_BIN_OP,
143 '>=' => self::TYPE_BIN_OP,
144 '>>' => self::TYPE_BIN_OP,
145 '>>=' => self::TYPE_BIN_OP,
146 '>>>' => self::TYPE_BIN_OP,
147 '>>>=' => self::TYPE_BIN_OP,
148 '^' => self::TYPE_BIN_OP,
149 '^=' => self::TYPE_BIN_OP,
150 '|' => self::TYPE_BIN_OP,
151 '|=' => self::TYPE_BIN_OP,
152 '||' => self::TYPE_BIN_OP,
153 'in' => self::TYPE_BIN_OP,
154 'instanceof' => self::TYPE_BIN_OP,
155 '+' => self::TYPE_ADD_OP,
156 '-' => self::TYPE_ADD_OP,
157 '?' => self::TYPE_HOOK,
158 ':' => self::TYPE_COLON,
159 ',' => self::TYPE_COMMA,
160 ';' => self::TYPE_SEMICOLON,
161 '{' => self::TYPE_BRACE_OPEN,
162 '}' => self::TYPE_BRACE_CLOSE,
163 '(' => self::TYPE_PAREN_OPEN,
164 '[' => self::TYPE_PAREN_OPEN,
165 ')' => self::TYPE_PAREN_CLOSE,
166 ']' => self::TYPE_PAREN_CLOSE,
167 'break' => self::TYPE_RETURN,
168 'continue' => self::TYPE_RETURN,
169 'return' => self::TYPE_RETURN,
170 'throw' => self::TYPE_RETURN,
171 'catch' => self::TYPE_IF,
172 'for' => self::TYPE_IF,
173 'if' => self::TYPE_IF,
174 'switch' => self::TYPE_IF,
175 'while' => self::TYPE_IF,
176 'with' => self::TYPE_IF,
177 'case' => self::TYPE_DO,
178 'do' => self::TYPE_DO,
179 'else' => self::TYPE_DO,
180 'finally' => self::TYPE_DO,
181 'try' => self::TYPE_DO,
182 'var' => self::TYPE_DO,
183 'function' => self::TYPE_FUNC
184 ];
185
186 // $goto : This is the main table for our state machine. For every state/token pair
187 // the following state is defined. When no rule exists for a given pair,
188 // the state is left unchanged.
189 $goto = [
190 self::STATEMENT => [
191 self::TYPE_UN_OP => self::EXPRESSION,
192 self::TYPE_INCR_OP => self::EXPRESSION,
193 self::TYPE_ADD_OP => self::EXPRESSION,
194 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
195 self::TYPE_RETURN => self::EXPRESSION_NO_NL,
196 self::TYPE_IF => self::CONDITION,
197 self::TYPE_FUNC => self::CONDITION,
198 self::TYPE_LITERAL => self::EXPRESSION_OP
199 ],
200 self::CONDITION => [
201 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
202 ],
203 self::PROPERTY_ASSIGNMENT => [
204 self::TYPE_COLON => self::PROPERTY_EXPRESSION,
205 self::TYPE_BRACE_OPEN => self::STATEMENT
206 ],
207 self::EXPRESSION => [
208 self::TYPE_SEMICOLON => self::STATEMENT,
209 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
210 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
211 self::TYPE_FUNC => self::EXPRESSION_FUNC,
212 self::TYPE_LITERAL => self::EXPRESSION_OP
213 ],
214 self::EXPRESSION_NO_NL => [
215 self::TYPE_SEMICOLON => self::STATEMENT,
216 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
217 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
218 self::TYPE_FUNC => self::EXPRESSION_FUNC,
219 self::TYPE_LITERAL => self::EXPRESSION_OP
220 ],
221 self::EXPRESSION_OP => [
222 self::TYPE_BIN_OP => self::EXPRESSION,
223 self::TYPE_ADD_OP => self::EXPRESSION,
224 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
225 self::TYPE_COLON => self::STATEMENT,
226 self::TYPE_COMMA => self::EXPRESSION,
227 self::TYPE_SEMICOLON => self::STATEMENT,
228 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
229 ],
230 self::EXPRESSION_FUNC => [
231 self::TYPE_BRACE_OPEN => self::STATEMENT
232 ],
233 self::EXPRESSION_TERNARY => [
234 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
235 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
236 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
237 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
238 ],
239 self::EXPRESSION_TERNARY_OP => [
240 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
241 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
242 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
243 self::TYPE_COMMA => self::EXPRESSION_TERNARY,
244 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
245 ],
246 self::EXPRESSION_TERNARY_FUNC => [
247 self::TYPE_BRACE_OPEN => self::STATEMENT
248 ],
249 self::PAREN_EXPRESSION => [
250 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
251 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
252 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
253 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
254 ],
255 self::PAREN_EXPRESSION_OP => [
256 self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
257 self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
258 self::TYPE_HOOK => self::PAREN_EXPRESSION,
259 self::TYPE_COLON => self::PAREN_EXPRESSION,
260 self::TYPE_COMMA => self::PAREN_EXPRESSION,
261 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
262 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
263 ],
264 self::PAREN_EXPRESSION_FUNC => [
265 self::TYPE_BRACE_OPEN => self::STATEMENT
266 ],
267 self::PROPERTY_EXPRESSION => [
268 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
269 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
270 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
271 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
272 ],
273 self::PROPERTY_EXPRESSION_OP => [
274 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
275 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
276 self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
277 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
278 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
279 ],
280 self::PROPERTY_EXPRESSION_FUNC => [
281 self::TYPE_BRACE_OPEN => self::STATEMENT
282 ]
283 ];
284
285 // $push : This table contains the rules for when to push a state onto the stack.
286 // The pushed state is the state to return to when the corresponding
287 // closing token is found
288 $push = [
289 self::STATEMENT => [
290 self::TYPE_BRACE_OPEN => self::STATEMENT,
291 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
292 ],
293 self::CONDITION => [
294 self::TYPE_PAREN_OPEN => self::STATEMENT
295 ],
296 self::PROPERTY_ASSIGNMENT => [
297 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
298 ],
299 self::EXPRESSION => [
300 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
301 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
302 ],
303 self::EXPRESSION_NO_NL => [
304 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
305 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
306 ],
307 self::EXPRESSION_OP => [
308 self::TYPE_HOOK => self::EXPRESSION,
309 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
310 ],
311 self::EXPRESSION_FUNC => [
312 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
313 ],
314 self::EXPRESSION_TERNARY => [
315 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
316 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
317 ],
318 self::EXPRESSION_TERNARY_OP => [
319 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
320 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
321 ],
322 self::EXPRESSION_TERNARY_FUNC => [
323 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
324 ],
325 self::PAREN_EXPRESSION => [
326 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
327 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
328 ],
329 self::PAREN_EXPRESSION_OP => [
330 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
331 ],
332 self::PAREN_EXPRESSION_FUNC => [
333 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
334 ],
335 self::PROPERTY_EXPRESSION => [
336 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
337 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
338 ],
339 self::PROPERTY_EXPRESSION_OP => [
340 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
341 ],
342 self::PROPERTY_EXPRESSION_FUNC => [
343 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
344 ]
345 ];
346
347 // $pop : Rules for when to pop a state from the stack
348 $pop = [
349 self::STATEMENT => [ self::TYPE_BRACE_CLOSE => true ],
350 self::PROPERTY_ASSIGNMENT => [ self::TYPE_BRACE_CLOSE => true ],
351 self::EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
352 self::EXPRESSION_NO_NL => [ self::TYPE_BRACE_CLOSE => true ],
353 self::EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ],
354 self::EXPRESSION_TERNARY_OP => [ self::TYPE_COLON => true ],
355 self::PAREN_EXPRESSION => [ self::TYPE_PAREN_CLOSE => true ],
356 self::PAREN_EXPRESSION_OP => [ self::TYPE_PAREN_CLOSE => true ],
357 self::PROPERTY_EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
358 self::PROPERTY_EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ]
359 ];
360
361 // $semicolon : Rules for when a semicolon insertion is appropriate
362 $semicolon = [
363 self::EXPRESSION_NO_NL => [
364 self::TYPE_UN_OP => true,
365 self::TYPE_INCR_OP => true,
366 self::TYPE_ADD_OP => true,
367 self::TYPE_BRACE_OPEN => true,
368 self::TYPE_PAREN_OPEN => true,
369 self::TYPE_RETURN => true,
370 self::TYPE_IF => true,
371 self::TYPE_DO => true,
372 self::TYPE_FUNC => true,
373 self::TYPE_LITERAL => true
374 ],
375 self::EXPRESSION_OP => [
376 self::TYPE_UN_OP => true,
377 self::TYPE_INCR_OP => true,
378 self::TYPE_BRACE_OPEN => true,
379 self::TYPE_RETURN => true,
380 self::TYPE_IF => true,
381 self::TYPE_DO => true,
382 self::TYPE_FUNC => true,
383 self::TYPE_LITERAL => true
384 ]
385 ];
386
387 // $divStates : Contains all states that can be followed by a division operator
388 $divStates = [
389 self::EXPRESSION_OP => true,
390 self::EXPRESSION_TERNARY_OP => true,
391 self::PAREN_EXPRESSION_OP => true,
392 self::PROPERTY_EXPRESSION_OP => true
393 ];
394
395 // Here's where the minifying takes place: Loop through the input, looking for tokens
396 // and output them to $out, taking actions to the above defined rules when appropriate.
397 $out = '';
398 $pos = 0;
399 $length = strlen( $s );
400 $lineLength = 0;
401 $newlineFound = true;
402 $state = self::STATEMENT;
403 $stack = [];
404 $last = ';'; // Pretend that we have seen a semicolon yet
405 while ( $pos < $length ) {
406 // First, skip over any whitespace and multiline comments, recording whether we
407 // found any newline character
408 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
409 if ( !$skip ) {
410 $ch = $s[$pos];
411 if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
412 // Multiline comment. Search for the end token or EOT.
413 $end = strpos( $s, '*/', $pos + 2 );
414 $skip = $end === false ? $length - $pos : $end - $pos + 2;
415 }
416 }
417 if ( $skip ) {
418 // The semicolon insertion mechanism needs to know whether there was a newline
419 // between two tokens, so record it now.
420 if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
421 $newlineFound = true;
422 }
423 $pos += $skip;
424 continue;
425 }
426 // Handle C++-style comments and html comments, which are treated as single line
427 // comments by the browser, regardless of whether the end tag is on the same line.
428 // Handle --> the same way, but only if it's at the beginning of the line
429 if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
430 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
431 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
432 ) {
433 $pos += strcspn( $s, "\r\n", $pos );
434 continue;
435 }
436
437 // Find out which kind of token we're handling.
438 // Note: $end must point past the end of the current token
439 // so that `substr($s, $pos, $end - $pos)` would be the entire token.
440 // In order words, $end will be the offset of the last relevant character
441 // in the stream + 1, or simply put: The offset of the first character
442 // of any next token in the stream.
443 $end = $pos + 1;
444 // Handle string literals
445 if ( $ch === "'" || $ch === '"' ) {
446 // Search to the end of the string literal, skipping over backslash escapes
447 $search = $ch . '\\';
448 do{
449 // Speculatively add 2 to the end so that if we see a backslash,
450 // the next iteration will start 2 characters further (one for the
451 // backslash, one for the escaped character).
452 // We'll correct this outside the loop.
453 $end += strcspn( $s, $search, $end ) + 2;
454 // If the last character in our search for a quote or a backlash
455 // matched a backslash and we haven't reached the end, keep searching..
456 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
457 // Correction (1): Undo speculative add, keep only one (end of string literal)
458 $end--;
459 if ( $end > $length ) {
460 // Correction (2): Loop wrongly assumed an end quote ended the search,
461 // but search ended because we've reached the end. Correct $end.
462 // TODO: This is invalid and should throw.
463 $end--;
464 }
465 // We have to distinguish between regexp literals and division operators
466 // A division operator is only possible in certain states
467 } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
468 // Regexp literal
469 for ( ; ; ) {
470 // Search until we find "/" (end of regexp), "\" (backslash escapes),
471 // or "[" (start of character classes).
472 do{
473 // Speculatively add 2 to ensure next iteration skips
474 // over backslash and escaped character.
475 // We'll correct this outside the loop.
476 $end += strcspn( $s, '/[\\', $end ) + 2;
477 // If backslash escape, keep searching...
478 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
479 // Correction (1): Undo speculative add, keep only one (end of regexp)
480 $end--;
481 if ( $end > $length ) {
482 // Correction (2): Loop wrongly assumed end slash was seen
483 // String ended without end of regexp. Correct $end.
484 // TODO: This is invalid and should throw.
485 $end--;
486 break;
487 }
488 if ( $s[$end - 1] === '/' ) {
489 break;
490 }
491 // (Implicit else), we must've found the start of a char class,
492 // skip until we find "]" (end of char class), or "\" (backslash escape)
493 do{
494 // Speculatively add 2 for backslash escape.
495 // We'll substract one outside the loop.
496 $end += strcspn( $s, ']\\', $end ) + 2;
497 // If backslash escape, keep searching...
498 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
499 // Correction (1): Undo speculative add, keep only one (end of regexp)
500 $end--;
501 if ( $end > $length ) {
502 // Correction (2): Loop wrongly assumed "]" was seen
503 // String ended without ending char class or regexp. Correct $end.
504 // TODO: This is invalid and should throw.
505 $end--;
506 break;
507 }
508 }
509 // Search past the regexp modifiers (gi)
510 while ( $end < $length && ctype_alpha( $s[$end] ) ) {
511 $end++;
512 }
513 } elseif (
514 $ch === '0'
515 && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
516 ) {
517 // Hex numeric literal
518 $end++; // x or X
519 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
520 if ( !$len ) {
521 return self::parseError(
522 $s,
523 $pos,
524 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
525 );
526 }
527 $end += $len;
528 } elseif (
529 ctype_digit( $ch )
530 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
531 ) {
532 $end += strspn( $s, '0123456789', $end );
533 $decimal = strspn( $s, '.', $end );
534 if ( $decimal ) {
535 if ( $decimal > 2 ) {
536 return self::parseError( $s, $end, 'The number has too many decimal points' );
537 }
538 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
539 }
540 $exponent = strspn( $s, 'eE', $end );
541 if ( $exponent ) {
542 if ( $exponent > 1 ) {
543 return self::parseError( $s, $end, 'Number with several E' );
544 }
545 $end++;
546
547 // + sign is optional; - sign is required.
548 $end += strspn( $s, '-+', $end );
549 $len = strspn( $s, '0123456789', $end );
550 if ( !$len ) {
551 return self::parseError(
552 $s,
553 $pos,
554 'No decimal digits after e, how many zeroes should be added?'
555 );
556 }
557 $end += $len;
558 }
559 } elseif ( isset( $opChars[$ch] ) ) {
560 // Punctuation character. Search for the longest matching operator.
561 while (
562 $end < $length
563 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
564 ) {
565 $end++;
566 }
567 } else {
568 // Identifier or reserved word. Search for the end by excluding whitespace and
569 // punctuation.
570 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
571 }
572
573 // Now get the token type from our type array
574 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
575 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
576
577 if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
578 // This token triggers the semicolon insertion mechanism of javascript. While we
579 // could add the ; token here ourselves, keeping the newline has a few advantages.
580 $out .= "\n";
581 $state = self::STATEMENT;
582 $lineLength = 0;
583 } elseif ( $lineLength + $end - $pos > self::MAX_LINE_LENGTH &&
584 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
585 // This line would get too long if we added $token, so add a newline first.
586 // Only do this if it won't trigger semicolon insertion and if it won't
587 // put a postfix increment operator on its own line, which is illegal in js.
588 $out .= "\n";
589 $lineLength = 0;
590 // Check, whether we have to separate the token from the last one with whitespace
591 } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
592 $out .= ' ';
593 $lineLength++;
594 // Don't accidentally create ++, -- or // tokens
595 } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
596 $out .= ' ';
597 $lineLength++;
598 }
599 if (
600 $type === self::TYPE_LITERAL
601 && ( $token === 'true' || $token === 'false' )
602 && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION )
603 && $last !== '.'
604 ) {
605 $token = ( $token === 'true' ) ? '!0' : '!1';
606 }
607
608 $out .= $token;
609 $lineLength += $end - $pos; // += strlen( $token )
610 $last = $s[$end - 1];
611 $pos = $end;
612 $newlineFound = false;
613
614 // Now that we have output our token, transition into the new state.
615 if ( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
616 $stack[] = $push[$state][$type];
617 }
618 if ( $stack && isset( $pop[$state][$type] ) ) {
619 $state = array_pop( $stack );
620 } elseif ( isset( $goto[$state][$type] ) ) {
621 $state = $goto[$state][$type];
622 }
623 }
624 return $out;
625 }
626
627 static function parseError( $fullJavascript, $position, $errorMsg ) {
628 // TODO: Handle the error: trigger_error, throw exception, return false...
629 return false;
630 }
631 }