Merge "mw.Feedback: If the message is posted remotely, link the title correctly"
[lhc/web/wiklou.git] / includes / libs / JavaScriptMinifier.php
1 <?php
2 /**
3 * JavaScript Minifier
4 *
5 * @file
6 * @author Paul Copperman <paul.copperman@gmail.com>
7 * @license Choose any of Apache, MIT, GPL, LGPL
8 */
9
10 /**
11 * This class is meant to safely minify javascript code, while leaving syntactically correct
12 * programs intact. Other libraries, such as JSMin require a certain coding style to work
13 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
14 * slow, because they construct a complete parse tree before outputting the code minified.
15 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
16 * fast enough to be used for on-the-fly minifying.
17 */
18 class JavaScriptMinifier {
19
20 /* Parsing states.
21 * The state machine is only necessary to decide whether to parse a slash as division
22 * operator or as regexp literal.
23 * States are named after the next expected item. We only distinguish states when the
24 * distinction is relevant for our purpose.
25 */
26 const STATEMENT = 0;
27 const CONDITION = 1;
28 const PROPERTY_ASSIGNMENT = 2;
29 const EXPRESSION = 3;
30 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
31 const EXPRESSION_OP = 5;
32 const EXPRESSION_FUNC = 6;
33 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
34 const EXPRESSION_TERNARY_OP = 8;
35 const EXPRESSION_TERNARY_FUNC = 9;
36 const PAREN_EXPRESSION = 10; // expression which is not on the top level
37 const PAREN_EXPRESSION_OP = 11;
38 const PAREN_EXPRESSION_FUNC = 12;
39 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
40 const PROPERTY_EXPRESSION_OP = 14;
41 const PROPERTY_EXPRESSION_FUNC = 15;
42
43 /* Token types */
44 const TYPE_UN_OP = 1; // unary operators
45 const TYPE_INCR_OP = 2; // ++ and --
46 const TYPE_BIN_OP = 3; // binary operators
47 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
48 const TYPE_HOOK = 5; // ?
49 const TYPE_COLON = 6; // :
50 const TYPE_COMMA = 7; // ,
51 const TYPE_SEMICOLON = 8; // ;
52 const TYPE_BRACE_OPEN = 9; // {
53 const TYPE_BRACE_CLOSE = 10; // }
54 const TYPE_PAREN_OPEN = 11; // ( and [
55 const TYPE_PAREN_CLOSE = 12; // ) and ]
56 const TYPE_RETURN = 13; // keywords: break, continue, return, throw
57 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
58 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
59 const TYPE_FUNC = 16; // keywords: function
60 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
61
62 // Sanity limit to avoid excessive memory usage
63 const STACK_LIMIT = 1000;
64
65 /**
66 * NOTE: This isn't a strict maximum. Longer lines will be produced when
67 * literals (e.g. quoted strings) longer than this are encountered
68 * or when required to guard against semicolon insertion.
69 */
70 const MAX_LINE_LENGTH = 1000;
71
72 /**
73 * Returns minified JavaScript code.
74 *
75 * @param string $s JavaScript code to minify
76 * @return String Minified code
77 */
78 public static function minify( $s ) {
79 // First we declare a few tables that contain our parsing rules
80
81 // $opChars : characters, which can be combined without whitespace in between them
82 $opChars = [
83 '!' => true,
84 '"' => true,
85 '%' => true,
86 '&' => true,
87 "'" => true,
88 '(' => true,
89 ')' => true,
90 '*' => true,
91 '+' => true,
92 ',' => true,
93 '-' => true,
94 '.' => true,
95 '/' => true,
96 ':' => true,
97 ';' => true,
98 '<' => true,
99 '=' => true,
100 '>' => true,
101 '?' => true,
102 '[' => true,
103 ']' => true,
104 '^' => true,
105 '{' => true,
106 '|' => true,
107 '}' => true,
108 '~' => true
109 ];
110
111 // $tokenTypes : maps keywords and operators to their corresponding token type
112 $tokenTypes = [
113 '!' => self::TYPE_UN_OP,
114 '~' => self::TYPE_UN_OP,
115 'delete' => self::TYPE_UN_OP,
116 'new' => self::TYPE_UN_OP,
117 'typeof' => self::TYPE_UN_OP,
118 'void' => self::TYPE_UN_OP,
119 '++' => self::TYPE_INCR_OP,
120 '--' => self::TYPE_INCR_OP,
121 '!=' => self::TYPE_BIN_OP,
122 '!==' => self::TYPE_BIN_OP,
123 '%' => self::TYPE_BIN_OP,
124 '%=' => self::TYPE_BIN_OP,
125 '&' => self::TYPE_BIN_OP,
126 '&&' => self::TYPE_BIN_OP,
127 '&=' => self::TYPE_BIN_OP,
128 '*' => self::TYPE_BIN_OP,
129 '*=' => self::TYPE_BIN_OP,
130 '+=' => self::TYPE_BIN_OP,
131 '-=' => self::TYPE_BIN_OP,
132 '.' => self::TYPE_BIN_OP,
133 '/' => self::TYPE_BIN_OP,
134 '/=' => self::TYPE_BIN_OP,
135 '<' => self::TYPE_BIN_OP,
136 '<<' => self::TYPE_BIN_OP,
137 '<<=' => self::TYPE_BIN_OP,
138 '<=' => self::TYPE_BIN_OP,
139 '=' => self::TYPE_BIN_OP,
140 '==' => self::TYPE_BIN_OP,
141 '===' => self::TYPE_BIN_OP,
142 '>' => self::TYPE_BIN_OP,
143 '>=' => self::TYPE_BIN_OP,
144 '>>' => self::TYPE_BIN_OP,
145 '>>=' => self::TYPE_BIN_OP,
146 '>>>' => self::TYPE_BIN_OP,
147 '>>>=' => self::TYPE_BIN_OP,
148 '^' => self::TYPE_BIN_OP,
149 '^=' => self::TYPE_BIN_OP,
150 '|' => self::TYPE_BIN_OP,
151 '|=' => self::TYPE_BIN_OP,
152 '||' => self::TYPE_BIN_OP,
153 'in' => self::TYPE_BIN_OP,
154 'instanceof' => self::TYPE_BIN_OP,
155 '+' => self::TYPE_ADD_OP,
156 '-' => self::TYPE_ADD_OP,
157 '?' => self::TYPE_HOOK,
158 ':' => self::TYPE_COLON,
159 ',' => self::TYPE_COMMA,
160 ';' => self::TYPE_SEMICOLON,
161 '{' => self::TYPE_BRACE_OPEN,
162 '}' => self::TYPE_BRACE_CLOSE,
163 '(' => self::TYPE_PAREN_OPEN,
164 '[' => self::TYPE_PAREN_OPEN,
165 ')' => self::TYPE_PAREN_CLOSE,
166 ']' => self::TYPE_PAREN_CLOSE,
167 'break' => self::TYPE_RETURN,
168 'continue' => self::TYPE_RETURN,
169 'return' => self::TYPE_RETURN,
170 'throw' => self::TYPE_RETURN,
171 'catch' => self::TYPE_IF,
172 'for' => self::TYPE_IF,
173 'if' => self::TYPE_IF,
174 'switch' => self::TYPE_IF,
175 'while' => self::TYPE_IF,
176 'with' => self::TYPE_IF,
177 'case' => self::TYPE_DO,
178 'do' => self::TYPE_DO,
179 'else' => self::TYPE_DO,
180 'finally' => self::TYPE_DO,
181 'try' => self::TYPE_DO,
182 'var' => self::TYPE_DO,
183 'function' => self::TYPE_FUNC
184 ];
185
186 // $goto : This is the main table for our state machine. For every state/token pair
187 // the following state is defined. When no rule exists for a given pair,
188 // the state is left unchanged.
189 $goto = [
190 self::STATEMENT => [
191 self::TYPE_UN_OP => self::EXPRESSION,
192 self::TYPE_INCR_OP => self::EXPRESSION,
193 self::TYPE_ADD_OP => self::EXPRESSION,
194 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
195 self::TYPE_RETURN => self::EXPRESSION_NO_NL,
196 self::TYPE_IF => self::CONDITION,
197 self::TYPE_FUNC => self::CONDITION,
198 self::TYPE_LITERAL => self::EXPRESSION_OP
199 ],
200 self::CONDITION => [
201 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
202 ],
203 self::PROPERTY_ASSIGNMENT => [
204 self::TYPE_COLON => self::PROPERTY_EXPRESSION,
205 self::TYPE_BRACE_OPEN => self::STATEMENT
206 ],
207 self::EXPRESSION => [
208 self::TYPE_SEMICOLON => self::STATEMENT,
209 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
210 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
211 self::TYPE_FUNC => self::EXPRESSION_FUNC,
212 self::TYPE_LITERAL => self::EXPRESSION_OP
213 ],
214 self::EXPRESSION_NO_NL => [
215 self::TYPE_SEMICOLON => self::STATEMENT,
216 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
217 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
218 self::TYPE_FUNC => self::EXPRESSION_FUNC,
219 self::TYPE_LITERAL => self::EXPRESSION_OP
220 ],
221 self::EXPRESSION_OP => [
222 self::TYPE_BIN_OP => self::EXPRESSION,
223 self::TYPE_ADD_OP => self::EXPRESSION,
224 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
225 self::TYPE_COLON => self::STATEMENT,
226 self::TYPE_COMMA => self::EXPRESSION,
227 self::TYPE_SEMICOLON => self::STATEMENT,
228 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
229 ],
230 self::EXPRESSION_FUNC => [
231 self::TYPE_BRACE_OPEN => self::STATEMENT
232 ],
233 self::EXPRESSION_TERNARY => [
234 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
235 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
236 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
237 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
238 ],
239 self::EXPRESSION_TERNARY_OP => [
240 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
241 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
242 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
243 self::TYPE_COMMA => self::EXPRESSION_TERNARY,
244 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
245 ],
246 self::EXPRESSION_TERNARY_FUNC => [
247 self::TYPE_BRACE_OPEN => self::STATEMENT
248 ],
249 self::PAREN_EXPRESSION => [
250 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
251 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
252 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
253 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
254 ],
255 self::PAREN_EXPRESSION_OP => [
256 self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
257 self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
258 self::TYPE_HOOK => self::PAREN_EXPRESSION,
259 self::TYPE_COLON => self::PAREN_EXPRESSION,
260 self::TYPE_COMMA => self::PAREN_EXPRESSION,
261 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
262 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
263 ],
264 self::PAREN_EXPRESSION_FUNC => [
265 self::TYPE_BRACE_OPEN => self::STATEMENT
266 ],
267 self::PROPERTY_EXPRESSION => [
268 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
269 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
270 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
271 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
272 ],
273 self::PROPERTY_EXPRESSION_OP => [
274 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
275 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
276 self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
277 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
278 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
279 ],
280 self::PROPERTY_EXPRESSION_FUNC => [
281 self::TYPE_BRACE_OPEN => self::STATEMENT
282 ]
283 ];
284
285 // $push : This table contains the rules for when to push a state onto the stack.
286 // The pushed state is the state to return to when the corresponding
287 // closing token is found
288 $push = [
289 self::STATEMENT => [
290 self::TYPE_BRACE_OPEN => self::STATEMENT,
291 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
292 ],
293 self::CONDITION => [
294 self::TYPE_PAREN_OPEN => self::STATEMENT
295 ],
296 self::PROPERTY_ASSIGNMENT => [
297 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
298 ],
299 self::EXPRESSION => [
300 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
301 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
302 ],
303 self::EXPRESSION_NO_NL => [
304 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
305 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
306 ],
307 self::EXPRESSION_OP => [
308 self::TYPE_HOOK => self::EXPRESSION,
309 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
310 ],
311 self::EXPRESSION_FUNC => [
312 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
313 ],
314 self::EXPRESSION_TERNARY => [
315 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
316 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
317 ],
318 self::EXPRESSION_TERNARY_OP => [
319 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
320 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
321 ],
322 self::EXPRESSION_TERNARY_FUNC => [
323 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
324 ],
325 self::PAREN_EXPRESSION => [
326 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
327 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
328 ],
329 self::PAREN_EXPRESSION_OP => [
330 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
331 ],
332 self::PAREN_EXPRESSION_FUNC => [
333 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
334 ],
335 self::PROPERTY_EXPRESSION => [
336 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
337 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
338 ],
339 self::PROPERTY_EXPRESSION_OP => [
340 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
341 ],
342 self::PROPERTY_EXPRESSION_FUNC => [
343 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
344 ]
345 ];
346
347 // $pop : Rules for when to pop a state from the stack
348 $pop = [
349 self::STATEMENT => [ self::TYPE_BRACE_CLOSE => true ],
350 self::PROPERTY_ASSIGNMENT => [ self::TYPE_BRACE_CLOSE => true ],
351 self::EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
352 self::EXPRESSION_NO_NL => [ self::TYPE_BRACE_CLOSE => true ],
353 self::EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ],
354 self::EXPRESSION_TERNARY_OP => [ self::TYPE_COLON => true ],
355 self::PAREN_EXPRESSION => [ self::TYPE_PAREN_CLOSE => true ],
356 self::PAREN_EXPRESSION_OP => [ self::TYPE_PAREN_CLOSE => true ],
357 self::PROPERTY_EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ],
358 self::PROPERTY_EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ]
359 ];
360
361 // $semicolon : Rules for when a semicolon insertion is appropriate
362 $semicolon = [
363 self::EXPRESSION_NO_NL => [
364 self::TYPE_UN_OP => true,
365 self::TYPE_INCR_OP => true,
366 self::TYPE_ADD_OP => true,
367 self::TYPE_BRACE_OPEN => true,
368 self::TYPE_PAREN_OPEN => true,
369 self::TYPE_RETURN => true,
370 self::TYPE_IF => true,
371 self::TYPE_DO => true,
372 self::TYPE_FUNC => true,
373 self::TYPE_LITERAL => true
374 ],
375 self::EXPRESSION_OP => [
376 self::TYPE_UN_OP => true,
377 self::TYPE_INCR_OP => true,
378 self::TYPE_BRACE_OPEN => true,
379 self::TYPE_RETURN => true,
380 self::TYPE_IF => true,
381 self::TYPE_DO => true,
382 self::TYPE_FUNC => true,
383 self::TYPE_LITERAL => true
384 ]
385 ];
386
387 // $divStates : Contains all states that can be followed by a division operator
388 $divStates = [
389 self::EXPRESSION_OP => true,
390 self::EXPRESSION_TERNARY_OP => true,
391 self::PAREN_EXPRESSION_OP => true,
392 self::PROPERTY_EXPRESSION_OP => true
393 ];
394
395 // Here's where the minifying takes place: Loop through the input, looking for tokens
396 // and output them to $out, taking actions to the above defined rules when appropriate.
397 $out = '';
398 $pos = 0;
399 $length = strlen( $s );
400 $lineLength = 0;
401 $newlineFound = true;
402 $state = self::STATEMENT;
403 $stack = [];
404 $last = ';'; // Pretend that we have seen a semicolon yet
405 while ( $pos < $length ) {
406 // First, skip over any whitespace and multiline comments, recording whether we
407 // found any newline character
408 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
409 if ( !$skip ) {
410 $ch = $s[$pos];
411 if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
412 // Multiline comment. Search for the end token or EOT.
413 $end = strpos( $s, '*/', $pos + 2 );
414 $skip = $end === false ? $length - $pos : $end - $pos + 2;
415 }
416 }
417 if ( $skip ) {
418 // The semicolon insertion mechanism needs to know whether there was a newline
419 // between two tokens, so record it now.
420 if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
421 $newlineFound = true;
422 }
423 $pos += $skip;
424 continue;
425 }
426 // Handle C++-style comments and html comments, which are treated as single line
427 // comments by the browser, regardless of whether the end tag is on the same line.
428 // Handle --> the same way, but only if it's at the beginning of the line
429 if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
430 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
431 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
432 ) {
433 $pos += strcspn( $s, "\r\n", $pos );
434 continue;
435 }
436
437 // Find out which kind of token we're handling.
438 // Note: $end must point past the end of the current token
439 // so that `substr($s, $pos, $end - $pos)` would be the entire token.
440 // In order words, $end will be the offset of the last relevant character
441 // in the stream + 1, or simply put: The offset of the first character
442 // of any next token in the stream.
443 $end = $pos + 1;
444 // Handle string literals
445 if ( $ch === "'" || $ch === '"' ) {
446 // Search to the end of the string literal, skipping over backslash escapes
447 $search = $ch . '\\';
448 do{
449 // Speculatively add 2 to the end so that if we see a backslash,
450 // the next iteration will start 2 characters further (one for the
451 // backslash, one for the escaped character).
452 // We'll correct this outside the loop.
453 $end += strcspn( $s, $search, $end ) + 2;
454 // If the last character in our search for a quote or a backlash
455 // matched a backslash and we haven't reached the end, keep searching..
456 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
457 // Correction (1): Undo speculative add, keep only one (end of string literal)
458 $end--;
459 if ( $end > $length ) {
460 // Correction (2): Loop wrongly assumed an end quote ended the search,
461 // but search ended because we've reached the end. Correct $end.
462 // TODO: This is invalid and should throw.
463 $end--;
464 }
465 // We have to distinguish between regexp literals and division operators
466 // A division operator is only possible in certain states
467 } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) {
468 // Regexp literal
469 for ( ; ; ) {
470 // Search until we find "/" (end of regexp), "\" (backslash escapes),
471 // or "[" (start of character classes).
472 do{
473 // Speculatively add 2 to ensure next iteration skips
474 // over backslash and escaped character.
475 // We'll correct this outside the loop.
476 $end += strcspn( $s, '/[\\', $end ) + 2;
477 // If backslash escape, keep searching...
478 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
479 // Correction (1): Undo speculative add, keep only one (end of regexp)
480 $end--;
481 if ( $end > $length ) {
482 // Correction (2): Loop wrongly assumed end slash was seen
483 // String ended without end of regexp. Correct $end.
484 // TODO: This is invalid and should throw.
485 $end--;
486 break;
487 }
488 if ( $s[$end - 1] === '/' ) {
489 break;
490 }
491 // (Implicit else), we must've found the start of a char class,
492 // skip until we find "]" (end of char class), or "\" (backslash escape)
493 do{
494 // Speculatively add 2 for backslash escape.
495 // We'll substract one outside the loop.
496 $end += strcspn( $s, ']\\', $end ) + 2;
497 // If backslash escape, keep searching...
498 } while ( $end - 2 < $length && $s[$end - 2] === '\\' );
499 // Correction (1): Undo speculative add, keep only one (end of regexp)
500 $end--;
501 }
502 // Search past the regexp modifiers (gi)
503 while ( $end < $length && ctype_alpha( $s[$end] ) ) {
504 $end++;
505 }
506 } elseif (
507 $ch === '0'
508 && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
509 ) {
510 // Hex numeric literal
511 $end++; // x or X
512 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
513 if ( !$len ) {
514 return self::parseError(
515 $s,
516 $pos,
517 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...'
518 );
519 }
520 $end += $len;
521 } elseif (
522 ctype_digit( $ch )
523 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
524 ) {
525 $end += strspn( $s, '0123456789', $end );
526 $decimal = strspn( $s, '.', $end );
527 if ( $decimal ) {
528 if ( $decimal > 2 ) {
529 return self::parseError( $s, $end, 'The number has too many decimal points' );
530 }
531 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
532 }
533 $exponent = strspn( $s, 'eE', $end );
534 if ( $exponent ) {
535 if ( $exponent > 1 ) {
536 return self::parseError( $s, $end, 'Number with several E' );
537 }
538 $end++;
539
540 // + sign is optional; - sign is required.
541 $end += strspn( $s, '-+', $end );
542 $len = strspn( $s, '0123456789', $end );
543 if ( !$len ) {
544 return self::parseError(
545 $s,
546 $pos,
547 'No decimal digits after e, how many zeroes should be added?'
548 );
549 }
550 $end += $len;
551 }
552 } elseif ( isset( $opChars[$ch] ) ) {
553 // Punctuation character. Search for the longest matching operator.
554 while (
555 $end < $length
556 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
557 ) {
558 $end++;
559 }
560 } else {
561 // Identifier or reserved word. Search for the end by excluding whitespace and
562 // punctuation.
563 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
564 }
565
566 // Now get the token type from our type array
567 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
568 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
569
570 if ( $newlineFound && isset( $semicolon[$state][$type] ) ) {
571 // This token triggers the semicolon insertion mechanism of javascript. While we
572 // could add the ; token here ourselves, keeping the newline has a few advantages.
573 $out .= "\n";
574 $state = self::STATEMENT;
575 $lineLength = 0;
576 } elseif ( $lineLength + $end - $pos > self::MAX_LINE_LENGTH &&
577 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) {
578 // This line would get too long if we added $token, so add a newline first.
579 // Only do this if it won't trigger semicolon insertion and if it won't
580 // put a postfix increment operator on its own line, which is illegal in js.
581 $out .= "\n";
582 $lineLength = 0;
583 // Check, whether we have to separate the token from the last one with whitespace
584 } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
585 $out .= ' ';
586 $lineLength++;
587 // Don't accidentally create ++, -- or // tokens
588 } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
589 $out .= ' ';
590 $lineLength++;
591 }
592 if (
593 $type === self::TYPE_LITERAL
594 && ( $token === 'true' || $token === 'false' )
595 && ( $state === self::EXPRESSION || $state === self::PROPERTY_EXPRESSION )
596 && $last !== '.'
597 ) {
598 $token = ( $token === 'true' ) ? '!0' : '!1';
599 }
600
601 $out .= $token;
602 $lineLength += $end - $pos; // += strlen( $token )
603 $last = $s[$end - 1];
604 $pos = $end;
605 $newlineFound = false;
606
607 // Now that we have output our token, transition into the new state.
608 if ( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
609 $stack[] = $push[$state][$type];
610 }
611 if ( $stack && isset( $pop[$state][$type] ) ) {
612 $state = array_pop( $stack );
613 } elseif ( isset( $goto[$state][$type] ) ) {
614 $state = $goto[$state][$type];
615 }
616 }
617 return $out;
618 }
619
620 static function parseError( $fullJavascript, $position, $errorMsg ) {
621 // TODO: Handle the error: trigger_error, throw exception, return false...
622 return false;
623 }
624 }