Merge "(bug 45937) API: Check amlang in meta=allmessages"
[lhc/web/wiklou.git] / includes / libs / JavaScriptMinifier.php
1 <?php
2 /**
3 * JavaScript Minifier
4 *
5 * @file
6 * @author Paul Copperman <paul.copperman@gmail.com>
7 * @license Choose any of Apache, MIT, GPL, LGPL
8 */
9
10 /**
11 * This class is meant to safely minify javascript code, while leaving syntactically correct
12 * programs intact. Other libraries, such as JSMin require a certain coding style to work
13 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
14 * slow, because they construct a complete parse tree before outputting the code minified.
15 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
16 * fast enough to be used for on-the-fly minifying.
17 */
18 class JavaScriptMinifier {
19
20 /* Class constants */
21 /* Parsing states.
22 * The state machine is only necessary to decide whether to parse a slash as division
23 * operator or as regexp literal.
24 * States are named after the next expected item. We only distinguish states when the
25 * distinction is relevant for our purpose.
26 */
27 const STATEMENT = 0;
28 const CONDITION = 1;
29 const PROPERTY_ASSIGNMENT = 2;
30 const EXPRESSION = 3;
31 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
32 const EXPRESSION_OP = 5;
33 const EXPRESSION_FUNC = 6;
34 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
35 const EXPRESSION_TERNARY_OP = 8;
36 const EXPRESSION_TERNARY_FUNC = 9;
37 const PAREN_EXPRESSION = 10; // expression which is not on the top level
38 const PAREN_EXPRESSION_OP = 11;
39 const PAREN_EXPRESSION_FUNC = 12;
40 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
41 const PROPERTY_EXPRESSION_OP = 14;
42 const PROPERTY_EXPRESSION_FUNC = 15;
43
44 /* Token types */
45 const TYPE_UN_OP = 1; // unary operators
46 const TYPE_INCR_OP = 2; // ++ and --
47 const TYPE_BIN_OP = 3; // binary operators
48 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
49 const TYPE_HOOK = 5; // ?
50 const TYPE_COLON = 6; // :
51 const TYPE_COMMA = 7; // ,
52 const TYPE_SEMICOLON = 8; // ;
53 const TYPE_BRACE_OPEN = 9; // {
54 const TYPE_BRACE_CLOSE = 10; // }
55 const TYPE_PAREN_OPEN = 11; // ( and [
56 const TYPE_PAREN_CLOSE = 12; // ) and ]
57 const TYPE_RETURN = 13; // keywords: break, continue, return, throw
58 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
59 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
60 const TYPE_FUNC = 16; // keywords: function
61 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
62
63 // Sanity limit to avoid excessive memory usage
64 const STACK_LIMIT = 1000;
65
66 /* Static functions */
67
68 /**
69 * Returns minified JavaScript code.
70 *
71 * NOTE: $maxLineLength isn't a strict maximum. Longer lines will be produced when
72 * literals (e.g. quoted strings) longer than $maxLineLength are encountered
73 * or when required to guard against semicolon insertion.
74 *
75 * @param string $s JavaScript code to minify
76 * @param bool $statementsOnOwnLine Whether to put each statement on its own line
77 * @param int $maxLineLength Maximum length of a single line, or -1 for no maximum.
78 * @return String Minified code
79 */
80 public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
81 // First we declare a few tables that contain our parsing rules
82
83 // $opChars : characters, which can be combined without whitespace in between them
84 $opChars = array(
85 '!' => true,
86 '"' => true,
87 '%' => true,
88 '&' => true,
89 "'" => true,
90 '(' => true,
91 ')' => true,
92 '*' => true,
93 '+' => true,
94 ',' => true,
95 '-' => true,
96 '.' => true,
97 '/' => true,
98 ':' => true,
99 ';' => true,
100 '<' => true,
101 '=' => true,
102 '>' => true,
103 '?' => true,
104 '[' => true,
105 ']' => true,
106 '^' => true,
107 '{' => true,
108 '|' => true,
109 '}' => true,
110 '~' => true
111 );
112
113 // $tokenTypes : maps keywords and operators to their corresponding token type
114 $tokenTypes = array(
115 '!' => self::TYPE_UN_OP,
116 '~' => self::TYPE_UN_OP,
117 'delete' => self::TYPE_UN_OP,
118 'new' => self::TYPE_UN_OP,
119 'typeof' => self::TYPE_UN_OP,
120 'void' => self::TYPE_UN_OP,
121 '++' => self::TYPE_INCR_OP,
122 '--' => self::TYPE_INCR_OP,
123 '!=' => self::TYPE_BIN_OP,
124 '!==' => self::TYPE_BIN_OP,
125 '%' => self::TYPE_BIN_OP,
126 '%=' => self::TYPE_BIN_OP,
127 '&' => self::TYPE_BIN_OP,
128 '&&' => self::TYPE_BIN_OP,
129 '&=' => self::TYPE_BIN_OP,
130 '*' => self::TYPE_BIN_OP,
131 '*=' => self::TYPE_BIN_OP,
132 '+=' => self::TYPE_BIN_OP,
133 '-=' => self::TYPE_BIN_OP,
134 '.' => self::TYPE_BIN_OP,
135 '/' => self::TYPE_BIN_OP,
136 '/=' => self::TYPE_BIN_OP,
137 '<' => self::TYPE_BIN_OP,
138 '<<' => self::TYPE_BIN_OP,
139 '<<=' => self::TYPE_BIN_OP,
140 '<=' => self::TYPE_BIN_OP,
141 '=' => self::TYPE_BIN_OP,
142 '==' => self::TYPE_BIN_OP,
143 '===' => self::TYPE_BIN_OP,
144 '>' => self::TYPE_BIN_OP,
145 '>=' => self::TYPE_BIN_OP,
146 '>>' => self::TYPE_BIN_OP,
147 '>>=' => self::TYPE_BIN_OP,
148 '>>>' => self::TYPE_BIN_OP,
149 '>>>=' => self::TYPE_BIN_OP,
150 '^' => self::TYPE_BIN_OP,
151 '^=' => self::TYPE_BIN_OP,
152 '|' => self::TYPE_BIN_OP,
153 '|=' => self::TYPE_BIN_OP,
154 '||' => self::TYPE_BIN_OP,
155 'in' => self::TYPE_BIN_OP,
156 'instanceof' => self::TYPE_BIN_OP,
157 '+' => self::TYPE_ADD_OP,
158 '-' => self::TYPE_ADD_OP,
159 '?' => self::TYPE_HOOK,
160 ':' => self::TYPE_COLON,
161 ',' => self::TYPE_COMMA,
162 ';' => self::TYPE_SEMICOLON,
163 '{' => self::TYPE_BRACE_OPEN,
164 '}' => self::TYPE_BRACE_CLOSE,
165 '(' => self::TYPE_PAREN_OPEN,
166 '[' => self::TYPE_PAREN_OPEN,
167 ')' => self::TYPE_PAREN_CLOSE,
168 ']' => self::TYPE_PAREN_CLOSE,
169 'break' => self::TYPE_RETURN,
170 'continue' => self::TYPE_RETURN,
171 'return' => self::TYPE_RETURN,
172 'throw' => self::TYPE_RETURN,
173 'catch' => self::TYPE_IF,
174 'for' => self::TYPE_IF,
175 'if' => self::TYPE_IF,
176 'switch' => self::TYPE_IF,
177 'while' => self::TYPE_IF,
178 'with' => self::TYPE_IF,
179 'case' => self::TYPE_DO,
180 'do' => self::TYPE_DO,
181 'else' => self::TYPE_DO,
182 'finally' => self::TYPE_DO,
183 'try' => self::TYPE_DO,
184 'var' => self::TYPE_DO,
185 'function' => self::TYPE_FUNC
186 );
187
188 // $goto : This is the main table for our state machine. For every state/token pair
189 // the following state is defined. When no rule exists for a given pair,
190 // the state is left unchanged.
191 $goto = array(
192 self::STATEMENT => array(
193 self::TYPE_UN_OP => self::EXPRESSION,
194 self::TYPE_INCR_OP => self::EXPRESSION,
195 self::TYPE_ADD_OP => self::EXPRESSION,
196 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
197 self::TYPE_RETURN => self::EXPRESSION_NO_NL,
198 self::TYPE_IF => self::CONDITION,
199 self::TYPE_FUNC => self::CONDITION,
200 self::TYPE_LITERAL => self::EXPRESSION_OP
201 ),
202 self::CONDITION => array(
203 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
204 ),
205 self::PROPERTY_ASSIGNMENT => array(
206 self::TYPE_COLON => self::PROPERTY_EXPRESSION,
207 self::TYPE_BRACE_OPEN => self::STATEMENT
208 ),
209 self::EXPRESSION => array(
210 self::TYPE_SEMICOLON => self::STATEMENT,
211 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
212 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
213 self::TYPE_FUNC => self::EXPRESSION_FUNC,
214 self::TYPE_LITERAL => self::EXPRESSION_OP
215 ),
216 self::EXPRESSION_NO_NL => array(
217 self::TYPE_SEMICOLON => self::STATEMENT,
218 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
219 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
220 self::TYPE_FUNC => self::EXPRESSION_FUNC,
221 self::TYPE_LITERAL => self::EXPRESSION_OP
222 ),
223 self::EXPRESSION_OP => array(
224 self::TYPE_BIN_OP => self::EXPRESSION,
225 self::TYPE_ADD_OP => self::EXPRESSION,
226 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
227 self::TYPE_COLON => self::STATEMENT,
228 self::TYPE_COMMA => self::EXPRESSION,
229 self::TYPE_SEMICOLON => self::STATEMENT,
230 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
231 ),
232 self::EXPRESSION_FUNC => array(
233 self::TYPE_BRACE_OPEN => self::STATEMENT
234 ),
235 self::EXPRESSION_TERNARY => array(
236 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
237 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
238 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
239 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
240 ),
241 self::EXPRESSION_TERNARY_OP => array(
242 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
243 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
244 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
245 self::TYPE_COMMA => self::EXPRESSION_TERNARY,
246 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
247 ),
248 self::EXPRESSION_TERNARY_FUNC => array(
249 self::TYPE_BRACE_OPEN => self::STATEMENT
250 ),
251 self::PAREN_EXPRESSION => array(
252 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
253 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
254 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
255 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
256 ),
257 self::PAREN_EXPRESSION_OP => array(
258 self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
259 self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
260 self::TYPE_HOOK => self::PAREN_EXPRESSION,
261 self::TYPE_COLON => self::PAREN_EXPRESSION,
262 self::TYPE_COMMA => self::PAREN_EXPRESSION,
263 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
264 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
265 ),
266 self::PAREN_EXPRESSION_FUNC => array(
267 self::TYPE_BRACE_OPEN => self::STATEMENT
268 ),
269 self::PROPERTY_EXPRESSION => array(
270 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
271 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
272 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
273 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
274 ),
275 self::PROPERTY_EXPRESSION_OP => array(
276 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
277 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
278 self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
279 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
280 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
281 ),
282 self::PROPERTY_EXPRESSION_FUNC => array(
283 self::TYPE_BRACE_OPEN => self::STATEMENT
284 )
285 );
286
287 // $push : This table contains the rules for when to push a state onto the stack.
288 // The pushed state is the state to return to when the corresponding
289 // closing token is found
290 $push = array(
291 self::STATEMENT => array(
292 self::TYPE_BRACE_OPEN => self::STATEMENT,
293 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
294 ),
295 self::CONDITION => array(
296 self::TYPE_PAREN_OPEN => self::STATEMENT
297 ),
298 self::PROPERTY_ASSIGNMENT => array(
299 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
300 ),
301 self::EXPRESSION => array(
302 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
303 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
304 ),
305 self::EXPRESSION_NO_NL => array(
306 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
307 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
308 ),
309 self::EXPRESSION_OP => array(
310 self::TYPE_HOOK => self::EXPRESSION,
311 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
312 ),
313 self::EXPRESSION_FUNC => array(
314 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
315 ),
316 self::EXPRESSION_TERNARY => array(
317 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
318 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
319 ),
320 self::EXPRESSION_TERNARY_OP => array(
321 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
322 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
323 ),
324 self::EXPRESSION_TERNARY_FUNC => array(
325 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
326 ),
327 self::PAREN_EXPRESSION => array(
328 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
329 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
330 ),
331 self::PAREN_EXPRESSION_OP => array(
332 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
333 ),
334 self::PAREN_EXPRESSION_FUNC => array(
335 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
336 ),
337 self::PROPERTY_EXPRESSION => array(
338 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
339 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
340 ),
341 self::PROPERTY_EXPRESSION_OP => array(
342 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
343 ),
344 self::PROPERTY_EXPRESSION_FUNC => array(
345 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
346 )
347 );
348
349 // $pop : Rules for when to pop a state from the stack
350 $pop = array(
351 self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ),
352 self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ),
353 self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
354 self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ),
355 self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ),
356 self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ),
357 self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ),
358 self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ),
359 self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
360 self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
361 );
362
363 // $semicolon : Rules for when a semicolon insertion is appropriate
364 $semicolon = array(
365 self::EXPRESSION_NO_NL => array(
366 self::TYPE_UN_OP => true,
367 self::TYPE_INCR_OP => true,
368 self::TYPE_ADD_OP => true,
369 self::TYPE_BRACE_OPEN => true,
370 self::TYPE_PAREN_OPEN => true,
371 self::TYPE_RETURN => true,
372 self::TYPE_IF => true,
373 self::TYPE_DO => true,
374 self::TYPE_FUNC => true,
375 self::TYPE_LITERAL => true
376 ),
377 self::EXPRESSION_OP => array(
378 self::TYPE_UN_OP => true,
379 self::TYPE_INCR_OP => true,
380 self::TYPE_BRACE_OPEN => true,
381 self::TYPE_RETURN => true,
382 self::TYPE_IF => true,
383 self::TYPE_DO => true,
384 self::TYPE_FUNC => true,
385 self::TYPE_LITERAL => true
386 )
387 );
388
389 // Rules for when newlines should be inserted if
390 // $statementsOnOwnLine is enabled.
391 // $newlineBefore is checked before switching state,
392 // $newlineAfter is checked after
393 $newlineBefore = array(
394 self::STATEMENT => array(
395 self::TYPE_BRACE_CLOSE => true,
396 ),
397 );
398 $newlineAfter = array(
399 self::STATEMENT => array(
400 self::TYPE_BRACE_OPEN => true,
401 self::TYPE_PAREN_CLOSE => true,
402 self::TYPE_SEMICOLON => true,
403 ),
404 );
405
406 // $divStates : Contains all states that can be followed by a division operator
407 $divStates = array(
408 self::EXPRESSION_OP => true,
409 self::EXPRESSION_TERNARY_OP => true,
410 self::PAREN_EXPRESSION_OP => true,
411 self::PROPERTY_EXPRESSION_OP => true
412 );
413
414 // Here's where the minifying takes place: Loop through the input, looking for tokens
415 // and output them to $out, taking actions to the above defined rules when appropriate.
416 $out = '';
417 $pos = 0;
418 $length = strlen( $s );
419 $lineLength = 0;
420 $newlineFound = true;
421 $state = self::STATEMENT;
422 $stack = array();
423 $last = ';'; // Pretend that we have seen a semicolon yet
424 while( $pos < $length ) {
425 // First, skip over any whitespace and multiline comments, recording whether we
426 // found any newline character
427 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
428 if( !$skip ) {
429 $ch = $s[$pos];
430 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
431 // Multiline comment. Search for the end token or EOT.
432 $end = strpos( $s, '*/', $pos + 2 );
433 $skip = $end === false ? $length - $pos : $end - $pos + 2;
434 }
435 }
436 if( $skip ) {
437 // The semicolon insertion mechanism needs to know whether there was a newline
438 // between two tokens, so record it now.
439 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
440 $newlineFound = true;
441 }
442 $pos += $skip;
443 continue;
444 }
445 // Handle C++-style comments and html comments, which are treated as single line
446 // comments by the browser, regardless of whether the end tag is on the same line.
447 // Handle --> the same way, but only if it's at the beginning of the line
448 if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
449 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
450 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
451 ) {
452 $pos += strcspn( $s, "\r\n", $pos );
453 continue;
454 }
455
456 // Find out which kind of token we're handling. $end will point past the end of it.
457 $end = $pos + 1;
458 // Handle string literals
459 if( $ch === "'" || $ch === '"' ) {
460 // Search to the end of the string literal, skipping over backslash escapes
461 $search = $ch . '\\';
462 do{
463 $end += strcspn( $s, $search, $end ) + 2;
464 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
465 $end--;
466 // We have to distinguish between regexp literals and division operators
467 // A division operator is only possible in certain states
468 } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
469 // Regexp literal, search to the end, skipping over backslash escapes and
470 // character classes
471 for( ; ; ) {
472 do{
473 $end += strcspn( $s, '/[\\', $end ) + 2;
474 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
475 $end--;
476 if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
477 break;
478 }
479 do{
480 $end += strcspn( $s, ']\\', $end ) + 2;
481 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
482 $end--;
483 };
484 // Search past the regexp modifiers (gi)
485 while( $end < $length && ctype_alpha( $s[$end] ) ) {
486 $end++;
487 }
488 } elseif(
489 $ch === '0'
490 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
491 ) {
492 // Hex numeric literal
493 $end++; // x or X
494 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
495 if ( !$len ) {
496 return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
497 }
498 $end += $len;
499 } elseif(
500 ctype_digit( $ch )
501 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
502 ) {
503 $end += strspn( $s, '0123456789', $end );
504 $decimal = strspn( $s, '.', $end );
505 if ($decimal) {
506 if ( $decimal > 2 ) {
507 return self::parseError($s, $end, 'The number has too many decimal points' );
508 }
509 $end += strspn( $s, '0123456789', $end + 1 ) + $decimal;
510 }
511 $exponent = strspn( $s, 'eE', $end );
512 if( $exponent ) {
513 if ( $exponent > 1 ) {
514 return self::parseError($s, $end, 'Number with several E' );
515 }
516 $end++;
517
518 // + sign is optional; - sign is required.
519 $end += strspn( $s, '-+', $end );
520 $len = strspn( $s, '0123456789', $end );
521 if ( !$len ) {
522 return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
523 }
524 $end += $len;
525 }
526 } elseif( isset( $opChars[$ch] ) ) {
527 // Punctuation character. Search for the longest matching operator.
528 while(
529 $end < $length
530 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
531 ) {
532 $end++;
533 }
534 } else {
535 // Identifier or reserved word. Search for the end by excluding whitespace and
536 // punctuation.
537 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
538 }
539
540 // Now get the token type from our type array
541 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
542 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
543
544 if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
545 // This token triggers the semicolon insertion mechanism of javascript. While we
546 // could add the ; token here ourselves, keeping the newline has a few advantages.
547 $out .= "\n";
548 $state = self::STATEMENT;
549 $lineLength = 0;
550 } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
551 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
552 {
553 // This line would get too long if we added $token, so add a newline first.
554 // Only do this if it won't trigger semicolon insertion and if it won't
555 // put a postfix increment operator on its own line, which is illegal in js.
556 $out .= "\n";
557 $lineLength = 0;
558 // Check, whether we have to separate the token from the last one with whitespace
559 } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
560 $out .= ' ';
561 $lineLength++;
562 // Don't accidentally create ++, -- or // tokens
563 } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
564 $out .= ' ';
565 $lineLength++;
566 }
567
568 $out .= $token;
569 $lineLength += $end - $pos; // += strlen( $token )
570 $last = $s[$end - 1];
571 $pos = $end;
572 $newlineFound = false;
573
574 // Output a newline after the token if required
575 // This is checked before AND after switching state
576 $newlineAdded = false;
577 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
578 $out .= "\n";
579 $lineLength = 0;
580 $newlineAdded = true;
581 }
582
583 // Now that we have output our token, transition into the new state.
584 if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
585 $stack[] = $push[$state][$type];
586 }
587 if( $stack && isset( $pop[$state][$type] ) ) {
588 $state = array_pop( $stack );
589 } elseif( isset( $goto[$state][$type] ) ) {
590 $state = $goto[$state][$type];
591 }
592
593 // Check for newline insertion again
594 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
595 $out .= "\n";
596 $lineLength = 0;
597 }
598 }
599 return $out;
600 }
601
602 static function parseError($fullJavascript, $position, $errorMsg) {
603 // TODO: Handle the error: trigger_error, throw exception, return false...
604 return false;
605 }
606 }