fc815e2a9ddbd03a5863f92e853c3a911bddba76
[lhc/web/wiklou.git] / includes / libs / JavaScriptMinifier.php
1 <?php
2 /**
3 * JavaScript Minifier
4 *
5 * This class is meant to safely minify javascript code, while leaving syntactically correct
6 * programs intact. Other libraries, such as JSMin require a certain coding style to work
7 * correctly. OTOH, libraries like jsminplus, that do parse the code correctly are rather
8 * slow, because they construct a complete parse tree before outputting the code minified.
9 * So this class is meant to allow arbitrary (but syntactically correct) input, while being
10 * fast enough to be used for on-the-fly minifying.
11 *
12 * Author: Paul Copperman <paul.copperman@gmail.com>
13 * License: choose any of Apache, MIT, GPL, LGPL
14 */
15
16 class JavaScriptMinifier {
17
18 /* Class constants */
19 /* Parsing states.
20 * The state machine is only necessary to decide whether to parse a slash as division
21 * operator or as regexp literal.
22 * States are named after the next expected item. We only distinguish states when the
23 * distinction is relevant for our purpose.
24 */
25 const STATEMENT = 0;
26 const CONDITION = 1;
27 const PROPERTY_ASSIGNMENT = 2;
28 const EXPRESSION = 3;
29 const EXPRESSION_NO_NL = 4; // only relevant for semicolon insertion
30 const EXPRESSION_OP = 5;
31 const EXPRESSION_FUNC = 6;
32 const EXPRESSION_TERNARY = 7; // used to determine the role of a colon
33 const EXPRESSION_TERNARY_OP = 8;
34 const EXPRESSION_TERNARY_FUNC = 9;
35 const PAREN_EXPRESSION = 10; // expression which is not on the top level
36 const PAREN_EXPRESSION_OP = 11;
37 const PAREN_EXPRESSION_FUNC = 12;
38 const PROPERTY_EXPRESSION = 13; // expression which is within an object literal
39 const PROPERTY_EXPRESSION_OP = 14;
40 const PROPERTY_EXPRESSION_FUNC = 15;
41
42 /* Token types */
43 const TYPE_UN_OP = 1; // unary operators
44 const TYPE_INCR_OP = 2; // ++ and --
45 const TYPE_BIN_OP = 3; // binary operators
46 const TYPE_ADD_OP = 4; // + and - which can be either unary or binary ops
47 const TYPE_HOOK = 5; // ?
48 const TYPE_COLON = 6; // :
49 const TYPE_COMMA = 7; // ,
50 const TYPE_SEMICOLON = 8; // ;
51 const TYPE_BRACE_OPEN = 9; // {
52 const TYPE_BRACE_CLOSE = 10; // }
53 const TYPE_PAREN_OPEN = 11; // ( and [
54 const TYPE_PAREN_CLOSE = 12; // ) and ]
55 const TYPE_RETURN = 13; // keywords: break, continue, return, throw
56 const TYPE_IF = 14; // keywords: catch, for, with, switch, while, if
57 const TYPE_DO = 15; // keywords: case, var, finally, else, do, try
58 const TYPE_FUNC = 16; // keywords: function
59 const TYPE_LITERAL = 17; // all literals, identifiers and unrecognised tokens
60
61 // Sanity limit to avoid excessive memory usage
62 const STACK_LIMIT = 1000;
63
64 /* Static functions */
65
66 /**
67 * Returns minified JavaScript code.
68 *
69 * NOTE: $maxLineLength isn't a strict maximum. Longer lines will be produced when
70 * literals (e.g. quoted strings) longer than $maxLineLength are encountered
71 * or when required to guard against semicolon insertion.
72 *
73 * @param $s String JavaScript code to minify
74 * @param $statementsOnOwnLine Bool Whether to put each statement on its own line
75 * @param $maxLineLength Int Maximum length of a single line, or -1 for no maximum.
76 * @return String Minified code
77 */
78 public static function minify( $s, $statementsOnOwnLine = false, $maxLineLength = 1000 ) {
79 // First we declare a few tables that contain our parsing rules
80
81 // $opChars : characters, which can be combined without whitespace in between them
82 $opChars = array(
83 '!' => true,
84 '"' => true,
85 '%' => true,
86 '&' => true,
87 "'" => true,
88 '(' => true,
89 ')' => true,
90 '*' => true,
91 '+' => true,
92 ',' => true,
93 '-' => true,
94 '.' => true,
95 '/' => true,
96 ':' => true,
97 ';' => true,
98 '<' => true,
99 '=' => true,
100 '>' => true,
101 '?' => true,
102 '[' => true,
103 ']' => true,
104 '^' => true,
105 '{' => true,
106 '|' => true,
107 '}' => true,
108 '~' => true
109 );
110
111 // $tokenTypes : maps keywords and operators to their corresponding token type
112 $tokenTypes = array(
113 '!' => self::TYPE_UN_OP,
114 '~' => self::TYPE_UN_OP,
115 'delete' => self::TYPE_UN_OP,
116 'new' => self::TYPE_UN_OP,
117 'typeof' => self::TYPE_UN_OP,
118 'void' => self::TYPE_UN_OP,
119 '++' => self::TYPE_INCR_OP,
120 '--' => self::TYPE_INCR_OP,
121 '!=' => self::TYPE_BIN_OP,
122 '!==' => self::TYPE_BIN_OP,
123 '%' => self::TYPE_BIN_OP,
124 '%=' => self::TYPE_BIN_OP,
125 '&' => self::TYPE_BIN_OP,
126 '&&' => self::TYPE_BIN_OP,
127 '&=' => self::TYPE_BIN_OP,
128 '*' => self::TYPE_BIN_OP,
129 '*=' => self::TYPE_BIN_OP,
130 '+=' => self::TYPE_BIN_OP,
131 '-=' => self::TYPE_BIN_OP,
132 '.' => self::TYPE_BIN_OP,
133 '/' => self::TYPE_BIN_OP,
134 '/=' => self::TYPE_BIN_OP,
135 '<' => self::TYPE_BIN_OP,
136 '<<' => self::TYPE_BIN_OP,
137 '<<=' => self::TYPE_BIN_OP,
138 '<=' => self::TYPE_BIN_OP,
139 '=' => self::TYPE_BIN_OP,
140 '==' => self::TYPE_BIN_OP,
141 '===' => self::TYPE_BIN_OP,
142 '>' => self::TYPE_BIN_OP,
143 '>=' => self::TYPE_BIN_OP,
144 '>>' => self::TYPE_BIN_OP,
145 '>>=' => self::TYPE_BIN_OP,
146 '>>>' => self::TYPE_BIN_OP,
147 '>>>=' => self::TYPE_BIN_OP,
148 '^' => self::TYPE_BIN_OP,
149 '^=' => self::TYPE_BIN_OP,
150 '|' => self::TYPE_BIN_OP,
151 '|=' => self::TYPE_BIN_OP,
152 '||' => self::TYPE_BIN_OP,
153 'in' => self::TYPE_BIN_OP,
154 'instanceof' => self::TYPE_BIN_OP,
155 '+' => self::TYPE_ADD_OP,
156 '-' => self::TYPE_ADD_OP,
157 '?' => self::TYPE_HOOK,
158 ':' => self::TYPE_COLON,
159 ',' => self::TYPE_COMMA,
160 ';' => self::TYPE_SEMICOLON,
161 '{' => self::TYPE_BRACE_OPEN,
162 '}' => self::TYPE_BRACE_CLOSE,
163 '(' => self::TYPE_PAREN_OPEN,
164 '[' => self::TYPE_PAREN_OPEN,
165 ')' => self::TYPE_PAREN_CLOSE,
166 ']' => self::TYPE_PAREN_CLOSE,
167 'break' => self::TYPE_RETURN,
168 'continue' => self::TYPE_RETURN,
169 'return' => self::TYPE_RETURN,
170 'throw' => self::TYPE_RETURN,
171 'catch' => self::TYPE_IF,
172 'for' => self::TYPE_IF,
173 'if' => self::TYPE_IF,
174 'switch' => self::TYPE_IF,
175 'while' => self::TYPE_IF,
176 'with' => self::TYPE_IF,
177 'case' => self::TYPE_DO,
178 'do' => self::TYPE_DO,
179 'else' => self::TYPE_DO,
180 'finally' => self::TYPE_DO,
181 'try' => self::TYPE_DO,
182 'var' => self::TYPE_DO,
183 'function' => self::TYPE_FUNC
184 );
185
186 // $goto : This is the main table for our state machine. For every state/token pair
187 // the following state is defined. When no rule exists for a given pair,
188 // the state is left unchanged.
189 $goto = array(
190 self::STATEMENT => array(
191 self::TYPE_UN_OP => self::EXPRESSION,
192 self::TYPE_INCR_OP => self::EXPRESSION,
193 self::TYPE_ADD_OP => self::EXPRESSION,
194 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
195 self::TYPE_RETURN => self::EXPRESSION_NO_NL,
196 self::TYPE_IF => self::CONDITION,
197 self::TYPE_FUNC => self::CONDITION,
198 self::TYPE_LITERAL => self::EXPRESSION_OP
199 ),
200 self::CONDITION => array(
201 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
202 ),
203 self::PROPERTY_ASSIGNMENT => array(
204 self::TYPE_COLON => self::PROPERTY_EXPRESSION,
205 self::TYPE_BRACE_OPEN => self::STATEMENT
206 ),
207 self::EXPRESSION => array(
208 self::TYPE_SEMICOLON => self::STATEMENT,
209 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
210 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
211 self::TYPE_FUNC => self::EXPRESSION_FUNC,
212 self::TYPE_LITERAL => self::EXPRESSION_OP
213 ),
214 self::EXPRESSION_NO_NL => array(
215 self::TYPE_SEMICOLON => self::STATEMENT,
216 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
217 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
218 self::TYPE_FUNC => self::EXPRESSION_FUNC,
219 self::TYPE_LITERAL => self::EXPRESSION_OP
220 ),
221 self::EXPRESSION_OP => array(
222 self::TYPE_BIN_OP => self::EXPRESSION,
223 self::TYPE_ADD_OP => self::EXPRESSION,
224 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
225 self::TYPE_COLON => self::STATEMENT,
226 self::TYPE_COMMA => self::EXPRESSION,
227 self::TYPE_SEMICOLON => self::STATEMENT,
228 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
229 ),
230 self::EXPRESSION_FUNC => array(
231 self::TYPE_BRACE_OPEN => self::STATEMENT
232 ),
233 self::EXPRESSION_TERNARY => array(
234 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
235 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
236 self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC,
237 self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP
238 ),
239 self::EXPRESSION_TERNARY_OP => array(
240 self::TYPE_BIN_OP => self::EXPRESSION_TERNARY,
241 self::TYPE_ADD_OP => self::EXPRESSION_TERNARY,
242 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
243 self::TYPE_COMMA => self::EXPRESSION_TERNARY,
244 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
245 ),
246 self::EXPRESSION_TERNARY_FUNC => array(
247 self::TYPE_BRACE_OPEN => self::STATEMENT
248 ),
249 self::PAREN_EXPRESSION => array(
250 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
251 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
252 self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC,
253 self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP
254 ),
255 self::PAREN_EXPRESSION_OP => array(
256 self::TYPE_BIN_OP => self::PAREN_EXPRESSION,
257 self::TYPE_ADD_OP => self::PAREN_EXPRESSION,
258 self::TYPE_HOOK => self::PAREN_EXPRESSION,
259 self::TYPE_COLON => self::PAREN_EXPRESSION,
260 self::TYPE_COMMA => self::PAREN_EXPRESSION,
261 self::TYPE_SEMICOLON => self::PAREN_EXPRESSION,
262 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
263 ),
264 self::PAREN_EXPRESSION_FUNC => array(
265 self::TYPE_BRACE_OPEN => self::STATEMENT
266 ),
267 self::PROPERTY_EXPRESSION => array(
268 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT,
269 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION,
270 self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC,
271 self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP
272 ),
273 self::PROPERTY_EXPRESSION_OP => array(
274 self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION,
275 self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION,
276 self::TYPE_HOOK => self::PROPERTY_EXPRESSION,
277 self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT,
278 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION
279 ),
280 self::PROPERTY_EXPRESSION_FUNC => array(
281 self::TYPE_BRACE_OPEN => self::STATEMENT
282 )
283 );
284
285 // $push : This table contains the rules for when to push a state onto the stack.
286 // The pushed state is the state to return to when the corresponding
287 // closing token is found
288 $push = array(
289 self::STATEMENT => array(
290 self::TYPE_BRACE_OPEN => self::STATEMENT,
291 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
292 ),
293 self::CONDITION => array(
294 self::TYPE_PAREN_OPEN => self::STATEMENT
295 ),
296 self::PROPERTY_ASSIGNMENT => array(
297 self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT
298 ),
299 self::EXPRESSION => array(
300 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
301 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
302 ),
303 self::EXPRESSION_NO_NL => array(
304 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP,
305 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
306 ),
307 self::EXPRESSION_OP => array(
308 self::TYPE_HOOK => self::EXPRESSION,
309 self::TYPE_PAREN_OPEN => self::EXPRESSION_OP
310 ),
311 self::EXPRESSION_FUNC => array(
312 self::TYPE_BRACE_OPEN => self::EXPRESSION_OP
313 ),
314 self::EXPRESSION_TERNARY => array(
315 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP,
316 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
317 ),
318 self::EXPRESSION_TERNARY_OP => array(
319 self::TYPE_HOOK => self::EXPRESSION_TERNARY,
320 self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP
321 ),
322 self::EXPRESSION_TERNARY_FUNC => array(
323 self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP
324 ),
325 self::PAREN_EXPRESSION => array(
326 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP,
327 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
328 ),
329 self::PAREN_EXPRESSION_OP => array(
330 self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP
331 ),
332 self::PAREN_EXPRESSION_FUNC => array(
333 self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP
334 ),
335 self::PROPERTY_EXPRESSION => array(
336 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP,
337 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
338 ),
339 self::PROPERTY_EXPRESSION_OP => array(
340 self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP
341 ),
342 self::PROPERTY_EXPRESSION_FUNC => array(
343 self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP
344 )
345 );
346
347 // $pop : Rules for when to pop a state from the stack
348 $pop = array(
349 self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ),
350 self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ),
351 self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
352 self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ),
353 self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ),
354 self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ),
355 self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ),
356 self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ),
357 self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ),
358 self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true )
359 );
360
361 // $semicolon : Rules for when a semicolon insertion is appropriate
362 $semicolon = array(
363 self::EXPRESSION_NO_NL => array(
364 self::TYPE_UN_OP => true,
365 self::TYPE_INCR_OP => true,
366 self::TYPE_ADD_OP => true,
367 self::TYPE_BRACE_OPEN => true,
368 self::TYPE_PAREN_OPEN => true,
369 self::TYPE_RETURN => true,
370 self::TYPE_IF => true,
371 self::TYPE_DO => true,
372 self::TYPE_FUNC => true,
373 self::TYPE_LITERAL => true
374 ),
375 self::EXPRESSION_OP => array(
376 self::TYPE_UN_OP => true,
377 self::TYPE_INCR_OP => true,
378 self::TYPE_BRACE_OPEN => true,
379 self::TYPE_RETURN => true,
380 self::TYPE_IF => true,
381 self::TYPE_DO => true,
382 self::TYPE_FUNC => true,
383 self::TYPE_LITERAL => true
384 )
385 );
386
387 // Rules for when newlines should be inserted if
388 // $statementsOnOwnLine is enabled.
389 // $newlineBefore is checked before switching state,
390 // $newlineAfter is checked after
391 $newlineBefore = array(
392 self::STATEMENT => array(
393 self::TYPE_BRACE_CLOSE => true,
394 ),
395 );
396 $newlineAfter = array(
397 self::STATEMENT => array(
398 self::TYPE_BRACE_OPEN => true,
399 self::TYPE_PAREN_CLOSE => true,
400 self::TYPE_SEMICOLON => true,
401 ),
402 );
403
404 // $divStates : Contains all states that can be followed by a division operator
405 $divStates = array(
406 self::EXPRESSION_OP => true,
407 self::EXPRESSION_TERNARY_OP => true,
408 self::PAREN_EXPRESSION_OP => true,
409 self::PROPERTY_EXPRESSION_OP => true
410 );
411
412 // Here's where the minifying takes place: Loop through the input, looking for tokens
413 // and output them to $out, taking actions to the above defined rules when appropriate.
414 $out = '';
415 $pos = 0;
416 $length = strlen( $s );
417 $lineLength = 0;
418 $newlineFound = true;
419 $state = self::STATEMENT;
420 $stack = array();
421 $last = ';'; // Pretend that we have seen a semicolon yet
422 while( $pos < $length ) {
423 // First, skip over any whitespace and multiline comments, recording whether we
424 // found any newline character
425 $skip = strspn( $s, " \t\n\r\xb\xc", $pos );
426 if( !$skip ) {
427 $ch = $s[$pos];
428 if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) {
429 // Multiline comment. Search for the end token or EOT.
430 $end = strpos( $s, '*/', $pos + 2 );
431 $skip = $end === false ? $length - $pos : $end - $pos + 2;
432 }
433 }
434 if( $skip ) {
435 // The semicolon insertion mechanism needs to know whether there was a newline
436 // between two tokens, so record it now.
437 if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) {
438 $newlineFound = true;
439 }
440 $pos += $skip;
441 continue;
442 }
443 // Handle C++-style comments and html comments, which are treated as single line
444 // comments by the browser, regardless of whether the end tag is on the same line.
445 // Handle --> the same way, but only if it's at the beginning of the line
446 if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' )
447 || ( $ch === '<' && substr( $s, $pos, 4 ) === '<!--' )
448 || ( $ch === '-' && $newlineFound && substr( $s, $pos, 3 ) === '-->' )
449 ) {
450 $pos += strcspn( $s, "\r\n", $pos );
451 continue;
452 }
453
454 // Find out which kind of token we're handling. $end will point past the end of it.
455 $end = $pos + 1;
456 // Handle string literals
457 if( $ch === "'" || $ch === '"' ) {
458 // Search to the end of the string literal, skipping over backslash escapes
459 $search = $ch . '\\';
460 do{
461 $end += strcspn( $s, $search, $end ) + 2;
462 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
463 $end--;
464 // We have to distinguish between regexp literals and division operators
465 // A division operator is only possible in certain states
466 } elseif( $ch === '/' && !isset( $divStates[$state] ) ) {
467 // Regexp literal, search to the end, skipping over backslash escapes and
468 // character classes
469 for( ; ; ) {
470 do{
471 $end += strcspn( $s, '/[\\', $end ) + 2;
472 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
473 $end--;
474 if( $end - 1 >= $length || $s[$end - 1] === '/' ) {
475 break;
476 }
477 do{
478 $end += strcspn( $s, ']\\', $end ) + 2;
479 } while( $end - 2 < $length && $s[$end - 2] === '\\' );
480 $end--;
481 };
482 // Search past the regexp modifiers (gi)
483 while( $end < $length && ctype_alpha( $s[$end] ) ) {
484 $end++;
485 }
486 } elseif(
487 $ch === '0'
488 && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' )
489 ) {
490 // Hex numeric literal
491 $end++; // x or X
492 $len = strspn( $s, '0123456789ABCDEFabcdef', $end );
493 if ( !$len ) {
494 return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' );
495 }
496 $end += $len;
497 } elseif(
498 ctype_digit( $ch )
499 || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) )
500 ) {
501 $end += strspn( $s, '0123456789', $end );
502 $decimal = strspn( $s, '.', $end );
503 if ($decimal) {
504 if ( $decimal > 2 ) {
505 return self::parseError($s, $end, 'The number has too many decimal points' );
506 }
507 $end += strspn( $s, '0123456789', $end ) + 1;
508 }
509 $exponent = strspn( $s, 'eE', $end );
510 if( $exponent ) {
511 if ( $exponent > 1 ) {
512 return self::parseError($s, $end, 'Number with several E' );
513 }
514 $end++;
515
516 // + sign is optional; - sign is required.
517 $end += strspn( $s, '-+', $end );
518 $len = strspn( $s, '0123456789', $end );
519 if ( !$len ) {
520 return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' );
521 }
522 $end += $len;
523 }
524 } elseif( isset( $opChars[$ch] ) ) {
525 // Punctuation character. Search for the longest matching operator.
526 while(
527 $end < $length
528 && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] )
529 ) {
530 $end++;
531 }
532 } else {
533 // Identifier or reserved word. Search for the end by excluding whitespace and
534 // punctuation.
535 $end += strcspn( $s, " \t\n.;,=<>+-{}()[]?:*/%'\"!&|^~\xb\xc\r", $end );
536 }
537
538 // Now get the token type from our type array
539 $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token )
540 $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL;
541
542 if( $newlineFound && isset( $semicolon[$state][$type] ) ) {
543 // This token triggers the semicolon insertion mechanism of javascript. While we
544 // could add the ; token here ourselves, keeping the newline has a few advantages.
545 $out .= "\n";
546 $state = self::STATEMENT;
547 $lineLength = 0;
548 } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength &&
549 !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP )
550 {
551 // This line would get too long if we added $token, so add a newline first.
552 // Only do this if it won't trigger semicolon insertion and if it won't
553 // put a postfix increment operator on its own line, which is illegal in js.
554 $out .= "\n";
555 $lineLength = 0;
556 // Check, whether we have to separate the token from the last one with whitespace
557 } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) {
558 $out .= ' ';
559 $lineLength++;
560 // Don't accidentally create ++, -- or // tokens
561 } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) {
562 $out .= ' ';
563 $lineLength++;
564 }
565
566 $out .= $token;
567 $lineLength += $end - $pos; // += strlen( $token )
568 $last = $s[$end - 1];
569 $pos = $end;
570 $newlineFound = false;
571
572 // Output a newline after the token if required
573 // This is checked before AND after switching state
574 $newlineAdded = false;
575 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineBefore[$state][$type] ) ) {
576 $out .= "\n";
577 $lineLength = 0;
578 $newlineAdded = true;
579 }
580
581 // Now that we have output our token, transition into the new state.
582 if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) {
583 $stack[] = $push[$state][$type];
584 }
585 if( $stack && isset( $pop[$state][$type] ) ) {
586 $state = array_pop( $stack );
587 } elseif( isset( $goto[$state][$type] ) ) {
588 $state = $goto[$state][$type];
589 }
590
591 // Check for newline insertion again
592 if ( $statementsOnOwnLine && !$newlineAdded && isset( $newlineAfter[$state][$type] ) ) {
593 $out .= "\n";
594 $lineLength = 0;
595 }
596 }
597 return $out;
598 }
599
600 static function parseError($fullJavascript, $position, $errorMsg) {
601 // TODO: Handle the error: trigger_error, throw exception, return false...
602 return false;
603 }
604 }