X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=blobdiff_plain;f=includes%2Flibs%2FJavaScriptMinifier.php;h=5ecfc7cc9baba23c42c6ef48195c239dc0b7c3bc;hp=ea4755efccb0e6af05189d0b72639ef6566fdcf5;hb=7065200b036d4bbf0c46f4b236d761a79b57215e;hpb=544122a6c2dc7e7e383af8ebabc2f2dd3977069a diff --git a/includes/libs/JavaScriptMinifier.php b/includes/libs/JavaScriptMinifier.php index ea4755efcc..5ecfc7cc9b 100644 --- a/includes/libs/JavaScriptMinifier.php +++ b/includes/libs/JavaScriptMinifier.php @@ -1,5 +1,4 @@ true, '"' => true, '%' => true, @@ -105,10 +106,10 @@ class JavaScriptMinifier { '|' => true, '}' => true, '~' => true - ); + ]; // $tokenTypes : maps keywords and operators to their corresponding token type - $tokenTypes = array( + $tokenTypes = [ '!' => self::TYPE_UN_OP, '~' => self::TYPE_UN_OP, 'delete' => self::TYPE_UN_OP, @@ -180,13 +181,13 @@ class JavaScriptMinifier { 'try' => self::TYPE_DO, 'var' => self::TYPE_DO, 'function' => self::TYPE_FUNC - ); + ]; // $goto : This is the main table for our state machine. For every state/token pair // the following state is defined. When no rule exists for a given pair, // the state is left unchanged. - $goto = array( - self::STATEMENT => array( + $goto = [ + self::STATEMENT => [ self::TYPE_UN_OP => self::EXPRESSION, self::TYPE_INCR_OP => self::EXPRESSION, self::TYPE_ADD_OP => self::EXPRESSION, @@ -195,29 +196,29 @@ class JavaScriptMinifier { self::TYPE_IF => self::CONDITION, self::TYPE_FUNC => self::CONDITION, self::TYPE_LITERAL => self::EXPRESSION_OP - ), - self::CONDITION => array( + ], + self::CONDITION => [ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION - ), - self::PROPERTY_ASSIGNMENT => array( + ], + self::PROPERTY_ASSIGNMENT => [ self::TYPE_COLON => self::PROPERTY_EXPRESSION, self::TYPE_BRACE_OPEN => self::STATEMENT - ), - self::EXPRESSION => array( + ], + self::EXPRESSION => [ self::TYPE_SEMICOLON => self::STATEMENT, self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, self::TYPE_FUNC => self::EXPRESSION_FUNC, self::TYPE_LITERAL => self::EXPRESSION_OP - ), - self::EXPRESSION_NO_NL => array( + ], + self::EXPRESSION_NO_NL => [ self::TYPE_SEMICOLON => self::STATEMENT, self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, self::TYPE_FUNC => self::EXPRESSION_FUNC, self::TYPE_LITERAL => self::EXPRESSION_OP - ), - self::EXPRESSION_OP => array( + ], + self::EXPRESSION_OP => [ self::TYPE_BIN_OP => self::EXPRESSION, self::TYPE_ADD_OP => self::EXPRESSION, self::TYPE_HOOK => self::EXPRESSION_TERNARY, @@ -225,33 +226,33 @@ class JavaScriptMinifier { self::TYPE_COMMA => self::EXPRESSION, self::TYPE_SEMICOLON => self::STATEMENT, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION - ), - self::EXPRESSION_FUNC => array( + ], + self::EXPRESSION_FUNC => [ self::TYPE_BRACE_OPEN => self::STATEMENT - ), - self::EXPRESSION_TERNARY => array( + ], + self::EXPRESSION_TERNARY => [ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, self::TYPE_FUNC => self::EXPRESSION_TERNARY_FUNC, self::TYPE_LITERAL => self::EXPRESSION_TERNARY_OP - ), - self::EXPRESSION_TERNARY_OP => array( + ], + self::EXPRESSION_TERNARY_OP => [ self::TYPE_BIN_OP => self::EXPRESSION_TERNARY, self::TYPE_ADD_OP => self::EXPRESSION_TERNARY, self::TYPE_HOOK => self::EXPRESSION_TERNARY, self::TYPE_COMMA => self::EXPRESSION_TERNARY, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION - ), - self::EXPRESSION_TERNARY_FUNC => array( + ], + self::EXPRESSION_TERNARY_FUNC => [ self::TYPE_BRACE_OPEN => self::STATEMENT - ), - self::PAREN_EXPRESSION => array( + ], + self::PAREN_EXPRESSION => [ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, self::TYPE_FUNC => self::PAREN_EXPRESSION_FUNC, self::TYPE_LITERAL => self::PAREN_EXPRESSION_OP - ), - self::PAREN_EXPRESSION_OP => array( + ], + self::PAREN_EXPRESSION_OP => [ self::TYPE_BIN_OP => self::PAREN_EXPRESSION, self::TYPE_ADD_OP => self::PAREN_EXPRESSION, self::TYPE_HOOK => self::PAREN_EXPRESSION, @@ -259,107 +260,107 @@ class JavaScriptMinifier { self::TYPE_COMMA => self::PAREN_EXPRESSION, self::TYPE_SEMICOLON => self::PAREN_EXPRESSION, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION - ), - self::PAREN_EXPRESSION_FUNC => array( + ], + self::PAREN_EXPRESSION_FUNC => [ self::TYPE_BRACE_OPEN => self::STATEMENT - ), - self::PROPERTY_EXPRESSION => array( + ], + self::PROPERTY_EXPRESSION => [ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION, self::TYPE_FUNC => self::PROPERTY_EXPRESSION_FUNC, self::TYPE_LITERAL => self::PROPERTY_EXPRESSION_OP - ), - self::PROPERTY_EXPRESSION_OP => array( + ], + self::PROPERTY_EXPRESSION_OP => [ self::TYPE_BIN_OP => self::PROPERTY_EXPRESSION, self::TYPE_ADD_OP => self::PROPERTY_EXPRESSION, self::TYPE_HOOK => self::PROPERTY_EXPRESSION, self::TYPE_COMMA => self::PROPERTY_ASSIGNMENT, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION - ), - self::PROPERTY_EXPRESSION_FUNC => array( + ], + self::PROPERTY_EXPRESSION_FUNC => [ self::TYPE_BRACE_OPEN => self::STATEMENT - ) - ); + ] + ]; // $push : This table contains the rules for when to push a state onto the stack. // The pushed state is the state to return to when the corresponding // closing token is found - $push = array( - self::STATEMENT => array( + $push = [ + self::STATEMENT => [ self::TYPE_BRACE_OPEN => self::STATEMENT, self::TYPE_PAREN_OPEN => self::EXPRESSION_OP - ), - self::CONDITION => array( + ], + self::CONDITION => [ self::TYPE_PAREN_OPEN => self::STATEMENT - ), - self::PROPERTY_ASSIGNMENT => array( + ], + self::PROPERTY_ASSIGNMENT => [ self::TYPE_BRACE_OPEN => self::PROPERTY_ASSIGNMENT - ), - self::EXPRESSION => array( + ], + self::EXPRESSION => [ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, self::TYPE_PAREN_OPEN => self::EXPRESSION_OP - ), - self::EXPRESSION_NO_NL => array( + ], + self::EXPRESSION_NO_NL => [ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP, self::TYPE_PAREN_OPEN => self::EXPRESSION_OP - ), - self::EXPRESSION_OP => array( + ], + self::EXPRESSION_OP => [ self::TYPE_HOOK => self::EXPRESSION, self::TYPE_PAREN_OPEN => self::EXPRESSION_OP - ), - self::EXPRESSION_FUNC => array( + ], + self::EXPRESSION_FUNC => [ self::TYPE_BRACE_OPEN => self::EXPRESSION_OP - ), - self::EXPRESSION_TERNARY => array( + ], + self::EXPRESSION_TERNARY => [ self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP, self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP - ), - self::EXPRESSION_TERNARY_OP => array( + ], + self::EXPRESSION_TERNARY_OP => [ self::TYPE_HOOK => self::EXPRESSION_TERNARY, self::TYPE_PAREN_OPEN => self::EXPRESSION_TERNARY_OP - ), - self::EXPRESSION_TERNARY_FUNC => array( + ], + self::EXPRESSION_TERNARY_FUNC => [ self::TYPE_BRACE_OPEN => self::EXPRESSION_TERNARY_OP - ), - self::PAREN_EXPRESSION => array( + ], + self::PAREN_EXPRESSION => [ self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP, self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP - ), - self::PAREN_EXPRESSION_OP => array( + ], + self::PAREN_EXPRESSION_OP => [ self::TYPE_PAREN_OPEN => self::PAREN_EXPRESSION_OP - ), - self::PAREN_EXPRESSION_FUNC => array( + ], + self::PAREN_EXPRESSION_FUNC => [ self::TYPE_BRACE_OPEN => self::PAREN_EXPRESSION_OP - ), - self::PROPERTY_EXPRESSION => array( + ], + self::PROPERTY_EXPRESSION => [ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP, self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP - ), - self::PROPERTY_EXPRESSION_OP => array( + ], + self::PROPERTY_EXPRESSION_OP => [ self::TYPE_PAREN_OPEN => self::PROPERTY_EXPRESSION_OP - ), - self::PROPERTY_EXPRESSION_FUNC => array( + ], + self::PROPERTY_EXPRESSION_FUNC => [ self::TYPE_BRACE_OPEN => self::PROPERTY_EXPRESSION_OP - ) - ); + ] + ]; // $pop : Rules for when to pop a state from the stack - $pop = array( - self::STATEMENT => array( self::TYPE_BRACE_CLOSE => true ), - self::PROPERTY_ASSIGNMENT => array( self::TYPE_BRACE_CLOSE => true ), - self::EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), - self::EXPRESSION_NO_NL => array( self::TYPE_BRACE_CLOSE => true ), - self::EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ), - self::EXPRESSION_TERNARY_OP => array( self::TYPE_COLON => true ), - self::PAREN_EXPRESSION => array( self::TYPE_PAREN_CLOSE => true ), - self::PAREN_EXPRESSION_OP => array( self::TYPE_PAREN_CLOSE => true ), - self::PROPERTY_EXPRESSION => array( self::TYPE_BRACE_CLOSE => true ), - self::PROPERTY_EXPRESSION_OP => array( self::TYPE_BRACE_CLOSE => true ) - ); + $pop = [ + self::STATEMENT => [ self::TYPE_BRACE_CLOSE => true ], + self::PROPERTY_ASSIGNMENT => [ self::TYPE_BRACE_CLOSE => true ], + self::EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ], + self::EXPRESSION_NO_NL => [ self::TYPE_BRACE_CLOSE => true ], + self::EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ], + self::EXPRESSION_TERNARY_OP => [ self::TYPE_COLON => true ], + self::PAREN_EXPRESSION => [ self::TYPE_PAREN_CLOSE => true ], + self::PAREN_EXPRESSION_OP => [ self::TYPE_PAREN_CLOSE => true ], + self::PROPERTY_EXPRESSION => [ self::TYPE_BRACE_CLOSE => true ], + self::PROPERTY_EXPRESSION_OP => [ self::TYPE_BRACE_CLOSE => true ] + ]; // $semicolon : Rules for when a semicolon insertion is appropriate - $semicolon = array( - self::EXPRESSION_NO_NL => array( + $semicolon = [ + self::EXPRESSION_NO_NL => [ self::TYPE_UN_OP => true, self::TYPE_INCR_OP => true, self::TYPE_ADD_OP => true, @@ -370,8 +371,8 @@ class JavaScriptMinifier { self::TYPE_DO => true, self::TYPE_FUNC => true, self::TYPE_LITERAL => true - ), - self::EXPRESSION_OP => array( + ], + self::EXPRESSION_OP => [ self::TYPE_UN_OP => true, self::TYPE_INCR_OP => true, self::TYPE_BRACE_OPEN => true, @@ -380,16 +381,16 @@ class JavaScriptMinifier { self::TYPE_DO => true, self::TYPE_FUNC => true, self::TYPE_LITERAL => true - ) - ); + ] + ]; // $divStates : Contains all states that can be followed by a division operator - $divStates = array( + $divStates = [ self::EXPRESSION_OP => true, self::EXPRESSION_TERNARY_OP => true, self::PAREN_EXPRESSION_OP => true, self::PROPERTY_EXPRESSION_OP => true - ); + ]; // Here's where the minifying takes place: Loop through the input, looking for tokens // and output them to $out, taking actions to the above defined rules when appropriate. @@ -399,24 +400,24 @@ class JavaScriptMinifier { $lineLength = 0; $newlineFound = true; $state = self::STATEMENT; - $stack = array(); + $stack = []; $last = ';'; // Pretend that we have seen a semicolon yet - while( $pos < $length ) { + while ( $pos < $length ) { // First, skip over any whitespace and multiline comments, recording whether we // found any newline character $skip = strspn( $s, " \t\n\r\xb\xc", $pos ); - if( !$skip ) { + if ( !$skip ) { $ch = $s[$pos]; - if( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { + if ( $ch === '/' && substr( $s, $pos, 2 ) === '/*' ) { // Multiline comment. Search for the end token or EOT. $end = strpos( $s, '*/', $pos + 2 ); $skip = $end === false ? $length - $pos : $end - $pos + 2; } } - if( $skip ) { + if ( $skip ) { // The semicolon insertion mechanism needs to know whether there was a newline // between two tokens, so record it now. - if( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { + if ( !$newlineFound && strcspn( $s, "\r\n", $pos, $skip ) !== $skip ) { $newlineFound = true; } $pos += $skip; @@ -425,7 +426,7 @@ class JavaScriptMinifier { // Handle C++-style comments and html comments, which are treated as single line // comments by the browser, regardless of whether the end tag is on the same line. // Handle --> the same way, but only if it's at the beginning of the line - if( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) + if ( ( $ch === '/' && substr( $s, $pos, 2 ) === '//' ) || ( $ch === '<' && substr( $s, $pos, 4 ) === '' ) ) { @@ -433,71 +434,113 @@ class JavaScriptMinifier { continue; } - // Find out which kind of token we're handling. $end will point past the end of it. + // Find out which kind of token we're handling. + // Note: $end must point past the end of the current token + // so that `substr($s, $pos, $end - $pos)` would be the entire token. + // In order words, $end will be the offset of the last relevant character + // in the stream + 1, or simply put: The offset of the first character + // of any next token in the stream. $end = $pos + 1; // Handle string literals - if( $ch === "'" || $ch === '"' ) { + if ( $ch === "'" || $ch === '"' ) { // Search to the end of the string literal, skipping over backslash escapes $search = $ch . '\\'; do{ + // Speculatively add 2 to the end so that if we see a backslash, + // the next iteration will start 2 characters further (one for the + // backslash, one for the escaped character). + // We'll correct this outside the loop. $end += strcspn( $s, $search, $end ) + 2; - } while( $end - 2 < $length && $s[$end - 2] === '\\' ); + // If the last character in our search for a quote or a backlash + // matched a backslash and we haven't reached the end, keep searching.. + } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); + // Correction (1): Undo speculative add, keep only one (end of string literal) $end--; + if ( $end > $length ) { + // Correction (2): Loop wrongly assumed an end quote ended the search, + // but search ended because we've reached the end. Correct $end. + // TODO: This is invalid and should throw. + $end--; + } // We have to distinguish between regexp literals and division operators // A division operator is only possible in certain states - } elseif( $ch === '/' && !isset( $divStates[$state] ) ) { + } elseif ( $ch === '/' && !isset( $divStates[$state] ) ) { // Regexp literal - for( ; ; ) { + for ( ; ; ) { + // Search until we find "/" (end of regexp), "\" (backslash escapes), + // or "[" (start of character classes). do{ - // Skip until we find "/" (end of regexp), "\" (backslash escapes), - // or "[" (start of character classes). + // Speculatively add 2 to ensure next iteration skips + // over backslash and escaped character. + // We'll correct this outside the loop. $end += strcspn( $s, '/[\\', $end ) + 2; // If backslash escape, keep searching... - } while( $end - 2 < $length && $s[$end - 2] === '\\' ); + } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); + // Correction (1): Undo speculative add, keep only one (end of regexp) $end--; - // If the end, stop here. - if( $end - 1 >= $length || $s[$end - 1] === '/' ) { + if ( $end > $length ) { + // Correction (2): Loop wrongly assumed end slash was seen + // String ended without end of regexp. Correct $end. + // TODO: This is invalid and should throw. + $end--; + break; + } + if ( $s[$end - 1] === '/' ) { break; } // (Implicit else), we must've found the start of a char class, // skip until we find "]" (end of char class), or "\" (backslash escape) do{ + // Speculatively add 2 for backslash escape. + // We'll substract one outside the loop. $end += strcspn( $s, ']\\', $end ) + 2; // If backslash escape, keep searching... - } while( $end - 2 < $length && $s[$end - 2] === '\\' ); + } while ( $end - 2 < $length && $s[$end - 2] === '\\' ); + // Correction (1): Undo speculative add, keep only one (end of regexp) $end--; - }; + if ( $end > $length ) { + // Correction (2): Loop wrongly assumed "]" was seen + // String ended without ending char class or regexp. Correct $end. + // TODO: This is invalid and should throw. + $end--; + break; + } + } // Search past the regexp modifiers (gi) - while( $end < $length && ctype_alpha( $s[$end] ) ) { + while ( $end < $length && ctype_alpha( $s[$end] ) ) { $end++; } - } elseif( + } elseif ( $ch === '0' - && ($pos + 1 < $length) && ($s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) + && ( $pos + 1 < $length ) && ( $s[$pos + 1] === 'x' || $s[$pos + 1] === 'X' ) ) { // Hex numeric literal $end++; // x or X $len = strspn( $s, '0123456789ABCDEFabcdef', $end ); if ( !$len ) { - return self::parseError($s, $pos, 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' ); + return self::parseError( + $s, + $pos, + 'Expected a hexadecimal number but found ' . substr( $s, $pos, 5 ) . '...' + ); } $end += $len; - } elseif( + } elseif ( ctype_digit( $ch ) || ( $ch === '.' && $pos + 1 < $length && ctype_digit( $s[$pos + 1] ) ) ) { $end += strspn( $s, '0123456789', $end ); $decimal = strspn( $s, '.', $end ); - if ($decimal) { + if ( $decimal ) { if ( $decimal > 2 ) { - return self::parseError($s, $end, 'The number has too many decimal points' ); + return self::parseError( $s, $end, 'The number has too many decimal points' ); } $end += strspn( $s, '0123456789', $end + 1 ) + $decimal; } $exponent = strspn( $s, 'eE', $end ); - if( $exponent ) { + if ( $exponent ) { if ( $exponent > 1 ) { - return self::parseError($s, $end, 'Number with several E' ); + return self::parseError( $s, $end, 'Number with several E' ); } $end++; @@ -505,13 +548,17 @@ class JavaScriptMinifier { $end += strspn( $s, '-+', $end ); $len = strspn( $s, '0123456789', $end ); if ( !$len ) { - return self::parseError($s, $pos, 'No decimal digits after e, how many zeroes should be added?' ); + return self::parseError( + $s, + $pos, + 'No decimal digits after e, how many zeroes should be added?' + ); } $end += $len; } - } elseif( isset( $opChars[$ch] ) ) { + } elseif ( isset( $opChars[$ch] ) ) { // Punctuation character. Search for the longest matching operator. - while( + while ( $end < $length && isset( $tokenTypes[substr( $s, $pos, $end - $pos + 1 )] ) ) { @@ -527,26 +574,25 @@ class JavaScriptMinifier { $token = substr( $s, $pos, $end - $pos ); // so $end - $pos == strlen( $token ) $type = isset( $tokenTypes[$token] ) ? $tokenTypes[$token] : self::TYPE_LITERAL; - if( $newlineFound && isset( $semicolon[$state][$type] ) ) { + if ( $newlineFound && isset( $semicolon[$state][$type] ) ) { // This token triggers the semicolon insertion mechanism of javascript. While we // could add the ; token here ourselves, keeping the newline has a few advantages. $out .= "\n"; $state = self::STATEMENT; $lineLength = 0; - } elseif( $maxLineLength > 0 && $lineLength + $end - $pos > $maxLineLength && - !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) - { + } elseif ( $lineLength + $end - $pos > self::MAX_LINE_LENGTH && + !isset( $semicolon[$state][$type] ) && $type !== self::TYPE_INCR_OP ) { // This line would get too long if we added $token, so add a newline first. // Only do this if it won't trigger semicolon insertion and if it won't // put a postfix increment operator on its own line, which is illegal in js. $out .= "\n"; $lineLength = 0; // Check, whether we have to separate the token from the last one with whitespace - } elseif( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) { + } elseif ( !isset( $opChars[$last] ) && !isset( $opChars[$ch] ) ) { $out .= ' '; $lineLength++; // Don't accidentally create ++, -- or // tokens - } elseif( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { + } elseif ( $last === $ch && ( $ch === '+' || $ch === '-' || $ch === '/' ) ) { $out .= ' '; $lineLength++; } @@ -566,19 +612,19 @@ class JavaScriptMinifier { $newlineFound = false; // Now that we have output our token, transition into the new state. - if( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) { + if ( isset( $push[$state][$type] ) && count( $stack ) < self::STACK_LIMIT ) { $stack[] = $push[$state][$type]; } - if( $stack && isset( $pop[$state][$type] ) ) { + if ( $stack && isset( $pop[$state][$type] ) ) { $state = array_pop( $stack ); - } elseif( isset( $goto[$state][$type] ) ) { + } elseif ( isset( $goto[$state][$type] ) ) { $state = $goto[$state][$type]; } } return $out; } - static function parseError($fullJavascript, $position, $errorMsg) { + static function parseError( $fullJavascript, $position, $errorMsg ) { // TODO: Handle the error: trigger_error, throw exception, return false... return false; }