X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=blobdiff_plain;f=languages%2Futils%2FCLDRPluralRuleEvaluator.php;h=7e7208aa9daab9817ac56f5a1be13404f2209042;hp=e420cb2d40c30e14da4e660178b8c70daf2aa77d;hb=5970f6c1d9f1cbbe65eb13dcca0ce7088476e6e3;hpb=449ee32451f5e4771086ba35a0b5ffe9104a36b0 diff --git a/languages/utils/CLDRPluralRuleEvaluator.php b/languages/utils/CLDRPluralRuleEvaluator.php index e420cb2d40..7e7208aa9d 100644 --- a/languages/utils/CLDRPluralRuleEvaluator.php +++ b/languages/utils/CLDRPluralRuleEvaluator.php @@ -1,4 +1,5 @@ add( $right ); + return $range; case '..': - return new CLDRPluralRuleEvaluator_Range( $left, $right ); + return new CLDRPluralRuleEvaluatorRange( $left, $right ); default: throw new CLDRPluralRuleError( "Invalid RPN token" ); } } } - -/** - * Evaluator helper class representing a range list. - */ -class CLDRPluralRuleEvaluator_Range { - /** - * The parts - * - * @var array - */ - public $parts = array(); - - /** - * Initialize a new instance of CLDRPluralRuleEvaluator_Range - * - * @param int $start The start of the range - * @param int|bool $end The end of the range, or false if the range is not bounded. - */ - function __construct( $start, $end = false ) { - if ( $end === false ) { - $this->parts[] = $start; - } else { - $this->parts[] = array( $start, $end ); - } - } - - /** - * Determine if the given number is inside the range. - * - * @param int $number The number to check - * @param bool $integerConstraint If true, also asserts the number is an integer; otherwise, number simply has to be inside the range. - * @return bool True if the number is inside the range; otherwise, false. - */ - function isNumberIn( $number, $integerConstraint = true ) { - foreach ( $this->parts as $part ) { - if ( is_array( $part ) ) { - if ( ( !$integerConstraint || floor( $number ) === (float)$number ) - && $number >= $part[0] && $number <= $part[1] - ) { - return true; - } - } else { - if ( $number == $part ) { - return true; - } - } - } - return false; - } - - /** - * Readable alias for isNumberIn( $number, false ), and the implementation - * of the "within" operator. - * - * @param int $number The number to check - * @return bool True if the number is inside the range; otherwise, false. - */ - function isNumberWithin( $number ) { - return $this->isNumberIn( $number, false ); - } - - /** - * Add another part to this range. - * - * @param CLDRPluralRuleEvaluator_Range|int $other The part to add, either - * a range object itself or a single number. - */ - function add( $other ) { - if ( $other instanceof self ) { - $this->parts = array_merge( $this->parts, $other->parts ); - } else { - $this->parts[] = $other; - } - } - - /** - * Returns the string representation of the rule evaluator range. - * The purpose of this method is to help debugging. - * - * @return string The string representation of the rule evaluator range - */ - function __toString() { - $s = 'Range('; - foreach ( $this->parts as $i => $part ) { - if ( $i ) { - $s .= ', '; - } - if ( is_array( $part ) ) { - $s .= $part[0] . '..' . $part[1]; - } else { - $s .= $part; - } - } - $s .= ')'; - return $s; - } - -} - -/** - * Helper class for converting rules to reverse polish notation (RPN). - */ -class CLDRPluralRuleConverter { - /** - * The input string - * - * @var string - */ - public $rule; - - /** - * The current position - * - * @var int - */ - public $pos; - - /** - * The past-the-end position - * - * @var int - */ - public $end; - - /** - * The operator stack - * - * @var array - */ - public $operators = array(); - - /** - * The operand stack - * - * @var array - */ - public $operands = array(); - - /** - * Precedence levels. Note that there's no need to worry about associativity - * for the level 4 operators, since they return boolean and don't accept - * boolean inputs. - */ - static $precedence = array( - 'or' => 2, - 'and' => 3, - 'is' => 4, - 'is-not' => 4, - 'in' => 4, - 'not-in' => 4, - 'within' => 4, - 'not-within' => 4, - 'mod' => 5, - ',' => 6, - '..' => 7, - ); - - /** - * A character list defining whitespace, for use in strspn() etc. - */ - const WHITESPACE_CLASS = " \t\r\n"; - - /** - * Same for digits. Note that the grammar given in UTS #35 doesn't allow - * negative numbers or decimal separators. - */ - const NUMBER_CLASS = '0123456789'; - - /** - * A character list of symbolic operands. - */ - const OPERAND_SYMBOLS = 'nivwft'; - - /** - * An anchored regular expression which matches a word at the current offset. - */ - const WORD_REGEX = '/[a-zA-Z@]+/A'; - - /** - * Convert a rule to RPN. This is the only public entry point. - * - * @param string $rule The rule to convert - * @return string The RPN representation of the rule - */ - public static function convert( $rule ) { - $parser = new self( $rule ); - return $parser->doConvert(); - } - - /** - * Private constructor. - */ - protected function __construct( $rule ) { - $this->rule = $rule; - $this->pos = 0; - $this->end = strlen( $rule ); - } - - /** - * Do the operation. - * - * @return string The RPN representation of the rule (e.g. "5 3 mod n is") - */ - protected function doConvert() { - $expectOperator = true; - - // Iterate through all tokens, saving the operators and operands to a - // stack per Dijkstra's shunting yard algorithm. - /** @var CLDRPluralRuleConverter_Operator $token */ - while ( false !== ( $token = $this->nextToken() ) ) { - // In this grammar, there are only binary operators, so every valid - // rule string will alternate between operator and operand tokens. - $expectOperator = !$expectOperator; - - if ( $token instanceof CLDRPluralRuleConverter_Expression ) { - // Operand - if ( $expectOperator ) { - $token->error( 'unexpected operand' ); - } - $this->operands[] = $token; - continue; - } else { - // Operator - if ( !$expectOperator ) { - $token->error( 'unexpected operator' ); - } - // Resolve higher precedence levels - $lastOp = end( $this->operators ); - while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) { - $this->doOperation( $lastOp, $this->operands ); - array_pop( $this->operators ); - $lastOp = end( $this->operators ); - } - $this->operators[] = $token; - } - } - - // Finish off the stack - while ( $op = array_pop( $this->operators ) ) { - $this->doOperation( $op, $this->operands ); - } - - // Make sure the result is sane. The first case is possible for an empty - // string input, the second should be unreachable. - if ( !count( $this->operands ) ) { - $this->error( 'condition expected' ); - } elseif ( count( $this->operands ) > 1 ) { - $this->error( 'missing operator or too many operands' ); - } - - $value = $this->operands[0]; - if ( $value->type !== 'boolean' ) { - $this->error( 'the result must have a boolean type' ); - } - - return $this->operands[0]->rpn; - } - - /** - * Fetch the next token from the input string. - * - * @return CLDRPluralRuleConverter_Fragment The next token - */ - protected function nextToken() { - if ( $this->pos >= $this->end ) { - return false; - } - - // Whitespace - $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos ); - $this->pos += $length; - - if ( $this->pos >= $this->end ) { - return false; - } - - // Number - $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos ); - if ( $length !== 0 ) { - $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos ); - $this->pos += $length; - return $token; - } - - // Two-character operators - $op2 = substr( $this->rule, $this->pos, 2 ); - if ( $op2 === '..' || $op2 === '!=' ) { - $token = $this->newOperator( $op2, $this->pos, 2 ); - $this->pos += 2; - return $token; - } - - // Single-character operators - $op1 = $this->rule[$this->pos]; - if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { - $token = $this->newOperator( $op1, $this->pos, 1 ); - $this->pos ++; - return $token; - } - - // Word - if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) { - $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' ); - } - $word1 = strtolower( $m[0] ); - $word2 = ''; - $nextTokenPos = $this->pos + strlen( $word1 ); - if ( $word1 === 'not' || $word1 === 'is' ) { - // Look ahead one word - $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos ); - if ( $nextTokenPos < $this->end - && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) - ) { - $word2 = strtolower( $m[0] ); - $nextTokenPos += strlen( $word2 ); - } - } - - // Two-word operators like "is not" take precedence over single-word operators like "is" - if ( $word2 !== '' ) { - $bothWords = "{$word1}-{$word2}"; - if ( isset( self::$precedence[$bothWords] ) ) { - $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos ); - $this->pos = $nextTokenPos; - return $token; - } - } - - // Single-word operators - if ( isset( self::$precedence[$word1] ) ) { - $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) ); - $this->pos += strlen( $word1 ); - return $token; - } - - // The single-character operand symbols - if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { - $token = $this->newNumber( $word1, $this->pos ); - $this->pos ++; - return $token; - } - - // Samples - if ( $word1 === '@integer' || $word1 === '@decimal' ) { - // Samples are like comments, they have no effect on rule evaluation. - // They run from the first sample indicator to the end of the string. - $this->pos = $this->end; - return false; - } - - $this->error( 'unrecognised word' ); - } - - /** - * For the binary operator $op, pop its operands off the stack and push - * a fragment with rpn and type members describing the result of that - * operation. - * - * @param CLDRPluralRuleConverter_Operator $op - */ - protected function doOperation( $op ) { - if ( count( $this->operands ) < 2 ) { - $op->error( 'missing operand' ); - } - $right = array_pop( $this->operands ); - $left = array_pop( $this->operands ); - $result = $op->operate( $left, $right ); - $this->operands[] = $result; - } - - /** - * Create a numerical expression object - * - * @param string $text - * @param int $pos - * @return CLDRPluralRuleConverter_Expression The numerical expression - */ - protected function newNumber( $text, $pos ) { - return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) ); - } - - /** - * Create a binary operator - * - * @param string $type - * @param int $pos - * @param int $length - * @return CLDRPluralRuleConverter_Operator The operator - */ - protected function newOperator( $type, $pos, $length ) { - return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length ); - } - - /** - * Throw an error - */ - protected function error( $message ) { - throw new CLDRPluralRuleError( $message ); - } -} - -/** - * Helper for CLDRPluralRuleConverter. - * The base class for operators and expressions, describing a region of the input string. - */ -class CLDRPluralRuleConverter_Fragment { - public $parser, $pos, $length, $end; - - function __construct( $parser, $pos, $length ) { - $this->parser = $parser; - $this->pos = $pos; - $this->length = $length; - $this->end = $pos + $length; - } - - public function error( $message ) { - $text = $this->getText(); - throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" ); - } - - public function getText() { - return substr( $this->parser->rule, $this->pos, $this->length ); - } -} - -/** - * Helper for CLDRPluralRuleConverter. - * An expression object, representing a region of the input string (for error - * messages), the RPN notation used to evaluate it, and the result type for - * validation. - */ -class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment { - /** @var string */ - public $type; - - /** @var string */ - public $rpn; - - function __construct( $parser, $type, $rpn, $pos, $length ) { - parent::__construct( $parser, $pos, $length ); - $this->type = $type; - $this->rpn = $rpn; - } - - public function isType( $type ) { - if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) { - return true; - } - if ( $type === $this->type ) { - return true; - } - return false; - } -} - -/** - * Helper for CLDRPluralRuleConverter. - * An operator object, representing a region of the input string (for error - * messages), and the binary operator at that location. - */ -class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment { - /** @var string The name */ - public $name; - - /** - * Each op type has three characters: left operand type, right operand type and result type - * - * b = boolean - * n = number - * r = range - * - * A number is a kind of range. - * - * @var array - */ - static $opTypes = array( - 'or' => 'bbb', - 'and' => 'bbb', - 'is' => 'nnb', - 'is-not' => 'nnb', - 'in' => 'nrb', - 'not-in' => 'nrb', - 'within' => 'nrb', - 'not-within' => 'nrb', - 'mod' => 'nnn', - ',' => 'rrr', - '..' => 'nnr', - ); - - /** - * Map converting from the abbrevation to the full form. - * - * @var array - */ - static $typeSpecMap = array( - 'b' => 'boolean', - 'n' => 'number', - 'r' => 'range', - ); - - /** - * Map for converting the new operators introduced in Rev 33 to the old forms - */ - static $aliasMap = array( - '%' => 'mod', - '!=' => 'not-in', - '=' => 'in' - ); - - /** - * Initialize a new instance of a CLDRPluralRuleConverter_Operator object - * - * @param CLDRPluralRuleConverter $parser The parser - * @param string $name The operator name - * @param int $pos The length - * @param int $length - */ - function __construct( $parser, $name, $pos, $length ) { - parent::__construct( $parser, $pos, $length ); - if ( isset( self::$aliasMap[$name] ) ) { - $name = self::$aliasMap[$name]; - } - $this->name = $name; - } - - /** - * Compute the operation - * - * @param CLDRPluralRuleConverter_Expression $left The left part of the expression - * @param CLDRPluralRuleConverter_Expression $right The right part of the expression - * @return CLDRPluralRuleConverter_Expression The result of the operation - */ - public function operate( $left, $right ) { - $typeSpec = self::$opTypes[$this->name]; - - $leftType = self::$typeSpecMap[$typeSpec[0]]; - $rightType = self::$typeSpecMap[$typeSpec[1]]; - $resultType = self::$typeSpecMap[$typeSpec[2]]; - - $start = min( $this->pos, $left->pos, $right->pos ); - $end = max( $this->end, $left->end, $right->end ); - $length = $end - $start; - - $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType, - "{$left->rpn} {$right->rpn} {$this->name}", - $start, $length ); - - if ( !$left->isType( $leftType ) ) { - $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" ); - } - - if ( !$right->isType( $rightType ) ) { - $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" ); - } - return $newExpr; - } -} - -/** - * The exception class for all the classes in this file. This will be thrown - * back to the caller if there is any validation error. - */ -class CLDRPluralRuleError extends MWException { - function __construct( $message ) { - parent::__construct( 'CLDR plural rule error: ' . $message ); - } -}