X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=languages%2Futils%2FCLDRPluralRuleEvaluator.php;h=1f4d0cde4ab7285357764ba4d7d22186ca59e4fb;hb=b637381cd1a030c5637fe0daa037a7160d66b29c;hp=c2aede0249ad273fa9ccbfceaf31950fc04169c6;hpb=4960dd9b775b4f7aae2e1f1906a1f72b859415df;p=lhc%2Fweb%2Fwiklou.git diff --git a/languages/utils/CLDRPluralRuleEvaluator.php b/languages/utils/CLDRPluralRuleEvaluator.php index c2aede0249..1f4d0cde4a 100644 --- a/languages/utils/CLDRPluralRuleEvaluator.php +++ b/languages/utils/CLDRPluralRuleEvaluator.php @@ -2,7 +2,8 @@ /** * Parse and evaluate a plural rule. * - * http://unicode.org/reports/tr35/#Language_Plural_Rules + * UTS #35 Revision 33 + * http://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules * * @author Niklas Laxstrom, Tim Starling * @@ -63,11 +64,41 @@ class CLDRPluralRuleEvaluator { * Evaluate a compiled set of rules returned by compile(). Do not allow * the user to edit the compiled form, or else PHP errors may result. * - * @param int The number to be evaluated against the rules + * @param string The number to be evaluated against the rules, in English, or it + * may be a type convertible to string. * @param array The associative array of plural rules in pluralform => rule format. * @return int The index of the plural form which passed the evaluation */ public static function evaluateCompiled( $number, array $rules ) { + // Calculate the values of the operand symbols + $number = strval( $number ); + if ( !preg_match( '/^ -? ( ([0-9]+) (?: \. ([0-9]+) )? )$/x', $number, $m ) ) { + wfDebug( __METHOD__.': invalid number input, returning "other"' ); + return count( $rules ); + } + if ( !isset( $m[3] ) ) { + $operandSymbols = array( + 'n' => intval( $m[1] ), + 'i' => intval( $m[1] ), + 'v' => 0, + 'w' => 0, + 'f' => 0, + 't' => 0 + ); + } else { + $absValStr = $m[1]; + $intStr = $m[2]; + $fracStr = $m[3]; + $operandSymbols = array( + 'n' => floatval( $absValStr ), + 'i' => intval( $intStr ), + 'v' => strlen( $fracStr ), + 'w' => strlen( rtrim( $fracStr, '0' ) ), + 'f' => intval( $fracStr ), + 't' => intval( rtrim( $fracStr, '0' ) ), + ); + } + // The compiled form is RPN, with tokens strictly delimited by // spaces, so this is a simple RPN evaluator. foreach ( $rules as $i => $rule ) { @@ -76,8 +107,8 @@ class CLDRPluralRuleEvaluator { $nine = ord( '9' ); foreach ( StringUtils::explode( ' ', $rule ) as $token ) { $ord = ord( $token ); - if ( $token === 'n' ) { - $stack[] = $number; + if ( isset( $operandSymbols[$token] ) ) { + $stack[] = $operandSymbols[$token]; } elseif ( $ord >= $zero && $ord <= $nine ) { $stack[] = intval( $token ); } else { @@ -91,8 +122,8 @@ class CLDRPluralRuleEvaluator { return $i; } } - // None of the provided rules match. The number belongs to caregory - // 'other' which comes last. + // None of the provided rules match. The number belongs to category + // 'other', which comes last. return count( $rules ); } @@ -251,35 +282,35 @@ class CLDRPluralRuleEvaluator_Range { */ class CLDRPluralRuleConverter { /** - * The rule + * The input string * * @var string */ public $rule; /** - * The position + * The current position * * @var int */ public $pos; /** - * The last position possible + * The past-the-end position * * @var int */ public $end; /** - * The operators + * The operator stack * * @var array */ public $operators = array(); /** - * The operands + * The operand stack * * @var array */ @@ -311,14 +342,19 @@ class CLDRPluralRuleConverter { /** * Same for digits. Note that the grammar given in UTS #35 doesn't allow - * negative numbers or decimals. + * negative numbers or decimal separators. */ const NUMBER_CLASS = '0123456789'; + /** + * A character list of symbolic operands. + */ + const OPERAND_SYMBOLS = 'nivwft'; + /** * An anchored regular expression which matches a word at the current offset. */ - const WORD_REGEX = '/[a-zA-Z]+/A'; + const WORD_REGEX = '/[a-zA-Z@]+/A'; /** * Convert a rule to RPN. This is the only public entry point. @@ -425,17 +461,19 @@ class CLDRPluralRuleConverter { return $token; } - // Comma - if ( $this->rule[$this->pos] === ',' ) { - $token = $this->newOperator( ',', $this->pos, 1 ); - $this->pos ++; + // Two-character operators + $op2 = substr( $this->rule, $this->pos, 2 ); + if ( $op2 === '..' || $op2 === '!=' ) { + $token = $this->newOperator( $op2, $this->pos, 2 ); + $this->pos += 2; return $token; } - // Dot dot - if ( substr( $this->rule, $this->pos, 2 ) === '..' ) { - $token = $this->newOperator( '..', $this->pos, 2 ); - $this->pos += 2; + // Single-character operators + $op1 = $this->rule[$this->pos]; + if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) { + $token = $this->newOperator( $op1, $this->pos, 1 ); + $this->pos ++; return $token; } @@ -474,13 +512,21 @@ class CLDRPluralRuleConverter { return $token; } - // The special numerical keyword "n" - if ( $word1 === 'n' ) { - $token = $this->newNumber( 'n', $this->pos ); + // The single-character operand symbols + if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) { + $token = $this->newNumber( $word1, $this->pos ); $this->pos ++; return $token; } + // Samples + if ( $word1 === '@integer' || $word1 === '@decimal' ) { + // Samples are like comments, they have no effect on rule evaluation. + // They run from the first sample indicator to the end of the string. + $this->pos = $this->end; + return false; + } + $this->error( 'unrecognised word' ); } @@ -624,6 +670,15 @@ class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment 'r' => 'range', ); + /** + * Map for converting the new operators introduced in Rev 33 to the old forms + */ + static $aliasMap = array( + '%' => 'mod', + '!=' => 'not-in', + '=' => 'in' + ); + /** * Initialize a new instance of a CLDRPluralRuleConverter_Operator object * @@ -634,6 +689,9 @@ class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment */ function __construct( $parser, $name, $pos, $length ) { parent::__construct( $parser, $pos, $length ); + if ( isset( self::$aliasMap[$name] ) ) { + $name = self::$aliasMap[$name]; + } $this->name = $name; }