/**
* Parse and evaluate a plural rule.
*
- * http://unicode.org/reports/tr35/#Language_Plural_Rules
+ * UTS #35 Revision 33
+ * http://www.unicode.org/reports/tr35/tr35-33/tr35-numbers.html#Language_Plural_Rules
*
* @author Niklas Laxstrom, Tim Starling
*
* @file
* @since 1.20
*/
-
class CLDRPluralRuleEvaluator {
/**
* Evaluate a number against a set of plural rules. If a rule passes,
* Convert a set of rules to a compiled form which is optimised for
* fast evaluation. The result will be an array of strings, and may be cached.
*
- * @param $rules The rules to compile
- * @return An array of compile rules.
+ * @param array $rules The rules to compile
+ * @return array An array of compile rules.
*/
public static function compile( array $rules ) {
// We can't use array_map() for this because it generates a warning if
/**
* Evaluate a compiled set of rules returned by compile(). Do not allow
* the user to edit the compiled form, or else PHP errors may result.
+ *
+ * @param string The number to be evaluated against the rules, in English, or it
+ * may be a type convertible to string.
+ * @param array The associative array of plural rules in pluralform => rule format.
+ * @return int The index of the plural form which passed the evaluation
*/
public static function evaluateCompiled( $number, array $rules ) {
+ // Calculate the values of the operand symbols
+ $number = strval( $number );
+ if ( !preg_match( '/^ -? ( ([0-9]+) (?: \. ([0-9]+) )? )$/x', $number, $m ) ) {
+ wfDebug( __METHOD__.': invalid number input, returning "other"' );
+ return count( $rules );
+ }
+ if ( !isset( $m[3] ) ) {
+ $operandSymbols = array(
+ 'n' => intval( $m[1] ),
+ 'i' => intval( $m[1] ),
+ 'v' => 0,
+ 'w' => 0,
+ 'f' => 0,
+ 't' => 0
+ );
+ } else {
+ $absValStr = $m[1];
+ $intStr = $m[2];
+ $fracStr = $m[3];
+ $operandSymbols = array(
+ 'n' => floatval( $absValStr ),
+ 'i' => intval( $intStr ),
+ 'v' => strlen( $fracStr ),
+ 'w' => strlen( rtrim( $fracStr, '0' ) ),
+ 'f' => intval( $fracStr ),
+ 't' => intval( rtrim( $fracStr, '0' ) ),
+ );
+ }
+
// The compiled form is RPN, with tokens strictly delimited by
// spaces, so this is a simple RPN evaluator.
foreach ( $rules as $i => $rule ) {
$nine = ord( '9' );
foreach ( StringUtils::explode( ' ', $rule ) as $token ) {
$ord = ord( $token );
- if ( $token === 'n' ) {
- $stack[] = $number;
+ if ( isset( $operandSymbols[$token] ) ) {
+ $stack[] = $operandSymbols[$token];
} elseif ( $ord >= $zero && $ord <= $nine ) {
$stack[] = intval( $token );
} else {
return $i;
}
}
- // None of the provided rules match. The number belongs to caregory
- // 'other' which comes last.
+ // None of the provided rules match. The number belongs to category
+ // 'other', which comes last.
return count( $rules );
}
/**
* Do a single operation
*
- * @param $token string The token string
- * @param $left The left operand. If it is an object, its state may be destroyed.
- * @param $right The right operand
+ * @param string $token The token string
+ * @param mixed $left The left operand. If it is an object, its state may be destroyed.
+ * @param mixed $right The right operand
* @throws CLDRPluralRuleError
- * @return mixed
+ * @return mixed The operation result
*/
private static function doOperation( $token, $left, $right ) {
if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) {
return !$right->isNumberWithin( $left );
case 'mod':
if ( is_int( $left ) ) {
- return (int) fmod( $left, $right );
+ return (int)fmod( $left, $right );
}
return fmod( $left, $right );
case ',':
* Evaluator helper class representing a range list.
*/
class CLDRPluralRuleEvaluator_Range {
+ /**
+ * The parts
+ *
+ * @var array
+ */
public $parts = array();
+ /**
+ * Initialize a new instance of CLDRPluralRuleEvaluator_Range
+ *
+ * @param int $start The start of the range
+ * @param int|bool $end The end of the range, or false if the range is not bounded.
+ */
function __construct( $start, $end = false ) {
if ( $end === false ) {
$this->parts[] = $start;
}
/**
- * Determine if the given number is inside the range. If $integerConstraint
- * is true, the number must additionally be an integer if it is to match
- * any interval part.
+ * Determine if the given number is inside the range.
+ *
+ * @param int $number The number to check
+ * @param bool $integerConstraint If true, also asserts the number is an integer; otherwise, number simply has to be inside the range.
+ * @return bool True if the number is inside the range; otherwise, false.
*/
function isNumberIn( $number, $integerConstraint = true ) {
foreach ( $this->parts as $part ) {
if ( is_array( $part ) ) {
if ( ( !$integerConstraint || floor( $number ) === (float)$number )
- && $number >= $part[0] && $number <= $part[1] )
- {
+ && $number >= $part[0] && $number <= $part[1]
+ ) {
return true;
}
} else {
/**
* Readable alias for isNumberIn( $number, false ), and the implementation
* of the "within" operator.
+ *
+ * @param int $number The number to check
+ * @return bool True if the number is inside the range; otherwise, false.
*/
function isNumberWithin( $number ) {
return $this->isNumberIn( $number, false );
}
/**
- * Add another part to this range. The supplied new part may either be a
- * range object itself, or a single number.
+ * Add another part to this range.
+ *
+ * @param mixed The part to add, either a range object itself or a single number.
*/
function add( $other ) {
if ( $other instanceof self ) {
}
/**
- * For debugging
+ * Returns the string representation of the rule evaluator range.
+ * The purpose of this method is to help debugging.
+ *
+ * @return string The string representation of the rule evaluator range
*/
function __toString() {
$s = 'Range(';
* Helper class for converting rules to reverse polish notation (RPN).
*/
class CLDRPluralRuleConverter {
- public $rule, $pos, $end;
+ /**
+ * The input string
+ *
+ * @var string
+ */
+ public $rule;
+
+ /**
+ * The current position
+ *
+ * @var int
+ */
+ public $pos;
+
+ /**
+ * The past-the-end position
+ *
+ * @var int
+ */
+ public $end;
+
+ /**
+ * The operator stack
+ *
+ * @var array
+ */
public $operators = array();
+
+ /**
+ * The operand stack
+ *
+ * @var array
+ */
public $operands = array();
/**
/**
* Same for digits. Note that the grammar given in UTS #35 doesn't allow
- * negative numbers or decimals.
+ * negative numbers or decimal separators.
*/
const NUMBER_CLASS = '0123456789';
+ /**
+ * A character list of symbolic operands.
+ */
+ const OPERAND_SYMBOLS = 'nivwft';
+
/**
* An anchored regular expression which matches a word at the current offset.
*/
- const WORD_REGEX = '/[a-zA-Z]+/A';
+ const WORD_REGEX = '/[a-zA-Z@]+/A';
/**
* Convert a rule to RPN. This is the only public entry point.
+ *
+ * @param $rule The rule to convert
+ * @return string The RPN representation of the rule
*/
public static function convert( $rule ) {
$parser = new self( $rule );
/**
* Do the operation.
+ *
+ * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
*/
protected function doConvert() {
$expectOperator = true;
}
/**
- * Fetch the next token from the input string. Return it as a
- * CLDRPluralRuleConverter_Fragment object.
+ * Fetch the next token from the input string.
+ *
+ * @return CLDRPluralRuleConverter_Fragment The next token
*/
protected function nextToken() {
if ( $this->pos >= $this->end ) {
return $token;
}
- // Comma
- if ( $this->rule[$this->pos] === ',' ) {
- $token = $this->newOperator( ',', $this->pos, 1 );
- $this->pos ++;
+ // Two-character operators
+ $op2 = substr( $this->rule, $this->pos, 2 );
+ if ( $op2 === '..' || $op2 === '!=' ) {
+ $token = $this->newOperator( $op2, $this->pos, 2 );
+ $this->pos += 2;
return $token;
}
- // Dot dot
- if ( substr( $this->rule, $this->pos, 2 ) === '..' ) {
- $token = $this->newOperator( '..', $this->pos, 2 );
- $this->pos += 2;
+ // Single-character operators
+ $op1 = $this->rule[$this->pos];
+ if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
+ $token = $this->newOperator( $op1, $this->pos, 1 );
+ $this->pos ++;
return $token;
}
// Look ahead one word
$nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
if ( $nextTokenPos < $this->end
- && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) )
- {
+ && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
+ ) {
$word2 = strtolower( $m[0] );
$nextTokenPos += strlen( $word2 );
}
return $token;
}
- // The special numerical keyword "n"
- if ( $word1 === 'n' ) {
- $token = $this->newNumber( 'n', $this->pos );
+ // The single-character operand symbols
+ if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
+ $token = $this->newNumber( $word1, $this->pos );
$this->pos ++;
return $token;
}
+ // Samples
+ if ( $word1 === '@integer' || $word1 === '@decimal' ) {
+ // Samples are like comments, they have no effect on rule evaluation.
+ // They run from the first sample indicator to the end of the string.
+ $this->pos = $this->end;
+ return false;
+ }
+
$this->error( 'unrecognised word' );
}
/**
* Create a numerical expression object
+ *
+ * @return CLDRPluralRuleConverter_Expression The numerical expression
*/
protected function newNumber( $text, $pos ) {
return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
/**
* Create a binary operator
+ *
+ * @return CLDRPluralRuleConverter_Operator The operator
*/
protected function newOperator( $type, $pos, $length ) {
return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
* messages), and the binary operator at that location.
*/
class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment {
+ /**
+ * The name
+ *
+ * @var string
+ */
public $name;
/**
* r = range
*
* A number is a kind of range.
+ *
+ * @var array
*/
static $opTypes = array(
'or' => 'bbb',
/**
* Map converting from the abbrevation to the full form.
+ *
+ * @var array
*/
static $typeSpecMap = array(
'b' => 'boolean',
'r' => 'range',
);
+ /**
+ * Map for converting the new operators introduced in Rev 33 to the old forms
+ */
+ static $aliasMap = array(
+ '%' => 'mod',
+ '!=' => 'not-in',
+ '=' => 'in'
+ );
+
+ /**
+ * Initialize a new instance of a CLDRPluralRuleConverter_Operator object
+ *
+ * @param CLDRPluralRuleConverter $parser The parser
+ * @param string $name The operator name
+ * @param int $pos The position
+ * @param int $pos The length
+ */
function __construct( $parser, $name, $pos, $length ) {
parent::__construct( $parser, $pos, $length );
+ if ( isset( self::$aliasMap[$name] ) ) {
+ $name = self::$aliasMap[$name];
+ }
$this->name = $name;
}
+ /**
+ * Compute the operation
+ *
+ * @param CLDRPluralRuleConverter_Expression $left The left part of the expression
+ * @param CLDRPluralRuleConverter_Expression $right The right part of the expression
+ * @return CLDRPluralRuleConverter_Expression The result of the operation
+ */
public function operate( $left, $right ) {
$typeSpec = self::$opTypes[$this->name];