Fixed dependencies for jquery.collapsibleTabs
[lhc/web/wiklou.git] / languages / utils / CLDRPluralRuleEvaluator.php
1 <?php
2 /**
3 * Parse and evaluate a plural rule.
4 *
5 * http://unicode.org/reports/tr35/#Language_Plural_Rules
6 *
7 * @author Niklas Laxstrom, Tim Starling
8 *
9 * @copyright Copyright © 2010-2012, Niklas Laxström
10 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
11 * @file
12 * @since 1.20
13 */
14
15 class CLDRPluralRuleEvaluator {
16 /**
17 * Evaluate a number against a set of plural rules. If a rule passes,
18 * return the index of plural rule.
19 *
20 * @param int The number to be evaluated against the rules
21 * @param array The associative array of plural rules in pluralform => rule format.
22 * @return int The index of the plural form which passed the evaluation
23 */
24 public static function evaluate( $number, array $rules ) {
25 $rules = self::compile( $rules );
26 return self::evaluateCompiled( $number, $rules );
27 }
28
29 /**
30 * Convert a set of rules to a compiled form which is optimised for
31 * fast evaluation. The result will be an array of strings, and may be cached.
32 *
33 * @param $rules The rules to compile
34 * @return An array of compile rules.
35 */
36 public static function compile( array $rules ) {
37 // We can't use array_map() for this because it generates a warning if
38 // there is an exception.
39 foreach ( $rules as &$rule ) {
40 $rule = CLDRPluralRuleConverter::convert( $rule );
41 }
42 return $rules;
43 }
44
45 /**
46 * Evaluate a compiled set of rules returned by compile(). Do not allow
47 * the user to edit the compiled form, or else PHP errors may result.
48 */
49 public static function evaluateCompiled( $number, array $rules ) {
50 // The compiled form is RPN, with tokens strictly delimited by
51 // spaces, so this is a simple RPN evaluator.
52 foreach ( $rules as $i => $rule ) {
53 $stack = array();
54 $zero = ord( '0' );
55 $nine = ord( '9' );
56 foreach ( StringUtils::explode( ' ', $rule ) as $token ) {
57 $ord = ord( $token );
58 if ( $token === 'n' ) {
59 $stack[] = $number;
60 } elseif ( $ord >= $zero && $ord <= $nine ) {
61 $stack[] = intval( $token );
62 } else {
63 $right = array_pop( $stack );
64 $left = array_pop( $stack );
65 $result = self::doOperation( $token, $left, $right );
66 $stack[] = $result;
67 }
68 }
69 if ( $stack[0] ) {
70 return $i;
71 }
72 }
73 // None of the provided rules match. The number belongs to caregory
74 // 'other' which comes last.
75 return count( $rules );
76 }
77
78 /**
79 * Do a single operation
80 *
81 * @param $token string The token string
82 * @param $left The left operand. If it is an object, its state may be destroyed.
83 * @param $right The right operand
84 * @return mixed
85 */
86 private static function doOperation( $token, $left, $right ) {
87 if ( in_array( $token, array( 'in', 'not-in', 'within', 'not-within' ) ) ) {
88 if ( !($right instanceof CLDRPluralRuleEvaluator_Range ) ) {
89 $right = new CLDRPluralRuleEvaluator_Range( $right );
90 }
91 }
92 switch ( $token ) {
93 case 'or':
94 return $left || $right;
95 case 'and':
96 return $left && $right;
97 case 'is':
98 return $left == $right;
99 case 'is-not':
100 return $left != $right;
101 case 'in':
102 return $right->isNumberIn( $left );
103 case 'not-in':
104 return !$right->isNumberIn( $left );
105 case 'within':
106 return $right->isNumberWithin( $left );
107 case 'not-within':
108 return !$right->isNumberWithin( $left );
109 case 'mod':
110 if ( is_int( $left ) ) {
111 return (int) fmod( $left, $right );
112 }
113 return fmod( $left, $right );
114 case ',':
115 if ( $left instanceof CLDRPluralRuleEvaluator_Range ) {
116 $range = $left;
117 } else {
118 $range = new CLDRPluralRuleEvaluator_Range( $left );
119 }
120 $range->add( $right );
121 return $range;
122 case '..':
123 return new CLDRPluralRuleEvaluator_Range( $left, $right );
124 default:
125 throw new CLDRPluralRuleError( "Invalid RPN token" );
126 }
127 }
128 }
129
130 /**
131 * Evaluator helper class representing a range list.
132 */
133 class CLDRPluralRuleEvaluator_Range {
134 public $parts = array();
135
136 function __construct( $start, $end = false ) {
137 if ( $end === false ) {
138 $this->parts[] = $start;
139 } else {
140 $this->parts[] = array( $start, $end );
141 }
142 }
143
144 /**
145 * Determine if the given number is inside the range. If $integerConstraint
146 * is true, the number must additionally be an integer if it is to match
147 * any interval part.
148 */
149 function isNumberIn( $number, $integerConstraint = true ) {
150 foreach ( $this->parts as $part ) {
151 if ( is_array( $part ) ) {
152 if ( ( !$integerConstraint || floor( $number ) === (float)$number )
153 && $number >= $part[0] && $number <= $part[1] )
154 {
155 return true;
156 }
157 } else {
158 if ( $number == $part ) {
159 return true;
160 }
161 }
162 }
163 return false;
164 }
165
166 /**
167 * Readable alias for isNumberIn( $number, false ), and the implementation
168 * of the "within" operator.
169 */
170 function isNumberWithin( $number ) {
171 return $this->isNumberIn( $number, false );
172 }
173
174 /**
175 * Add another part to this range. The supplied new part may either be a
176 * range object itself, or a single number.
177 */
178 function add( $other ) {
179 if ( $other instanceof self ) {
180 $this->parts = array_merge( $this->parts, $other->parts );
181 } else {
182 $this->parts[] = $other;
183 }
184 }
185
186 /**
187 * For debugging
188 */
189 function __toString() {
190 $s = 'Range(';
191 foreach ( $this->parts as $i => $part ) {
192 if ( $i ) {
193 $s .= ', ';
194 }
195 if ( is_array( $part ) ) {
196 $s .= $part[0] . '..' . $part[1];
197 } else {
198 $s .= $part;
199 }
200 }
201 $s .= ')';
202 return $s;
203 }
204
205 }
206
207 /**
208 * Helper class for converting rules to reverse polish notation (RPN).
209 */
210 class CLDRPluralRuleConverter {
211 public $rule, $pos, $end;
212 public $operators = array();
213 public $operands = array();
214
215 /**
216 * Precedence levels. Note that there's no need to worry about associativity
217 * for the level 4 operators, since they return boolean and don't accept
218 * boolean inputs.
219 */
220 static $precedence = array(
221 'or' => 2,
222 'and' => 3,
223 'is' => 4,
224 'is-not' => 4,
225 'in' => 4,
226 'not-in' => 4,
227 'within' => 4,
228 'not-within' => 4,
229 'mod' => 5,
230 ',' => 6,
231 '..' => 7,
232 );
233
234 /**
235 * A character list defining whitespace, for use in strspn() etc.
236 */
237 const WHITESPACE_CLASS = " \t\r\n";
238
239 /**
240 * Same for digits. Note that the grammar given in UTS #35 doesn't allow
241 * negative numbers or decimals.
242 */
243 const NUMBER_CLASS = '0123456789';
244
245 /**
246 * An anchored regular expression which matches a word at the current offset.
247 */
248 const WORD_REGEX = '/[a-zA-Z]+/A';
249
250 /**
251 * Convert a rule to RPN. This is the only public entry point.
252 */
253 public static function convert( $rule ) {
254 $parser = new self( $rule );
255 return $parser->doConvert();
256 }
257
258 /**
259 * Private constructor.
260 */
261 protected function __construct( $rule ) {
262 $this->rule = $rule;
263 $this->pos = 0;
264 $this->end = strlen( $rule );
265 }
266
267 /**
268 * Do the operation.
269 */
270 protected function doConvert() {
271 $expectOperator = true;
272
273 // Iterate through all tokens, saving the operators and operands to a
274 // stack per Dijkstra's shunting yard algorithm.
275 while ( false !== ( $token = $this->nextToken() ) ) {
276 // In this grammar, there are only binary operators, so every valid
277 // rule string will alternate between operator and operand tokens.
278 $expectOperator = !$expectOperator;
279
280 if ( $token instanceof CLDRPluralRuleConverter_Expression ) {
281 // Operand
282 if ( $expectOperator ) {
283 $token->error( 'unexpected operand' );
284 }
285 $this->operands[] = $token;
286 continue;
287 } else {
288 // Operator
289 if ( !$expectOperator ) {
290 $token->error( 'unexpected operator' );
291 }
292 // Resolve higher precedence levels
293 $lastOp = end( $this->operators );
294 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
295 $this->doOperation( $lastOp, $this->operands );
296 array_pop( $this->operators );
297 $lastOp = end( $this->operators );
298 }
299 $this->operators[] = $token;
300 }
301 }
302
303 // Finish off the stack
304 while ( $op = array_pop( $this->operators ) ) {
305 $this->doOperation( $op, $this->operands );
306 }
307
308 // Make sure the result is sane. The first case is possible for an empty
309 // string input, the second should be unreachable.
310 if ( !count( $this->operands ) ) {
311 $this->error( 'condition expected' );
312 } elseif ( count( $this->operands ) > 1 ) {
313 $this->error( 'missing operator or too many operands' );
314 }
315
316 $value = $this->operands[0];
317 if ( $value->type !== 'boolean' ) {
318 $this->error( 'the result must have a boolean type' );
319 }
320
321 return $this->operands[0]->rpn;
322 }
323
324 /**
325 * Fetch the next token from the input string. Return it as a
326 * CLDRPluralRuleConverter_Fragment object.
327 */
328 protected function nextToken() {
329 if ( $this->pos >= $this->end ) {
330 return false;
331 }
332
333 // Whitespace
334 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
335 $this->pos += $length;
336
337 if ( $this->pos >= $this->end ) {
338 return false;
339 }
340
341 // Number
342 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
343 if ( $length !== 0 ) {
344 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
345 $this->pos += $length;
346 return $token;
347 }
348
349 // Comma
350 if ( $this->rule[$this->pos] === ',' ) {
351 $token = $this->newOperator( ',', $this->pos, 1 );
352 $this->pos ++;
353 return $token;
354 }
355
356 // Dot dot
357 if ( substr( $this->rule, $this->pos, 2 ) === '..' ) {
358 $token = $this->newOperator( '..', $this->pos, 2 );
359 $this->pos += 2;
360 return $token;
361 }
362
363 // Word
364 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
365 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
366 }
367 $word1 = strtolower( $m[0] );
368 $word2 = '';
369 $nextTokenPos = $this->pos + strlen( $word1 );
370 if ( $word1 === 'not' || $word1 === 'is' ) {
371 // Look ahead one word
372 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
373 if ( $nextTokenPos < $this->end
374 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos ) )
375 {
376 $word2 = strtolower( $m[0] );
377 $nextTokenPos += strlen( $word2 );
378 }
379 }
380
381 // Two-word operators like "is not" take precedence over single-word operators like "is"
382 if ( $word2 !== '' ) {
383 $bothWords = "{$word1}-{$word2}";
384 if ( isset( self::$precedence[$bothWords] ) ) {
385 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
386 $this->pos = $nextTokenPos;
387 return $token;
388 }
389 }
390
391 // Single-word operators
392 if ( isset( self::$precedence[$word1] ) ) {
393 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
394 $this->pos += strlen( $word1 );
395 return $token;
396 }
397
398 // The special numerical keyword "n"
399 if ( $word1 === 'n' ) {
400 $token = $this->newNumber( 'n', $this->pos );
401 $this->pos ++;
402 return $token;
403 }
404
405 $this->error( 'unrecognised word' );
406 }
407
408 /**
409 * For the binary operator $op, pop its operands off the stack and push
410 * a fragment with rpn and type members describing the result of that
411 * operation.
412 */
413 protected function doOperation( $op ) {
414 if ( count( $this->operands ) < 2 ) {
415 $op->error( 'missing operand' );
416 }
417 $right = array_pop( $this->operands );
418 $left = array_pop( $this->operands );
419 $result = $op->operate( $left, $right );
420 $this->operands[] = $result;
421 }
422
423 /**
424 * Create a numerical expression object
425 */
426 protected function newNumber( $text, $pos ) {
427 return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
428 }
429
430 /**
431 * Create a binary operator
432 */
433 protected function newOperator( $type, $pos, $length ) {
434 return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
435 }
436
437 /**
438 * Throw an error
439 */
440 protected function error( $message ) {
441 throw new CLDRPluralRuleError( $message );
442 }
443 }
444
445 /**
446 * Helper for CLDRPluralRuleConverter.
447 * The base class for operators and expressions, describing a region of the input string.
448 */
449 class CLDRPluralRuleConverter_Fragment {
450 public $parser, $pos, $length, $end;
451
452 function __construct( $parser, $pos, $length ) {
453 $this->parser = $parser;
454 $this->pos = $pos;
455 $this->length = $length;
456 $this->end = $pos + $length;
457 }
458
459 public function error( $message ) {
460 $text = $this->getText();
461 throw new CLDRPluralRuleError( "$message at position " . ( $this->pos + 1 ) . ": \"$text\"" );
462 }
463
464 public function getText() {
465 return substr( $this->parser->rule, $this->pos, $this->length );
466 }
467 }
468
469 /**
470 * Helper for CLDRPluralRuleConverter.
471 * An expression object, representing a region of the input string (for error
472 * messages), the RPN notation used to evaluate it, and the result type for
473 * validation.
474 */
475 class CLDRPluralRuleConverter_Expression extends CLDRPluralRuleConverter_Fragment {
476 public $type, $rpn;
477
478 function __construct( $parser, $type, $rpn, $pos, $length ) {
479 parent::__construct( $parser, $pos, $length );
480 $this->type = $type;
481 $this->rpn = $rpn;
482 }
483
484 public function isType( $type ) {
485 if ( $type === 'range' && ( $this->type === 'range' || $this->type === 'number' ) ) {
486 return true;
487 }
488 if ( $type === $this->type ) {
489 return true;
490 }
491 return false;
492 }
493 }
494
495 /**
496 * Helper for CLDRPluralRuleConverter.
497 * An operator object, representing a region of the input string (for error
498 * messages), and the binary operator at that location.
499 */
500 class CLDRPluralRuleConverter_Operator extends CLDRPluralRuleConverter_Fragment {
501 public $name;
502
503 /**
504 * Each op type has three characters: left operand type, right operand type and result type
505 *
506 * b = boolean
507 * n = number
508 * r = range
509 *
510 * A number is a kind of range.
511 */
512 static $opTypes = array(
513 'or' => 'bbb',
514 'and' => 'bbb',
515 'is' => 'nnb',
516 'is-not' => 'nnb',
517 'in' => 'nrb',
518 'not-in' => 'nrb',
519 'within' => 'nrb',
520 'not-within' => 'nrb',
521 'mod' => 'nnn',
522 ',' => 'rrr',
523 '..' => 'nnr',
524 );
525
526 /**
527 * Map converting from the abbrevation to the full form.
528 */
529 static $typeSpecMap = array(
530 'b' => 'boolean',
531 'n' => 'number',
532 'r' => 'range',
533 );
534
535 function __construct( $parser, $name, $pos, $length ) {
536 parent::__construct( $parser, $pos, $length );
537 $this->name = $name;
538 }
539
540 public function operate( $left, $right ) {
541 $typeSpec = self::$opTypes[$this->name];
542
543 $leftType = self::$typeSpecMap[$typeSpec[0]];
544 $rightType = self::$typeSpecMap[$typeSpec[1]];
545 $resultType = self::$typeSpecMap[$typeSpec[2]];
546
547 $start = min( $this->pos, $left->pos, $right->pos );
548 $end = max( $this->end, $left->end, $right->end );
549 $length = $end - $start;
550
551 $newExpr = new CLDRPluralRuleConverter_Expression( $this->parser, $resultType,
552 "{$left->rpn} {$right->rpn} {$this->name}",
553 $start, $length );
554
555 if ( !$left->isType( $leftType ) ) {
556 $newExpr->error( "invalid type for left operand: expected $leftType, got {$left->type}" );
557 }
558
559 if ( !$right->isType( $rightType ) ) {
560 $newExpr->error( "invalid type for right operand: expected $rightType, got {$right->type}" );
561 }
562 return $newExpr;
563 }
564 }
565
566 /**
567 * The exception class for all the classes in this file. This will be thrown
568 * back to the caller if there is any validation error.
569 */
570 class CLDRPluralRuleError extends MWException {
571 function __construct( $message ) {
572 parent::__construct( 'CLDR plural rule error: ' . $message );
573 }
574 }