Merge "jshint: Enable 'es3' option"
[lhc/web/wiklou.git] / languages / utils / CLDRPluralRuleConverter.php
1 <?php
2
3 /**
4 * @author Niklas Laxström, Tim Starling
5 *
6 * @copyright Copyright © 2010-2012, Niklas Laxström
7 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
8 *
9 * @file
10 * @since 1.20
11 */
12
13 /**
14 * Helper class for converting rules to reverse polish notation (RPN).
15 */
16 class CLDRPluralRuleConverter {
17 /**
18 * The input string
19 *
20 * @var string
21 */
22 public $rule;
23
24 /**
25 * The current position
26 *
27 * @var int
28 */
29 public $pos;
30
31 /**
32 * The past-the-end position
33 *
34 * @var int
35 */
36 public $end;
37
38 /**
39 * The operator stack
40 *
41 * @var array
42 */
43 public $operators = array();
44
45 /**
46 * The operand stack
47 *
48 * @var array
49 */
50 public $operands = array();
51
52 /**
53 * Precedence levels. Note that there's no need to worry about associativity
54 * for the level 4 operators, since they return boolean and don't accept
55 * boolean inputs.
56 */
57 static $precedence = array(
58 'or' => 2,
59 'and' => 3,
60 'is' => 4,
61 'is-not' => 4,
62 'in' => 4,
63 'not-in' => 4,
64 'within' => 4,
65 'not-within' => 4,
66 'mod' => 5,
67 ',' => 6,
68 '..' => 7,
69 );
70
71 /**
72 * A character list defining whitespace, for use in strspn() etc.
73 */
74 const WHITESPACE_CLASS = " \t\r\n";
75
76 /**
77 * Same for digits. Note that the grammar given in UTS #35 doesn't allow
78 * negative numbers or decimal separators.
79 */
80 const NUMBER_CLASS = '0123456789';
81
82 /**
83 * A character list of symbolic operands.
84 */
85 const OPERAND_SYMBOLS = 'nivwft';
86
87 /**
88 * An anchored regular expression which matches a word at the current offset.
89 */
90 const WORD_REGEX = '/[a-zA-Z@]+/A';
91
92 /**
93 * Convert a rule to RPN. This is the only public entry point.
94 *
95 * @param string $rule The rule to convert
96 * @return string The RPN representation of the rule
97 */
98 public static function convert( $rule ) {
99 $parser = new self( $rule );
100 return $parser->doConvert();
101 }
102
103 /**
104 * Private constructor.
105 */
106 protected function __construct( $rule ) {
107 $this->rule = $rule;
108 $this->pos = 0;
109 $this->end = strlen( $rule );
110 }
111
112 /**
113 * Do the operation.
114 *
115 * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
116 */
117 protected function doConvert() {
118 $expectOperator = true;
119
120 // Iterate through all tokens, saving the operators and operands to a
121 // stack per Dijkstra's shunting yard algorithm.
122 /** @var CLDRPluralRuleConverter_Operator $token */
123 while ( false !== ( $token = $this->nextToken() ) ) {
124 // In this grammar, there are only binary operators, so every valid
125 // rule string will alternate between operator and operand tokens.
126 $expectOperator = !$expectOperator;
127
128 if ( $token instanceof CLDRPluralRuleConverter_Expression ) {
129 // Operand
130 if ( $expectOperator ) {
131 $token->error( 'unexpected operand' );
132 }
133 $this->operands[] = $token;
134 continue;
135 } else {
136 // Operator
137 if ( !$expectOperator ) {
138 $token->error( 'unexpected operator' );
139 }
140 // Resolve higher precedence levels
141 $lastOp = end( $this->operators );
142 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
143 $this->doOperation( $lastOp, $this->operands );
144 array_pop( $this->operators );
145 $lastOp = end( $this->operators );
146 }
147 $this->operators[] = $token;
148 }
149 }
150
151 // Finish off the stack
152 while ( $op = array_pop( $this->operators ) ) {
153 $this->doOperation( $op, $this->operands );
154 }
155
156 // Make sure the result is sane. The first case is possible for an empty
157 // string input, the second should be unreachable.
158 if ( !count( $this->operands ) ) {
159 $this->error( 'condition expected' );
160 } elseif ( count( $this->operands ) > 1 ) {
161 $this->error( 'missing operator or too many operands' );
162 }
163
164 $value = $this->operands[0];
165 if ( $value->type !== 'boolean' ) {
166 $this->error( 'the result must have a boolean type' );
167 }
168
169 return $this->operands[0]->rpn;
170 }
171
172 /**
173 * Fetch the next token from the input string.
174 *
175 * @return CLDRPluralRuleConverter_Fragment The next token
176 */
177 protected function nextToken() {
178 if ( $this->pos >= $this->end ) {
179 return false;
180 }
181
182 // Whitespace
183 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
184 $this->pos += $length;
185
186 if ( $this->pos >= $this->end ) {
187 return false;
188 }
189
190 // Number
191 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
192 if ( $length !== 0 ) {
193 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
194 $this->pos += $length;
195 return $token;
196 }
197
198 // Two-character operators
199 $op2 = substr( $this->rule, $this->pos, 2 );
200 if ( $op2 === '..' || $op2 === '!=' ) {
201 $token = $this->newOperator( $op2, $this->pos, 2 );
202 $this->pos += 2;
203 return $token;
204 }
205
206 // Single-character operators
207 $op1 = $this->rule[$this->pos];
208 if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
209 $token = $this->newOperator( $op1, $this->pos, 1 );
210 $this->pos ++;
211 return $token;
212 }
213
214 // Word
215 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
216 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
217 }
218 $word1 = strtolower( $m[0] );
219 $word2 = '';
220 $nextTokenPos = $this->pos + strlen( $word1 );
221 if ( $word1 === 'not' || $word1 === 'is' ) {
222 // Look ahead one word
223 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
224 if ( $nextTokenPos < $this->end
225 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
226 ) {
227 $word2 = strtolower( $m[0] );
228 $nextTokenPos += strlen( $word2 );
229 }
230 }
231
232 // Two-word operators like "is not" take precedence over single-word operators like "is"
233 if ( $word2 !== '' ) {
234 $bothWords = "{$word1}-{$word2}";
235 if ( isset( self::$precedence[$bothWords] ) ) {
236 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
237 $this->pos = $nextTokenPos;
238 return $token;
239 }
240 }
241
242 // Single-word operators
243 if ( isset( self::$precedence[$word1] ) ) {
244 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
245 $this->pos += strlen( $word1 );
246 return $token;
247 }
248
249 // The single-character operand symbols
250 if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
251 $token = $this->newNumber( $word1, $this->pos );
252 $this->pos ++;
253 return $token;
254 }
255
256 // Samples
257 if ( $word1 === '@integer' || $word1 === '@decimal' ) {
258 // Samples are like comments, they have no effect on rule evaluation.
259 // They run from the first sample indicator to the end of the string.
260 $this->pos = $this->end;
261 return false;
262 }
263
264 $this->error( 'unrecognised word' );
265 }
266
267 /**
268 * For the binary operator $op, pop its operands off the stack and push
269 * a fragment with rpn and type members describing the result of that
270 * operation.
271 *
272 * @param CLDRPluralRuleConverter_Operator $op
273 */
274 protected function doOperation( $op ) {
275 if ( count( $this->operands ) < 2 ) {
276 $op->error( 'missing operand' );
277 }
278 $right = array_pop( $this->operands );
279 $left = array_pop( $this->operands );
280 $result = $op->operate( $left, $right );
281 $this->operands[] = $result;
282 }
283
284 /**
285 * Create a numerical expression object
286 *
287 * @param string $text
288 * @param int $pos
289 * @return CLDRPluralRuleConverter_Expression The numerical expression
290 */
291 protected function newNumber( $text, $pos ) {
292 return new CLDRPluralRuleConverter_Expression( $this, 'number', $text, $pos, strlen( $text ) );
293 }
294
295 /**
296 * Create a binary operator
297 *
298 * @param string $type
299 * @param int $pos
300 * @param int $length
301 * @return CLDRPluralRuleConverter_Operator The operator
302 */
303 protected function newOperator( $type, $pos, $length ) {
304 return new CLDRPluralRuleConverter_Operator( $this, $type, $pos, $length );
305 }
306
307 /**
308 * Throw an error
309 */
310 protected function error( $message ) {
311 throw new CLDRPluralRuleError( $message );
312 }
313 }