SpecialChangeEmail: error if old email was entered in new email field
[lhc/web/wiklou.git] / languages / utils / CLDRPluralRuleConverter.php
1 <?php
2 /**
3 * @author Niklas Laxström, Tim Starling
4 *
5 * @copyright Copyright © 2010-2012, Niklas Laxström
6 * @license http://www.gnu.org/copyleft/gpl.html GNU General Public License 2.0 or later
7 *
8 * @file
9 * @since 1.20
10 */
11
12 /**
13 * Helper class for converting rules to reverse polish notation (RPN).
14 */
15 class CLDRPluralRuleConverter {
16 /**
17 * The input string
18 *
19 * @var string
20 */
21 public $rule;
22
23 /**
24 * The current position
25 *
26 * @var int
27 */
28 public $pos;
29
30 /**
31 * The past-the-end position
32 *
33 * @var int
34 */
35 public $end;
36
37 /**
38 * The operator stack
39 *
40 * @var array
41 */
42 public $operators = array();
43
44 /**
45 * The operand stack
46 *
47 * @var array
48 */
49 public $operands = array();
50
51 /**
52 * Precedence levels. Note that there's no need to worry about associativity
53 * for the level 4 operators, since they return boolean and don't accept
54 * boolean inputs.
55 */
56 private static $precedence = array(
57 'or' => 2,
58 'and' => 3,
59 'is' => 4,
60 'is-not' => 4,
61 'in' => 4,
62 'not-in' => 4,
63 'within' => 4,
64 'not-within' => 4,
65 'mod' => 5,
66 ',' => 6,
67 '..' => 7,
68 );
69
70 /**
71 * A character list defining whitespace, for use in strspn() etc.
72 */
73 const WHITESPACE_CLASS = " \t\r\n";
74
75 /**
76 * Same for digits. Note that the grammar given in UTS #35 doesn't allow
77 * negative numbers or decimal separators.
78 */
79 const NUMBER_CLASS = '0123456789';
80
81 /**
82 * A character list of symbolic operands.
83 */
84 const OPERAND_SYMBOLS = 'nivwft';
85
86 /**
87 * An anchored regular expression which matches a word at the current offset.
88 */
89 const WORD_REGEX = '/[a-zA-Z@]+/A';
90
91 /**
92 * Convert a rule to RPN. This is the only public entry point.
93 *
94 * @param string $rule The rule to convert
95 * @return string The RPN representation of the rule
96 */
97 public static function convert( $rule ) {
98 $parser = new self( $rule );
99
100 return $parser->doConvert();
101 }
102
103 /**
104 * Private constructor.
105 * @param string $rule
106 */
107 protected function __construct( $rule ) {
108 $this->rule = $rule;
109 $this->pos = 0;
110 $this->end = strlen( $rule );
111 }
112
113 /**
114 * Do the operation.
115 *
116 * @return string The RPN representation of the rule (e.g. "5 3 mod n is")
117 */
118 protected function doConvert() {
119 $expectOperator = true;
120
121 // Iterate through all tokens, saving the operators and operands to a
122 // stack per Dijkstra's shunting yard algorithm.
123 /** @var CLDRPluralRuleConverterOperator $token */
124 while ( false !== ( $token = $this->nextToken() ) ) {
125 // In this grammar, there are only binary operators, so every valid
126 // rule string will alternate between operator and operand tokens.
127 $expectOperator = !$expectOperator;
128
129 if ( $token instanceof CLDRPluralRuleConverterExpression ) {
130 // Operand
131 if ( $expectOperator ) {
132 $token->error( 'unexpected operand' );
133 }
134 $this->operands[] = $token;
135 continue;
136 } else {
137 // Operator
138 if ( !$expectOperator ) {
139 $token->error( 'unexpected operator' );
140 }
141 // Resolve higher precedence levels
142 $lastOp = end( $this->operators );
143 while ( $lastOp && self::$precedence[$token->name] <= self::$precedence[$lastOp->name] ) {
144 $this->doOperation( $lastOp, $this->operands );
145 array_pop( $this->operators );
146 $lastOp = end( $this->operators );
147 }
148 $this->operators[] = $token;
149 }
150 }
151
152 // Finish off the stack
153 while ( $op = array_pop( $this->operators ) ) {
154 $this->doOperation( $op, $this->operands );
155 }
156
157 // Make sure the result is sane. The first case is possible for an empty
158 // string input, the second should be unreachable.
159 if ( !count( $this->operands ) ) {
160 $this->error( 'condition expected' );
161 } elseif ( count( $this->operands ) > 1 ) {
162 $this->error( 'missing operator or too many operands' );
163 }
164
165 $value = $this->operands[0];
166 if ( $value->type !== 'boolean' ) {
167 $this->error( 'the result must have a boolean type' );
168 }
169
170 return $this->operands[0]->rpn;
171 }
172
173 /**
174 * Fetch the next token from the input string.
175 *
176 * @return CLDRPluralRuleConverterFragment The next token
177 */
178 protected function nextToken() {
179 if ( $this->pos >= $this->end ) {
180 return false;
181 }
182
183 // Whitespace
184 $length = strspn( $this->rule, self::WHITESPACE_CLASS, $this->pos );
185 $this->pos += $length;
186
187 if ( $this->pos >= $this->end ) {
188 return false;
189 }
190
191 // Number
192 $length = strspn( $this->rule, self::NUMBER_CLASS, $this->pos );
193 if ( $length !== 0 ) {
194 $token = $this->newNumber( substr( $this->rule, $this->pos, $length ), $this->pos );
195 $this->pos += $length;
196
197 return $token;
198 }
199
200 // Two-character operators
201 $op2 = substr( $this->rule, $this->pos, 2 );
202 if ( $op2 === '..' || $op2 === '!=' ) {
203 $token = $this->newOperator( $op2, $this->pos, 2 );
204 $this->pos += 2;
205
206 return $token;
207 }
208
209 // Single-character operators
210 $op1 = $this->rule[$this->pos];
211 if ( $op1 === ',' || $op1 === '=' || $op1 === '%' ) {
212 $token = $this->newOperator( $op1, $this->pos, 1 );
213 $this->pos++;
214
215 return $token;
216 }
217
218 // Word
219 if ( !preg_match( self::WORD_REGEX, $this->rule, $m, 0, $this->pos ) ) {
220 $this->error( 'unexpected character "' . $this->rule[$this->pos] . '"' );
221 }
222 $word1 = strtolower( $m[0] );
223 $word2 = '';
224 $nextTokenPos = $this->pos + strlen( $word1 );
225 if ( $word1 === 'not' || $word1 === 'is' ) {
226 // Look ahead one word
227 $nextTokenPos += strspn( $this->rule, self::WHITESPACE_CLASS, $nextTokenPos );
228 if ( $nextTokenPos < $this->end
229 && preg_match( self::WORD_REGEX, $this->rule, $m, 0, $nextTokenPos )
230 ) {
231 $word2 = strtolower( $m[0] );
232 $nextTokenPos += strlen( $word2 );
233 }
234 }
235
236 // Two-word operators like "is not" take precedence over single-word operators like "is"
237 if ( $word2 !== '' ) {
238 $bothWords = "{$word1}-{$word2}";
239 if ( isset( self::$precedence[$bothWords] ) ) {
240 $token = $this->newOperator( $bothWords, $this->pos, $nextTokenPos - $this->pos );
241 $this->pos = $nextTokenPos;
242
243 return $token;
244 }
245 }
246
247 // Single-word operators
248 if ( isset( self::$precedence[$word1] ) ) {
249 $token = $this->newOperator( $word1, $this->pos, strlen( $word1 ) );
250 $this->pos += strlen( $word1 );
251
252 return $token;
253 }
254
255 // The single-character operand symbols
256 if ( strpos( self::OPERAND_SYMBOLS, $word1 ) !== false ) {
257 $token = $this->newNumber( $word1, $this->pos );
258 $this->pos++;
259
260 return $token;
261 }
262
263 // Samples
264 if ( $word1 === '@integer' || $word1 === '@decimal' ) {
265 // Samples are like comments, they have no effect on rule evaluation.
266 // They run from the first sample indicator to the end of the string.
267 $this->pos = $this->end;
268
269 return false;
270 }
271
272 $this->error( 'unrecognised word' );
273 }
274
275 /**
276 * For the binary operator $op, pop its operands off the stack and push
277 * a fragment with rpn and type members describing the result of that
278 * operation.
279 *
280 * @param CLDRPluralRuleConverterOperator $op
281 */
282 protected function doOperation( $op ) {
283 if ( count( $this->operands ) < 2 ) {
284 $op->error( 'missing operand' );
285 }
286 $right = array_pop( $this->operands );
287 $left = array_pop( $this->operands );
288 $result = $op->operate( $left, $right );
289 $this->operands[] = $result;
290 }
291
292 /**
293 * Create a numerical expression object
294 *
295 * @param string $text
296 * @param int $pos
297 * @return CLDRPluralRuleConverterExpression The numerical expression
298 */
299 protected function newNumber( $text, $pos ) {
300 return new CLDRPluralRuleConverterExpression( $this, 'number', $text, $pos, strlen( $text ) );
301 }
302
303 /**
304 * Create a binary operator
305 *
306 * @param string $type
307 * @param int $pos
308 * @param int $length
309 * @return CLDRPluralRuleConverterOperator The operator
310 */
311 protected function newOperator( $type, $pos, $length ) {
312 return new CLDRPluralRuleConverterOperator( $this, $type, $pos, $length );
313 }
314
315 /**
316 * Throw an error
317 * @param string $message
318 */
319 protected function error( $message ) {
320 throw new CLDRPluralRuleError( $message );
321 }
322 }