Merge "Fix and make some types in PHPDoc and JSDoc tags more specific"
[lhc/web/wiklou.git] / languages / ConverterRule.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @ingroup Language
20 */
21
22 /**
23 * Parser for rules of language conversion , parse rules in -{ }- tag.
24 * @ingroup Language
25 * @author fdcn <fdcn64@gmail.com>, PhiLiP <philip.npc@gmail.com>
26 */
27 class ConverterRule {
28 public $mText; // original text in -{text}-
29 public $mConverter; // LanguageConverter object
30 public $mRuleDisplay = '';
31 public $mRuleTitle = false;
32 public $mRules = '';// string : the text of the rules
33 public $mRulesAction = 'none';
34 public $mFlags = [];
35 public $mVariantFlags = [];
36 public $mConvTable = [];
37 public $mBidtable = [];// array of the translation in each variant
38 public $mUnidtable = [];// array of the translation in each variant
39
40 /**
41 * @param string $text The text between -{ and }-
42 * @param LanguageConverter $converter
43 */
44 public function __construct( $text, $converter ) {
45 $this->mText = $text;
46 $this->mConverter = $converter;
47 }
48
49 /**
50 * Check if variants array in convert array.
51 *
52 * @param array|string $variants Variant language code
53 * @return string Translated text
54 */
55 public function getTextInBidtable( $variants ) {
56 $variants = (array)$variants;
57 if ( !$variants ) {
58 return false;
59 }
60 foreach ( $variants as $variant ) {
61 if ( isset( $this->mBidtable[$variant] ) ) {
62 return $this->mBidtable[$variant];
63 }
64 }
65 return false;
66 }
67
68 /**
69 * Parse flags with syntax -{FLAG| ... }-
70 * @private
71 */
72 function parseFlags() {
73 $text = $this->mText;
74 $flags = [];
75 $variantFlags = [];
76
77 $sepPos = strpos( $text, '|' );
78 if ( $sepPos !== false ) {
79 $validFlags = $this->mConverter->mFlags;
80 $f = StringUtils::explode( ';', substr( $text, 0, $sepPos ) );
81 foreach ( $f as $ff ) {
82 $ff = trim( $ff );
83 if ( isset( $validFlags[$ff] ) ) {
84 $flags[$validFlags[$ff]] = true;
85 }
86 }
87 $text = strval( substr( $text, $sepPos + 1 ) );
88 }
89
90 if ( !$flags ) {
91 $flags['S'] = true;
92 } elseif ( isset( $flags['R'] ) ) {
93 $flags = [ 'R' => true ];// remove other flags
94 } elseif ( isset( $flags['N'] ) ) {
95 $flags = [ 'N' => true ];// remove other flags
96 } elseif ( isset( $flags['-'] ) ) {
97 $flags = [ '-' => true ];// remove other flags
98 } elseif ( count( $flags ) == 1 && isset( $flags['T'] ) ) {
99 $flags['H'] = true;
100 } elseif ( isset( $flags['H'] ) ) {
101 // replace A flag, and remove other flags except T
102 $temp = [ '+' => true, 'H' => true ];
103 if ( isset( $flags['T'] ) ) {
104 $temp['T'] = true;
105 }
106 if ( isset( $flags['D'] ) ) {
107 $temp['D'] = true;
108 }
109 $flags = $temp;
110 } else {
111 if ( isset( $flags['A'] ) ) {
112 $flags['+'] = true;
113 $flags['S'] = true;
114 }
115 if ( isset( $flags['D'] ) ) {
116 unset( $flags['S'] );
117 }
118 // try to find flags like "zh-hans", "zh-hant"
119 // allow syntaxes like "-{zh-hans;zh-hant|XXXX}-"
120 $variantFlags = array_intersect( array_keys( $flags ), $this->mConverter->mVariants );
121 if ( $variantFlags ) {
122 $variantFlags = array_flip( $variantFlags );
123 $flags = [];
124 }
125 }
126 $this->mVariantFlags = $variantFlags;
127 $this->mRules = $text;
128 $this->mFlags = $flags;
129 }
130
131 /**
132 * Generate conversion table.
133 * @private
134 */
135 function parseRules() {
136 $rules = $this->mRules;
137 $bidtable = [];
138 $unidtable = [];
139 $variants = $this->mConverter->mVariants;
140 $varsep_pattern = $this->mConverter->getVarSeparatorPattern();
141
142 // Split according to $varsep_pattern, but ignore semicolons from HTML entities
143 $rules = preg_replace( '/(&[#a-zA-Z0-9]+);/', "$1\x01", $rules );
144 $choice = preg_split( $varsep_pattern, $rules );
145 $choice = str_replace( "\x01", ';', $choice );
146
147 foreach ( $choice as $c ) {
148 $v = explode( ':', $c, 2 );
149 if ( count( $v ) != 2 ) {
150 // syntax error, skip
151 continue;
152 }
153 $to = trim( $v[1] );
154 $v = trim( $v[0] );
155 $u = explode( '=>', $v, 2 );
156 // if $to is empty (which is also used as $from in bidtable),
157 // strtr() could return a wrong result.
158 if ( count( $u ) == 1 && $to !== '' && in_array( $v, $variants ) ) {
159 $bidtable[$v] = $to;
160 } elseif ( count( $u ) == 2 ) {
161 $from = trim( $u[0] );
162 $v = trim( $u[1] );
163 // if $from is empty, strtr() could return a wrong result.
164 if ( array_key_exists( $v, $unidtable )
165 && !is_array( $unidtable[$v] )
166 && $from !== ''
167 && in_array( $v, $variants ) ) {
168 $unidtable[$v] = [ $from => $to ];
169 } elseif ( $from !== '' && in_array( $v, $variants ) ) {
170 $unidtable[$v][$from] = $to;
171 }
172 }
173 // syntax error, pass
174 if ( !isset( $this->mConverter->mVariantNames[$v] ) ) {
175 $bidtable = [];
176 $unidtable = [];
177 break;
178 }
179 }
180 $this->mBidtable = $bidtable;
181 $this->mUnidtable = $unidtable;
182 }
183
184 /**
185 * @private
186 *
187 * @return string
188 */
189 function getRulesDesc() {
190 $codesep = $this->mConverter->mDescCodeSep;
191 $varsep = $this->mConverter->mDescVarSep;
192 $text = '';
193 foreach ( $this->mBidtable as $k => $v ) {
194 $text .= $this->mConverter->mVariantNames[$k] . "$codesep$v$varsep";
195 }
196 foreach ( $this->mUnidtable as $k => $a ) {
197 foreach ( $a as $from => $to ) {
198 $text .= $from . '⇒' . $this->mConverter->mVariantNames[$k] .
199 "$codesep$to$varsep";
200 }
201 }
202 return $text;
203 }
204
205 /**
206 * Parse rules conversion.
207 * @private
208 *
209 * @param string $variant
210 *
211 * @return string
212 */
213 function getRuleConvertedStr( $variant ) {
214 $bidtable = $this->mBidtable;
215 $unidtable = $this->mUnidtable;
216
217 if ( count( $bidtable ) + count( $unidtable ) == 0 ) {
218 return $this->mRules;
219 } else {
220 // display current variant in bidirectional array
221 $disp = $this->getTextInBidtable( $variant );
222 // or display current variant in fallbacks
223 if ( $disp === false ) {
224 $disp = $this->getTextInBidtable(
225 $this->mConverter->getVariantFallbacks( $variant ) );
226 }
227 // or display current variant in unidirectional array
228 if ( $disp === false && array_key_exists( $variant, $unidtable ) ) {
229 $disp = array_values( $unidtable[$variant] )[0];
230 }
231 // or display first text under disable manual convert
232 if ( $disp === false && $this->mConverter->mManualLevel[$variant] == 'disable' ) {
233 if ( count( $bidtable ) > 0 ) {
234 $disp = array_values( $bidtable )[0];
235 } else {
236 $disp = array_values( array_values( $unidtable )[0] )[0];
237 }
238 }
239 return $disp;
240 }
241 }
242
243 /**
244 * Similar to getRuleConvertedStr(), but this prefers to use original
245 * page title if $variant === $this->mConverter->mMainLanguageCode
246 * and may return false in this case (so this title conversion rule
247 * will be ignored and the original title is shown).
248 *
249 * @since 1.22
250 * @param string $variant The variant code to display page title in
251 * @return string|bool The converted title or false if just page name
252 */
253 function getRuleConvertedTitle( $variant ) {
254 if ( $variant === $this->mConverter->mMainLanguageCode ) {
255 // If a string targeting exactly this variant is set,
256 // use it. Otherwise, just return false, so the real
257 // page name can be shown (and because variant === main,
258 // there'll be no further automatic conversion).
259 $disp = $this->getTextInBidtable( $variant );
260 if ( $disp ) {
261 return $disp;
262 }
263 if ( array_key_exists( $variant, $this->mUnidtable ) ) {
264 $disp = array_values( $this->mUnidtable[$variant] )[0];
265 }
266 // Assigned above or still false.
267 return $disp;
268 } else {
269 return $this->getRuleConvertedStr( $variant );
270 }
271 }
272
273 /**
274 * Generate conversion table for all text.
275 * @private
276 */
277 function generateConvTable() {
278 // Special case optimisation
279 if ( !$this->mBidtable && !$this->mUnidtable ) {
280 $this->mConvTable = [];
281 return;
282 }
283
284 $bidtable = $this->mBidtable;
285 $unidtable = $this->mUnidtable;
286 $manLevel = $this->mConverter->mManualLevel;
287
288 $vmarked = [];
289 foreach ( $this->mConverter->mVariants as $v ) {
290 /* for bidirectional array
291 fill in the missing variants, if any,
292 with fallbacks */
293 if ( !isset( $bidtable[$v] ) ) {
294 $variantFallbacks =
295 $this->mConverter->getVariantFallbacks( $v );
296 $vf = $this->getTextInBidtable( $variantFallbacks );
297 if ( $vf ) {
298 $bidtable[$v] = $vf;
299 }
300 }
301
302 if ( isset( $bidtable[$v] ) ) {
303 foreach ( $vmarked as $vo ) {
304 // use syntax: -{A|zh:WordZh;zh-tw:WordTw}-
305 // or -{H|zh:WordZh;zh-tw:WordTw}-
306 // or -{-|zh:WordZh;zh-tw:WordTw}-
307 // to introduce a custom mapping between
308 // words WordZh and WordTw in the whole text
309 if ( $manLevel[$v] == 'bidirectional' ) {
310 $this->mConvTable[$v][$bidtable[$vo]] = $bidtable[$v];
311 }
312 if ( $manLevel[$vo] == 'bidirectional' ) {
313 $this->mConvTable[$vo][$bidtable[$v]] = $bidtable[$vo];
314 }
315 }
316 $vmarked[] = $v;
317 }
318 /* for unidirectional array fill to convert tables */
319 if ( ( $manLevel[$v] == 'bidirectional' || $manLevel[$v] == 'unidirectional' )
320 && isset( $unidtable[$v] )
321 ) {
322 if ( isset( $this->mConvTable[$v] ) ) {
323 $this->mConvTable[$v] = $unidtable[$v] + $this->mConvTable[$v];
324 } else {
325 $this->mConvTable[$v] = $unidtable[$v];
326 }
327 }
328 }
329 }
330
331 /**
332 * Parse rules and flags.
333 * @param string $variant Variant language code
334 */
335 public function parse( $variant = null ) {
336 if ( !$variant ) {
337 $variant = $this->mConverter->getPreferredVariant();
338 }
339
340 $this->parseFlags();
341 $flags = $this->mFlags;
342
343 // convert to specified variant
344 // syntax: -{zh-hans;zh-hant[;...]|<text to convert>}-
345 if ( $this->mVariantFlags ) {
346 // check if current variant in flags
347 if ( isset( $this->mVariantFlags[$variant] ) ) {
348 // then convert <text to convert> to current language
349 $this->mRules = $this->mConverter->autoConvert( $this->mRules,
350 $variant );
351 } else {
352 // if current variant no in flags,
353 // then we check its fallback variants.
354 $variantFallbacks =
355 $this->mConverter->getVariantFallbacks( $variant );
356 if ( is_array( $variantFallbacks ) ) {
357 foreach ( $variantFallbacks as $variantFallback ) {
358 // if current variant's fallback exist in flags
359 if ( isset( $this->mVariantFlags[$variantFallback] ) ) {
360 // then convert <text to convert> to fallback language
361 $this->mRules =
362 $this->mConverter->autoConvert( $this->mRules,
363 $variantFallback );
364 break;
365 }
366 }
367 }
368 }
369 $this->mFlags = $flags = [ 'R' => true ];
370 }
371
372 if ( !isset( $flags['R'] ) && !isset( $flags['N'] ) ) {
373 // decode => HTML entities modified by Sanitizer::removeHTMLtags
374 $this->mRules = str_replace( '=&gt;', '=>', $this->mRules );
375 $this->parseRules();
376 }
377 $rules = $this->mRules;
378
379 if ( !$this->mBidtable && !$this->mUnidtable ) {
380 if ( isset( $flags['+'] ) || isset( $flags['-'] ) ) {
381 // fill all variants if text in -{A/H/-|text}- is non-empty but without rules
382 if ( $rules !== '' ) {
383 foreach ( $this->mConverter->mVariants as $v ) {
384 $this->mBidtable[$v] = $rules;
385 }
386 }
387 } elseif ( !isset( $flags['N'] ) && !isset( $flags['T'] ) ) {
388 $this->mFlags = $flags = [ 'R' => true ];
389 }
390 }
391
392 $this->mRuleDisplay = false;
393 foreach ( $flags as $flag => $unused ) {
394 switch ( $flag ) {
395 case 'R':
396 // if we don't do content convert, still strip the -{}- tags
397 $this->mRuleDisplay = $rules;
398 break;
399 case 'N':
400 // process N flag: output current variant name
401 $ruleVar = trim( $rules );
402 if ( isset( $this->mConverter->mVariantNames[$ruleVar] ) ) {
403 $this->mRuleDisplay = $this->mConverter->mVariantNames[$ruleVar];
404 } else {
405 $this->mRuleDisplay = '';
406 }
407 break;
408 case 'D':
409 // process D flag: output rules description
410 $this->mRuleDisplay = $this->getRulesDesc();
411 break;
412 case 'H':
413 // process H,- flag or T only: output nothing
414 $this->mRuleDisplay = '';
415 break;
416 case '-':
417 $this->mRulesAction = 'remove';
418 $this->mRuleDisplay = '';
419 break;
420 case '+':
421 $this->mRulesAction = 'add';
422 $this->mRuleDisplay = '';
423 break;
424 case 'S':
425 $this->mRuleDisplay = $this->getRuleConvertedStr( $variant );
426 break;
427 case 'T':
428 $this->mRuleTitle = $this->getRuleConvertedTitle( $variant );
429 $this->mRuleDisplay = '';
430 break;
431 default:
432 // ignore unknown flags (but see error case below)
433 }
434 }
435 if ( $this->mRuleDisplay === false ) {
436 $this->mRuleDisplay = '<span class="error">'
437 . wfMessage( 'converter-manual-rule-error' )->inContentLanguage()->escaped()
438 . '</span>';
439 }
440
441 $this->generateConvTable();
442 }
443
444 /**
445 * Checks if there are conversion rules.
446 * @return bool
447 */
448 public function hasRules() {
449 return $this->mRules !== '';
450 }
451
452 /**
453 * Get display text on markup -{...}-
454 * @return string
455 */
456 public function getDisplay() {
457 return $this->mRuleDisplay;
458 }
459
460 /**
461 * Get converted title.
462 * @return string
463 */
464 public function getTitle() {
465 return $this->mRuleTitle;
466 }
467
468 /**
469 * Return how deal with conversion rules.
470 * @return string
471 */
472 public function getRulesAction() {
473 return $this->mRulesAction;
474 }
475
476 /**
477 * Get conversion table. (bidirectional and unidirectional
478 * conversion table)
479 * @return array
480 */
481 public function getConvTable() {
482 return $this->mConvTable;
483 }
484
485 /**
486 * Get conversion rules string.
487 * @return string
488 */
489 public function getRules() {
490 return $this->mRules;
491 }
492
493 /**
494 * Get conversion flags.
495 * @return array
496 */
497 public function getFlags() {
498 return $this->mFlags;
499 }
500 }