Split some Language methods to LanguageNameUtils
[lhc/web/wiklou.git] / tests / phpunit / unit / includes / language / LanguageNameUtilsTestTrait.php
1 <?php
2
3 use MediaWiki\Languages\LanguageNameUtils;
4
5 const AUTONYMS = LanguageNameUtils::AUTONYMS;
6 const ALL = LanguageNameUtils::ALL;
7 const DEFINED = LanguageNameUtils::DEFINED;
8 const SUPPORTED = LanguageNameUtils::SUPPORTED;
9
10 /**
11 * For code shared between LanguageNameUtilsTest and LanguageTest.
12 */
13 trait LanguageNameUtilsTestTrait {
14 abstract protected function isSupportedLanguage( $code );
15
16 /**
17 * @dataProvider provideIsSupportedLanguage
18 * @covers MediaWiki\Languages\LanguageNameUtils::__construct
19 * @covers MediaWiki\Languages\LanguageNameUtils::isSupportedLanguage
20 * @covers Language::isSupportedLanguage
21 */
22 public function testIsSupportedLanguage( $code, $expected ) {
23 $this->assertSame( $expected, $this->isSupportedLanguage( $code ) );
24 }
25
26 public static function provideIsSupportedLanguage() {
27 return [
28 'en' => [ 'en', true ],
29 'fi' => [ 'fi', true ],
30 'bunny' => [ 'bunny', false ],
31 'qqq' => [ 'qqq', false ],
32 'uppercase is not considered supported' => [ 'FI', false ],
33 ];
34 }
35
36 abstract protected function isValidCode( $code );
37
38 /**
39 * We don't test that the result is cached, because that should only be noticeable if the
40 * configuration changes in between calls, and 1) that should never happen in normal operation,
41 * 2) if you do it you deserve whatever you get, and 3) once the static Language method is
42 * dropped and the invalid title regex is moved to something injected instead of a static call,
43 * the cache will be undetectable.
44 *
45 * @todo Should we test changes to $wgLegalTitleChars here? Does anybody actually change that?
46 * Is it possible to change it usefully without breaking everything?
47 *
48 * @dataProvider provideIsValidCode
49 * @covers MediaWiki\Languages\LanguageNameUtils::isValidCode
50 * @covers Language::isValidCode
51 *
52 * @param string $code
53 * @param bool $expected
54 */
55 public function testIsValidCode( $code, $expected ) {
56 $this->assertSame( $expected, $this->isValidCode( $code ) );
57 }
58
59 public static function provideIsValidCode() {
60 $ret = [
61 'en' => [ 'en', true ],
62 'en-GB' => [ 'en-GB', true ],
63 'Funny chars' => [ "%!$()*,-.;=?@^_`~\x80\xA2\xFF+", true ],
64 'Percent escape not allowed' => [ 'a%aF', false ],
65 'Percent with only one following char is okay' => [ '%a', true ],
66 'Percent with non-hex following chars is okay' => [ '%AG', true ],
67 'Named char reference "a"' => [ 'a&a', false ],
68 'Named char reference "A"' => [ 'a&A', false ],
69 'Named char reference "0"' => [ 'a&0', false ],
70 'Named char reference non-ASCII' => [ "a&\x92", false ],
71 'Numeric char reference' => [ "a&#0", false ],
72 'Hex char reference 0' => [ "a&#x0", false ],
73 'Hex char reference A' => [ "a&#xA", false ],
74 'Lone ampersand is valid for title but not lang code' => [ '&', false ],
75 'Ampersand followed by just # is valid for title but not lang code' => [ '&#', false ],
76 'Ampersand followed by # and non-x/digit is valid for title but not lang code' =>
77 [ '&#a', false ],
78 ];
79 $disallowedChars = ":/\\\000&<>'\"";
80 foreach ( str_split( $disallowedChars ) as $char ) {
81 $ret["Disallowed character $char"] = [ "a{$char}a", false ];
82 }
83 return $ret;
84 }
85
86 abstract protected function isValidBuiltInCode( $code );
87
88 /**
89 * @dataProvider provideIsValidBuiltInCode
90 * @covers MediaWiki\Languages\LanguageNameUtils::isValidBuiltInCode
91 * @covers Language::isValidBuiltInCode
92 *
93 * @param string $code
94 * @param bool $expected
95 */
96 public function testIsValidBuiltInCode( $code, $expected ) {
97 $this->assertSame( $expected, $this->isValidBuiltInCode( $code ) );
98 }
99
100 public static function provideIsValidBuiltInCode() {
101 return [
102 'Two letters, lowercase' => [ 'fr', true ],
103 'Two letters, uppercase' => [ 'EN', false ],
104 'Three letters' => [ 'tyv', true ],
105 'With dash' => [ 'be-tarask', true ],
106 'With extension (two dashes)' => [ 'be-x-old', true ],
107 'Reject underscores' => [ 'be_tarask', false ],
108 'One letter' => [ 'a', false ],
109 'Only digits' => [ '00', true ],
110 'Only dashes' => [ '--', true ],
111 'Unreasonably long' => [ str_repeat( 'x', 100 ), true ],
112 'qqq' => [ 'qqq', true ],
113 ];
114 }
115
116 abstract protected function isKnownLanguageTag( $code );
117
118 /**
119 * @dataProvider provideIsKnownLanguageTag
120 * @covers MediaWiki\Languages\LanguageNameUtils::isKnownLanguageTag
121 * @covers Language::isKnownLanguageTag
122 *
123 * @param string $code
124 * @param bool $expected
125 */
126 public function testIsKnownLanguageTag( $code, $expected ) {
127 $this->assertSame( $expected, $this->isKnownLanguageTag( $code ) );
128 }
129
130 public static function provideIsKnownLanguageTag() {
131 $invalidBuiltInCodes = array_filter( static::provideIsValidBuiltInCode(),
132 function ( $arr ) {
133 // If isValidBuiltInCode() returns false, we want to also, but if it returns true,
134 // we could still return false from isKnownLanguageTag(), so skip those.
135 return !$arr[1];
136 }
137 );
138 return array_merge( $invalidBuiltInCodes, [
139 'Simple code' => [ 'fr', true ],
140 'An MW legacy tag' => [ 'bat-smg', true ],
141 'An internal standard MW name, for which a legacy tag is used externally' =>
142 [ 'sgs', true ],
143 'Non-existent two-letter code' => [ 'mw', false ],
144 'Very invalid language code' => [ 'foo"<bar', false ],
145 ] );
146 }
147
148 abstract protected function assertGetLanguageNames(
149 array $options, $expected, $code, ...$otherArgs
150 );
151
152 abstract protected function getLanguageNames( ...$args );
153
154 abstract protected function getLanguageName( ...$args );
155
156 /**
157 * @dataProvider provideGetLanguageNames
158 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
159 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
160 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
161 * @covers Language::fetchLanguageNames
162 * @covers Language::fetchLanguageName
163 *
164 * @param string $expected
165 * @param string $code
166 * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
167 */
168 public function testGetLanguageNames( $expected, $code, ...$otherArgs ) {
169 $this->assertGetLanguageNames( [], $expected, $code, ...$otherArgs );
170 }
171
172 public static function provideGetLanguageNames() {
173 // @todo There are probably lots of interesting tests to add here.
174 return [
175 'Simple code' => [ 'Deutsch', 'de' ],
176 'Simple code in a different language (doesn\'t work without hook)' =>
177 [ 'Deutsch', 'de', 'fr' ],
178 'Invalid code' => [ '', '&' ],
179 'Pig Latin not enabled' => [ '', 'en-x-piglatin', AUTONYMS, ALL ],
180 'qqq doesn\'t have a name' => [ '', 'qqq', AUTONYMS, ALL ],
181 'An MW legacy tag is recognized' => [ 'žemaitėška', 'bat-smg' ],
182 // @todo Is the next test's result desired?
183 'An MW legacy tag is not supported' => [ '', 'bat-smg', AUTONYMS, SUPPORTED ],
184 'An internal standard name, for which a legacy tag is used externally, is supported' =>
185 [ 'žemaitėška', 'sgs', AUTONYMS, SUPPORTED ],
186 ];
187 }
188
189 /**
190 * @dataProvider provideGetLanguageNames_withHook
191 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
192 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
193 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
194 * @covers Language::fetchLanguageNames
195 * @covers Language::fetchLanguageName
196 *
197 * @param string $expected Expected return value of getLanguageName()
198 * @param string $code
199 * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
200 */
201 public function testGetLanguageNames_withHook( $expected, $code, ...$otherArgs ) {
202 $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
203 function ( &$names, $inLanguage ) {
204 switch ( $inLanguage ) {
205 case 'de':
206 $names = [
207 'de' => 'Deutsch',
208 'en' => 'Englisch',
209 'fr' => 'Französisch',
210 ];
211 break;
212
213 case 'en':
214 $names = [
215 'de' => 'German',
216 'en' => 'English',
217 'fr' => 'French',
218 'sqsqsqsq' => '!!?!',
219 'bat-smg' => 'Samogitian',
220 ];
221 break;
222
223 case 'fr':
224 $names = [
225 'de' => 'allemand',
226 'en' => 'anglais',
227 // Deliberate mistake (no cedilla)
228 'fr' => 'francais',
229 ];
230 break;
231 }
232 }
233 );
234
235 // Really we could dispense with assertGetLanguageNames() and just call
236 // testGetLanguageNames() here, but it looks weird to call a test method from another test
237 // method.
238 $this->assertGetLanguageNames( [], $expected, $code, ...$otherArgs );
239 }
240
241 public static function provideGetLanguageNames_withHook() {
242 return [
243 'Simple code in a different language' => [ 'allemand', 'de', 'fr' ],
244 'Invalid inLanguage defaults to English' => [ 'German', 'de', '&' ],
245 'If inLanguage not provided, default to autonym' => [ 'Deutsch', 'de' ],
246 'Hooks ignored for explicitly-requested autonym' => [ 'français', 'fr', 'fr' ],
247 'Hooks don\'t make a language supported' => [ '', 'bat-smg', 'en', SUPPORTED ],
248 'Hooks don\'t make a language defined' => [ '', 'sqsqsqsq', 'en', DEFINED ],
249 'Hooks do make a language name returned with ALL' => [ '!!?!', 'sqsqsqsq', 'en', ALL ],
250 ];
251 }
252
253 /**
254 * @dataProvider provideGetLanguageNames_ExtraLanguageNames
255 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
256 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
257 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
258 * @covers Language::fetchLanguageNames
259 * @covers Language::fetchLanguageName
260 *
261 * @param string $expected Expected return value of getLanguageName()
262 * @param string $code
263 * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
264 */
265 public function testGetLanguageNames_ExtraLanguageNames( $expected, $code, ...$otherArgs ) {
266 $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
267 function ( &$names ) {
268 $names['de'] = 'die deutsche Sprache';
269 }
270 );
271 $this->assertGetLanguageNames(
272 [ 'ExtraLanguageNames' => [ 'de' => 'deutsche Sprache', 'sqsqsqsq' => '!!?!' ] ],
273 $expected, $code, ...$otherArgs
274 );
275 }
276
277 public static function provideGetLanguageNames_ExtraLanguageNames() {
278 return [
279 'Simple extra language name' => [ '!!?!', 'sqsqsqsq' ],
280 'Extra language is defined' => [ '!!?!', 'sqsqsqsq', AUTONYMS, DEFINED ],
281 'Extra language is not supported' => [ '', 'sqsqsqsq', AUTONYMS, SUPPORTED ],
282 'Extra language overrides default' => [ 'deutsche Sprache', 'de' ],
283 'Extra language overrides hook for explicitly requested autonym' =>
284 [ 'deutsche Sprache', 'de', 'de' ],
285 'Hook overrides extra language for non-autonym' =>
286 [ 'die deutsche Sprache', 'de', 'fr' ],
287 ];
288 }
289
290 /**
291 * Test that getLanguageNames() defaults to DEFINED, and getLanguageName() defaults to ALL.
292 *
293 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
294 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
295 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
296 * @covers Language::fetchLanguageNames
297 * @covers Language::fetchLanguageName
298 */
299 public function testGetLanguageNames_parameterDefault() {
300 $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
301 function ( &$names ) {
302 $names = [ 'sqsqsqsq' => '!!?!' ];
303 }
304 );
305
306 // We use 'en' here because the hook is not run if we're requesting autonyms, although in
307 // this case (language that isn't defined by MediaWiki itself) that behavior seems wrong.
308 $this->assertArrayNotHasKey( 'sqsqsqsq', $this->getLanguageNames(), 'en' );
309
310 $this->assertSame( '!!?!', $this->getLanguageName( 'sqsqsqsq', 'en' ) );
311 }
312
313 /**
314 * @dataProvider provideGetLanguageNames_sorted
315 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
316 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
317 * @covers Language::fetchLanguageNames
318 *
319 * @param mixed ...$args To pass to method
320 */
321 public function testGetLanguageNames_sorted( ...$args ) {
322 $names = $this->getLanguageNames( ...$args );
323 $sortedNames = $names;
324 ksort( $sortedNames );
325 $this->assertSame( $sortedNames, $names );
326 }
327
328 public static function provideGetLanguageNames_sorted() {
329 return [
330 [],
331 [ AUTONYMS ],
332 [ AUTONYMS, 'mw' ],
333 [ AUTONYMS, ALL ],
334 [ AUTONYMS, SUPPORTED ],
335 [ 'he', 'mw' ],
336 [ 'he', ALL ],
337 [ 'he', SUPPORTED ],
338 ];
339 }
340
341 /**
342 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
343 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
344 * @covers Language::fetchLanguageNames
345 */
346 public function testGetLanguageNames_hookNotCalledForAutonyms() {
347 $count = 0;
348 $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
349 function () use ( &$count ) {
350 $count++;
351 }
352 );
353
354 $this->getLanguageNames();
355 $this->assertSame( 0, $count, 'Hook must not be called for autonyms' );
356
357 // We test elsewhere that the hook works, but the following verifies that our test is
358 // working and $count isn't being incremented above only because we're checking autonyms.
359 $this->getLanguageNames( 'fr' );
360 $this->assertSame( 1, $count, 'Hook must be called for non-autonyms' );
361 }
362
363 /**
364 * @dataProvider provideGetLanguageNames_pigLatin
365 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
366 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
367 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
368 * @covers Language::fetchLanguageNames
369 * @covers Language::fetchLanguageName
370 *
371 * @param string $expected
372 * @param mixed ...$otherArgs Optionally, pass $inLanguage and/or $include.
373 */
374 public function testGetLanguageNames_pigLatin( $expected, ...$otherArgs ) {
375 $this->setTemporaryHook( 'LanguageGetTranslatedLanguageNames',
376 function ( &$names, $inLanguage ) {
377 switch ( $inLanguage ) {
378 case 'fr':
379 $names = [ 'en-x-piglatin' => 'latin de cochons' ];
380 break;
381
382 case 'en-x-piglatin':
383 // Deliberately lowercase
384 $names = [ 'en-x-piglatin' => 'igpay atinlay' ];
385 break;
386 }
387 }
388 );
389
390 $this->assertGetLanguageNames(
391 [ 'UsePigLatinVariant' => true ], $expected, 'en-x-piglatin', ...$otherArgs );
392 }
393
394 public static function provideGetLanguageNames_pigLatin() {
395 return [
396 'Simple test' => [ 'Igpay Atinlay' ],
397 'Not supported' => [ '', AUTONYMS, SUPPORTED ],
398 'Foreign language' => [ 'latin de cochons', 'fr' ],
399 'Hook doesn\'t override explicit autonym' =>
400 [ 'Igpay Atinlay', 'en-x-piglatin', 'en-x-piglatin' ],
401 ];
402 }
403
404 /**
405 * Just for the sake of completeness, test that ExtraLanguageNames will not override the name
406 * for pig Latin. Nobody actually cares about this and if anything current behavior is probably
407 * wrong, but once we're testing the whole file we may as well be comprehensive.
408 *
409 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNames
410 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageNamesUncached
411 * @covers MediaWiki\Languages\LanguageNameUtils::getLanguageName
412 * @covers Language::fetchLanguageNames
413 * @covers Language::fetchLanguageName
414 */
415 public function testGetLanguageNames_pigLatinAndExtraLanguageNames() {
416 $this->assertGetLanguageNames(
417 [
418 'UsePigLatinVariant' => true,
419 'ExtraLanguageNames' => [ 'en-x-piglatin' => 'igpay atinlay' ]
420 ],
421 'Igpay Atinlay',
422 'en-x-piglatin'
423 );
424 }
425
426 abstract protected static function getFileName( ...$args );
427
428 /**
429 * @dataProvider provideGetFileName
430 * @covers MediaWiki\Languages\LanguageNameUtils::getFileName
431 * @covers Language::getFileName
432 *
433 * @param string $expected
434 * @param mixed ...$args To pass to method
435 */
436 public function testGetFileName( $expected, ...$args ) {
437 $this->assertSame( $expected, $this->getFileName( ...$args ) );
438 }
439
440 public static function provideGetFileName() {
441 return [
442 'Simple case' => [ 'MessagesXx.php', 'Messages', 'xx' ],
443 'With extension' => [ 'MessagesXx.ext', 'Messages', 'xx', '.ext' ],
444 'Replacing dashes' => [ '!__?', '!', '--', '?' ],
445 'Empty prefix and extension' => [ 'Xx', '', 'xx', '' ],
446 'Uppercase only first letter' => [ 'Messages_a.php', 'Messages', '-a' ],
447 ];
448 }
449
450 abstract protected function getMessagesFileName( $code );
451
452 /**
453 * @dataProvider provideGetMessagesFileName
454 * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
455 * @covers Language::getMessagesFileName
456 *
457 * @param string $code
458 * @param string $expected
459 */
460 public function testGetMessagesFileName( $code, $expected ) {
461 $this->assertSame( $expected, $this->getMessagesFileName( $code ) );
462 }
463
464 public static function provideGetMessagesFileName() {
465 global $IP;
466 return [
467 'Simple case' => [ 'en', "$IP/languages/messages/MessagesEn.php" ],
468 'Replacing dashes' => [ '--', "$IP/languages/messages/Messages__.php" ],
469 'Uppercase only first letter' => [ '-a', "$IP/languages/messages/Messages_a.php" ],
470 ];
471 }
472
473 /**
474 * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
475 * @covers Language::getMessagesFileName
476 */
477 public function testGetMessagesFileName_withHook() {
478 $called = 0;
479
480 $this->setTemporaryHook( 'Language::getMessagesFileName',
481 function ( $code, &$file ) use ( &$called ) {
482 global $IP;
483
484 $called++;
485
486 $this->assertSame( 'ab-cd', $code );
487 $this->assertSame( "$IP/languages/messages/MessagesAb_cd.php", $file );
488 $file = 'bye-bye';
489 }
490 );
491
492 $this->assertSame( 'bye-bye', $this->getMessagesFileName( 'ab-cd' ) );
493 $this->assertSame( 1, $called );
494 }
495
496 abstract protected function getJsonMessagesFileName( $code );
497
498 /**
499 * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName
500 * @covers Language::getJsonMessagesFileName
501 */
502 public function testGetJsonMessagesFileName() {
503 global $IP;
504
505 // Not so much to test here, one test seems to be enough
506 $expected = "$IP/languages/i18n/en--123.json";
507 $this->assertSame( $expected, $this->getJsonMessagesFileName( 'en--123' ) );
508 }
509
510 /**
511 * getFileName, getMessagesFileName, and getJsonMessagesFileName all throw if they get an
512 * invalid code. To save boilerplate, test them all in one method.
513 *
514 * @dataProvider provideExceptionFromInvalidCode
515 * @covers MediaWiki\Languages\LanguageNameUtils::getFileName
516 * @covers MediaWiki\Languages\LanguageNameUtils::getMessagesFileName
517 * @covers MediaWiki\Languages\LanguageNameUtils::getJsonMessagesFileName
518 * @covers Language::getFileName
519 * @covers Language::getMessagesFileName
520 * @covers Language::getJsonMessagesFileName
521 *
522 * @param callable $callback Will throw when passed $code
523 * @param string $code
524 */
525 public function testExceptionFromInvalidCode( $callback, $code ) {
526 $this->setExpectedException( MWException::class, "Invalid language code \"$code\"" );
527
528 $callback( $code );
529 }
530
531 public static function provideExceptionFromInvalidCode() {
532 $ret = [];
533 foreach ( static::provideIsValidBuiltInCode() as $desc => list( $code, $valid ) ) {
534 if ( $valid ) {
535 // Won't get an exception from this one
536 continue;
537 }
538
539 // For getFileName, we define an anonymous function because of the extra first param
540 $ret["getFileName: $desc"] = [
541 function ( $code ) {
542 return static::getFileName( 'Messages', $code );
543 },
544 $code
545 ];
546
547 $ret["getMessagesFileName: $desc"] =
548 [ [ static::class, 'getMessagesFileName' ], $code ];
549
550 $ret["getJsonMessagesFileName: $desc"] =
551 [ [ static::class, 'getJsonMessagesFileName' ], $code ];
552 }
553 return $ret;
554 }
555 }