Title: Title::getSubpage should not lose the interwiki prefix
[lhc/web/wiklou.git] / tests / phpunit / languages / LanguageCodeTest.php
1 <?php
2
3 /**
4 * @covers LanguageCode
5 * @group Language
6 *
7 * @author Thiemo Kreuz
8 */
9 class LanguageCodeTest extends PHPUnit\Framework\TestCase {
10
11 use MediaWikiCoversValidator;
12
13 public function testConstructor() {
14 $instance = new LanguageCode();
15
16 $this->assertInstanceOf( LanguageCode::class, $instance );
17 }
18
19 public function testGetDeprecatedCodeMapping() {
20 $map = LanguageCode::getDeprecatedCodeMapping();
21
22 $this->assertInternalType( 'array', $map );
23 $this->assertContainsOnly( 'string', array_keys( $map ) );
24 $this->assertArrayNotHasKey( '', $map );
25 $this->assertContainsOnly( 'string', $map );
26 $this->assertNotContains( '', $map );
27
28 // Codes special to MediaWiki should never appear in a map of "deprecated" codes
29 $this->assertArrayNotHasKey( 'qqq', $map, 'documentation' );
30 $this->assertNotContains( 'qqq', $map, 'documentation' );
31 $this->assertArrayNotHasKey( 'qqx', $map, 'debug code' );
32 $this->assertNotContains( 'qqx', $map, 'debug code' );
33
34 // Valid language codes that are currently not "deprecated"
35 $this->assertArrayNotHasKey( 'bh', $map, 'family of Bihari languages' );
36 $this->assertArrayNotHasKey( 'no', $map, 'family of Norwegian languages' );
37 $this->assertArrayNotHasKey( 'simple', $map );
38 }
39
40 public function testReplaceDeprecatedCodes() {
41 $this->assertEquals( 'gsw', LanguageCode::replaceDeprecatedCodes( 'als' ) );
42 $this->assertEquals( 'gsw', LanguageCode::replaceDeprecatedCodes( 'gsw' ) );
43 $this->assertEquals( null, LanguageCode::replaceDeprecatedCodes( null ) );
44 }
45
46 /**
47 * test @see LanguageCode::bcp47().
48 * Please note the BCP 47 explicitly state that language codes are case
49 * insensitive, there are some exceptions to the rule :)
50 * This test is used to verify our formatting against all lower and
51 * all upper cases language code.
52 *
53 * @see https://tools.ietf.org/html/bcp47
54 * @dataProvider provideLanguageCodes()
55 */
56 public function testBcp47( $code, $expected ) {
57 $this->assertEquals( $expected, LanguageCode::bcp47( $code ),
58 "Applying BCP 47 standard to '$code'"
59 );
60
61 $code = strtolower( $code );
62 $this->assertEquals( $expected, LanguageCode::bcp47( $code ),
63 "Applying BCP 47 standard to lower case '$code'"
64 );
65
66 $code = strtoupper( $code );
67 $this->assertEquals( $expected, LanguageCode::bcp47( $code ),
68 "Applying BCP 47 standard to upper case '$code'"
69 );
70 }
71
72 /**
73 * Array format is ($code, $expected)
74 */
75 public static function provideLanguageCodes() {
76 return [
77 // Extracted from BCP 47 (list not exhaustive)
78 # 2.1.1
79 [ 'en-ca-x-ca', 'en-CA-x-ca' ],
80 [ 'sgn-be-fr', 'sgn-BE-FR' ],
81 [ 'az-latn-x-latn', 'az-Latn-x-latn' ],
82 # 2.2
83 [ 'sr-Latn-RS', 'sr-Latn-RS' ],
84 [ 'az-arab-ir', 'az-Arab-IR' ],
85
86 # 2.2.5
87 [ 'sl-nedis', 'sl-nedis' ],
88 [ 'de-ch-1996', 'de-CH-1996' ],
89
90 # 2.2.6
91 [
92 'en-latn-gb-boont-r-extended-sequence-x-private',
93 'en-Latn-GB-boont-r-extended-sequence-x-private'
94 ],
95
96 // Examples from BCP 47 Appendix A
97 # Simple language subtag:
98 [ 'DE', 'de' ],
99 [ 'fR', 'fr' ],
100 [ 'ja', 'ja' ],
101
102 # Language subtag plus script subtag:
103 [ 'zh-hans', 'zh-Hans' ],
104 [ 'sr-cyrl', 'sr-Cyrl' ],
105 [ 'sr-latn', 'sr-Latn' ],
106
107 # Extended language subtags and their primary language subtag
108 # counterparts:
109 [ 'zh-cmn-hans-cn', 'zh-cmn-Hans-CN' ],
110 [ 'cmn-hans-cn', 'cmn-Hans-CN' ],
111 [ 'zh-yue-hk', 'zh-yue-HK' ],
112 [ 'yue-hk', 'yue-HK' ],
113
114 # Language-Script-Region:
115 [ 'zh-hans-cn', 'zh-Hans-CN' ],
116 [ 'sr-latn-RS', 'sr-Latn-RS' ],
117
118 # Language-Variant:
119 [ 'sl-rozaj', 'sl-rozaj' ],
120 [ 'sl-rozaj-biske', 'sl-rozaj-biske' ],
121 [ 'sl-nedis', 'sl-nedis' ],
122
123 # Language-Region-Variant:
124 [ 'de-ch-1901', 'de-CH-1901' ],
125 [ 'sl-it-nedis', 'sl-IT-nedis' ],
126
127 # Language-Script-Region-Variant:
128 [ 'hy-latn-it-arevela', 'hy-Latn-IT-arevela' ],
129
130 # Language-Region:
131 [ 'de-de', 'de-DE' ],
132 [ 'en-us', 'en-US' ],
133 [ 'es-419', 'es-419' ],
134
135 # Private use subtags:
136 [ 'de-ch-x-phonebk', 'de-CH-x-phonebk' ],
137 [ 'az-arab-x-aze-derbend', 'az-Arab-x-aze-derbend' ],
138 /**
139 * Previous test does not reflect the BCP 47 which states:
140 * az-Arab-x-AZE-derbend
141 * AZE being private, it should be lower case, hence the test above
142 * should probably be:
143 * [ 'az-arab-x-aze-derbend', 'az-Arab-x-AZE-derbend' ],
144 */
145
146 # Private use registry values:
147 [ 'x-whatever', 'x-whatever' ],
148 [ 'qaa-qaaa-qm-x-southern', 'qaa-Qaaa-QM-x-southern' ],
149 [ 'de-qaaa', 'de-Qaaa' ],
150 [ 'sr-latn-qm', 'sr-Latn-QM' ],
151 [ 'sr-qaaa-rs', 'sr-Qaaa-RS' ],
152
153 # Tags that use extensions
154 [ 'en-us-u-islamcal', 'en-US-u-islamcal' ],
155 [ 'zh-cn-a-myext-x-private', 'zh-CN-a-myext-x-private' ],
156 [ 'en-a-myext-b-another', 'en-a-myext-b-another' ],
157
158 # Invalid:
159 // de-419-DE
160 // a-DE
161 // ar-a-aaa-b-bbb-a-ccc
162
163 # Non-standard and deprecated language codes used by MediaWiki
164 [ 'als', 'gsw' ],
165 [ 'bat-smg', 'sgs' ],
166 [ 'be-x-old', 'be-tarask' ],
167 [ 'fiu-vro', 'vro' ],
168 [ 'roa-rup', 'rup' ],
169 [ 'zh-classical', 'lzh' ],
170 [ 'zh-min-nan', 'nan' ],
171 [ 'zh-yue', 'yue' ],
172 [ 'cbk-zam', 'cbk' ],
173 [ 'de-formal', 'de-x-formal' ],
174 [ 'eml', 'egl' ],
175 [ 'en-rtl', 'en-x-rtl' ],
176 [ 'es-formal', 'es-x-formal' ],
177 [ 'hu-formal', 'hu-x-formal' ],
178 [ 'kk-Arab', 'kk-Arab' ],
179 [ 'kk-Cyrl', 'kk-Cyrl' ],
180 [ 'kk-Latn', 'kk-Latn' ],
181 [ 'map-bms', 'jv-x-bms' ],
182 [ 'mo', 'ro-Cyrl-MD' ],
183 [ 'nrm', 'nrf' ],
184 [ 'nl-informal', 'nl-x-informal' ],
185 [ 'roa-tara', 'nap-x-tara' ],
186 [ 'simple', 'en-simple' ],
187 [ 'sr-ec', 'sr-Cyrl' ],
188 [ 'sr-el', 'sr-Latn' ],
189 [ 'zh-cn', 'zh-Hans-CN' ],
190 [ 'zh-sg', 'zh-Hans-SG' ],
191 [ 'zh-my', 'zh-Hans-MY' ],
192 [ 'zh-tw', 'zh-Hant-TW' ],
193 [ 'zh-hk', 'zh-Hant-HK' ],
194 [ 'zh-mo', 'zh-Hant-MO' ],
195 [ 'zh-hans', 'zh-Hans' ],
196 [ 'zh-hant', 'zh-Hant' ],
197 ];
198 }
199
200 }