Revert "Strip Unicode 6.3.0 directional formatting characters from title"
[lhc/web/wiklou.git] / tests / phpunit / includes / title / MediaWikiTitleCodecTest.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @author Daniel Kinzler
20 */
21
22 /**
23 * @covers MediaWikiTitleCodec
24 *
25 * @group Title
26 * @group Database
27 * ^--- needed because of global state in
28 */
29 class MediaWikiTitleCodecTest extends MediaWikiTestCase {
30
31 public function setUp() {
32 parent::setUp();
33
34 $this->setMwGlobals( [
35 'wgAllowUserJs' => false,
36 'wgDefaultLanguageVariant' => false,
37 'wgMetaNamespace' => 'Project',
38 'wgLocalInterwikis' => [ 'localtestiw' ],
39 'wgCapitalLinks' => true,
40
41 // NOTE: this is why global state is evil.
42 // TODO: refactor access to the interwiki codes so it can be injected.
43 'wgHooks' => [
44 'InterwikiLoadPrefix' => [
45 function ( $prefix, &$data ) {
46 if ( $prefix === 'localtestiw' ) {
47 $data = [ 'iw_url' => 'localtestiw' ];
48 } elseif ( $prefix === 'remotetestiw' ) {
49 $data = [ 'iw_url' => 'remotetestiw' ];
50 }
51 return false;
52 }
53 ]
54 ]
55 ] );
56 $this->setUserLang( 'en' );
57 $this->setContentLang( 'en' );
58 }
59
60 /**
61 * Returns a mock GenderCache that will consider a user "female" if the
62 * first part of the user name ends with "a".
63 *
64 * @return GenderCache
65 */
66 private function getGenderCache() {
67 $genderCache = $this->getMockBuilder( GenderCache::class )
68 ->disableOriginalConstructor()
69 ->getMock();
70
71 $genderCache->expects( $this->any() )
72 ->method( 'getGenderOf' )
73 ->will( $this->returnCallback( function ( $userName ) {
74 return preg_match( '/^[^- _]+a( |_|$)/u', $userName ) ? 'female' : 'male';
75 } ) );
76
77 return $genderCache;
78 }
79
80 protected function makeCodec( $lang ) {
81 $gender = $this->getGenderCache();
82 $lang = Language::factory( $lang );
83 // language object can came from cache, which does not respect test settings
84 $lang->resetNamespaces();
85 return new MediaWikiTitleCodec( $lang, $gender );
86 }
87
88 public static function provideFormat() {
89 return [
90 [ NS_MAIN, 'Foo_Bar', '', '', 'en', 'Foo Bar' ],
91 [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', '', 'en', 'User:Hansi Maier#stuff and so on' ],
92 [ false, 'Hansi_Maier', '', '', 'en', 'Hansi Maier' ],
93 [
94 NS_USER_TALK,
95 'hansi__maier',
96 '',
97 '',
98 'en',
99 'User talk:hansi maier',
100 'User talk:Hansi maier'
101 ],
102
103 // getGenderCache() provides a mock that considers first
104 // names ending in "a" to be female.
105 [ NS_USER, 'Lisa_Müller', '', '', 'de', 'Benutzerin:Lisa Müller' ],
106 [ NS_MAIN, 'FooBar', '', 'remotetestiw', 'en', 'remotetestiw:FooBar' ],
107 // Strip soft hyphen and Unicode bidi override characters
108 [ NS_MAIN, "Foo\xC2\xAD\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" .
109 "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAEbar", '', '', 'en',
110 "Foo\xC2\xAD\xE2\x80\x8E\xE2\x80\x8F\xE2\x80\xAA\xE2\x80\xAB" .
111 "\xE2\x80\xAC\xE2\x80\xAD\xE2\x80\xAEbar", 'Foobar' ],
112 ];
113 }
114
115 /**
116 * @dataProvider provideFormat
117 */
118 public function testFormat( $namespace, $text, $fragment, $interwiki, $lang, $expected,
119 $normalized = null
120 ) {
121 if ( $normalized === null ) {
122 $normalized = $expected;
123 }
124
125 $codec = $this->makeCodec( $lang );
126 $actual = $codec->formatTitle( $namespace, $text, $fragment, $interwiki );
127
128 $this->assertEquals( $expected, $actual, 'formatted' );
129
130 // test round trip
131 $parsed = $codec->parseTitle( $actual, NS_MAIN );
132 $actual2 = $codec->formatTitle(
133 $parsed->getNamespace(),
134 $parsed->getText(),
135 $parsed->getFragment(),
136 $parsed->getInterwiki()
137 );
138
139 $this->assertEquals( $normalized, $actual2, 'normalized after round trip' );
140 }
141
142 public static function provideGetText() {
143 return [
144 [ NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ],
145 [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'Hansi Maier' ],
146 ];
147 }
148
149 /**
150 * @dataProvider provideGetText
151 */
152 public function testGetText( $namespace, $dbkey, $fragment, $lang, $expected ) {
153 $codec = $this->makeCodec( $lang );
154 $title = new TitleValue( $namespace, $dbkey, $fragment );
155
156 $actual = $codec->getText( $title );
157
158 $this->assertEquals( $expected, $actual );
159 }
160
161 public static function provideGetPrefixedText() {
162 return [
163 [ NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ],
164 [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier' ],
165
166 // No capitalization or normalization is applied while formatting!
167 [ NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier' ],
168
169 // getGenderCache() provides a mock that considers first
170 // names ending in "a" to be female.
171 [ NS_USER, 'Lisa_Müller', '', 'de', 'Benutzerin:Lisa Müller' ],
172 [ 1000000, 'Invalid_namespace', '', 'en', ':Invalid namespace' ],
173 ];
174 }
175
176 /**
177 * @dataProvider provideGetPrefixedText
178 */
179 public function testGetPrefixedText( $namespace, $dbkey, $fragment, $lang, $expected ) {
180 $codec = $this->makeCodec( $lang );
181 $title = new TitleValue( $namespace, $dbkey, $fragment );
182
183 $actual = $codec->getPrefixedText( $title );
184
185 $this->assertEquals( $expected, $actual );
186 }
187
188 public static function provideGetPrefixedDBkey() {
189 return [
190 [ NS_MAIN, 'Foo_Bar', '', '', 'en', 'Foo_Bar' ],
191 [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', '', 'en', 'User:Hansi_Maier' ],
192
193 // No capitalization or normalization is applied while formatting!
194 [ NS_USER_TALK, 'hansi__maier', '', '', 'en', 'User_talk:hansi__maier' ],
195
196 // getGenderCache() provides a mock that considers first
197 // names ending in "a" to be female.
198 [ NS_USER, 'Lisa_Müller', '', '', 'de', 'Benutzerin:Lisa_Müller' ],
199
200 [ NS_MAIN, 'Remote_page', '', 'remotetestiw', 'en', 'remotetestiw:Remote_page' ],
201
202 // non-existent namespace
203 [ 10000000, 'Foobar', '', '', 'en', ':Foobar' ],
204 ];
205 }
206
207 /**
208 * @dataProvider provideGetPrefixedDBkey
209 */
210 public function testGetPrefixedDBkey( $namespace, $dbkey, $fragment,
211 $interwiki, $lang, $expected
212 ) {
213 $codec = $this->makeCodec( $lang );
214 $title = new TitleValue( $namespace, $dbkey, $fragment, $interwiki );
215
216 $actual = $codec->getPrefixedDBkey( $title );
217
218 $this->assertEquals( $expected, $actual );
219 }
220
221 public static function provideGetFullText() {
222 return [
223 [ NS_MAIN, 'Foo_Bar', '', 'en', 'Foo Bar' ],
224 [ NS_USER, 'Hansi_Maier', 'stuff_and_so_on', 'en', 'User:Hansi Maier#stuff and so on' ],
225
226 // No capitalization or normalization is applied while formatting!
227 [ NS_USER_TALK, 'hansi__maier', '', 'en', 'User talk:hansi maier' ],
228 ];
229 }
230
231 /**
232 * @dataProvider provideGetFullText
233 */
234 public function testGetFullText( $namespace, $dbkey, $fragment, $lang, $expected ) {
235 $codec = $this->makeCodec( $lang );
236 $title = new TitleValue( $namespace, $dbkey, $fragment );
237
238 $actual = $codec->getFullText( $title );
239
240 $this->assertEquals( $expected, $actual );
241 }
242
243 public static function provideParseTitle() {
244 // TODO: test capitalization and trimming
245 // TODO: test unicode normalization
246
247 return [
248 [ ' : Hansi_Maier _ ', NS_MAIN, 'en',
249 new TitleValue( NS_MAIN, 'Hansi_Maier', '' ) ],
250 [ 'User:::1', NS_MAIN, 'de',
251 new TitleValue( NS_USER, '0:0:0:0:0:0:0:1', '' ) ],
252 [ ' lisa Müller', NS_USER, 'de',
253 new TitleValue( NS_USER, 'Lisa_Müller', '' ) ],
254 [ 'benutzerin:lisa Müller#stuff', NS_MAIN, 'de',
255 new TitleValue( NS_USER, 'Lisa_Müller', 'stuff' ) ],
256
257 [ ':Category:Quux', NS_MAIN, 'en',
258 new TitleValue( NS_CATEGORY, 'Quux', '' ) ],
259 [ 'Category:Quux', NS_MAIN, 'en',
260 new TitleValue( NS_CATEGORY, 'Quux', '' ) ],
261 [ 'Category:Quux', NS_CATEGORY, 'en',
262 new TitleValue( NS_CATEGORY, 'Quux', '' ) ],
263 [ 'Quux', NS_CATEGORY, 'en',
264 new TitleValue( NS_CATEGORY, 'Quux', '' ) ],
265 [ ':Quux', NS_CATEGORY, 'en',
266 new TitleValue( NS_MAIN, 'Quux', '' ) ],
267
268 // getGenderCache() provides a mock that considers first
269 // names ending in "a" to be female.
270
271 [ 'a b c', NS_MAIN, 'en',
272 new TitleValue( NS_MAIN, 'A_b_c' ) ],
273 [ ' a b c ', NS_MAIN, 'en',
274 new TitleValue( NS_MAIN, 'A_b_c' ) ],
275 [ ' _ Foo __ Bar_ _', NS_MAIN, 'en',
276 new TitleValue( NS_MAIN, 'Foo_Bar' ) ],
277
278 // NOTE: cases copied from TitleTest::testSecureAndSplit. Keep in sync.
279 [ 'Sandbox', NS_MAIN, 'en', ],
280 [ 'A "B"', NS_MAIN, 'en', ],
281 [ 'A \'B\'', NS_MAIN, 'en', ],
282 [ '.com', NS_MAIN, 'en', ],
283 [ '~', NS_MAIN, 'en', ],
284 [ '"', NS_MAIN, 'en', ],
285 [ '\'', NS_MAIN, 'en', ],
286
287 [ 'Talk:Sandbox', NS_MAIN, 'en',
288 new TitleValue( NS_TALK, 'Sandbox' ) ],
289 [ 'Talk:Foo:Sandbox', NS_MAIN, 'en',
290 new TitleValue( NS_TALK, 'Foo:Sandbox' ) ],
291 [ 'File:Example.svg', NS_MAIN, 'en',
292 new TitleValue( NS_FILE, 'Example.svg' ) ],
293 [ 'File_talk:Example.svg', NS_MAIN, 'en',
294 new TitleValue( NS_FILE_TALK, 'Example.svg' ) ],
295 [ 'Foo/.../Sandbox', NS_MAIN, 'en',
296 'Foo/.../Sandbox' ],
297 [ 'Sandbox/...', NS_MAIN, 'en',
298 'Sandbox/...' ],
299 [ 'A~~', NS_MAIN, 'en',
300 'A~~' ],
301 // Length is 256 total, but only title part matters
302 [ 'Category:' . str_repeat( 'x', 248 ), NS_MAIN, 'en',
303 new TitleValue( NS_CATEGORY,
304 'X' . str_repeat( 'x', 247 ) ) ],
305 [ str_repeat( 'x', 252 ), NS_MAIN, 'en',
306 'X' . str_repeat( 'x', 251 ) ]
307 ];
308 }
309
310 /**
311 * @dataProvider provideParseTitle
312 */
313 public function testParseTitle( $text, $ns, $lang, $title = null ) {
314 if ( $title === null ) {
315 $title = str_replace( ' ', '_', trim( $text ) );
316 }
317
318 if ( is_string( $title ) ) {
319 $title = new TitleValue( NS_MAIN, $title, '' );
320 }
321
322 $codec = $this->makeCodec( $lang );
323 $actual = $codec->parseTitle( $text, $ns );
324
325 $this->assertEquals( $title, $actual );
326 }
327
328 public static function provideParseTitle_invalid() {
329 // TODO: test unicode errors
330
331 return [
332 [ '#' ],
333 [ '::' ],
334 [ '::xx' ],
335 [ '::##' ],
336 [ ' :: x' ],
337
338 [ 'Talk:File:Foo.jpg' ],
339 [ 'Talk:localtestiw:Foo' ],
340 [ '::1' ], // only valid in user namespace
341 [ 'User::x' ], // leading ":" in a user name is only valid of IPv6 addresses
342
343 // NOTE: cases copied from TitleTest::testSecureAndSplit. Keep in sync.
344 [ '' ],
345 [ ':' ],
346 [ '__ __' ],
347 [ ' __ ' ],
348 // Bad characters forbidden regardless of wgLegalTitleChars
349 [ 'A [ B' ],
350 [ 'A ] B' ],
351 [ 'A { B' ],
352 [ 'A } B' ],
353 [ 'A < B' ],
354 [ 'A > B' ],
355 [ 'A | B' ],
356 // URL encoding
357 [ 'A%20B' ],
358 [ 'A%23B' ],
359 [ 'A%2523B' ],
360 // XML/HTML character entity references
361 // Note: Commented out because they are not marked invalid by the PHP test as
362 // Title::newFromText runs Sanitizer::decodeCharReferencesAndNormalize first.
363 // [ 'A &eacute; B' ],
364 // [ 'A &#233; B' ],
365 // [ 'A &#x00E9; B' ],
366 // Subject of NS_TALK does not roundtrip to NS_MAIN
367 [ 'Talk:File:Example.svg' ],
368 // Directory navigation
369 [ '.' ],
370 [ '..' ],
371 [ './Sandbox' ],
372 [ '../Sandbox' ],
373 [ 'Foo/./Sandbox' ],
374 [ 'Foo/../Sandbox' ],
375 [ 'Sandbox/.' ],
376 [ 'Sandbox/..' ],
377 // Tilde
378 [ 'A ~~~ Name' ],
379 [ 'A ~~~~ Signature' ],
380 [ 'A ~~~~~ Timestamp' ],
381 [ str_repeat( 'x', 256 ) ],
382 // Namespace prefix without actual title
383 [ 'Talk:' ],
384 [ 'Category: ' ],
385 [ 'Category: #bar' ]
386 ];
387 }
388
389 /**
390 * @dataProvider provideParseTitle_invalid
391 */
392 public function testParseTitle_invalid( $text ) {
393 $this->setExpectedException( MalformedTitleException::class );
394
395 $codec = $this->makeCodec( 'en' );
396 $codec->parseTitle( $text, NS_MAIN );
397 }
398
399 public static function provideGetNamespaceName() {
400 return [
401 [ NS_MAIN, 'Foo', 'en', '' ],
402 [ NS_USER, 'Foo', 'en', 'User' ],
403 [ NS_USER, 'Hansi Maier', 'de', 'Benutzer' ],
404
405 // getGenderCache() provides a mock that considers first
406 // names ending in "a" to be female.
407 [ NS_USER, 'Lisa Müller', 'de', 'Benutzerin' ],
408 ];
409 }
410
411 /**
412 * @dataProvider provideGetNamespaceName
413 */
414 public function testGetNamespaceName( $namespace, $text, $lang, $expected ) {
415 $codec = $this->makeCodec( $lang );
416 $name = $codec->getNamespaceName( $namespace, $text );
417
418 $this->assertEquals( $expected, $name );
419 }
420 }