Remove hard deprecation of PasswordPolicyChecks::checkPopularPasswordBlacklist
[lhc/web/wiklou.git] / tests / phpunit / includes / LinkFilterTest.php
1 <?php
2
3 use Wikimedia\Rdbms\LikeMatch;
4
5 /**
6 * @covers LinkFilter
7 * @group Database
8 */
9 class LinkFilterTest extends MediaWikiLangTestCase {
10
11 protected function setUp() {
12 parent::setUp();
13
14 $this->setMwGlobals( 'wgUrlProtocols', [
15 'http://',
16 'https://',
17 'ftp://',
18 'irc://',
19 'ircs://',
20 'gopher://',
21 'telnet://',
22 'nntp://',
23 'worldwind://',
24 'mailto:',
25 'news:',
26 'svn://',
27 'git://',
28 'mms://',
29 '//',
30 ] );
31 }
32
33 /**
34 * createRegexFromLike($like)
35 *
36 * Takes an array as created by LinkFilter::makeLikeArray() and creates a regex from it
37 *
38 * @param array $like Array as created by LinkFilter::makeLikeArray()
39 * @return string Regex
40 */
41 function createRegexFromLIKE( $like ) {
42 $regex = '!^';
43
44 foreach ( $like as $item ) {
45 if ( $item instanceof LikeMatch ) {
46 if ( $item->toString() == '%' ) {
47 $regex .= '.*';
48 } elseif ( $item->toString() == '_' ) {
49 $regex .= '.';
50 }
51 } else {
52 $regex .= preg_quote( $item, '!' );
53 }
54
55 }
56
57 $regex .= '$!';
58
59 return $regex;
60 }
61
62 /**
63 * provideValidPatterns()
64 *
65 * @return array
66 */
67 public static function provideValidPatterns() {
68 return [
69 // Protocol, Search pattern, URL which matches the pattern
70 [ 'http://', '*.test.com', 'http://www.test.com' ],
71 [ 'http://', 'test.com:8080/dir/file', 'http://name:pass@test.com:8080/dir/file' ],
72 [ 'https://', '*.com', 'https://s.s.test..com:88/dir/file?a=1&b=2' ],
73 [ 'https://', '*.com', 'https://name:pass@secure.com/index.html' ],
74 [ 'http://', 'name:pass@test.com', 'http://test.com' ],
75 [ 'http://', 'test.com', 'http://name:pass@test.com' ],
76 [ 'http://', '*.test.com', 'http://a.b.c.test.com/dir/dir/file?a=6' ],
77 [ null, 'http://*.test.com', 'http://www.test.com' ],
78 [ 'http://', '.test.com', 'http://.test.com' ],
79 [ 'http://', '*..test.com', 'http://foo..test.com' ],
80 [ 'mailto:', 'name@mail.test123.com', 'mailto:name@mail.test123.com' ],
81 [ 'mailto:', '*@mail.test123.com', 'mailto:name@mail.test123.com' ],
82 [ '',
83 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
84 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
85 ],
86 [ '', 'http://name:pass@*.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
87 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
88 [ '', 'http://name:wrongpass@*.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]',
89 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
90 [ 'http://', 'name:pass@*.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
91 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
92 [ '', 'http://name:pass@www.test.com:12345',
93 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg' ],
94 [ 'ftp://', 'user:pass@ftp.test.com:1233/home/user/file;type=efw',
95 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
96 [ null, 'ftp://otheruser:otherpass@ftp.test.com:1233/home/user/file;type=',
97 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
98 [ null, 'ftp://@ftp.test.com:1233/home/user/file;type=',
99 'ftp://user:pass@ftp.test.com:1233/home/user/file;type=efw' ],
100 [ null, 'ftp://ftp.test.com/',
101 'ftp://user:pass@ftp.test.com/home/user/file;type=efw' ],
102 [ null, 'ftp://ftp.test.com/',
103 'ftp://user:pass@ftp.test.com/home/user/file;type=efw' ],
104 [ null, 'ftp://*.test.com:222/',
105 'ftp://user:pass@ftp.test.com:222/home' ],
106 [ 'irc://', '*.myserver:6667/', 'irc://test.myserver:6667/' ],
107 [ 'irc://', 'name:pass@*.myserver/', 'irc://test.myserver:6667/' ],
108 [ 'irc://', 'name:pass@*.myserver/', 'irc://other:@test.myserver:6667/' ],
109 [ '', 'irc://test/name,string,abc?msg=t', 'irc://test/name,string,abc?msg=test' ],
110 [ '', 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z',
111 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z' ],
112 [ '', 'https://gerrit.wikimedia.org',
113 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z' ],
114 [ 'mailto:', '*.test.com', 'mailto:name@pop3.test.com' ],
115 [ 'mailto:', 'test.com', 'mailto:name@test.com' ],
116 [ 'news:', 'test.1234afc@news.test.com', 'news:test.1234afc@news.test.com' ],
117 [ 'news:', '*.test.com', 'news:test.1234afc@news.test.com' ],
118 [ '', 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com',
119 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com' ],
120 [ '', 'news:*.aol.com',
121 'news:4df8kh$iagfewewf(at)newsbf02aaa.news.aol.com' ],
122 [ '', 'git://github.com/prwef/abc-def.git', 'git://github.com/prwef/abc-def.git' ],
123 [ 'git://', 'github.com/', 'git://github.com/prwef/abc-def.git' ],
124 [ 'git://', '*.github.com/', 'git://a.b.c.d.e.f.github.com/prwef/abc-def.git' ],
125 [ '', 'gopher://*.test.com/', 'gopher://gopher.test.com/0/v2/vstat' ],
126 [ 'telnet://', '*.test.com', 'telnet://shell.test.com/~home/' ],
127 [ '', 'http://test.com', 'http://test.com/index?arg=1' ],
128 [ 'http://', '*.test.com', 'http://www.test.com/index?arg=1' ],
129 [ '' ,
130 'http://xx23124:__ffdfdef__@www.test.com:12345/dir' ,
131 'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
132 ],
133 [ 'http://', '127.0.0.1', 'http://127.000.000.001' ],
134 [ 'http://', '127.0.0.*', 'http://127.000.000.010' ],
135 [ 'http://', '127.0.*', 'http://127.000.123.010' ],
136 [ 'http://', '127.*', 'http://127.127.127.127' ],
137 [ 'http://', '[0:0:0:0:0:0:0:0001]', 'http://[::1]' ],
138 [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::]' ],
139 [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::123]' ],
140 [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::123:456]' ],
141 [ 'http://', 'xn--f-vgaa.example.com', 'http://fóó.example.com', [ 'idn' => true ] ],
142 [ 'http://', 'xn--f-vgaa.example.com', 'http://f%c3%b3%C3%B3.example.com', [ 'idn' => true ] ],
143 [ 'http://', 'fóó.example.com', 'http://xn--f-vgaa.example.com', [ 'idn' => true ] ],
144 [ 'http://', 'f%c3%b3%C3%B3.example.com', 'http://xn--f-vgaa.example.com', [ 'idn' => true ] ],
145 [ 'http://', 'f%c3%b3%C3%B3.example.com', 'http://fóó.example.com' ],
146 [ 'http://', 'fóó.example.com', 'http://f%c3%b3%C3%B3.example.com' ],
147
148 [ 'http://', 'example.com./foo', 'http://example.com/foo' ],
149 [ 'http://', 'example.com/foo', 'http://example.com./foo' ],
150 [ 'http://', '127.0.0.1./foo', 'http://127.0.0.1/foo' ],
151 [ 'http://', '127.0.0.1/foo', 'http://127.0.0.1./foo' ],
152
153 // Tests for false positives
154 [ 'http://', 'test.com', 'http://www.test.com', [ 'found' => false ] ],
155 [ 'http://', 'www1.test.com', 'http://www.test.com', [ 'found' => false ] ],
156 [ 'http://', '*.test.com', 'http://www.test.t.com', [ 'found' => false ] ],
157 [ 'http://', 'test.com', 'http://xtest.com', [ 'found' => false ] ],
158 [ 'http://', '*.test.com', 'http://xtest.com', [ 'found' => false ] ],
159 [ 'http://', '.test.com', 'http://test.com', [ 'found' => false ] ],
160 [ 'http://', '.test.com', 'http://www.test.com', [ 'found' => false ] ],
161 [ 'http://', '*..test.com', 'http://test.com', [ 'found' => false ] ],
162 [ 'http://', '*..test.com', 'http://www.test.com', [ 'found' => false ] ],
163 [ '', 'http://test.com:8080', 'http://www.test.com:8080', [ 'found' => false ] ],
164 [ '', 'https://test.com', 'http://test.com', [ 'found' => false ] ],
165 [ '', 'http://test.com', 'https://test.com', [ 'found' => false ] ],
166 [ 'http://', 'http://test.com', 'http://test.com', [ 'found' => false ] ],
167 [ null, 'http://www.test.com', 'http://www.test.com:80', [ 'found' => false ] ],
168 [ null, 'http://www.test.com:80', 'http://www.test.com', [ 'found' => false ] ],
169 [ null, 'http://*.test.com:80', 'http://www.test.com', [ 'found' => false ] ],
170 [ '', 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z',
171 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z', [ 'found' => false ] ],
172 [ '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
173 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z', [ 'found' => false ] ],
174 [ 'mailto:', '@test.com', '@abc.test.com', [ 'found' => false ] ],
175 [ 'mailto:', 'mail@test.com', 'mail2@test.com', [ 'found' => false ] ],
176 [ '', 'mailto:mail@test.com', 'mail2@test.com', [ 'found' => false ] ],
177 [ '', 'mailto:@test.com', '@abc.test.com', [ 'found' => false ] ],
178 [ 'ftp://', '*.co', 'ftp://www.co.uk', [ 'found' => false ] ],
179 [ 'ftp://', '*.co', 'ftp://www.co.m', [ 'found' => false ] ],
180 [ 'ftp://', '*.co/dir/', 'ftp://www.co/dir2/', [ 'found' => false ] ],
181 [ 'ftp://', 'www.co/dir/', 'ftp://www.co/dir2/', [ 'found' => false ] ],
182 [ 'ftp://', 'test.com/dir/', 'ftp://test.com/', [ 'found' => false ] ],
183 [ '', 'http://test.com:8080/dir/', 'http://test.com:808/dir/', [ 'found' => false ] ],
184 [ '', 'http://test.com/dir/index.html', 'http://test.com/dir/index.php', [ 'found' => false ] ],
185 [ 'http://', '127.0.0.*', 'http://127.0.1.0', [ 'found' => false ] ],
186 [ 'http://', '[2001:db8::*]', 'http://[2001:0DB8::123:456]', [ 'found' => false ] ],
187
188 // These are false positives too and ideally shouldn't match, but that
189 // would require using regexes and RLIKE instead of LIKE
190 // [ null, 'http://*.test.com', 'http://www.test.com:80', [ 'found' => false ] ],
191 // [ '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
192 // 'https://gerrit.wikimedia.org/XXX/r/#/q/status:open,n,z', [ 'found' => false ] ],
193 ];
194 }
195
196 /**
197 * testMakeLikeArrayWithValidPatterns()
198 *
199 * Tests whether the LIKE clause produced by LinkFilter::makeLikeArray($pattern, $protocol)
200 * will find one of the URL indexes produced by LinkFilter::makeIndexes($url)
201 *
202 * @dataProvider provideValidPatterns
203 *
204 * @param string $protocol Protocol, e.g. 'http://' or 'mailto:'
205 * @param string $pattern Search pattern to feed to LinkFilter::makeLikeArray
206 * @param string $url URL to feed to LinkFilter::makeIndexes
207 * @param array $options
208 * - found: (bool) Should the URL be found? (defaults true)
209 * - idn: (bool) Does this test require the idn conversion (default false)
210 */
211 function testMakeLikeArrayWithValidPatterns( $protocol, $pattern, $url, $options = [] ) {
212 $options += [ 'found' => true, 'idn' => false ];
213 if ( !empty( $options['idn'] ) && !LinkFilter::supportsIDN() ) {
214 $this->markTestSkipped( 'LinkFilter IDN support is not available' );
215 }
216
217 $indexes = LinkFilter::makeIndexes( $url );
218 $likeArray = LinkFilter::makeLikeArray( $pattern, $protocol );
219
220 $this->assertTrue( $likeArray !== false,
221 "LinkFilter::makeLikeArray('$pattern', '$protocol') returned false on a valid pattern"
222 );
223
224 $regex = $this->createRegexFromLIKE( $likeArray );
225 $debugmsg = "Regex: '" . $regex . "'\n";
226 $debugmsg .= count( $indexes ) . " index(es) created by LinkFilter::makeIndexes():\n";
227
228 $matches = 0;
229
230 foreach ( $indexes as $index ) {
231 $matches += preg_match( $regex, $index );
232 $debugmsg .= "\t'$index'\n";
233 }
234
235 if ( !empty( $options['found'] ) ) {
236 $this->assertTrue(
237 $matches > 0,
238 "Search pattern '$protocol$pattern' does not find url '$url' \n$debugmsg"
239 );
240 } else {
241 $this->assertFalse(
242 $matches > 0,
243 "Search pattern '$protocol$pattern' should not find url '$url' \n$debugmsg"
244 );
245 }
246 }
247
248 /**
249 * provideInvalidPatterns()
250 *
251 * @return array
252 */
253 public static function provideInvalidPatterns() {
254 return [
255 [ '' ],
256 [ '*' ],
257 [ 'http://*' ],
258 [ 'http://*/' ],
259 [ 'http://*/dir/file' ],
260 [ 'test.*.com' ],
261 [ 'http://test.*.com' ],
262 [ 'test.*.com' ],
263 [ 'http://*.test.*' ],
264 [ 'http://*test.com' ],
265 [ 'https://*' ],
266 [ '*://test.com' ],
267 [ 'mailto:name:pass@t*est.com' ],
268 [ 'http://*:888/' ],
269 [ '*http://' ],
270 [ 'test.com/*/index' ],
271 [ 'test.com/dir/index?arg=*' ],
272 ];
273 }
274
275 /**
276 * testMakeLikeArrayWithInvalidPatterns()
277 *
278 * Tests whether LinkFilter::makeLikeArray($pattern) will reject invalid search patterns
279 *
280 * @dataProvider provideInvalidPatterns
281 *
282 * @param string $pattern Invalid search pattern
283 */
284 function testMakeLikeArrayWithInvalidPatterns( $pattern ) {
285 $this->assertFalse(
286 LinkFilter::makeLikeArray( $pattern ),
287 "'$pattern' is not a valid pattern and should be rejected"
288 );
289 }
290
291 /**
292 * @dataProvider provideMakeIndexes()
293 * @covers LinkFilter::makeIndexes
294 */
295 public function testMakeIndexes( $url, $expected ) {
296 // Set global so file:// tests can work
297 $this->setMwGlobals( [
298 'wgUrlProtocols' => [
299 'http://',
300 'https://',
301 'mailto:',
302 '//',
303 'file://', # Non-default
304 ],
305 ] );
306
307 $index = LinkFilter::makeIndexes( $url );
308 $this->assertEquals( $expected, $index, "LinkFilter::makeIndexes(\"$url\")" );
309 }
310
311 public static function provideMakeIndexes() {
312 return [
313 // Testcase for T30627
314 [
315 'https://example.org/test.cgi?id=12345',
316 [ 'https://org.example./test.cgi?id=12345' ]
317 ],
318 [
319 // mailtos are handled special
320 'mailto:wiki@wikimedia.org',
321 [ 'mailto:org.wikimedia.@wiki' ]
322 ],
323 [
324 // mailtos are handled special
325 'mailto:wiki',
326 [ 'mailto:@wiki' ]
327 ],
328
329 // file URL cases per T30627...
330 [
331 // three slashes: local filesystem path Unix-style
332 'file:///whatever/you/like.txt',
333 [ 'file://./whatever/you/like.txt' ]
334 ],
335 [
336 // three slashes: local filesystem path Windows-style
337 'file:///c:/whatever/you/like.txt',
338 [ 'file://./c:/whatever/you/like.txt' ]
339 ],
340 [
341 // two slashes: UNC filesystem path Windows-style
342 'file://intranet/whatever/you/like.txt',
343 [ 'file://intranet./whatever/you/like.txt' ]
344 ],
345 // Multiple-slash cases that can sorta work on Mozilla
346 // if you hack it just right are kinda pathological,
347 // and unreliable cross-platform or on IE which means they're
348 // unlikely to appear on intranets.
349 // Those will survive the algorithm but with results that
350 // are less consistent.
351
352 // protocol-relative URL cases per T31854...
353 [
354 '//example.org/test.cgi?id=12345',
355 [
356 'http://org.example./test.cgi?id=12345',
357 'https://org.example./test.cgi?id=12345'
358 ]
359 ],
360
361 // IP addresses
362 [
363 'http://192.0.2.0/foo',
364 [ 'http://V4.192.0.2.0./foo' ]
365 ],
366 [
367 'http://192.0.0002.0/foo',
368 [ 'http://V4.192.0.2.0./foo' ]
369 ],
370 [
371 'http://[2001:db8::1]/foo',
372 [ 'http://V6.2001.DB8.0.0.0.0.0.1./foo' ]
373 ],
374
375 // Explicit specification of the DNS root
376 [
377 'http://example.com./foo',
378 [ 'http://com.example./foo' ]
379 ],
380 [
381 'http://192.0.2.0./foo',
382 [ 'http://V4.192.0.2.0./foo' ]
383 ],
384
385 // Weird edge case
386 [
387 'http://.example.com/foo',
388 [ 'http://com.example../foo' ]
389 ],
390 ];
391 }
392
393 /**
394 * @dataProvider provideGetQueryConditions
395 * @covers LinkFilter::getQueryConditions
396 */
397 public function testGetQueryConditions( $query, $options, $expected ) {
398 $conds = LinkFilter::getQueryConditions( $query, $options );
399 $this->assertEquals( $expected, $conds );
400 }
401
402 public static function provideGetQueryConditions() {
403 return [
404 'Basic example' => [
405 'example.com',
406 [],
407 [
408 'el_index_60 LIKE \'http://com.example./%\' ESCAPE \'`\' ',
409 'el_index LIKE \'http://com.example./%\' ESCAPE \'`\' ',
410 ],
411 ],
412 'Basic example with path' => [
413 'example.com/foobar',
414 [],
415 [
416 'el_index_60 LIKE \'http://com.example./foobar%\' ESCAPE \'`\' ',
417 'el_index LIKE \'http://com.example./foobar%\' ESCAPE \'`\' ',
418 ],
419 ],
420 'Wildcard domain' => [
421 '*.example.com',
422 [],
423 [
424 'el_index_60 LIKE \'http://com.example.%\' ESCAPE \'`\' ',
425 'el_index LIKE \'http://com.example.%\' ESCAPE \'`\' ',
426 ],
427 ],
428 'Wildcard domain with path' => [
429 '*.example.com/foobar',
430 [],
431 [
432 'el_index_60 LIKE \'http://com.example.%\' ESCAPE \'`\' ',
433 'el_index LIKE \'http://com.example.%/foobar%\' ESCAPE \'`\' ',
434 ],
435 ],
436 'Wildcard domain with path, oneWildcard=true' => [
437 '*.example.com/foobar',
438 [ 'oneWildcard' => true ],
439 [
440 'el_index_60 LIKE \'http://com.example.%\' ESCAPE \'`\' ',
441 'el_index LIKE \'http://com.example.%\' ESCAPE \'`\' ',
442 ],
443 ],
444 'Constant prefix' => [
445 'example.com/blah/blah/blah/blah/blah/blah/blah/blah/blah/blah?foo=',
446 [],
447 [
448 'el_index_60' => 'http://com.example./blah/blah/blah/blah/blah/blah/blah/blah/',
449 'el_index LIKE ' .
450 '\'http://com.example./blah/blah/blah/blah/blah/blah/blah/blah/blah/blah?foo=%\' ' .
451 'ESCAPE \'`\' ',
452 ],
453 ],
454 'Bad protocol' => [
455 'test/',
456 [ 'protocol' => 'invalid://' ],
457 false
458 ],
459 'Various options' => [
460 'example.com',
461 [ 'protocol' => 'https://', 'prefix' => 'xx' ],
462 [
463 'xx_index_60 LIKE \'https://com.example./%\' ESCAPE \'`\' ',
464 'xx_index LIKE \'https://com.example./%\' ESCAPE \'`\' ',
465 ],
466 ],
467 ];
468 }
469
470 }