[ 'http://', 'test.com', 'http://name:pass@test.com' ],
[ 'http://', '*.test.com', 'http://a.b.c.test.com/dir/dir/file?a=6' ],
[ null, 'http://*.test.com', 'http://www.test.com' ],
+ [ 'http://', '.test.com', 'http://.test.com' ],
+ [ 'http://', '*..test.com', 'http://foo..test.com' ],
[ 'mailto:', 'name@mail.test123.com', 'mailto:name@mail.test123.com' ],
+ [ 'mailto:', '*@mail.test123.com', 'mailto:name@mail.test123.com' ],
[ '',
'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg',
'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
'http://xx23124:__ffdfdef__@www.test.com:12345/dir' ,
'http://name:pass@www.test.com:12345/dir/dir/file.xyz.php#__se__?arg1=_&arg2[]=4rtg'
],
+ [ 'http://', '127.0.0.1', 'http://127.000.000.001' ],
+ [ 'http://', '127.0.0.*', 'http://127.000.000.010' ],
+ [ 'http://', '127.0.*', 'http://127.000.123.010' ],
+ [ 'http://', '127.*', 'http://127.127.127.127' ],
+ [ 'http://', '[0:0:0:0:0:0:0:0001]', 'http://[::1]' ],
+ [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::]' ],
+ [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::123]' ],
+ [ 'http://', '[2001:db8:0:0:*]', 'http://[2001:0DB8::123:456]' ],
+ [ 'http://', 'xn--f-vgaa.example.com', 'http://fóó.example.com', [ 'idn' => true ] ],
+ [ 'http://', 'xn--f-vgaa.example.com', 'http://f%c3%b3%C3%B3.example.com', [ 'idn' => true ] ],
+ [ 'http://', 'fóó.example.com', 'http://xn--f-vgaa.example.com', [ 'idn' => true ] ],
+ [ 'http://', 'f%c3%b3%C3%B3.example.com', 'http://xn--f-vgaa.example.com', [ 'idn' => true ] ],
+ [ 'http://', 'f%c3%b3%C3%B3.example.com', 'http://fóó.example.com' ],
+ [ 'http://', 'fóó.example.com', 'http://f%c3%b3%C3%B3.example.com' ],
+
+ [ 'http://', 'example.com./foo', 'http://example.com/foo' ],
+ [ 'http://', 'example.com/foo', 'http://example.com./foo' ],
+ [ 'http://', '127.0.0.1./foo', 'http://127.0.0.1/foo' ],
+ [ 'http://', '127.0.0.1/foo', 'http://127.0.0.1./foo' ],
// Tests for false positives
- [ 'http://', 'test.com', 'http://www.test.com', false ],
- [ 'http://', 'www1.test.com', 'http://www.test.com', false ],
- [ 'http://', '*.test.com', 'http://www.test.t.com', false ],
- [ '', 'http://test.com:8080', 'http://www.test.com:8080', false ],
- [ '', 'https://test.com', 'http://test.com', false ],
- [ '', 'http://test.com', 'https://test.com', false ],
- [ 'http://', 'http://test.com', 'http://test.com', false ],
- [ null, 'http://www.test.com', 'http://www.test.com:80', false ],
- [ null, 'http://www.test.com:80', 'http://www.test.com', false ],
- [ null, 'http://*.test.com:80', 'http://www.test.com', false ],
+ [ 'http://', 'test.com', 'http://www.test.com', [ 'found' => false ] ],
+ [ 'http://', 'www1.test.com', 'http://www.test.com', [ 'found' => false ] ],
+ [ 'http://', '*.test.com', 'http://www.test.t.com', [ 'found' => false ] ],
+ [ 'http://', 'test.com', 'http://xtest.com', [ 'found' => false ] ],
+ [ 'http://', '*.test.com', 'http://xtest.com', [ 'found' => false ] ],
+ [ 'http://', '.test.com', 'http://test.com', [ 'found' => false ] ],
+ [ 'http://', '.test.com', 'http://www.test.com', [ 'found' => false ] ],
+ [ 'http://', '*..test.com', 'http://test.com', [ 'found' => false ] ],
+ [ 'http://', '*..test.com', 'http://www.test.com', [ 'found' => false ] ],
+ [ '', 'http://test.com:8080', 'http://www.test.com:8080', [ 'found' => false ] ],
+ [ '', 'https://test.com', 'http://test.com', [ 'found' => false ] ],
+ [ '', 'http://test.com', 'https://test.com', [ 'found' => false ] ],
+ [ 'http://', 'http://test.com', 'http://test.com', [ 'found' => false ] ],
+ [ null, 'http://www.test.com', 'http://www.test.com:80', [ 'found' => false ] ],
+ [ null, 'http://www.test.com:80', 'http://www.test.com', [ 'found' => false ] ],
+ [ null, 'http://*.test.com:80', 'http://www.test.com', [ 'found' => false ] ],
[ '', 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z',
- 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z', false ],
+ 'https://gerrit.wikimedia.org/r/#/q/status:open,n,z', [ 'found' => false ] ],
[ '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
- 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z', false ],
- [ 'mailto:', '@test.com', '@abc.test.com', false ],
- [ 'mailto:', 'mail@test.com', 'mail2@test.com', false ],
- [ '', 'mailto:mail@test.com', 'mail2@test.com', false ],
- [ '', 'mailto:@test.com', '@abc.test.com', false ],
- [ 'ftp://', '*.co', 'ftp://www.co.uk', false ],
- [ 'ftp://', '*.co', 'ftp://www.co.m', false ],
- [ 'ftp://', '*.co/dir/', 'ftp://www.co/dir2/', false ],
- [ 'ftp://', 'www.co/dir/', 'ftp://www.co/dir2/', false ],
- [ 'ftp://', 'test.com/dir/', 'ftp://test.com/', false ],
- [ '', 'http://test.com:8080/dir/', 'http://test.com:808/dir/', false ],
- [ '', 'http://test.com/dir/index.html', 'http://test.com/dir/index.php', false ],
+ 'https://gerrit.wikimedia.org/r/#/XXX/status:open,n,z', [ 'found' => false ] ],
+ [ 'mailto:', '@test.com', '@abc.test.com', [ 'found' => false ] ],
+ [ 'mailto:', 'mail@test.com', 'mail2@test.com', [ 'found' => false ] ],
+ [ '', 'mailto:mail@test.com', 'mail2@test.com', [ 'found' => false ] ],
+ [ '', 'mailto:@test.com', '@abc.test.com', [ 'found' => false ] ],
+ [ 'ftp://', '*.co', 'ftp://www.co.uk', [ 'found' => false ] ],
+ [ 'ftp://', '*.co', 'ftp://www.co.m', [ 'found' => false ] ],
+ [ 'ftp://', '*.co/dir/', 'ftp://www.co/dir2/', [ 'found' => false ] ],
+ [ 'ftp://', 'www.co/dir/', 'ftp://www.co/dir2/', [ 'found' => false ] ],
+ [ 'ftp://', 'test.com/dir/', 'ftp://test.com/', [ 'found' => false ] ],
+ [ '', 'http://test.com:8080/dir/', 'http://test.com:808/dir/', [ 'found' => false ] ],
+ [ '', 'http://test.com/dir/index.html', 'http://test.com/dir/index.php', [ 'found' => false ] ],
+ [ 'http://', '127.0.0.*', 'http://127.0.1.0', [ 'found' => false ] ],
+ [ 'http://', '[2001:db8::*]', 'http://[2001:0DB8::123:456]', [ 'found' => false ] ],
// These are false positives too and ideally shouldn't match, but that
// would require using regexes and RLIKE instead of LIKE
- // [ null, 'http://*.test.com', 'http://www.test.com:80', false ],
+ // [ null, 'http://*.test.com', 'http://www.test.com:80', [ 'found' => false ] ],
// [ '', 'https://*.wikimedia.org/r/#/q/status:open,n,z',
- // 'https://gerrit.wikimedia.org/XXX/r/#/q/status:open,n,z', false ],
+ // 'https://gerrit.wikimedia.org/XXX/r/#/q/status:open,n,z', [ 'found' => false ] ],
];
}
* testMakeLikeArrayWithValidPatterns()
*
* Tests whether the LIKE clause produced by LinkFilter::makeLikeArray($pattern, $protocol)
- * will find one of the URL indexes produced by wfMakeUrlIndexes($url)
+ * will find one of the URL indexes produced by LinkFilter::makeIndexes($url)
*
* @dataProvider provideValidPatterns
*
* @param string $protocol Protocol, e.g. 'http://' or 'mailto:'
* @param string $pattern Search pattern to feed to LinkFilter::makeLikeArray
- * @param string $url URL to feed to wfMakeUrlIndexes
- * @param bool $shouldBeFound Should the URL be found? (defaults true)
+ * @param string $url URL to feed to LinkFilter::makeIndexes
+ * @param array $options
+ * - found: (bool) Should the URL be found? (defaults true)
+ * - idn: (bool) Does this test require the idn conversion (default false)
*/
- function testMakeLikeArrayWithValidPatterns( $protocol, $pattern, $url, $shouldBeFound = true ) {
- $indexes = wfMakeUrlIndexes( $url );
+ function testMakeLikeArrayWithValidPatterns( $protocol, $pattern, $url, $options = [] ) {
+ $options += [ 'found' => true, 'idn' => false ];
+ if ( !empty( $options['idn'] ) && !LinkFilter::supportsIDN() ) {
+ $this->markTestSkipped( 'LinkFilter IDN support is not available' );
+ }
+
+ $indexes = LinkFilter::makeIndexes( $url );
$likeArray = LinkFilter::makeLikeArray( $pattern, $protocol );
$this->assertTrue( $likeArray !== false,
$regex = $this->createRegexFromLIKE( $likeArray );
$debugmsg = "Regex: '" . $regex . "'\n";
- $debugmsg .= count( $indexes ) . " index(es) created by wfMakeUrlIndexes():\n";
+ $debugmsg .= count( $indexes ) . " index(es) created by LinkFilter::makeIndexes():\n";
$matches = 0;
$debugmsg .= "\t'$index'\n";
}
- if ( $shouldBeFound ) {
+ if ( !empty( $options['found'] ) ) {
$this->assertTrue(
$matches > 0,
"Search pattern '$protocol$pattern' does not find url '$url' \n$debugmsg"
);
}
+ /**
+ * @dataProvider provideMakeIndexes()
+ * @covers LinkFilter::makeIndexes
+ */
+ public function testMakeIndexes( $url, $expected ) {
+ // Set global so file:// tests can work
+ $this->setMwGlobals( [
+ 'wgUrlProtocols' => [
+ 'http://',
+ 'https://',
+ 'mailto:',
+ '//',
+ 'file://', # Non-default
+ ],
+ ] );
+
+ $index = LinkFilter::makeIndexes( $url );
+ $this->assertEquals( $expected, $index, "LinkFilter::makeIndexes(\"$url\")" );
+ }
+
+ public static function provideMakeIndexes() {
+ return [
+ // Testcase for T30627
+ [
+ 'https://example.org/test.cgi?id=12345',
+ [ 'https://org.example./test.cgi?id=12345' ]
+ ],
+ [
+ // mailtos are handled special
+ 'mailto:wiki@wikimedia.org',
+ [ 'mailto:org.wikimedia.@wiki' ]
+ ],
+ [
+ // mailtos are handled special
+ 'mailto:wiki',
+ [ 'mailto:@wiki' ]
+ ],
+
+ // file URL cases per T30627...
+ [
+ // three slashes: local filesystem path Unix-style
+ 'file:///whatever/you/like.txt',
+ [ 'file://./whatever/you/like.txt' ]
+ ],
+ [
+ // three slashes: local filesystem path Windows-style
+ 'file:///c:/whatever/you/like.txt',
+ [ 'file://./c:/whatever/you/like.txt' ]
+ ],
+ [
+ // two slashes: UNC filesystem path Windows-style
+ 'file://intranet/whatever/you/like.txt',
+ [ 'file://intranet./whatever/you/like.txt' ]
+ ],
+ // Multiple-slash cases that can sorta work on Mozilla
+ // if you hack it just right are kinda pathological,
+ // and unreliable cross-platform or on IE which means they're
+ // unlikely to appear on intranets.
+ // Those will survive the algorithm but with results that
+ // are less consistent.
+
+ // protocol-relative URL cases per T31854...
+ [
+ '//example.org/test.cgi?id=12345',
+ [
+ 'http://org.example./test.cgi?id=12345',
+ 'https://org.example./test.cgi?id=12345'
+ ]
+ ],
+
+ // IP addresses
+ [
+ 'http://192.0.2.0/foo',
+ [ 'http://V4.192.0.2.0./foo' ]
+ ],
+ [
+ 'http://192.0.0002.0/foo',
+ [ 'http://V4.192.0.2.0./foo' ]
+ ],
+ [
+ 'http://[2001:db8::1]/foo',
+ [ 'http://V6.2001.DB8.0.0.0.0.0.1./foo' ]
+ ],
+
+ // Explicit specification of the DNS root
+ [
+ 'http://example.com./foo',
+ [ 'http://com.example./foo' ]
+ ],
+ [
+ 'http://192.0.2.0./foo',
+ [ 'http://V4.192.0.2.0./foo' ]
+ ],
+
+ // Weird edge case
+ [
+ 'http://.example.com/foo',
+ [ 'http://com.example../foo' ]
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideGetQueryConditions
+ * @covers LinkFilter::getQueryConditions
+ */
+ public function testGetQueryConditions( $query, $options, $expected ) {
+ $conds = LinkFilter::getQueryConditions( $query, $options );
+ $this->assertEquals( $expected, $conds );
+ }
+
+ public static function provideGetQueryConditions() {
+ return [
+ 'Basic example' => [
+ 'example.com',
+ [],
+ [
+ 'el_index_60 LIKE \'http://com.example./%\' ESCAPE \'`\' ',
+ 'el_index LIKE \'http://com.example./%\' ESCAPE \'`\' ',
+ ],
+ ],
+ 'Basic example with path' => [
+ 'example.com/foobar',
+ [],
+ [
+ 'el_index_60 LIKE \'http://com.example./foobar%\' ESCAPE \'`\' ',
+ 'el_index LIKE \'http://com.example./foobar%\' ESCAPE \'`\' ',
+ ],
+ ],
+ 'Wildcard domain' => [
+ '*.example.com',
+ [],
+ [
+ 'el_index_60 LIKE \'http://com.example.%\' ESCAPE \'`\' ',
+ 'el_index LIKE \'http://com.example.%\' ESCAPE \'`\' ',
+ ],
+ ],
+ 'Wildcard domain with path' => [
+ '*.example.com/foobar',
+ [],
+ [
+ 'el_index_60 LIKE \'http://com.example.%\' ESCAPE \'`\' ',
+ 'el_index LIKE \'http://com.example.%/foobar%\' ESCAPE \'`\' ',
+ ],
+ ],
+ 'Wildcard domain with path, oneWildcard=true' => [
+ '*.example.com/foobar',
+ [ 'oneWildcard' => true ],
+ [
+ 'el_index_60 LIKE \'http://com.example.%\' ESCAPE \'`\' ',
+ 'el_index LIKE \'http://com.example.%\' ESCAPE \'`\' ',
+ ],
+ ],
+ 'Constant prefix' => [
+ 'example.com/blah/blah/blah/blah/blah/blah/blah/blah/blah/blah?foo=',
+ [],
+ [
+ 'el_index_60' => 'http://com.example./blah/blah/blah/blah/blah/blah/blah/blah/',
+ 'el_index LIKE ' .
+ '\'http://com.example./blah/blah/blah/blah/blah/blah/blah/blah/blah/blah?foo=%\' ' .
+ 'ESCAPE \'`\' ',
+ ],
+ ],
+ 'Bad protocol' => [
+ 'test/',
+ [ 'protocol' => 'invalid://' ],
+ false
+ ],
+ 'Various options' => [
+ 'example.com',
+ [ 'protocol' => 'https://', 'prefix' => 'xx' ],
+ [
+ 'xx_index_60 LIKE \'https://com.example./%\' ESCAPE \'`\' ',
+ 'xx_index LIKE \'https://com.example./%\' ESCAPE \'`\' ',
+ ],
+ ],
+ ];
+ }
+
}