From: David Causse Date: Thu, 22 Jun 2017 12:48:14 +0000 (+0200) Subject: Fix highlighting for phrase queries X-Git-Tag: 1.31.0-rc.0~2867^2 X-Git-Url: http://git.heureux-cyclage.org/?a=commitdiff_plain;h=f230f5dcc7d663b7bc8278f26311041243e62b22;p=lhc%2Fweb%2Fwiklou.git Fix highlighting for phrase queries I think the bug was introduced during a cleanup in Iaabc10c. I don't think that " should be part of the legalSearchChars at query time, it seems to break the regex. The strategy here is to distinguish legalSearchChars used query time vs the ones used at index time by introducing: SearchEngine::legalSearchCharsForUpdate() Bug: T167798 Change-Id: I61dc53665e26d3c6c48caed78dd3bbde9a33def7 --- diff --git a/includes/deferred/SearchUpdate.php b/includes/deferred/SearchUpdate.php index b9a259b1a4..c94ae2a772 100644 --- a/includes/deferred/SearchUpdate.php +++ b/includes/deferred/SearchUpdate.php @@ -124,7 +124,7 @@ class SearchUpdate implements DeferrableUpdate { # Language-specific strip/conversion $text = $wgContLang->normalizeForSearch( $text ); $se = $se ?: MediaWikiServices::getInstance()->newSearchEngine(); - $lc = $se->legalSearchChars() . '&#;'; + $lc = $se->legalSearchCharsForUpdate() . '&#;'; $text = preg_replace( "/<\\/?\\s*[A-Za-z][^>]*?>/", ' ', $wgContLang->lc( " " . $text . " " ) ); # Strip HTML markup @@ -207,7 +207,7 @@ class SearchUpdate implements DeferrableUpdate { $ns = $this->title->getNamespace(); $title = $this->title->getText(); - $lc = $search->legalSearchChars() . '&#;'; + $lc = $search->legalSearchCharsForUpdate() . '&#;'; $t = $wgContLang->normalizeForSearch( $title ); $t = preg_replace( "/[^{$lc}]+/", ' ', $t ); $t = $wgContLang->lc( $t ); diff --git a/includes/search/SearchEngine.php b/includes/search/SearchEngine.php index 4473bb2927..9673aa3c96 100644 --- a/includes/search/SearchEngine.php +++ b/includes/search/SearchEngine.php @@ -206,7 +206,7 @@ abstract class SearchEngine { } /** - * Get chars legal for search. + * Get chars legal for search (at query time). * NOTE: usage as static is deprecated and preserved only as BC measure * @return string */ @@ -214,6 +214,16 @@ abstract class SearchEngine { return "A-Za-z_'.0-9\\x80-\\xFF\\-"; } + /** + * Get chars legal for search (at index time). + * + * @since 1.30 + * @return string + */ + public function legalSearchCharsForUpdate() { + return static::legalSearchChars(); + } + /** * Set the maximum number of results to return * and how many to skip before returning the first. diff --git a/includes/search/SearchMySQL.php b/includes/search/SearchMySQL.php index 36cbbaa856..2c7feeb752 100644 --- a/includes/search/SearchMySQL.php +++ b/includes/search/SearchMySQL.php @@ -149,8 +149,8 @@ class SearchMySQL extends SearchDatabase { return $regex; } - public static function legalSearchChars() { - return "\"*" . parent::legalSearchChars(); + public function legalSearchCharsForUpdate() { + return "\"*" . parent::legalSearchCharsForUpdate(); } /** diff --git a/includes/search/SearchOracle.php b/includes/search/SearchOracle.php index c5a5ef11a7..2e6cb84ca6 100644 --- a/includes/search/SearchOracle.php +++ b/includes/search/SearchOracle.php @@ -266,7 +266,7 @@ class SearchOracle extends SearchDatabase { [] ); } - public static function legalSearchChars() { - return "\"" . parent::legalSearchChars(); + public function legalSearchCharsForUpdate() { + return "\"" . parent::legalSearchCharsForUpdate(); } } diff --git a/includes/search/SearchSqlite.php b/includes/search/SearchSqlite.php index b40e1aaf38..5a8995d745 100644 --- a/includes/search/SearchSqlite.php +++ b/includes/search/SearchSqlite.php @@ -141,8 +141,8 @@ class SearchSqlite extends SearchDatabase { return $regex; } - public static function legalSearchChars() { - return "\"*" . parent::legalSearchChars(); + public function legalSearchCharsForUpdate() { + return "\"*" . parent::legalSearchCharsForUpdate(); } /** diff --git a/tests/phpunit/includes/search/SearchEngineTest.php b/tests/phpunit/includes/search/SearchEngineTest.php index c74c893901..6e00e53166 100644 --- a/tests/phpunit/includes/search/SearchEngineTest.php +++ b/tests/phpunit/includes/search/SearchEngineTest.php @@ -124,6 +124,24 @@ class SearchEngineTest extends MediaWikiLangTestCase { "Plain search failed" ); } + public function testPhraseSearch() { + $res = $this->search->searchText( '"smithee is one who smiths"' ); + $this->assertEquals( + [ 'Smithee' ], + $this->fetchIds( $res ), + "Phrase search failed" ); + $res = $this->search->searchText( '"smithee is one who smiths"' ); + $match = $res->next(); + $terms = [ 'smithee', 'is', 'one', 'who', 'smiths' ]; + $snippet = ""; + foreach ( $terms as $term ) { + $snippet .= " " . $term . ""; + } + $this->assertRegexp( '/' . preg_quote( $snippet, '/' ) . '/', + $match->getTextSnippet( $res->termMatches() ), + "Phrase search failed to highlight" ); + } + public function testTextPowerSearch() { $this->search->setNamespaces( [ 0, 1, 4 ] ); $this->assertEquals(