* @ingroup Search
*/
+use MediaWiki\MediaWikiServices;
+
/**
* Highlight bits of wikitext
*
class SearchHighlighter {
protected $mCleanWikitext = true;
+ /**
+ * @warning If you pass false to this constructor, then
+ * the caller is responsible for HTML escaping.
+ * @param bool $cleanupWikitext
+ */
function __construct( $cleanupWikitext = true ) {
$this->mCleanWikitext = $cleanupWikitext;
}
/**
- * Default implementation of wikitext highlighting
+ * Wikitext highlighting when $wgAdvancedSearchHighlighting = true
*
* @param string $text
- * @param array $terms Terms to highlight (unescaped)
+ * @param string[] $terms Terms to highlight (not html escaped but
+ * regex escaped via SearchDatabase::regexTerm())
* @param int $contextlines
* @param int $contextchars
* @return string
*/
public function highlightText( $text, $terms, $contextlines, $contextchars ) {
- global $wgContLang, $wgSearchHighlightBoundaries;
+ global $wgSearchHighlightBoundaries;
if ( $text == '' ) {
return '';
if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) {
$epat = '';
foreach ( $matches as $key => $val ) {
- if ( $key > 0 && $val[1] != - 1 ) {
+ if ( $key > 0 && $val[1] != -1 ) {
if ( $key == 2 ) {
// see if this is an image link
- $ns = substr( $val[0], 2, - 1 );
- if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
+ $ns = substr( $val[0], 2, -1 );
+ if (
+ MediaWikiServices::getInstance()->getContentLanguage()->
+ getNsIndex( $ns ) != NS_FILE
+ ) {
break;
}
}
$anyterm = implode( '|', $terms );
$phrase = implode( "$wgSearchHighlightBoundaries+", $terms );
-
// @todo FIXME: A hack to scale contextchars, a correct solution
// would be to have contextchars actually be char and not byte
// length, and do proper utf-8 substrings and lengths everywhere,
// $snippets = array_map( 'htmlspecialchars', $extended );
$snippets = $extended;
- $last = - 1;
+ $last = -1;
$extract = '';
foreach ( $snippets as $index => $line ) {
- if ( $last == - 1 ) {
+ if ( $last == -1 ) {
$extract .= $line; // first line
} elseif ( $last + 1 == $index
&& $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] )
/**
* Split text into lines and add it to extracts array
*
- * @param array $extracts Index -> $line
- * @param int $count
+ * @param array &$extracts Index -> $line
+ * @param int &$count
* @param string $text
*/
function splitAndAdd( &$extracts, &$count, $text ) {
* @return string
*/
function caseCallback( $matches ) {
- global $wgContLang;
if ( strlen( $matches[0] ) > 1 ) {
- return '[' . $wgContLang->lc( $matches[0] ) . $wgContLang->uc( $matches[0] ) . ']';
+ $contLang = MediaWikiServices::getInstance()->getContentLanguage();
+ return '[' . $contLang->lc( $matches[0] ) .
+ $contLang->uc( $matches[0] ) . ']';
} else {
return $matches[0];
}
* @param string $text
* @param int $start
* @param int $end
- * @param int $posStart (out) actual start position
- * @param int $posEnd (out) actual end position
+ * @param int|null &$posStart (out) actual start position
+ * @param int|null &$posEnd (out) actual end position
* @return string
*/
function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) {
*
* @param string $pattern Regexp for matching lines
* @param array $extracts Extracts to search
- * @param int $linesleft Number of extracts to make
- * @param int $contextchars Length of snippet
- * @param array $out Map for highlighted snippets
- * @param array $offsets Map of starting points of snippets
+ * @param int &$linesleft Number of extracts to make
+ * @param int &$contextchars Length of snippet
+ * @param array &$out Map for highlighted snippets
+ * @param array &$offsets Map of starting points of snippets
* @protected
*/
function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
$text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text );
$text = preg_replace( "/''/", "", $text );
+ // Note, the previous /<\/?[^>]+>/ is insufficient
+ // for XSS safety as the HTML tag can span multiple
+ // search results (T144845).
+ $text = Sanitizer::escapeHtmlAllowEntities( $text );
return $text;
}
if ( $colon === false ) {
return $matches[2]; // replace with caption
}
- global $wgContLang;
$ns = substr( $matches[1], 0, $colon );
- $index = $wgContLang->getNsIndex( $ns );
+ $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $ns );
if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) {
return $matches[0]; // return the whole thing
} else {
* Simple & fast snippet extraction, but gives completely unrelevant
* snippets
*
+ * Used when $wgAdvancedSearchHighlighting is false.
+ *
* @param string $text
- * @param array $terms
+ * @param string[] $terms Escaped for regex by SearchDatabase::regexTerm()
* @param int $contextlines
* @param int $contextchars
* @return string
*/
public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
- global $wgContLang;
-
$lines = explode( "\n", $text );
$terms = implode( '|', $terms );
$lineno = 0;
$extract = "";
+ $contLang = MediaWikiServices::getInstance()->getContentLanguage();
foreach ( $lines as $line ) {
- if ( 0 == $contextlines ) {
+ if ( $contextlines == 0 ) {
break;
}
++$lineno;
}
--$contextlines;
// truncate function changes ... to relevant i18n message.
- $pre = $wgContLang->truncate( $m[1], - $contextchars, '...', false );
+ $pre = $contLang->truncateForVisual( $m[1], - $contextchars, '...', false );
if ( count( $m ) < 3 ) {
$post = '';
} else {
- $post = $wgContLang->truncate( $m[3], $contextchars, '...', false );
+ $post = $contLang->truncateForVisual( $m[3], $contextchars, '...', false );
}
$found = $m[2];