X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Fsearch%2FSearchHighlighter.php;h=2bd19558a229fd98170268fb89f00aad90335ca4;hb=a9007e8baf802f0f57d095e3bb4ad201c98c0cb3;hp=98c05b56955052383d8f03cd95b5d327563c5af9;hpb=f4baf67270f1b593d7a4153eabd14c6c1011b310;p=lhc%2Fweb%2Fwiklou.git

diff --git a/includes/search/SearchHighlighter.php b/includes/search/SearchHighlighter.php
index 98c05b5695..6135395aeb 100644
--- a/includes/search/SearchHighlighter.php
+++ b/includes/search/SearchHighlighter.php
@@ -29,15 +29,20 @@
 class SearchHighlighter {
 	protected $mCleanWikitext = true;
 
+	/**
+	 * @warning If you pass false to this constructor, then
+	 *  the caller is responsible for HTML escaping.
+	 */
 	function __construct( $cleanupWikitext = true ) {
 		$this->mCleanWikitext = $cleanupWikitext;
 	}
 
 	/**
-	 * Default implementation of wikitext highlighting
+	 * Wikitext highlighting when $wgAdvancedSearchHighlighting = true
 	 *
 	 * @param string $text
-	 * @param array $terms Terms to highlight (unescaped)
+	 * @param array $terms Terms to highlight (not html escaped but
+	 *   regex escaped via SearchDatabase::regexTerm())
 	 * @param int $contextlines
 	 * @param int $contextchars
 	 * @return string
@@ -52,10 +57,10 @@ class SearchHighlighter {
 		// spli text into text + templates/links/tables
 		$spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
 		// first capture group is for detecting nested templates/links/tables/references
-		$endPatterns = array(
+		$endPatterns = [
 			1 => '/(\{\{)|(\}\})/', // template
 			2 => '/(\[\[)|(\]\])/', // image
-			3 => "/(\n\\{\\|)|(\n\\|\\})/" ); // table
+			3 => "/(\n\\{\\|)|(\n\\|\\})/" ]; // table
 
 		// @todo FIXME: This should prolly be a hook or something
 		// instead of hardcoding a class name from the Cite extension
@@ -64,8 +69,8 @@ class SearchHighlighter {
 			$endPatterns[4] = '/(<ref>)|(<\/ref>)/';
 		}
 		$spat .= '/';
-		$textExt = array(); // text extracts
-		$otherExt = array(); // other extracts
+		$textExt = []; // text extracts
+		$otherExt = []; // other extracts
 		$start = 0;
 		$textLen = strlen( $text );
 		$count = 0; // sequence number to maintain ordering
@@ -74,10 +79,10 @@ class SearchHighlighter {
 			if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) {
 				$epat = '';
 				foreach ( $matches as $key => $val ) {
-					if ( $key > 0 && $val[1] != - 1 ) {
+					if ( $key > 0 && $val[1] != -1 ) {
 						if ( $key == 2 ) {
 							// see if this is an image link
-							$ns = substr( $val[0], 2, - 1 );
+							$ns = substr( $val[0], 2, -1 );
 							if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
 								break;
 							}
@@ -136,7 +141,7 @@ class SearchHighlighter {
 			if ( preg_match( '/[\x80-\xff]/', $term ) ) {
 				$terms[$index] = preg_replace_callback(
 					'/./us',
-					array( $this, 'caseCallback' ),
+					[ $this, 'caseCallback' ],
 					$terms[$index]
 				);
 			} else {
@@ -145,7 +150,6 @@ class SearchHighlighter {
 		}
 		$anyterm = implode( '|', $terms );
 		$phrase = implode( "$wgSearchHighlightBoundaries+", $terms );
-
 		// @todo FIXME: A hack to scale contextchars, a correct solution
 		// would be to have contextchars actually be char and not byte
 		// length, and do proper utf-8 substrings and lengths everywhere,
@@ -161,8 +165,8 @@ class SearchHighlighter {
 
 		$left = $contextlines;
 
-		$snippets = array();
-		$offsets = array();
+		$snippets = [];
+		$offsets = [];
 
 		// show beginning only if it contains all words
 		$first = 0;
@@ -202,7 +206,7 @@ class SearchHighlighter {
 		}
 
 		// add extra chars to each snippet to make snippets constant size
-		$extended = array();
+		$extended = [];
 		if ( count( $snippets ) == 0 ) {
 			// couldn't find the target words, just show beginning of article
 			if ( array_key_exists( $first, $all ) ) {
@@ -214,7 +218,7 @@ class SearchHighlighter {
 			// if begin of the article contains the whole phrase, show only that !!
 			if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
 				&& $offsets[$first] < $contextchars * 2 ) {
-				$snippets = array( $first => $snippets[$first] );
+				$snippets = [ $first => $snippets[$first] ];
 			}
 
 			// calc by how much to extend existing snippets
@@ -252,10 +256,10 @@ class SearchHighlighter {
 
 		// $snippets = array_map( 'htmlspecialchars', $extended );
 		$snippets = $extended;
-		$last = - 1;
+		$last = -1;
 		$extract = '';
 		foreach ( $snippets as $index => $line ) {
-			if ( $last == - 1 ) {
+			if ( $last == -1 ) {
 				$extract .= $line; // first line
 			} elseif ( $last + 1 == $index
 				&& $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] )
@@ -271,7 +275,7 @@ class SearchHighlighter {
 			$extract .= '<b> ... </b>';
 		}
 
-		$processed = array();
+		$processed = [];
 		foreach ( $terms as $term ) {
 			if ( !isset( $processed[$term] ) ) {
 				$pat3 = "/$patPre(" . $term . ")$patPost/ui"; // highlight word
@@ -287,8 +291,8 @@ class SearchHighlighter {
 	/**
 	 * Split text into lines and add it to extracts array
 	 *
-	 * @param array $extracts Index -> $line
-	 * @param int $count
+	 * @param array &$extracts Index -> $line
+	 * @param int &$count
 	 * @param string $text
 	 */
 	function splitAndAdd( &$extracts, &$count, $text ) {
@@ -322,8 +326,8 @@ class SearchHighlighter {
 	 * @param string $text
 	 * @param int $start
 	 * @param int $end
-	 * @param int $posStart (out) actual start position
-	 * @param int $posEnd (out) actual end position
+	 * @param int &$posStart (out) actual start position
+	 * @param int &$posEnd (out) actual end position
 	 * @return string
 	 */
 	function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) {
@@ -362,7 +366,7 @@ class SearchHighlighter {
 		$tolerance = 10;
 		$s = max( 0, $point - $tolerance );
 		$l = min( strlen( $text ), $point + $tolerance ) - $s;
-		$m = array();
+		$m = [];
 
 		if ( preg_match(
 			'/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/',
@@ -393,10 +397,10 @@ class SearchHighlighter {
 	 *
 	 * @param string $pattern Regexp for matching lines
 	 * @param array $extracts Extracts to search
-	 * @param int $linesleft Number of extracts to make
-	 * @param int $contextchars Length of snippet
-	 * @param array $out Map for highlighted snippets
-	 * @param array $offsets Map of starting points of snippets
+	 * @param int &$linesleft Number of extracts to make
+	 * @param int &$contextchars Length of snippet
+	 * @param array &$out Map for highlighted snippets
+	 * @param array &$offsets Map of starting points of snippets
 	 * @protected
 	 */
 	function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
@@ -408,7 +412,7 @@ class SearchHighlighter {
 				continue; // this line already highlighted
 			}
 
-			$m = array();
+			$m = [];
 			if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) {
 				continue;
 			}
@@ -448,7 +452,7 @@ class SearchHighlighter {
 		$text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text );
 		$text = preg_replace_callback(
 			"/\\[\\[([^|]+\\|)(.*?)\\]\\]/",
-			array( $this, 'linkReplace' ),
+			[ $this, 'linkReplace' ],
 			$text
 		);
 		$text = preg_replace( "/<\/?[^>]+>/", "", $text );
@@ -456,6 +460,10 @@ class SearchHighlighter {
 		$text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text );
 		$text = preg_replace( "/''/", "", $text );
 
+		// Note, the previous /<\/?[^>]+>/ is insufficient
+		// for XSS safety as the HTML tag can span multiple
+		// search results (T144845).
+		$text = Sanitizer::escapeHtmlAllowEntities( $text );
 		return $text;
 	}
 
@@ -485,8 +493,10 @@ class SearchHighlighter {
 	 * Simple & fast snippet extraction, but gives completely unrelevant
 	 * snippets
 	 *
+	 * Used when $wgAdvancedSearchHighlighting is false.
+	 *
 	 * @param string $text
-	 * @param array $terms
+	 * @param array $terms Escaped for regex by SearchDatabase::regexTerm()
 	 * @param int $contextlines
 	 * @param int $contextchars
 	 * @return string
@@ -508,7 +518,7 @@ class SearchHighlighter {
 				break;
 			}
 			++$lineno;
-			$m = array();
+			$m = [];
 			if ( !preg_match( $pat1, $line, $m ) ) {
 				continue;
 			}
@@ -543,7 +553,7 @@ class SearchHighlighter {
 	 * @return string
 	 */
 	public function highlightNone( $text, $contextlines, $contextchars ) {
-		$match = array();
+		$match = [];
 		$text = ltrim( $text ) . "\n"; // make sure the preg_match may find the last line
 		$text = str_replace( "\n\n", "\n", $text ); // remove empty lines
 		preg_match( "/^(.*\n){0,$contextlines}/", $text, $match );