X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Fsearch%2FSearchHighlighter.php;h=2bd19558a229fd98170268fb89f00aad90335ca4;hb=a9007e8baf802f0f57d095e3bb4ad201c98c0cb3;hp=98c05b56955052383d8f03cd95b5d327563c5af9;hpb=f4baf67270f1b593d7a4153eabd14c6c1011b310;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/search/SearchHighlighter.php b/includes/search/SearchHighlighter.php index 98c05b5695..6135395aeb 100644 --- a/includes/search/SearchHighlighter.php +++ b/includes/search/SearchHighlighter.php @@ -29,15 +29,20 @@ class SearchHighlighter { protected $mCleanWikitext = true; + /** + * @warning If you pass false to this constructor, then + * the caller is responsible for HTML escaping. + */ function __construct( $cleanupWikitext = true ) { $this->mCleanWikitext = $cleanupWikitext; } /** - * Default implementation of wikitext highlighting + * Wikitext highlighting when $wgAdvancedSearchHighlighting = true * * @param string $text - * @param array $terms Terms to highlight (unescaped) + * @param array $terms Terms to highlight (not html escaped but + * regex escaped via SearchDatabase::regexTerm()) * @param int $contextlines * @param int $contextchars * @return string @@ -52,10 +57,10 @@ class SearchHighlighter { // spli text into text + templates/links/tables $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)"; // first capture group is for detecting nested templates/links/tables/references - $endPatterns = array( + $endPatterns = [ 1 => '/(\{\{)|(\}\})/', // template 2 => '/(\[\[)|(\]\])/', // image - 3 => "/(\n\\{\\|)|(\n\\|\\})/" ); // table + 3 => "/(\n\\{\\|)|(\n\\|\\})/" ]; // table // @todo FIXME: This should prolly be a hook or something // instead of hardcoding a class name from the Cite extension @@ -64,8 +69,8 @@ class SearchHighlighter { $endPatterns[4] = '/()|(<\/ref>)/'; } $spat .= '/'; - $textExt = array(); // text extracts - $otherExt = array(); // other extracts + $textExt = []; // text extracts + $otherExt = []; // other extracts $start = 0; $textLen = strlen( $text ); $count = 0; // sequence number to maintain ordering @@ -74,10 +79,10 @@ class SearchHighlighter { if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) { $epat = ''; foreach ( $matches as $key => $val ) { - if ( $key > 0 && $val[1] != - 1 ) { + if ( $key > 0 && $val[1] != -1 ) { if ( $key == 2 ) { // see if this is an image link - $ns = substr( $val[0], 2, - 1 ); + $ns = substr( $val[0], 2, -1 ); if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) { break; } @@ -136,7 +141,7 @@ class SearchHighlighter { if ( preg_match( '/[\x80-\xff]/', $term ) ) { $terms[$index] = preg_replace_callback( '/./us', - array( $this, 'caseCallback' ), + [ $this, 'caseCallback' ], $terms[$index] ); } else { @@ -145,7 +150,6 @@ class SearchHighlighter { } $anyterm = implode( '|', $terms ); $phrase = implode( "$wgSearchHighlightBoundaries+", $terms ); - // @todo FIXME: A hack to scale contextchars, a correct solution // would be to have contextchars actually be char and not byte // length, and do proper utf-8 substrings and lengths everywhere, @@ -161,8 +165,8 @@ class SearchHighlighter { $left = $contextlines; - $snippets = array(); - $offsets = array(); + $snippets = []; + $offsets = []; // show beginning only if it contains all words $first = 0; @@ -202,7 +206,7 @@ class SearchHighlighter { } // add extra chars to each snippet to make snippets constant size - $extended = array(); + $extended = []; if ( count( $snippets ) == 0 ) { // couldn't find the target words, just show beginning of article if ( array_key_exists( $first, $all ) ) { @@ -214,7 +218,7 @@ class SearchHighlighter { // if begin of the article contains the whole phrase, show only that !! if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] ) && $offsets[$first] < $contextchars * 2 ) { - $snippets = array( $first => $snippets[$first] ); + $snippets = [ $first => $snippets[$first] ]; } // calc by how much to extend existing snippets @@ -252,10 +256,10 @@ class SearchHighlighter { // $snippets = array_map( 'htmlspecialchars', $extended ); $snippets = $extended; - $last = - 1; + $last = -1; $extract = ''; foreach ( $snippets as $index => $line ) { - if ( $last == - 1 ) { + if ( $last == -1 ) { $extract .= $line; // first line } elseif ( $last + 1 == $index && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] ) @@ -271,7 +275,7 @@ class SearchHighlighter { $extract .= ' ... '; } - $processed = array(); + $processed = []; foreach ( $terms as $term ) { if ( !isset( $processed[$term] ) ) { $pat3 = "/$patPre(" . $term . ")$patPost/ui"; // highlight word @@ -287,8 +291,8 @@ class SearchHighlighter { /** * Split text into lines and add it to extracts array * - * @param array $extracts Index -> $line - * @param int $count + * @param array &$extracts Index -> $line + * @param int &$count * @param string $text */ function splitAndAdd( &$extracts, &$count, $text ) { @@ -322,8 +326,8 @@ class SearchHighlighter { * @param string $text * @param int $start * @param int $end - * @param int $posStart (out) actual start position - * @param int $posEnd (out) actual end position + * @param int &$posStart (out) actual start position + * @param int &$posEnd (out) actual end position * @return string */ function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) { @@ -362,7 +366,7 @@ class SearchHighlighter { $tolerance = 10; $s = max( 0, $point - $tolerance ); $l = min( strlen( $text ), $point + $tolerance ) - $s; - $m = array(); + $m = []; if ( preg_match( '/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/', @@ -393,10 +397,10 @@ class SearchHighlighter { * * @param string $pattern Regexp for matching lines * @param array $extracts Extracts to search - * @param int $linesleft Number of extracts to make - * @param int $contextchars Length of snippet - * @param array $out Map for highlighted snippets - * @param array $offsets Map of starting points of snippets + * @param int &$linesleft Number of extracts to make + * @param int &$contextchars Length of snippet + * @param array &$out Map for highlighted snippets + * @param array &$offsets Map of starting points of snippets * @protected */ function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) { @@ -408,7 +412,7 @@ class SearchHighlighter { continue; // this line already highlighted } - $m = array(); + $m = []; if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) { continue; } @@ -448,7 +452,7 @@ class SearchHighlighter { $text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text ); $text = preg_replace_callback( "/\\[\\[([^|]+\\|)(.*?)\\]\\]/", - array( $this, 'linkReplace' ), + [ $this, 'linkReplace' ], $text ); $text = preg_replace( "/<\/?[^>]+>/", "", $text ); @@ -456,6 +460,10 @@ class SearchHighlighter { $text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text ); $text = preg_replace( "/''/", "", $text ); + // Note, the previous /<\/?[^>]+>/ is insufficient + // for XSS safety as the HTML tag can span multiple + // search results (T144845). + $text = Sanitizer::escapeHtmlAllowEntities( $text ); return $text; } @@ -485,8 +493,10 @@ class SearchHighlighter { * Simple & fast snippet extraction, but gives completely unrelevant * snippets * + * Used when $wgAdvancedSearchHighlighting is false. + * * @param string $text - * @param array $terms + * @param array $terms Escaped for regex by SearchDatabase::regexTerm() * @param int $contextlines * @param int $contextchars * @return string @@ -508,7 +518,7 @@ class SearchHighlighter { break; } ++$lineno; - $m = array(); + $m = []; if ( !preg_match( $pat1, $line, $m ) ) { continue; } @@ -543,7 +553,7 @@ class SearchHighlighter { * @return string */ public function highlightNone( $text, $contextlines, $contextchars ) { - $match = array(); + $match = []; $text = ltrim( $text ) . "\n"; // make sure the preg_match may find the last line $text = str_replace( "\n\n", "\n", $text ); // remove empty lines preg_match( "/^(.*\n){0,$contextlines}/", $text, $match );