Add missing & to @param documentation to match functon call

[lhc/web/wiklou.git] / includes / search / SearchHighlighter.php
diff --git a/includes/search/SearchHighlighter.php b/includes/search/SearchHighlighter.php

index 5087e8d..6135395 100644 (file)
--- a/includes/search/SearchHighlighter.php
+++ b/includes/search/SearchHighlighter.php
@@ -29,15 +29,20 @@
  class SearchHighlighter {
         protected $mCleanWikitext = true;
  
+       /**
+        * @warning If you pass false to this constructor, then
+        *  the caller is responsible for HTML escaping.
+        */
         function __construct( $cleanupWikitext = true ) {
                 $this->mCleanWikitext = $cleanupWikitext;
         }
  
         /**
-        * Default implementation of wikitext highlighting
+        * Wikitext highlighting when $wgAdvancedSearchHighlighting = true
          *
          * @param string $text
-        * @param array $terms Terms to highlight (unescaped)
+        * @param array $terms Terms to highlight (not html escaped but
+        *   regex escaped via SearchDatabase::regexTerm())
          * @param int $contextlines
          * @param int $contextchars
          * @return string
@@ -52,10 +57,10 @@ class SearchHighlighter {
                 // spli text into text + templates/links/tables
                 $spat = "/(\\{\\{)|(\\[\\[[^\\]:]+:)|(\n\\{\\|)";
                 // first capture group is for detecting nested templates/links/tables/references
-               $endPatterns = array(
+               $endPatterns = [
                         1 => '/(\{\{)|(\}\})/', // template
                         2 => '/(\[\[)|(\]\])/', // image
-                       3 => "/(\n\\{\\|)|(\n\\|\\})/" ); // table
+                       3 => "/(\n\\{\\|)|(\n\\|\\})/" ]; // table
  
                 // @todo FIXME: This should prolly be a hook or something
                 // instead of hardcoding a class name from the Cite extension
@@ -64,8 +69,8 @@ class SearchHighlighter {
                         $endPatterns[4] = '/(<ref>)|(<\/ref>)/';
                 }
                 $spat .= '/';
-               $textExt = array(); // text extracts
-               $otherExt = array(); // other extracts
+               $textExt = []; // text extracts
+               $otherExt = []; // other extracts
                 $start = 0;
                 $textLen = strlen( $text );
                 $count = 0; // sequence number to maintain ordering
@@ -74,10 +79,10 @@ class SearchHighlighter {
                         if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) {
                                 $epat = '';
                                 foreach ( $matches as $key => $val ) {
-                                       if ( $key > 0 && $val[1] != - 1 ) {
+                                       if ( $key > 0 && $val[1] != -1 ) {
                                                 if ( $key == 2 ) {
                                                         // see if this is an image link
-                                                       $ns = substr( $val[0], 2, - 1 );
+                                                       $ns = substr( $val[0], 2, -1 );
                                                         if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
                                                                 break;
                                                         }
@@ -136,7 +141,7 @@ class SearchHighlighter {
                         if ( preg_match( '/[\x80-\xff]/', $term ) ) {
                                 $terms[$index] = preg_replace_callback(
                                         '/./us',
-                                       array( $this, 'caseCallback' ),
+                                       [ $this, 'caseCallback' ],
                                         $terms[$index]
                                 );
                         } else {
@@ -145,7 +150,6 @@ class SearchHighlighter {
                 }
                 $anyterm = implode( '|', $terms );
                 $phrase = implode( "$wgSearchHighlightBoundaries+", $terms );
-
                 // @todo FIXME: A hack to scale contextchars, a correct solution
                 // would be to have contextchars actually be char and not byte
                 // length, and do proper utf-8 substrings and lengths everywhere,
@@ -161,8 +165,8 @@ class SearchHighlighter {
  
                 $left = $contextlines;
  
-               $snippets = array();
-               $offsets = array();
+               $snippets = [];
+               $offsets = [];
  
                 // show beginning only if it contains all words
                 $first = 0;
@@ -202,7 +206,7 @@ class SearchHighlighter {
                 }
  
                 // add extra chars to each snippet to make snippets constant size
-               $extended = array();
+               $extended = [];
                 if ( count( $snippets ) == 0 ) {
                         // couldn't find the target words, just show beginning of article
                         if ( array_key_exists( $first, $all ) ) {
@@ -214,11 +218,11 @@ class SearchHighlighter {
                         // if begin of the article contains the whole phrase, show only that !!
                         if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
                                 && $offsets[$first] < $contextchars * 2 ) {
-                               $snippets = array( $first => $snippets[$first] );
+                               $snippets = [ $first => $snippets[$first] ];
                         }
  
                         // calc by how much to extend existing snippets
-                       $targetchars = intval( ( $contextchars * $contextlines ) / count ( $snippets ) );
+                       $targetchars = intval( ( $contextchars * $contextlines ) / count( $snippets ) );
                 }
  
                 foreach ( $snippets as $index => $line ) {
@@ -252,10 +256,10 @@ class SearchHighlighter {
  
                 // $snippets = array_map( 'htmlspecialchars', $extended );
                 $snippets = $extended;
-               $last = - 1;
+               $last = -1;
                 $extract = '';
                 foreach ( $snippets as $index => $line ) {
-                       if ( $last == - 1 ) {
+                       if ( $last == -1 ) {
                                 $extract .= $line; // first line
                         } elseif ( $last + 1 == $index
                                 && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] )
@@ -271,7 +275,7 @@ class SearchHighlighter {
                         $extract .= '<b> ... </b>';
                 }
  
-               $processed = array();
+               $processed = [];
                 foreach ( $terms as $term ) {
                         if ( !isset( $processed[$term] ) ) {
                                 $pat3 = "/$patPre(" . $term . ")$patPost/ui"; // highlight word
@@ -287,8 +291,8 @@ class SearchHighlighter {
         /**
          * Split text into lines and add it to extracts array
          *
-        * @param array $extracts Index -> $line
-        * @param int $count
+        * @param array &$extracts Index -> $line
+        * @param int &$count
          * @param string $text
          */
         function splitAndAdd( &$extracts, &$count, $text ) {
@@ -322,8 +326,8 @@ class SearchHighlighter {
          * @param string $text
          * @param int $start
          * @param int $end
-        * @param int $posStart (out) actual start position
-        * @param int $posEnd (out) actual end position
+        * @param int &$posStart (out) actual start position
+        * @param int &$posEnd (out) actual end position
          * @return string
          */
         function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) {
@@ -362,7 +366,7 @@ class SearchHighlighter {
                 $tolerance = 10;
                 $s = max( 0, $point - $tolerance );
                 $l = min( strlen( $text ), $point + $tolerance ) - $s;
-               $m = array();
+               $m = [];
  
                 if ( preg_match(
                         '/[ ,.!?~!@#$%^&*\(\)+=\-\\\|\[\]"\'<>]/',
@@ -393,10 +397,10 @@ class SearchHighlighter {
          *
          * @param string $pattern Regexp for matching lines
          * @param array $extracts Extracts to search
-        * @param int $linesleft Number of extracts to make
-        * @param int $contextchars Length of snippet
-        * @param array $out Map for highlighted snippets
-        * @param array $offsets Map of starting points of snippets
+        * @param int &$linesleft Number of extracts to make
+        * @param int &$contextchars Length of snippet
+        * @param array &$out Map for highlighted snippets
+        * @param array &$offsets Map of starting points of snippets
          * @protected
          */
         function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
@@ -408,7 +412,7 @@ class SearchHighlighter {
                                 continue; // this line already highlighted
                         }
  
-                       $m = array();
+                       $m = [];
                         if ( !preg_match( $pattern, $line, $m, PREG_OFFSET_CAPTURE ) ) {
                                 continue;
                         }
@@ -448,7 +452,7 @@ class SearchHighlighter {
                 $text = preg_replace( "/\\[\\[([^|]+?)\\]\\]/", "\\1", $text );
                 $text = preg_replace_callback(
                         "/\\[\\[([^|]+\\|)(.*?)\\]\\]/",
-                       array( $this, 'linkReplace' ),
+                       [ $this, 'linkReplace' ],
                         $text
                 );
                 $text = preg_replace( "/<\/?[^>]+>/", "", $text );
@@ -456,6 +460,10 @@ class SearchHighlighter {
                 $text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text );
                 $text = preg_replace( "/''/", "", $text );
  
+               // Note, the previous /<\/?[^>]+>/ is insufficient
+               // for XSS safety as the HTML tag can span multiple
+               // search results (T144845).
+               $text = Sanitizer::escapeHtmlAllowEntities( $text );
                 return $text;
         }
  
@@ -485,8 +493,10 @@ class SearchHighlighter {
          * Simple & fast snippet extraction, but gives completely unrelevant
          * snippets
          *
+        * Used when $wgAdvancedSearchHighlighting is false.
+        *
          * @param string $text
-        * @param array $terms
+        * @param array $terms Escaped for regex by SearchDatabase::regexTerm()
          * @param int $contextlines
          * @param int $contextchars
          * @return string
@@ -508,7 +518,7 @@ class SearchHighlighter {
                                 break;
                         }
                         ++$lineno;
-                       $m = array();
+                       $m = [];
                         if ( !preg_match( $pat1, $line, $m ) ) {
                                 continue;
                         }
@@ -543,11 +553,13 @@ class SearchHighlighter {
          * @return string
          */
         public function highlightNone( $text, $contextlines, $contextchars ) {
-               $match = array();
+               $match = [];
                 $text = ltrim( $text ) . "\n"; // make sure the preg_match may find the last line
                 $text = str_replace( "\n\n", "\n", $text ); // remove empty lines
                 preg_match( "/^(.*\n){0,$contextlines}/", $text, $match );
-               $text = htmlspecialchars( substr( trim( $match[0] ), 0, $contextlines * $contextchars ) ); // trim and limit to max number of chars
+
+               // Trim and limit to max number of chars
+               $text = htmlspecialchars( substr( trim( $match[0] ), 0, $contextlines * $contextchars ) );
                 return str_replace( "\n", '<br>', $text );
         }
  }