Merge "maintenance: Script to rename titles for Unicode uppercasing changes"

[lhc/web/wiklou.git] / includes / search / SearchHighlighter.php
diff --git a/includes/search/SearchHighlighter.php b/includes/search/SearchHighlighter.php

index 2bd1955..6c01f79 100644 (file)
--- a/includes/search/SearchHighlighter.php
+++ b/includes/search/SearchHighlighter.php
@@ -21,6 +21,8 @@
   * @ingroup Search
   */
  
+use MediaWiki\MediaWikiServices;
+
  /**
   * Highlight bits of wikitext
   *
@@ -29,21 +31,27 @@
  class SearchHighlighter {
         protected $mCleanWikitext = true;
  
+       /**
+        * @warning If you pass false to this constructor, then
+        *  the caller is responsible for HTML escaping.
+        * @param bool $cleanupWikitext
+        */
         function __construct( $cleanupWikitext = true ) {
                 $this->mCleanWikitext = $cleanupWikitext;
         }
  
         /**
-        * Default implementation of wikitext highlighting
+        * Wikitext highlighting when $wgAdvancedSearchHighlighting = true
          *
          * @param string $text
-        * @param array $terms Terms to highlight (unescaped)
+        * @param string[] $terms Terms to highlight (not html escaped but
+        *   regex escaped via SearchDatabase::regexTerm())
          * @param int $contextlines
          * @param int $contextchars
          * @return string
          */
         public function highlightText( $text, $terms, $contextlines, $contextchars ) {
-               global $wgContLang, $wgSearchHighlightBoundaries;
+               global $wgSearchHighlightBoundaries;
  
                 if ( $text == '' ) {
                         return '';
@@ -74,11 +82,14 @@ class SearchHighlighter {
                         if ( preg_match( $spat, $text, $matches, PREG_OFFSET_CAPTURE, $start ) ) {
                                 $epat = '';
                                 foreach ( $matches as $key => $val ) {
-                                       if ( $key > 0 && $val[1] != - 1 ) {
+                                       if ( $key > 0 && $val[1] != -1 ) {
                                                 if ( $key == 2 ) {
                                                         // see if this is an image link
-                                                       $ns = substr( $val[0], 2, - 1 );
-                                                       if ( $wgContLang->getNsIndex( $ns ) != NS_FILE ) {
+                                                       $ns = substr( $val[0], 2, -1 );
+                                                       if (
+                                                               MediaWikiServices::getInstance()->getContentLanguage()->
+                                                               getNsIndex( $ns ) != NS_FILE
+                                                       ) {
                                                                 break;
                                                         }
  
@@ -145,7 +156,6 @@ class SearchHighlighter {
                 }
                 $anyterm = implode( '|', $terms );
                 $phrase = implode( "$wgSearchHighlightBoundaries+", $terms );
-
                 // @todo FIXME: A hack to scale contextchars, a correct solution
                 // would be to have contextchars actually be char and not byte
                 // length, and do proper utf-8 substrings and lengths everywhere,
@@ -252,10 +262,10 @@ class SearchHighlighter {
  
                 // $snippets = array_map( 'htmlspecialchars', $extended );
                 $snippets = $extended;
-               $last = - 1;
+               $last = -1;
                 $extract = '';
                 foreach ( $snippets as $index => $line ) {
-                       if ( $last == - 1 ) {
+                       if ( $last == -1 ) {
                                 $extract .= $line; // first line
                         } elseif ( $last + 1 == $index
                                 && $offsets[$last] + strlen( $snippets[$last] ) >= strlen( $all[$last] )
@@ -287,8 +297,8 @@ class SearchHighlighter {
         /**
          * Split text into lines and add it to extracts array
          *
-        * @param array $extracts Index -> $line
-        * @param int $count
+        * @param array &$extracts Index -> $line
+        * @param int &$count
          * @param string $text
          */
         function splitAndAdd( &$extracts, &$count, $text ) {
@@ -308,9 +318,10 @@ class SearchHighlighter {
          * @return string
          */
         function caseCallback( $matches ) {
-               global $wgContLang;
                 if ( strlen( $matches[0] ) > 1 ) {
-                       return '[' . $wgContLang->lc( $matches[0] ) . $wgContLang->uc( $matches[0] ) . ']';
+                       $contLang = MediaWikiServices::getInstance()->getContentLanguage();
+                       return '[' . $contLang->lc( $matches[0] ) .
+                               $contLang->uc( $matches[0] ) . ']';
                 } else {
                         return $matches[0];
                 }
@@ -322,8 +333,8 @@ class SearchHighlighter {
          * @param string $text
          * @param int $start
          * @param int $end
-        * @param int $posStart (out) actual start position
-        * @param int $posEnd (out) actual end position
+        * @param int|null &$posStart (out) actual start position
+        * @param int|null &$posEnd (out) actual end position
          * @return string
          */
         function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) {
@@ -393,10 +404,10 @@ class SearchHighlighter {
          *
          * @param string $pattern Regexp for matching lines
          * @param array $extracts Extracts to search
-        * @param int $linesleft Number of extracts to make
-        * @param int $contextchars Length of snippet
-        * @param array $out Map for highlighted snippets
-        * @param array $offsets Map of starting points of snippets
+        * @param int &$linesleft Number of extracts to make
+        * @param int &$contextchars Length of snippet
+        * @param array &$out Map for highlighted snippets
+        * @param array &$offsets Map of starting points of snippets
          * @protected
          */
         function process( $pattern, $extracts, &$linesleft, &$contextchars, &$out, &$offsets ) {
@@ -456,6 +467,10 @@ class SearchHighlighter {
                 $text = preg_replace( "/('''|<\/?[iIuUbB]>)/", "", $text );
                 $text = preg_replace( "/''/", "", $text );
  
+               // Note, the previous /<\/?[^>]+>/ is insufficient
+               // for XSS safety as the HTML tag can span multiple
+               // search results (T144845).
+               $text = Sanitizer::escapeHtmlAllowEntities( $text );
                 return $text;
         }
  
@@ -471,9 +486,8 @@ class SearchHighlighter {
                 if ( $colon === false ) {
                         return $matches[2]; // replace with caption
                 }
-               global $wgContLang;
                 $ns = substr( $matches[1], 0, $colon );
-               $index = $wgContLang->getNsIndex( $ns );
+               $index = MediaWikiServices::getInstance()->getContentLanguage()->getNsIndex( $ns );
                 if ( $index !== false && ( $index == NS_FILE || $index == NS_CATEGORY ) ) {
                         return $matches[0]; // return the whole thing
                 } else {
@@ -485,15 +499,15 @@ class SearchHighlighter {
          * Simple & fast snippet extraction, but gives completely unrelevant
          * snippets
          *
+        * Used when $wgAdvancedSearchHighlighting is false.
+        *
          * @param string $text
-        * @param array $terms
+        * @param string[] $terms Escaped for regex by SearchDatabase::regexTerm()
          * @param int $contextlines
          * @param int $contextchars
          * @return string
          */
         public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
-               global $wgContLang;
-
                 $lines = explode( "\n", $text );
  
                 $terms = implode( '|', $terms );
@@ -503,8 +517,9 @@ class SearchHighlighter {
                 $lineno = 0;
  
                 $extract = "";
+               $contLang = MediaWikiServices::getInstance()->getContentLanguage();
                 foreach ( $lines as $line ) {
-                       if ( 0 == $contextlines ) {
+                       if ( $contextlines == 0 ) {
                                 break;
                         }
                         ++$lineno;
@@ -514,12 +529,12 @@ class SearchHighlighter {
                         }
                         --$contextlines;
                         // truncate function changes ... to relevant i18n message.
-                       $pre = $wgContLang->truncate( $m[1], - $contextchars, '...', false );
+                       $pre = $contLang->truncateForVisual( $m[1], - $contextchars, '...', false );
  
                         if ( count( $m ) < 3 ) {
                                 $post = '';
                         } else {
-                               $post = $wgContLang->truncate( $m[3], $contextchars, '...', false );
+                               $post = $contLang->truncateForVisual( $m[3], $contextchars, '...', false );
                         }
  
                         $found = $m[2];