FU r106752: use "media-" instead of "images-" in container names. Long live books...
[lhc/web/wiklou.git] / includes / search / SearchEngine.php
index a62104f..b3f5da9 100644 (file)
@@ -1,11 +1,15 @@
 <?php
 /**
- * @defgroup Search Search
+ * Basic search engine
  *
  * @file
  * @ingroup Search
  */
 
+/**
+ * @defgroup Search Search
+ */
+
 /**
  * Contain a class for special pages
  * @ingroup Search
@@ -18,6 +22,22 @@ class SearchEngine {
        var $namespaces = array( NS_MAIN );
        var $showRedirects = false;
 
+       /// Feature values
+       protected $features = array();
+
+       /**
+        * @var DatabaseBase
+        */
+       protected $db;
+
+       function __construct($db = null) {
+               if ( $db ) {
+                       $this->db = $db;
+               } else {
+                       $this->db = wfGetDB( DB_SLAVE );
+               }
+       }
+
        /**
         * Perform a full text search query and return a result set.
         * If title searches are not supported or disabled, return null.
@@ -42,9 +62,39 @@ class SearchEngine {
                return null;
        }
 
-       /** If this search backend can list/unlist redirects */
+       /**
+        * If this search backend can list/unlist redirects
+        * @deprecated since 1.18 Call supports( 'list-redirects' );
+        */
        function acceptListRedirects() {
-               return true;
+               wfDeprecated( __METHOD__, '1.18' );
+               return $this->supports( 'list-redirects' );
+       }
+
+       /**
+        * @since 1.18
+        * @param $feature String
+        * @return Boolean
+        */
+       public function supports( $feature ) {
+               switch( $feature ) {
+               case 'list-redirects':
+                       return true;
+               case 'title-suffix-filter':
+               default:
+                       return false;
+               }
+       }
+
+       /**
+        * Way to pass custom data for engines
+        * @since 1.18
+        * @param $feature String
+        * @param $data Mixed
+        * @return Noolean
+        */
+       public function setFeatureData( $feature, $data ) {
+               $this->features[$feature] = $data;
        }
 
        /**
@@ -83,11 +133,11 @@ class SearchEngine {
                wfRunHooks( 'SearchGetNearMatchComplete', array( $searchterm, &$title ) );
                return $title;
        }
-       
+
        /**
-        * Do a near match (see SearchEngine::getNearMatch) and wrap it into a 
+        * Do a near match (see SearchEngine::getNearMatch) and wrap it into a
         * SearchResultSet.
-        * 
+        *
         * @param $searchterm string
         * @return SearchResultSet
         */
@@ -99,14 +149,15 @@ class SearchEngine {
         * Really find the title match.
         */
        private static function getNearMatchInternal( $searchterm ) {
-               global $wgContLang;
+               global $wgContLang, $wgEnableSearchContributorsByIP;
 
                $allSearchTerms = array( $searchterm );
 
                if ( $wgContLang->hasVariants() ) {
-                       $allSearchTerms = array_merge( $allSearchTerms, $wgContLang->convertLinkToAllVariants( $searchterm ) );
+                       $allSearchTerms = array_merge( $allSearchTerms, $wgContLang->autoConvertToAllVariants( $searchterm ) );
                }
 
+               $titleResult = null;
                if ( !wfRunHooks( 'SearchGetNearMatchBefore', array( $allSearchTerms, &$titleResult ) ) ) {
                        return $titleResult;
                }
@@ -115,16 +166,17 @@ class SearchEngine {
 
                        # Exact match? No need to look further.
                        $title = Title::newFromText( $term );
-                       if ( is_null( $title ) )
+                       if ( is_null( $title ) ){
                                return null;
+                       }
 
-                       if ( $title->getNamespace() == NS_SPECIAL || $title->isExternal() || $title->exists() ) {
+                       if ( $title->isSpecialPage() || $title->isExternal() || $title->exists() ) {
                                return $title;
                        }
 
                        # See if it still otherwise has content is some sane sense
-                       $article = MediaWiki::articleFromTitle( $title );
-                       if ( $article->hasViewableContent() ) {
+                       $page = WikiPage::factory( $title );
+                       if ( $page->hasViewableContent() ) {
                                return $title;
                        }
 
@@ -164,10 +216,13 @@ class SearchEngine {
 
                $title = Title::newFromText( $searchterm );
 
+
                # Entering an IP address goes to the contributions page
-               if ( ( $title->getNamespace() == NS_USER && User::isIP( $title->getText() ) )
-                       || User::isIP( trim( $searchterm ) ) ) {
-                       return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
+               if ( $wgEnableSearchContributorsByIP ) {
+                       if ( ( $title->getNamespace() == NS_USER && User::isIP( $title->getText() ) )
+                               || User::isIP( trim( $searchterm ) ) ) {
+                               return SpecialPage::getTitleFor( 'Contributions', $title->getDBkey() );
+                       }
                }
 
 
@@ -246,7 +301,7 @@ class SearchEngine {
                if ( strncmp( $query, $allkeyword, strlen( $allkeyword ) ) == 0 ) {
                        $this->namespaces = null;
                        $parsed = substr( $query, strlen( $allkeyword ) );
-               } else if ( strpos( $query, ':' ) !== false ) {
+               } elseif ( strpos( $query, ':' ) !== false ) {
                        $prefix = substr( $query, 0, strpos( $query, ':' ) );
                        $index = $wgContLang->getNsIndex( $prefix );
                        if ( $index !== false ) {
@@ -292,7 +347,7 @@ class SearchEngine {
                // get search everything preference, that can be set to be read for logged-in users
                $searcheverything = false;
                if ( ( $wgSearchEverythingOnlyLoggedIn && $user->isLoggedIn() )
-                   || !$wgSearchEverythingOnlyLoggedIn )
+                       || !$wgSearchEverythingOnlyLoggedIn )
                        $searcheverything = $user->getOption( 'searcheverything' );
 
                // searcheverything overrides other options
@@ -308,14 +363,11 @@ class SearchEngine {
        }
 
        /**
-        * Find snippet highlight settings for a given user
+        * Find snippet highlight settings for all users
         *
-        * @param $user User
         * @return Array contextlines, contextchars
         */
-       public static function userHighlightPrefs( &$user ) {
-               // $contextlines = $user->getOption( 'contextlines',  5 );
-               // $contextchars = $user->getOption( 'contextchars', 50 );
+       public static function userHighlightPrefs() {
                $contextlines = 2; // Hardcode this. Old defaults sucked. :)
                $contextchars = 75; // same as above.... :P
                return array( $contextlines, $contextchars );
@@ -378,10 +430,11 @@ class SearchEngine {
         */
        public static function create() {
                global $wgSearchType;
-               $dbr = wfGetDB( DB_SLAVE );
+               $dbr = null;
                if ( $wgSearchType ) {
                        $class = $wgSearchType;
                } else {
+                       $dbr = wfGetDB( DB_SLAVE );
                        $class = $dbr->getSearchEngine();
                }
                $search = new $class( $dbr );
@@ -420,13 +473,15 @@ class SearchEngine {
         * @return String
         */
        public static function getOpenSearchTemplate() {
-               global $wgOpenSearchTemplate, $wgServer, $wgScriptPath;
-               if ( $wgOpenSearchTemplate )    {
+               global $wgOpenSearchTemplate, $wgCanonicalServer;
+               if ( $wgOpenSearchTemplate ) {
                        return $wgOpenSearchTemplate;
                } else {
                        $ns = implode( '|', SearchEngine::defaultNamespaces() );
-                       if ( !$ns ) $ns = "0";
-                       return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace=' . $ns;
+                       if ( !$ns ) {
+                               $ns = "0";
+                       }
+                       return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
                }
        }
 
@@ -436,11 +491,11 @@ class SearchEngine {
         * @return String
         */
        public static function getMWSuggestTemplate() {
-               global $wgMWSuggestTemplate, $wgServer, $wgScriptPath;
+               global $wgMWSuggestTemplate, $wgServer;
                if ( $wgMWSuggestTemplate )
                        return $wgMWSuggestTemplate;
                else
-                       return $wgServer . $wgScriptPath . '/api.php?action=opensearch&search={searchTerms}&namespace={namespaces}&suggest';
+                       return $wgServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace={namespaces}&suggest';
        }
 }
 
@@ -561,6 +616,9 @@ class SearchResultSet {
  * This class is used for different SQL-based search engines shipped with MediaWiki
  */
 class SqlSearchResultSet extends SearchResultSet {
+
+       protected $mResultSet;
+
        function __construct( $resultSet, $terms ) {
                $this->mResultSet = $resultSet;
                $this->mTerms = $terms;
@@ -584,7 +642,7 @@ class SqlSearchResultSet extends SearchResultSet {
                $row = $this->mResultSet->fetchObject();
                if ( $row === false )
                        return false;
-                       
+
                return SearchResult::newFromRow( $row );
        }
 
@@ -605,19 +663,33 @@ class SearchResultTooMany {
 
 
 /**
- * @todo Fixme: This class is horribly factored. It would probably be better to
+ * @todo FIXME: This class is horribly factored. It would probably be better to
  * have a useful base class to which you pass some standard information, then
  * let the fancy self-highlighters extend that.
  * @ingroup Search
  */
 class SearchResult {
+
+       /**
+        * @var Revision
+        */
        var $mRevision = null;
        var $mImage = null;
 
+       /**
+        * @var Title
+        */
+       var $mTitle;
+
+       /**
+        * @var String
+        */
+       var $mText;
+
        /**
         * Return a new SearchResult and initializes it with a title.
-        * 
-        * @param $title Title 
+        *
+        * @param $title Title
         * @return SearchResult
         */
        public static function newFromTitle( $title ) {
@@ -627,7 +699,7 @@ class SearchResult {
        }
        /**
         * Return a new SearchResult and initializes it with a row.
-        * 
+        *
         * @param $row object
         * @return SearchResult
         */
@@ -636,28 +708,28 @@ class SearchResult {
                $result->initFromRow( $row );
                return $result;
        }
-       
+
        public function __construct( $row = null ) {
                if ( !is_null( $row ) ) {
                        // Backwards compatibility with pre-1.17 callers
                        $this->initFromRow( $row );
                }
        }
-       
+
        /**
         * Initialize from a database row. Makes a Title and passes that to
         * initFromTitle.
-        * 
+        *
         * @param $row object
         */
        protected function initFromRow( $row ) {
                $this->initFromTitle( Title::makeTitle( $row->page_namespace, $row->page_title ) );
        }
-       
+
        /**
         * Initialize from a Title and if possible initializes a corresponding
         * Revision and File.
-        * 
+        *
         * @param $title Title
         */
        protected function initFromTitle( $title ) {
@@ -774,7 +846,7 @@ class SearchResult {
        function getTimestamp() {
                if ( $this->mRevision )
                        return $this->mRevision->getTimestamp();
-               else if ( $this->mImage )
+               elseif ( $this->mImage )
                        return $this->mImage->getTimestamp();
                return '';
        }
@@ -843,7 +915,7 @@ class SearchNearMatchResultSet extends SearchResultSet {
 class SearchHighlighter {
        var $mCleanWikitext = true;
 
-       function SearchHighlighter( $cleanupWikitext = true ) {
+       function __construct( $cleanupWikitext = true ) {
                $this->mCleanWikitext = $cleanupWikitext;
        }
 
@@ -872,7 +944,7 @@ class SearchHighlighter {
                        2 => '/(\[\[)|(\]\])/', // image
                        3 => "/(\n\\{\\|)|(\n\\|\\})/" ); // table
 
-               // FIXME: this should prolly be a hook or something
+               // @todo FIXME: This should prolly be a hook or something
                if ( function_exists( 'wfCite' ) ) {
                        $spat .= '|(<ref>)'; // references via cite extension
                        $endPatterns[4] = '/(<ref>)|(<\/ref>)/';
@@ -958,7 +1030,7 @@ class SearchHighlighter {
                $anyterm = implode( '|', $terms );
                $phrase = implode( "$wgSearchHighlightBoundaries+", $terms );
 
-               // FIXME: a hack to scale contextchars, a correct solution
+               // @todo FIXME: A hack to scale contextchars, a correct solution
                // would be to have contextchars actually be char and not byte
                // length, and do proper utf-8 substrings and lengths everywhere,
                // but PHP is making that very hard and unclean to implement :(
@@ -1027,7 +1099,7 @@ class SearchHighlighter {
                } else {
                        // if begin of the article contains the whole phrase, show only that !!
                        if ( array_key_exists( $first, $snippets ) && preg_match( $pat1, $snippets[$first] )
-                           && $offsets[$first] < $contextchars * 2 ) {
+                               && $offsets[$first] < $contextchars * 2 ) {
                                $snippets = array ( $first => $snippets[$first] );
                        }
 
@@ -1048,10 +1120,10 @@ class SearchHighlighter {
                                // add more lines
                                $add = $index + 1;
                                while ( $len < $targetchars - 20
-                                      && array_key_exists( $add, $all )
-                                      && !array_key_exists( $add, $snippets ) ) {
-                                   $offsets[$add] = 0;
-                                   $tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
+                                          && array_key_exists( $add, $all )
+                                          && !array_key_exists( $add, $snippets ) ) {
+                                       $offsets[$add] = 0;
+                                       $tt = "\n" . $this->extract( $all[$add], 0, $targetchars - $len, $offsets[$add] );
                                        $extended[$add] = $tt;
                                        $len += strlen( $tt );
                                        $add++;
@@ -1081,7 +1153,7 @@ class SearchHighlighter {
                        if ( ! isset( $processed[$term] ) ) {
                                $pat3 = "/$patPre(" . $term . ")$patPost/ui"; // highlight word
                                $extract = preg_replace( $pat3,
-                                       "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
+                                       "\\1<span class='searchmatch'>\\2</span>\\3", $extract );
                                $processed[$term] = true;
                        }
                }
@@ -1116,8 +1188,9 @@ class SearchHighlighter {
                global $wgContLang;
                if ( strlen( $matches[0] ) > 1 ) {
                        return '[' . $wgContLang->lc( $matches[0] ) . $wgContLang->uc( $matches[0] ) . ']';
-               } else
+               } else {
                        return $matches[0];
+               }
        }
 
        /**
@@ -1131,22 +1204,27 @@ class SearchHighlighter {
         * @return String
         */
        function extract( $text, $start, $end, &$posStart = null, &$posEnd = null ) {
-               if ( $start != 0 )
+               if ( $start != 0 ) {
                        $start = $this->position( $text, $start, 1 );
-               if ( $end >= strlen( $text ) )
+               }
+               if ( $end >= strlen( $text ) ) {
                        $end = strlen( $text );
-               else
+               } else {
                        $end = $this->position( $text, $end );
+               }
 
-               if ( !is_null( $posStart ) )
+               if ( !is_null( $posStart ) ) {
                        $posStart = $start;
-               if ( !is_null( $posEnd ) )
+               }
+               if ( !is_null( $posEnd ) ) {
                        $posEnd = $end;
+               }
 
-               if ( $end > $start )
+               if ( $end > $start )  {
                        return substr( $text, $start, $end - $start );
-               else
+               } else {
                        return '';
+               }
        }
 
        /**
@@ -1271,66 +1349,67 @@ class SearchHighlighter {
        }
 
        /**
-     * Simple & fast snippet extraction, but gives completely unrelevant
-     * snippets
-     *
-     * @param $text String
-     * @param $terms Array
-     * @param $contextlines Integer
-     * @param $contextchars Integer
-     * @return String
-     */
-    public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
-        global $wgContLang;
-        $fname = __METHOD__;
+        * Simple & fast snippet extraction, but gives completely unrelevant
+        * snippets
+        *
+        * @param $text String
+        * @param $terms Array
+        * @param $contextlines Integer
+        * @param $contextchars Integer
+        * @return String
+        */
+       public function highlightSimple( $text, $terms, $contextlines, $contextchars ) {
+               global $wgContLang;
+               $fname = __METHOD__;
 
-        $lines = explode( "\n", $text );
+               $lines = explode( "\n", $text );
 
-        $terms = implode( '|', $terms );
-        $max = intval( $contextchars ) + 1;
-        $pat1 = "/(.*)($terms)(.{0,$max})/i";
+               $terms = implode( '|', $terms );
+               $max = intval( $contextchars ) + 1;
+               $pat1 = "/(.*)($terms)(.{0,$max})/i";
 
-        $lineno = 0;
+               $lineno = 0;
 
-        $extract = "";
-        wfProfileIn( "$fname-extract" );
-        foreach ( $lines as $line ) {
-            if ( 0 == $contextlines ) {
-                break;
-            }
-            ++$lineno;
-            $m = array();
-            if ( ! preg_match( $pat1, $line, $m ) ) {
-                continue;
-            }
-            --$contextlines;
-            $pre = $wgContLang->truncate( $m[1], - $contextchars );
+               $extract = "";
+               wfProfileIn( "$fname-extract" );
+               foreach ( $lines as $line ) {
+                       if ( 0 == $contextlines ) {
+                               break;
+                       }
+                       ++$lineno;
+                       $m = array();
+                       if ( ! preg_match( $pat1, $line, $m ) ) {
+                               continue;
+                       }
+                       --$contextlines;
+                       // truncate function changes ... to relevant i18n message.
+                       $pre = $wgContLang->truncate( $m[1], - $contextchars, '...', false );
 
-            if ( count( $m ) < 3 ) {
-                $post = '';
-            } else {
-                $post = $wgContLang->truncate( $m[3], $contextchars );
-            }
+                       if ( count( $m ) < 3 ) {
+                               $post = '';
+                       } else {
+                               $post = $wgContLang->truncate( $m[3], $contextchars, '...', false );
+                       }
 
-            $found = $m[2];
+                       $found = $m[2];
 
-            $line = htmlspecialchars( $pre . $found . $post );
-            $pat2 = '/(' . $terms . ")/i";
-            $line = preg_replace( $pat2,
-              "<span class='searchmatch'>\\1</span>", $line );
+                       $line = htmlspecialchars( $pre . $found . $post );
+                       $pat2 = '/(' . $terms . ")/i";
+                       $line = preg_replace( $pat2,
+                         "<span class='searchmatch'>\\1</span>", $line );
 
-            $extract .= "${line}\n";
-        }
-        wfProfileOut( "$fname-extract" );
+                       $extract .= "${line}\n";
+               }
+               wfProfileOut( "$fname-extract" );
 
-        return $extract;
-    }
+               return $extract;
+       }
 
 }
 
 /**
  * Dummy class to be used when non-supported Database engine is present.
- * @todo Fixme: dummy class should probably try something at least mildly useful,
+ * @todo FIXME: Dummy class should probably try something at least mildly useful,
  * such as a LIKE search through titles.
  * @ingroup Search
  */