Handle redirects during prefix search exact match
authorNik Everett <neverett@wikimedia.org>
Fri, 21 Nov 2014 19:47:32 +0000 (14:47 -0500)
committerNik Everett <neverett@wikimedia.org>
Fri, 21 Nov 2014 19:47:32 +0000 (14:47 -0500)
Prefix search attempts to find exact matches to the user's query that aren't
returned by plugins.  In some cases, like when the exact match is a redirect
and the target of the redirect is also in the search results, it would result
in multiple results in prefix search going the same place.  This looks really
silly when the top hit is "Barack obama" (a redirect) and the next one is
"Barack Obama" (the target of the redirect).

This handles a bunch of cases to do with redirects in the matches and when
the exact match is a redirect.

Bug: 736731
Change-Id: I49fe1ccec84bd5d1f44c6b91b260abf50f2cc3a1

includes/PrefixSearch.php
tests/phpunit/MediaWikiTestCase.php
tests/phpunit/includes/PrefixSearchTest.php

index 839fedc..955313b 100644 (file)
@@ -155,34 +155,110 @@ abstract class PrefixSearch {
                $srchres = array();
                if ( wfRunHooks( 'PrefixSearchBackend', array( $namespaces, $search, $limit, &$srchres ) ) ) {
                        return $this->titles( $this->defaultSearchBackend( $namespaces, $search, $limit ) );
+               }
+               return $this->strings( $this->handleResultFromHook( $srchres, $namespaces, $search, $limit ) );
+       }
+
+       /**
+        * Default search backend does proper prefix searching, but custom backends
+        * may sort based on other algorythms that may cause the exact title match
+        * to not be in the results or be lower down the list.
+        * @param array $srchres results from the hook
+        * @return array munged results from the hook
+        */
+       private function handleResultFromHook( $srchres, $namespaces, $search, $limit ) {
+               // Pick namespace (based on PrefixSearch::defaultSearchBackend)
+               $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : $namespaces[0];
+               $t = Title::newFromText( $search, $ns );
+               if ( !$t || !$t->exists() ) {
+                       // No exact match so just return the search results
+                       return $srchres;
+               }
+               $string = $t->getPrefixedText();
+               $key = array_search( $string, $srchres );
+               if ( $key !== false ) {
+                       // Exact match was in the results so just move it to the front
+                       return $this->pullFront( $key, $srchres );
+               }
+               // Exact match not in the search results so check for some redirect handling cases
+               if ( $t->isRedirect() ) {
+                       $target = $this->getRedirectTarget( $t );
+                       $key = array_search( $target, $srchres );
+                       if ( $key !== false ) {
+                               // Exact match is a redirect to one of the returned matches so pull the
+                               // returned match to the front.  This might look odd but the alternative
+                               // is to put the redirect in front and drop the match.  The name of the
+                               // found match is often more descriptive/better formed than the name of
+                               // the redirec AND by definition they share a prefix.  Hopefully this
+                               // choice is less confusing and more helpful.  But it might now be.  But
+                               // it is the choice we're going with for now.
+                               return $this->pullFront( $key, $srchres );
+                       }
+                       $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
+                       if ( isset( $redirectTargetsToRedirect[ $target ] ) ) {
+                               // The exact match and something in the results list are both redirects
+                               // to the same thing!  In this case we'll pull the returned match to the
+                               // top following the same logic above.  Again, it might not be a perfect
+                               // choice but it'll do.
+                               return $this->pullFront( $redirectTargetsToRedirect[ $target ], $srchres );
+                       }
                } else {
-                       // Default search backend does proper prefix searching, but custom backends
-                       // may sort based on other algorythms that may cause the exact title match
-                       // to not be in the results or be lower down the list.
-
-                       // Pick namespace (based on PrefixSearch::defaultSearchBackend)
-                       $ns = in_array( NS_MAIN, $namespaces ) ? NS_MAIN : $namespaces[0];
-                       $t = Title::newFromText( $search, $ns );
-                       if ( $t ) {
-                               // If text is a valid title and is in the search results
-                               $string = $t->getPrefixedText();
-                               $key = array_search( $string, $srchres );
-                               if ( $key !== false ) {
-                                       // Move it to the front
-                                       $cut = array_splice( $srchres, $key, 1 );
-                                       array_unshift( $srchres, $cut[0] );
-                               } elseif ( $t->exists() ) {
-                                       // Add it in front
-                                       array_unshift( $srchres, $string );
-
-                                       if ( count( $srchres ) > $limit ) {
-                                               array_pop( $srchres );
-                                       }
-                               }
+                       $redirectTargetsToRedirect = $this->redirectTargetsToRedirect( $srchres );
+                       if ( isset( $redirectTargetsToRedirect[ $string ] ) ) {
+                               // The exact match is the target of a redirect already in the results list so remove
+                               // the redirect from the results list and push the exact match to the front
+                               array_splice( $srchres, $redirectTargetsToRedirect[ $string ], 1 );
+                               array_unshift( $srchres, $string );
+                               return $srchres;
                        }
                }
 
-               return $this->strings( $srchres );
+               // Exact match is totally unique from the other results so just add it to the front
+               array_unshift( $srchres, $string );
+               // And roll one off the end if the results are too long
+               if ( count( $srchres ) > $limit ) {
+                       array_pop( $srchres );
+               }
+               return $srchres;
+       }
+
+       /**
+        * @param Array(string) $titles as strings
+        * @return Array(string => int) redirect target prefixedText to index of title in titles
+        *   that is a redirect to it.
+        */
+       private function redirectTargetsToRedirect( $titles ) {
+               $result = array();
+               foreach ( $titles as $key => $titleText ) {
+                       $title = Title::newFromText( $titleText );
+                       if ( !$title || !$title->isRedirect() ) {
+                               continue;
+                       }
+                       $target = $this->getRedirectTarget( $title );
+                       if ( !$target ) {
+                               continue;
+                       }
+                       $result[ $target ] = $key;
+               }
+               return $result;
+       }
+
+       /**
+        * @param int $key key to pull to the front
+        * @return array $array with the item at $key pulled to the front
+        */
+       private function pullFront( $key, $array ) {
+               $cut = array_splice( $array, $key, 1 );
+               array_unshift( $array, $cut[0] );
+               return $array;
+       }
+
+       private function getRedirectTarget( $title ) {
+               $page = WikiPage::factory( $title );
+               if ( !$page->exists() ) {
+                       return null;
+               }
+               return $page->getRedirectTarget()->getPrefixedText();
        }
 
        /**
index 05275c8..2cd549e 100644 (file)
@@ -447,7 +447,8 @@ abstract class MediaWikiTestCase extends PHPUnit_Framework_TestCase {
                $comment = __METHOD__ . ': Sample page for unit test.';
 
                // Avoid memory leak...?
-               LinkCache::singleton()->clear();
+               // LinkCache::singleton()->clear();
+               // Maybe.  But doing this absolutely breaks $title->isRedirect() when called during unit tests....
 
                $page = WikiPage::factory( $title );
                $page->doEditContent( ContentHandler::makeContent( $text, $title ), $comment, 0, false, $user );
index a33f6a6..411d406 100644 (file)
@@ -41,6 +41,13 @@ class PrefixSearchTest extends MediaWikiLangTestCase {
                $this->insertPage( 'Example Foo' );
                $this->insertPage( 'Example Foo/Bar' );
                $this->insertPage( 'Example/Baz' );
+               $this->insertPage( 'Redirect test', '#REDIRECT [[Redirect Test]]' );
+               $this->insertPage( 'Redirect Test');
+               $this->insertPage( 'Redirect Test Worse Result');
+               $this->insertPage( 'Redirect test2', '#REDIRECT [[Redirect Test2]]' );
+               $this->insertPage( 'Redirect TEST2', '#REDIRECT [[Redirect Test2]]' );
+               $this->insertPage( 'Redirect Test2');
+               $this->insertPage( 'Redirect Test2 Worse Result');
 
                $this->insertPage( 'Talk:Sandbox' );
                $this->insertPage( 'Talk:Example' );
@@ -196,6 +203,55 @@ class PrefixSearchTest extends MediaWikiLangTestCase {
                                        'External',
                                ),
                        ) ),
+                       array( array(
+                               "Exact match shouldn't override already found match if " .
+                                       "exact is redirect and found isn't",
+                               'provision' => array(
+                                       // Target of the exact match is low in the list
+                                       'Redirect Test Worse Result',
+                                       'Redirect Test',
+                               ),
+                               'query' => 'redirect test',
+                               'results' => array(
+                                       // Redirect target is pulled up and exact match isn't added
+                                       'Redirect Test',
+                                       'Redirect Test Worse Result',
+                               ),
+                       ) ),
+                       array( array(
+                               "Exact match shouldn't override already found match if " .
+                                       "both exact match and found match are redirect",
+                               'provision' => array(
+                                       // Another redirect to the same target as the exact match
+                                       // is low in the list
+                                       'Redirect Test2 Worse Result',
+                                       'Redirect test2',
+                               ),
+                               'query' => 'redirect TEST2',
+                               'results' => array(
+                                       // Found redirect is pulled to the top and exact match isn't
+                                       // added
+                                       'Redirect test2',
+                                       'Redirect Test2 Worse Result',
+                               ),
+                       ) ),
+                       array( array(
+                               "Exact match should override any already found matches that " .
+                                       "are redirects to it",
+                               'provision' => array(
+                                       // Another redirect to the same target as the exact match
+                                       // is low in the list
+                                       'Redirect Test Worse Result',
+                                       'Redirect test',
+                               ),
+                               'query' => 'Redirect Test',
+                               'results' => array(
+                                       // Found redirect is pulled to the top and exact match isn't
+                                       // added
+                                       'Redirect Test',
+                                       'Redirect Test Worse Result',
+                               ),
+                       ) ),
                );
        }