API: Improve list=random
authorBrad Jorsch <bjorsch@wikimedia.org>
Tue, 7 Jul 2015 16:28:42 +0000 (12:28 -0400)
committerLegoktm <legoktm.wikipedia@gmail.com>
Thu, 10 Sep 2015 00:12:45 +0000 (00:12 +0000)
Changes are:
* Raise limits to be in line with other modules.
* Deprecate boolean rnredirect in favor of rnfilterredir that allows for
  including both redirects and non-redirects.
* Allow continuation, for applications that want to traverse the entire
  "random" sequence without repeats.

Bug: T99926
Change-Id: Id5a8b0f1591c73044524ac508d2add8ab1b3a22e

RELEASE-NOTES-1.26
includes/api/ApiQueryRandom.php
includes/api/i18n/en.json
includes/api/i18n/qqq.json

index a532171..12b973a 100644 (file)
@@ -105,6 +105,11 @@ production.
   sometimes being numerically-indexed objects with formatversion=2.
 * When errors about users being blocked are returned, they now include
   information about the relevant block.
+* (T99926) list=random has higher limits, in line with other API modules.
+* list=random's rnredirect parameter is deprecated in favor of a new
+  rnfilterredir parameter that also allows for listing both redirects and
+  non-redirects.
+* list=random now supports continuation.
 
 === Action API internal changes in 1.26 ===
 * New metadata item ApiResult::META_KVP_MERGE to allow for merging the KVP key
index a2c2844..8e7031c 100644 (file)
@@ -31,8 +31,6 @@
  * @ingroup API
  */
 class ApiQueryRandom extends ApiQueryGeneratorBase {
-       private $pageIDs;
-
        public function __construct( ApiQuery $query, $moduleName ) {
                parent::__construct( $query, $moduleName, 'rn' );
        }
@@ -46,102 +44,131 @@ class ApiQueryRandom extends ApiQueryGeneratorBase {
        }
 
        /**
-        * @param string $randstr
-        * @param int $limit
-        * @param int $namespace
-        * @param ApiPageSet $resultPageSet
-        * @param bool $redirect
-        * @return void
+        * Actually perform the query and add pages to the result.
+        * @param ApiPageSet|null $resultPageSet
+        * @param int $limit Number of pages to fetch
+        * @param string|null $start Starting page_random
+        * @param int|null $startId Starting page_id
+        * @param string|null $end Ending page_random
+        * @return array (int, string|null) Number of pages left to query and continuation string
         */
-       protected function prepareQuery( $randstr, $limit, $namespace, &$resultPageSet, $redirect ) {
+       protected function runQuery( $resultPageSet, $limit, $start, $startId, $end ) {
+               $params = $this->extractRequestParams();
+
                $this->resetQueryParams();
                $this->addTables( 'page' );
-               $this->addOption( 'LIMIT', $limit );
-               $this->addWhereFld( 'page_namespace', $namespace );
-               $this->addWhereRange( 'page_random', 'newer', $randstr, null );
-               $this->addWhereFld( 'page_is_redirect', $redirect );
+               $this->addFields( array( 'page_id', 'page_random' ) );
                if ( is_null( $resultPageSet ) ) {
-                       $this->addFields( array( 'page_id', 'page_title', 'page_namespace' ) );
+                       $this->addFields( array( 'page_title', 'page_namespace' ) );
                } else {
                        $this->addFields( $resultPageSet->getPageTableFields() );
                }
-       }
+               $this->addWhereFld( 'page_namespace', $params['namespace'] );
+               if ( $params['redirect'] || $params['filterredir'] === 'redirects' ) {
+                       $this->addWhereFld( 'page_is_redirect', 1 );
+               } elseif ( $params['filterredir'] === 'nonredirects' ) {
+                       $this->addWhereFld( 'page_is_redirect', 0 );
+               } elseif ( is_null( $resultPageSet ) ) {
+                       $this->addFields( array( 'page_is_redirect' ) );
+               }
+               $this->addOption( 'LIMIT', $limit + 1 );
+
+               if ( $start !== null ) {
+                       $start = $this->getDB()->addQuotes( $start );
+                       if ( $startId !== null ) {
+                               $startId = (int)$startId;
+                               $this->addWhere( "page_random = $start AND page_id >= $startId OR page_random > $start" );
+                       } else {
+                               $this->addWhere( "page_random >= $start" );
+                       }
+               }
+               if ( $end !== null ) {
+                       $this->addWhere( 'page_random < ' . $this->getDB()->addQuotes( $end ) );
+               }
+               $this->addOption( 'ORDER BY', array( 'page_random', 'page_id' ) );
+
+               $result = $this->getResult();
+               $path = array( 'query', $this->getModuleName() );
 
-       /**
-        * @param ApiPageSet $resultPageSet
-        * @return int
-        */
-       protected function runQuery( $resultPageSet = null ) {
                $res = $this->select( __METHOD__ );
                $count = 0;
                foreach ( $res as $row ) {
-                       $count++;
+                       if ( $count++ >= $limit ) {
+                               return array( 0, "{$row->page_random}|{$row->page_id}" );
+                       }
                        if ( is_null( $resultPageSet ) ) {
-                               // Prevent duplicates
-                               if ( !in_array( $row->page_id, $this->pageIDs ) ) {
-                                       $fit = $this->getResult()->addValue(
-                                               array( 'query', $this->getModuleName() ),
-                                               null, $this->extractRowInfo( $row ) );
-                                       if ( !$fit ) {
-                                               // We can't really query-continue a random list.
-                                               // Return an insanely high value so
-                                               // $count < $limit is false
-                                               return 1E9;
-                                       }
-                                       $this->pageIDs[] = $row->page_id;
+                               $title = Title::makeTitle( $row->page_namespace, $row->page_title );
+                               $page = array(
+                                       'id' => (int)$row->page_id,
+                               );
+                               ApiQueryBase::addTitleInfo( $page, $title );
+                               if ( isset( $row->page_is_redirect ) ) {
+                                       $page['redirect'] = (bool)$row->page_is_redirect;
+                               }
+                               $fit = $result->addValue( $path, null, $page );
+                               if ( !$fit ) {
+                                       return array( 0, "{$row->page_random}|{$row->page_id}" );
                                }
                        } else {
                                $resultPageSet->processDbRow( $row );
                        }
                }
 
-               return $count;
+               return array( $limit - $count, null );
        }
 
        /**
-        * @param ApiPageSet $resultPageSet
-        * @return void
+        * @param ApiPageSet|null $resultPageSet
         */
        public function run( $resultPageSet = null ) {
                $params = $this->extractRequestParams();
-               $result = $this->getResult();
-               $this->pageIDs = array();
-
-               $this->prepareQuery(
-                       wfRandom(),
-                       $params['limit'],
-                       $params['namespace'],
-                       $resultPageSet,
-                       $params['redirect']
-               );
-               $count = $this->runQuery( $resultPageSet );
-               if ( $count < $params['limit'] ) {
-                       /* We got too few pages, we probably picked a high value
-                        * for page_random. We'll just take the lowest ones, see
-                        * also the comment in Title::getRandomTitle()
-                        */
-                       $this->prepareQuery(
-                               0,
-                               $params['limit'] - $count,
-                               $params['namespace'],
-                               $resultPageSet,
-                               $params['redirect']
-                       );
-                       $this->runQuery( $resultPageSet );
+
+               // Since 'filterredir" will always be set in $params, we have to dig
+               // into the WebRequest to see if it was actually passed.
+               $request = $this->getMain()->getRequest();
+               if ( $request->getCheck( $this->encodeParamName( 'filterredir' ) ) ) {
+                       $this->requireMaxOneParameter( $params, 'filterredir', 'redirect' );
                }
 
-               if ( is_null( $resultPageSet ) ) {
-                       $result->addIndexedTagName( array( 'query', $this->getModuleName() ), 'page' );
+               if ( $params['redirect'] ) {
+                       $this->logFeatureUsage( "list=random&rnredirect=" );
+               }
+
+               if ( isset( $params['continue'] ) ) {
+                       $cont = explode( '|', $params['continue'] );
+                       $this->dieContinueUsageIf( count( $cont ) != 4 );
+                       $rand = $cont[0];
+                       $start = $cont[1];
+                       $startId = (int)$cont[2];
+                       $end = $cont[3] ? $rand : null;
+                       $this->dieContinueUsageIf( !preg_match( '/^0\.\d+$/', $rand ) );
+                       $this->dieContinueUsageIf( !preg_match( '/^0\.\d+$/', $start ) );
+                       $this->dieContinueUsageIf( $cont[2] !== (string)$startId );
+                       $this->dieContinueUsageIf( $cont[3] !== '0' && $cont[3] !== '1' );
+               } else {
+                       $rand = wfRandom();
+                       $start = $rand;
+                       $startId = null;
+                       $end = null;
                }
-       }
 
-       private function extractRowInfo( $row ) {
-               $title = Title::makeTitle( $row->page_namespace, $row->page_title );
-               $vals = array();
-               $vals['id'] = intval( $row->page_id );
-               ApiQueryBase::addTitleInfo( $vals, $title );
+               list( $left, $continue ) = $this->runQuery( $resultPageSet, $params['limit'], $start, $startId, $end );
+               if ( $end === null && $continue === null ) {
+                       // Wrap around. We do this even if $left === 0 for continuation
+                       // (saving a DB query in this rare case probably isn't worth the
+                       // added code complexity it would require).
+                       $end = $rand;
+                       list( $left, $continue ) = $this->runQuery( $resultPageSet, $left, null, null, $end );
+               }
+
+               if ( $continue !== null ) {
+                       $endFlag = $end === null ? 0 : 1;
+                       $this->setContinueEnumParameter( 'continue', "$rand|$continue|$endFlag" );
+               }
 
-               return $vals;
+               if ( is_null( $resultPageSet ) ) {
+                       $this->getResult()->addIndexedTagName( array( 'query', $this->getModuleName() ), 'page' );
+               }
        }
 
        public function getCacheMode( $params ) {
@@ -154,14 +181,24 @@ class ApiQueryRandom extends ApiQueryGeneratorBase {
                                ApiBase::PARAM_TYPE => 'namespace',
                                ApiBase::PARAM_ISMULTI => true
                        ),
+                       'filterredir' => array(
+                               ApiBase::PARAM_TYPE => array( 'all', 'redirects', 'nonredirects' ),
+                               ApiBase::PARAM_DFLT => 'nonredirects', // for BC
+                       ),
+                       'redirect' => array(
+                               ApiBase::PARAM_DEPRECATED => true,
+                               ApiBase::PARAM_DFLT => false,
+                       ),
                        'limit' => array(
                                ApiBase::PARAM_TYPE => 'limit',
                                ApiBase::PARAM_DFLT => 1,
                                ApiBase::PARAM_MIN => 1,
-                               ApiBase::PARAM_MAX => 10,
-                               ApiBase::PARAM_MAX2 => 20
+                               ApiBase::PARAM_MAX => ApiBase::LIMIT_BIG1,
+                               ApiBase::PARAM_MAX2 => ApiBase::LIMIT_BIG2
+                       ),
+                       'continue' => array(
+                               ApiBase::PARAM_HELP_MSG => 'api-help-param-continue'
                        ),
-                       'redirect' => false,
                );
        }
 
index 39d44d2..43df9ea 100644 (file)
        "apihelp-query+querypage-param-limit": "Number of results to return.",
        "apihelp-query+querypage-example-ancientpages": "Return results from [[Special:Ancientpages]].",
 
-       "apihelp-query+random-description": "Get a set of random pages.\n\nPages are listed in a fixed sequence, only the starting point is random. This means that if, for example, <samp>Main Page</samp> is the first random page in the list, <samp>List of fictional monkeys</samp> will <em>always</em> be second, <samp>List of people on stamps of Vanuatu</samp> third, etc.\n\nIf the number of pages in the namespace is lower than <var>$1limit</var>, fewer pages will be returned. The same page will not be returned twice.",
+       "apihelp-query+random-description": "Get a set of random pages.\n\nPages are listed in a fixed sequence, only the starting point is random. This means that if, for example, <samp>Main Page</samp> is the first random page in the list, <samp>List of fictional monkeys</samp> will <em>always</em> be second, <samp>List of people on stamps of Vanuatu</samp> third, etc.",
        "apihelp-query+random-param-namespace": "Return pages in these namespaces only.",
        "apihelp-query+random-param-limit": "Limit how many random pages will be returned.",
-       "apihelp-query+random-param-redirect": "Load a random redirect instead of a random page.",
+       "apihelp-query+random-param-redirect": "Use <kbd>$1filterredir=redirects</kbd> instead.",
+       "apihelp-query+random-param-filterredir": "How to filter for redirects.",
        "apihelp-query+random-example-simple": "Return two random pages from the main namespace.",
        "apihelp-query+random-example-generator": "Return page info about two random pages from the main namespace.",
 
index cd8d135..51811f8 100644 (file)
        "apihelp-query+random-description": "{{doc-apihelp-description|query+random}}",
        "apihelp-query+random-param-namespace": "{{doc-apihelp-param|query+random|namespace}}",
        "apihelp-query+random-param-limit": "{{doc-apihelp-param|query+random|limit}}",
+       "apihelp-query+random-param-filterredir": "{{apihelp-param|query+random|filterredir}}",
        "apihelp-query+random-param-redirect": "{{doc-apihelp-param|query+random|redirect}}",
        "apihelp-query+random-example-simple": "{{doc-apihelp-example|query+random}}",
        "apihelp-query+random-example-generator": "{{doc-apihelp-example|query+random}}",