Improve some queries ordering by rev_timestamp with actor migration READ_NEW
authorBrad Jorsch <bjorsch@wikimedia.org>
Wed, 19 Sep 2018 20:25:45 +0000 (16:25 -0400)
committerAnomie <bjorsch@wikimedia.org>
Fri, 26 Oct 2018 16:44:16 +0000 (16:44 +0000)
The revision_actor_temp table has denormalized copies of rev_page and
rev_timestamp to support the actor indexes equivalent to
`actor_timestamp` and `page_actor_timestamp`. But to get these to be
used, we need to have the WHERE and ORDER BY use the denormalized fields
instead of the main revision-table fields.

We can take generally advantage of the fact that "ORDER BY field" uses
aliased field names before the names in the actual table, but WHERE
conditions do need to explicitly vary.

Bug: T204669
Change-Id: I992aa50f02c35d76e91a5a28e05c870f8a32f858

includes/api/ApiQueryAllRevisions.php
includes/api/ApiQueryRevisions.php
includes/specials/pagers/ContribsPager.php
includes/user/User.php

index 6a26eff..922d2c3 100644 (file)
@@ -40,6 +40,8 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase {
         * @return void
         */
        protected function run( ApiPageSet $resultPageSet = null ) {
+               global $wgActorTableSchemaMigrationStage;
+
                $db = $this->getDB();
                $params = $this->extractRequestParams( false );
                $revisionStore = MediaWikiServices::getInstance()->getRevisionStore();
@@ -48,6 +50,19 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase {
 
                $this->requireMaxOneParameter( $params, 'user', 'excludeuser' );
 
+               $tsField = 'rev_timestamp';
+               $idField = 'rev_id';
+               $pageField = 'rev_page';
+               if ( $params['user'] !== null &&
+                       ( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_READ_NEW )
+               ) {
+                       // The query is probably best done using the actor_timestamp index on
+                       // revision_actor_temp. Use the denormalized fields from that table.
+                       $tsField = 'revactor_timestamp';
+                       $idField = 'revactor_rev';
+                       $pageField = 'revactor_page';
+               }
+
                // Namespace check is likely to be desired, but can't be done
                // efficiently in SQL.
                $miser_ns = null;
@@ -70,34 +85,59 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase {
                        $revQuery = $revisionStore->getQueryInfo(
                                $this->fetchContent ? [ 'page', 'text' ] : [ 'page' ]
                        );
-                       $this->addTables( $revQuery['tables'] );
-                       $this->addFields( $revQuery['fields'] );
-                       $this->addJoinConds( $revQuery['joins'] );
-
-                       // Review this depeneding on the outcome of T113901
-                       $this->addOption( 'STRAIGHT_JOIN' );
                } else {
                        $this->limit = $this->getParameter( 'limit' ) ?: 10;
-                       $this->addTables( 'revision' );
-                       $this->addFields( [ 'rev_timestamp', 'rev_id' ] );
+                       $revQuery = [
+                               'tables' => [ 'revision' ],
+                               'fields' => [ 'rev_timestamp', 'rev_id' ],
+                               'joins' => [],
+                       ];
+
                        if ( $params['generatetitles'] ) {
-                               $this->addFields( [ 'rev_page' ] );
+                               $revQuery['fields'][] = 'rev_page';
                        }
 
-                       if ( $needPageTable ) {
-                               $this->addTables( 'page' );
-                               $this->addJoinConds(
-                                       [ 'page' => [ 'INNER JOIN', [ 'rev_page = page_id' ] ] ]
-                               );
-                               $this->addFieldsIf( [ 'page_namespace' ], (bool)$miser_ns );
+                       if ( $params['user'] !== null || $params['excludeuser'] !== null ) {
+                               $actorQuery = ActorMigration::newMigration()->getJoin( 'rev_user' );
+                               $revQuery['tables'] += $actorQuery['tables'];
+                               $revQuery['joins'] += $actorQuery['joins'];
+                       }
 
-                               // Review this depeneding on the outcome of T113901
-                               $this->addOption( 'STRAIGHT_JOIN' );
+                       if ( $needPageTable ) {
+                               $revQuery['tables'][] = 'page';
+                               $revQuery['joins']['page'] = [ 'INNER JOIN', [ "$pageField = page_id" ] ];
+                               if ( (bool)$miser_ns ) {
+                                       $revQuery['fields'][] = 'page_namespace';
+                               }
                        }
                }
 
+               // If we're going to be using actor_timestamp, we need to swap the order of `revision`
+               // and `revision_actor_temp` in the query (for the straight join) and adjust some field aliases.
+               if ( $idField !== 'rev_id' && isset( $revQuery['tables']['temp_rev_user'] ) ) {
+                       $aliasFields = [ 'rev_id' => $idField, 'rev_timestamp' => $tsField, 'rev_page' => $pageField ];
+                       $revQuery['fields'] = array_merge(
+                               $aliasFields,
+                               array_diff( $revQuery['fields'], array_keys( $aliasFields ) )
+                       );
+                       unset( $revQuery['tables']['temp_rev_user'] );
+                       $revQuery['tables'] = array_merge(
+                               [ 'temp_rev_user' => 'revision_actor_temp' ],
+                               $revQuery['tables']
+                       );
+                       $revQuery['joins']['revision'] = $revQuery['joins']['temp_rev_user'];
+                       unset( $revQuery['joins']['temp_rev_user'] );
+               }
+
+               $this->addTables( $revQuery['tables'] );
+               $this->addFields( $revQuery['fields'] );
+               $this->addJoinConds( $revQuery['joins'] );
+
+               // Seems to be needed to avoid a planner bug (T113901)
+               $this->addOption( 'STRAIGHT_JOIN' );
+
                $dir = $params['dir'];
-               $this->addTimestampWhereRange( 'rev_timestamp', $dir, $params['start'], $params['end'] );
+               $this->addTimestampWhereRange( $tsField, $dir, $params['start'], $params['end'] );
 
                if ( $this->fld_tags ) {
                        $this->addTables( 'tag_summary' );
@@ -110,14 +150,10 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase {
                if ( $params['user'] !== null ) {
                        $actorQuery = ActorMigration::newMigration()
                                ->getWhere( $db, 'rev_user', User::newFromName( $params['user'], false ) );
-                       $this->addTables( $actorQuery['tables'] );
-                       $this->addJoinConds( $actorQuery['joins'] );
                        $this->addWhere( $actorQuery['conds'] );
                } elseif ( $params['excludeuser'] !== null ) {
                        $actorQuery = ActorMigration::newMigration()
                                ->getWhere( $db, 'rev_user', User::newFromName( $params['excludeuser'], false ) );
-                       $this->addTables( $actorQuery['tables'] );
-                       $this->addJoinConds( $actorQuery['joins'] );
                        $this->addWhere( 'NOT(' . $actorQuery['conds'] . ')' );
                }
 
@@ -142,17 +178,17 @@ class ApiQueryAllRevisions extends ApiQueryRevisionsBase {
                        $ts = $db->addQuotes( $db->timestamp( $cont[0] ) );
                        $rev_id = (int)$cont[1];
                        $this->dieContinueUsageIf( strval( $rev_id ) !== $cont[1] );
-                       $this->addWhere( "rev_timestamp $op $ts OR " .
-                               "(rev_timestamp = $ts AND " .
-                               "rev_id $op= $rev_id)" );
+                       $this->addWhere( "$tsField $op $ts OR " .
+                               "($tsField = $ts AND " .
+                               "$idField $op= $rev_id)" );
                }
 
                $this->addOption( 'LIMIT', $this->limit + 1 );
 
                $sort = ( $dir == 'newer' ? '' : ' DESC' );
                $orderby = [];
-               // Targeting index rev_timestamp, user_timestamp, or usertext_timestamp
-               // But 'user' is always constant for the latter two, so it doesn't matter here.
+               // Targeting index rev_timestamp, user_timestamp, usertext_timestamp, or actor_timestamp.
+               // But 'user' is always constant for the latter three, so it doesn't matter here.
                $orderby[] = "rev_timestamp $sort";
                $orderby[] = "rev_id $sort";
                $this->addOption( 'ORDER BY', $orderby );
index b8a180f..ac7ee0a 100644 (file)
@@ -84,7 +84,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase {
        }
 
        protected function run( ApiPageSet $resultPageSet = null ) {
-               global $wgChangeTagsSchemaMigrationStage;
+               global $wgActorTableSchemaMigrationStage, $wgChangeTagsSchemaMigrationStage;
 
                $params = $this->extractRequestParams( false );
                $revisionStore = MediaWikiServices::getInstance()->getRevisionStore();
@@ -133,6 +133,19 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase {
 
                $db = $this->getDB();
 
+               $idField = 'rev_id';
+               $tsField = 'rev_timestamp';
+               $pageField = 'rev_page';
+               if ( $params['user'] !== null &&
+                       ( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_READ_NEW )
+               ) {
+                       // We're going to want to use the page_actor_timestamp index (on revision_actor_temp)
+                       // so use that table's denormalized fields.
+                       $idField = 'revactor_rev';
+                       $tsField = 'revactor_timestamp';
+                       $pageField = 'revactor_page';
+               }
+
                if ( $resultPageSet === null ) {
                        $this->parseParameters( $params );
                        $this->token = $params['token'];
@@ -147,6 +160,15 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase {
                                $opts[] = 'user';
                        }
                        $revQuery = $revisionStore->getQueryInfo( $opts );
+
+                       if ( $idField !== 'rev_id' ) {
+                               $aliasFields = [ 'rev_id' => $idField, 'rev_timestamp' => $tsField, 'rev_page' => $pageField ];
+                               $revQuery['fields'] = array_merge(
+                                       $aliasFields,
+                                       array_diff( $revQuery['fields'], array_keys( $aliasFields ) )
+                               );
+                       }
+
                        $this->addTables( $revQuery['tables'] );
                        $this->addFields( $revQuery['fields'] );
                        $this->addJoinConds( $revQuery['joins'] );
@@ -157,7 +179,9 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase {
                        $this->addJoinConds(
                                [ 'page' => [ 'INNER JOIN', [ 'page_id = rev_page' ] ] ]
                        );
-                       $this->addFields( [ 'rev_id', 'rev_timestamp', 'rev_page' ] );
+                       $this->addFields( [
+                               'rev_id' => $idField, 'rev_timestamp' => $tsField, 'rev_page' => $pageField
+                       ] );
                }
 
                if ( $this->fld_tags ) {
@@ -207,6 +231,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase {
                if ( $enumRevMode ) {
                        // Indexes targeted:
                        //  page_timestamp if we don't have rvuser
+                       //  page_actor_timestamp (on revision_actor_temp) if we have rvuser in READ_NEW mode
                        //  page_user_timestamp if we have a logged-in rvuser
                        //  page_timestamp or usertext_timestamp if we have an IP rvuser
 
@@ -222,9 +247,9 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase {
                                $continueTimestamp = $db->addQuotes( $db->timestamp( $cont[0] ) );
                                $continueId = (int)$cont[1];
                                $this->dieContinueUsageIf( $continueId != $cont[1] );
-                               $this->addWhere( "rev_timestamp $op $continueTimestamp OR " .
-                                       "(rev_timestamp = $continueTimestamp AND " .
-                                       "rev_id $op= $continueId)"
+                               $this->addWhere( "$tsField $op $continueTimestamp OR " .
+                                       "($tsField = $continueTimestamp AND " .
+                                       "$idField $op= $continueId)"
                                );
                        }
 
@@ -274,24 +299,24 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase {
                                        $op = ( $params['dir'] === 'newer' ? '>' : '<' );
                                        $ts = $db->addQuotes( $db->timestampOrNull( $params['start'] ) );
                                        if ( $params['startid'] !== null ) {
-                                               $this->addWhere( "rev_timestamp $op $ts OR "
-                                                       . "rev_timestamp = $ts AND rev_id $op= " . intval( $params['startid'] ) );
+                                               $this->addWhere( "$tsField $op $ts OR "
+                                                       . "$tsField = $ts AND $idField $op= " . intval( $params['startid'] ) );
                                        } else {
-                                               $this->addWhere( "rev_timestamp $op= $ts" );
+                                               $this->addWhere( "$tsField $op= $ts" );
                                        }
                                }
                                if ( $params['end'] !== null ) {
                                        $op = ( $params['dir'] === 'newer' ? '<' : '>' ); // Yes, opposite of the above
                                        $ts = $db->addQuotes( $db->timestampOrNull( $params['end'] ) );
                                        if ( $params['endid'] !== null ) {
-                                               $this->addWhere( "rev_timestamp $op $ts OR "
-                                                       . "rev_timestamp = $ts AND rev_id $op= " . intval( $params['endid'] ) );
+                                               $this->addWhere( "$tsField $op $ts OR "
+                                                       . "$tsField = $ts AND $idField $op= " . intval( $params['endid'] ) );
                                        } else {
-                                               $this->addWhere( "rev_timestamp $op= $ts" );
+                                               $this->addWhere( "$tsField $op= $ts" );
                                        }
                                }
                        } else {
-                               $this->addTimestampWhereRange( 'rev_timestamp', $params['dir'],
+                               $this->addTimestampWhereRange( $tsField, $params['dir'],
                                        $params['start'], $params['end'] );
                        }
 
@@ -300,7 +325,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase {
 
                        // There is only one ID, use it
                        $ids = array_keys( $pageSet->getGoodTitles() );
-                       $this->addWhereFld( 'rev_page', reset( $ids ) );
+                       $this->addWhereFld( $pageField, reset( $ids ) );
 
                        if ( $params['user'] !== null ) {
                                $actorQuery = ActorMigration::newMigration()
index 9900340..5dec30c 100644 (file)
@@ -224,6 +224,12 @@ class ContribsPager extends RangeChronologicalPager {
                                if ( isset( $conds['orconds']['actor'] ) ) {
                                        // @todo: This will need changing when revision_comment_temp goes away
                                        $queryInfo['options']['USE INDEX']['temp_rev_user'] = 'actor_timestamp';
+                                       // Alias 'rev_timestamp' => 'revactor_timestamp' so "ORDER BY rev_timestamp" is interpreted to
+                                       // use revactor_timestamp instead.
+                                       $queryInfo['fields'] = array_merge(
+                                               array_diff( $queryInfo['fields'], [ 'rev_timestamp' ] ),
+                                               [ 'rev_timestamp' => 'revactor_timestamp' ]
+                                       );
                                } else {
                                        $queryInfo['options']['USE INDEX']['revision'] =
                                                isset( $conds['orconds']['userid'] ) ? 'user_timestamp' : 'usertext_timestamp';
index 9b08737..6066c87 100644 (file)
@@ -4942,12 +4942,14 @@ class User implements IDBAccessObject, UserIdentity {
                }
                $dbr = wfGetDB( DB_REPLICA );
                $actorWhere = ActorMigration::newMigration()->getWhere( $dbr, 'rev_user', $this );
+               $tsField = isset( $actorWhere['tables']['temp_rev_user'] )
+                       ? 'revactor_timestamp' : 'rev_timestamp';
                $time = $dbr->selectField(
                        [ 'revision' ] + $actorWhere['tables'],
-                       'rev_timestamp',
+                       $tsField,
                        [ $actorWhere['conds'] ],
                        __METHOD__,
-                       [ 'ORDER BY' => 'rev_timestamp ASC' ],
+                       [ 'ORDER BY' => "$tsField ASC" ],
                        $actorWhere['joins']
                );
                if ( !$time ) {