ContribsPager: Fix slow queries
authorBrad Jorsch <bjorsch@wikimedia.org>
Tue, 23 Apr 2019 16:13:08 +0000 (12:13 -0400)
committerBrad Jorsch <bjorsch@wikimedia.org>
Wed, 1 May 2019 01:07:32 +0000 (21:07 -0400)
When ContribsPager is using an auxiliary table like ip_changes or
revision_actor_temp for the main action of the query, we already had
code in place to let it use the auxiliary table's denormalized timestamp
field for the ordering. What we didn't have was code to let it also use
the auxiliary table's denormalized timestamp field for *continuation*.

With the schema defined in tables.sql, the simplest thing to do would be
to be to add a redundant JOIN condition between rev_timestamp and the
denormalized timestamp field which would be enough to allow
MySQL/MariaDB to propagate the continuation conditional on rev_timestamp
to the denormalized timestamp field.

Unfortunately many Wikimedia wikis have rev_timestamp defined
differently from table.sql (P8433), and that difference is enough to
break that propagation. So we need to take a more difficult route,
restructuring the code tell IndexPager to explicitly use the
denormalized fields for ordering and continuation.

On the plus side, since we're doing that anyway we can get rid of the
code mentioned in the first paragraph.

Bug: T221380
Change-Id: Iad6c0c2f1ac5e1c610de15fe6e85a637c287bcd8

includes/specials/pagers/ContribsPager.php
tests/phpunit/includes/specials/ContribsPagerTest.php

index 44ecb6f..a187a44 100644 (file)
@@ -103,6 +103,21 @@ class ContribsPager extends RangeChronologicalPager {
        private $templateParser;
 
        public function __construct( IContextSource $context, array $options ) {
+               // Set ->target and ->contribs before calling parent::__construct() so
+               // parent can call $this->getIndexField() and get the right result. Set
+               // the rest too just to keep things simple.
+               $this->target = $options['target'] ?? '';
+               $this->contribs = $options['contribs'] ?? 'users';
+               $this->namespace = $options['namespace'] ?? '';
+               $this->tagFilter = $options['tagfilter'] ?? false;
+               $this->nsInvert = $options['nsInvert'] ?? false;
+               $this->associated = $options['associated'] ?? false;
+
+               $this->deletedOnly = !empty( $options['deletedOnly'] );
+               $this->topOnly = !empty( $options['topOnly'] );
+               $this->newOnly = !empty( $options['newOnly'] );
+               $this->hideMinor = !empty( $options['hideMinor'] );
+
                parent::__construct( $context );
 
                $msgs = [
@@ -116,18 +131,6 @@ class ContribsPager extends RangeChronologicalPager {
                        $this->messages[$msg] = $this->msg( $msg )->escaped();
                }
 
-               $this->target = $options['target'] ?? '';
-               $this->contribs = $options['contribs'] ?? 'users';
-               $this->namespace = $options['namespace'] ?? '';
-               $this->tagFilter = $options['tagfilter'] ?? false;
-               $this->nsInvert = $options['nsInvert'] ?? false;
-               $this->associated = $options['associated'] ?? false;
-
-               $this->deletedOnly = !empty( $options['deletedOnly'] );
-               $this->topOnly = !empty( $options['topOnly'] );
-               $this->newOnly = !empty( $options['newOnly'] );
-               $this->hideMinor = !empty( $options['hideMinor'] );
-
                // Date filtering: use timestamp if available
                $startTimestamp = '';
                $endTimestamp = '';
@@ -235,6 +238,35 @@ class ContribsPager extends RangeChronologicalPager {
                return new FakeResultWrapper( $result );
        }
 
+       /**
+        * Return the table targeted for ordering and continuation
+        *
+        * See T200259 and T221380.
+        *
+        * @warning Keep this in sync with self::getQueryInfo()!
+        *
+        * @return string
+        */
+       private function getTargetTable() {
+               if ( $this->contribs == 'newbie' ) {
+                       return 'revision';
+               }
+
+               $user = User::newFromName( $this->target, false );
+               $ipRangeConds = $user->isAnon() ? $this->getIpRangeConds( $this->mDb, $this->target ) : null;
+               if ( $ipRangeConds ) {
+                       return 'ip_changes';
+               } else {
+                       $conds = ActorMigration::newMigration()->getWhere( $this->mDb, 'rev_user', $user );
+                       if ( isset( $conds['orconds']['actor'] ) ) {
+                               // @todo: This will need changing when revision_actor_temp goes away
+                               return 'revision_actor_temp';
+                       }
+               }
+
+               return 'revision';
+       }
+
        function getQueryInfo() {
                $revQuery = Revision::getQueryInfo( [ 'page', 'user' ] );
                $queryInfo = [
@@ -245,6 +277,8 @@ class ContribsPager extends RangeChronologicalPager {
                        'join_conds' => $revQuery['joins'],
                ];
 
+               // WARNING: Keep this in sync with getTargetTable()!
+
                if ( $this->contribs == 'newbie' ) {
                        $max = $this->mDb->selectField( 'user', 'max(user_id)', '', __METHOD__ );
                        $queryInfo['conds'][] = $revQuery['fields']['rev_user'] . ' >' . (int)( $max - $max / 100 );
@@ -273,22 +307,6 @@ class ContribsPager extends RangeChronologicalPager {
                        $ipRangeConds = $user->isAnon() ? $this->getIpRangeConds( $this->mDb, $this->target ) : null;
                        if ( $ipRangeConds ) {
                                $queryInfo['tables'][] = 'ip_changes';
-                               /**
-                                * These aliases make `ORDER BY rev_timestamp, rev_id` from {@see getIndexField} and
-                                * {@see getExtraSortFields} use the replicated `ipc_rev_timestamp` and `ipc_rev_id`
-                                * columns from the `ip_changes` table, for more efficient queries.
-                                * @see https://phabricator.wikimedia.org/T200259#4832318
-                                */
-                               $queryInfo['fields'] = array_merge(
-                                       [
-                                               'rev_timestamp' => 'ipc_rev_timestamp',
-                                               'rev_id' => 'ipc_rev_id',
-                                       ],
-                                       array_diff( $queryInfo['fields'], [
-                                               'rev_timestamp',
-                                               'rev_id',
-                                       ] )
-                               );
                                $queryInfo['join_conds']['ip_changes'] = [
                                        'LEFT JOIN', [ 'ipc_rev_id = rev_id' ]
                                ];
@@ -299,15 +317,8 @@ class ContribsPager extends RangeChronologicalPager {
                                $queryInfo['conds'][] = $conds['conds'];
                                // Force the appropriate index to avoid bad query plans (T189026)
                                if ( isset( $conds['orconds']['actor'] ) ) {
-                                       // @todo: This will need changing when revision_comment_temp goes away
+                                       // @todo: This will need changing when revision_actor_temp goes away
                                        $queryInfo['options']['USE INDEX']['temp_rev_user'] = 'actor_timestamp';
-                                       // Alias 'rev_timestamp' => 'revactor_timestamp' and 'rev_id' => 'revactor_rev' so
-                                       // "ORDER BY rev_timestamp, rev_id" is interpreted to use denormalized revision_actor_temp
-                                       // fields instead.
-                                       $queryInfo['fields'] = array_merge(
-                                               array_diff( $queryInfo['fields'], [ 'rev_timestamp', 'rev_id' ] ),
-                                               [ 'rev_timestamp' => 'revactor_timestamp', 'rev_id' => 'revactor_rev' ]
-                                       );
                                } else {
                                        $queryInfo['options']['USE INDEX']['revision'] =
                                                isset( $conds['orconds']['userid'] ) ? 'user_timestamp' : 'usertext_timestamp';
@@ -342,10 +353,10 @@ class ContribsPager extends RangeChronologicalPager {
                                ' != ' . Revision::SUPPRESSED_USER;
                }
 
-               // For IPv6, we use ipc_rev_timestamp on ip_changes as the index field,
-               // which will be referenced when parsing the results of a query.
-               if ( self::isQueryableRange( $this->target ) ) {
-                       $queryInfo['fields'][] = 'ipc_rev_timestamp';
+               // $this->getIndexField() must be in the result rows, as reallyDoQuery() tries to access it.
+               $indexField = $this->getIndexField();
+               if ( $indexField !== 'rev_timestamp' ) {
+                       $queryInfo['fields'][] = $indexField;
                }
 
                ChangeTags::modifyDisplayQuery(
@@ -431,8 +442,24 @@ class ContribsPager extends RangeChronologicalPager {
         * @return string
         */
        public function getIndexField() {
-               // Note this is run via parent::__construct() *before* $this->target is set!
-               return 'rev_timestamp';
+               // The returned column is used for sorting and continuation, so we need to
+               // make sure to use the right denormalized column depending on which table is
+               // being targeted by the query to avoid bad query plans.
+               // See T200259, T204669, T220991, and T221380.
+               $target = $this->getTargetTable();
+               switch ( $target ) {
+                       case 'revision':
+                               return 'rev_timestamp';
+                       case 'ip_changes':
+                               return 'ipc_rev_timestamp';
+                       case 'revision_actor_temp':
+                               return 'revactor_timestamp';
+                       default:
+                               wfWarn(
+                                       __METHOD__ . ": Unknown value '$target' from " . static::class . '::getTargetTable()', 0
+                               );
+                               return 'rev_timestamp';
+               }
        }
 
        /**
@@ -474,8 +501,24 @@ class ContribsPager extends RangeChronologicalPager {
         * @return string[]
         */
        protected function getExtraSortFields() {
-               // Note this is run via parent::__construct() *before* $this->target is set!
-               return [ 'rev_id' ];
+               // The returned columns are used for sorting, so we need to make sure
+               // to use the right denormalized column depending on which table is
+               // being targeted by the query to avoid bad query plans.
+               // See T200259, T204669, T220991, and T221380.
+               $target = $this->getTargetTable();
+               switch ( $target ) {
+                       case 'revision':
+                               return [ 'rev_id' ];
+                       case 'ip_changes':
+                               return [ 'ipc_rev_id' ];
+                       case 'revision_actor_temp':
+                               return [ 'revactor_rev' ];
+                       default:
+                               wfWarn(
+                                       __METHOD__ . ": Unknown value '$target' from " . static::class . '::getTargetTable()', 0
+                               );
+                               return [ 'rev_id' ];
+               }
        }
 
        protected function doBatchLookups() {
index dc02922..e881611 100644 (file)
@@ -158,9 +158,7 @@ class ContribsPagerTest extends MediaWikiTestCase {
 
                $this->assertContains( 'ip_changes', $queryInfo[0] );
                $this->assertArrayHasKey( 'ip_changes', $queryInfo[5] );
-               $this->assertSame( 'ipc_rev_timestamp', $queryInfo[1]['rev_timestamp'] );
-               $this->assertSame( 'ipc_rev_id', $queryInfo[1]['rev_id'] );
-               $this->assertSame( [ 'rev_timestamp DESC', 'rev_id DESC' ], $queryInfo[4]['ORDER BY'] );
+               $this->assertSame( [ 'ipc_rev_timestamp DESC', 'ipc_rev_id DESC' ], $queryInfo[4]['ORDER BY'] );
        }
 
 }