API: Use rev_user when possible for list=usercontribs
authorBrad Jorsch <bjorsch@wikimedia.org>
Mon, 28 Mar 2016 20:20:24 +0000 (16:20 -0400)
committerAnomie <bjorsch@wikimedia.org>
Fri, 15 Jul 2016 19:31:23 +0000 (19:31 +0000)
When all ucusers are registered users, we can do it somewhat more
efficiently by querying by rev_user instead of rev_user_text. This
should particularly make a difference on the WMF cluster where the
revision table is clustered by rev_user on the relevant slaves.

This also kills the forcing of the usertext_timestamp index in the
query. MySQL should be smart enough these days to not need that.

Bug: T131065
Change-Id: Ie8317ea15937f98133d4c1b69315c35aa2e046fa

includes/api/ApiQueryUserContributions.php

index 0688970..60e91e8 100644 (file)
@@ -35,7 +35,8 @@ class ApiQueryContributions extends ApiQueryBase {
                parent::__construct( $query, $moduleName, 'uc' );
        }
 
-       private $params, $prefixMode, $userprefix, $multiUserMode, $usernames, $parentLens;
+       private $params, $prefixMode, $userprefix, $multiUserMode, $idMode, $usernames, $userids,
+               $parentLens;
        private $fld_ids = false, $fld_title = false, $fld_timestamp = false,
                $fld_comment = false, $fld_parsedcomment = false, $fld_flags = false,
                $fld_patrolled = false, $fld_tags = false, $fld_size = false, $fld_sizediff = false;
@@ -64,11 +65,14 @@ class ApiQueryContributions extends ApiQueryBase {
                // TODO: if the query is going only against the revision table, should this be done?
                $this->selectNamedDB( 'contributions', DB_SLAVE, 'contributions' );
 
+               $this->idMode = false;
                if ( isset( $this->params['userprefix'] ) ) {
                        $this->prefixMode = true;
                        $this->multiUserMode = true;
                        $this->userprefix = $this->params['userprefix'];
                } else {
+                       $anyIPs = false;
+                       $this->userids = [];
                        $this->usernames = [];
                        if ( !is_array( $this->params['user'] ) ) {
                                $this->params['user'] = [ $this->params['user'] ];
@@ -77,10 +81,32 @@ class ApiQueryContributions extends ApiQueryBase {
                                $this->dieUsage( 'User parameter may not be empty.', 'param_user' );
                        }
                        foreach ( $this->params['user'] as $u ) {
-                               $this->prepareUsername( $u );
+                               if ( is_null( $u ) || $u === '' ) {
+                                       $this->dieUsage( 'User parameter may not be empty', 'param_user' );
+                               }
+
+                               if ( User::isIP( $u ) ) {
+                                       $anyIPs = true;
+                                       $this->usernames[] = $u;
+                               } else {
+                                       $name = User::getCanonicalName( $u, 'valid' );
+                                       if ( $name === false ) {
+                                               $this->dieUsage( "User name {$u} is not valid", 'param_user' );
+                                       }
+                                       $this->usernames[] = $name;
+                               }
                        }
                        $this->prefixMode = false;
                        $this->multiUserMode = ( count( $this->params['user'] ) > 1 );
+
+                       if ( !$anyIPs ) {
+                               $dbr = $this->getDB();
+                               $res = $dbr->select( 'user', 'user_id', [ 'user_name' => $this->usernames ], __METHOD__ );
+                               foreach ( $res as $row ) {
+                                       $this->userids[] = $row->user_id;
+                               }
+                               $this->idMode = count( $this->userids ) === count( $this->usernames );
+                       }
                }
 
                $this->prepareQuery();
@@ -126,27 +152,6 @@ class ApiQueryContributions extends ApiQueryBase {
                );
        }
 
-       /**
-        * Validate the 'user' parameter and set the value to compare
-        * against `revision`.`rev_user_text`
-        *
-        * @param string $user
-        */
-       private function prepareUsername( $user ) {
-               if ( !is_null( $user ) && $user !== '' ) {
-                       $name = User::isIP( $user )
-                               ? $user
-                               : User::getCanonicalName( $user, 'valid' );
-                       if ( $name === false ) {
-                               $this->dieUsage( "User name {$user} is not valid", 'param_user' );
-                       } else {
-                               $this->usernames[] = $name;
-                       }
-               } else {
-                       $this->dieUsage( 'User parameter may not be empty', 'param_user' );
-               }
-       }
-
        /**
         * Prepares the query and returns the limit of rows requested
         */
@@ -163,7 +168,17 @@ class ApiQueryContributions extends ApiQueryBase {
                        $continue = explode( '|', $this->params['continue'] );
                        $db = $this->getDB();
                        if ( $this->multiUserMode ) {
-                               $this->dieContinueUsageIf( count( $continue ) != 3 );
+                               $this->dieContinueUsageIf( count( $continue ) != 4 );
+                               $modeFlag = array_shift( $continue );
+                               $this->dieContinueUsageIf( !in_array( $modeFlag, [ 'id', 'name' ] ) );
+                               if ( $this->idMode && $modeFlag === 'name' ) {
+                                       // The users were created since this query started, but we
+                                       // can't go back and change modes now. So just keep on with
+                                       // name mode.
+                                       $this->idMode = false;
+                               }
+                               $this->dieContinueUsageIf( ( $modeFlag === 'id' ) !== $this->idMode );
+                               $userField = $this->idMode ? 'rev_user' : 'rev_user_text';
                                $encUser = $db->addQuotes( array_shift( $continue ) );
                        } else {
                                $this->dieContinueUsageIf( count( $continue ) != 2 );
@@ -174,8 +189,8 @@ class ApiQueryContributions extends ApiQueryBase {
                        $op = ( $this->params['dir'] == 'older' ? '<' : '>' );
                        if ( $this->multiUserMode ) {
                                $this->addWhere(
-                                       "rev_user_text $op $encUser OR " .
-                                       "(rev_user_text = $encUser AND " .
+                                       "$userField $op $encUser OR " .
+                                       "($userField = $encUser AND " .
                                        "(rev_timestamp $op $encTS OR " .
                                        "(rev_timestamp = $encTS AND " .
                                        "rev_id $op= $encId)))"
@@ -206,14 +221,17 @@ class ApiQueryContributions extends ApiQueryBase {
                if ( $this->prefixMode ) {
                        $this->addWhere( 'rev_user_text' .
                                $this->getDB()->buildLike( $this->userprefix, $this->getDB()->anyString() ) );
+               } elseif ( $this->idMode ) {
+                       $this->addWhereFld( 'rev_user', $this->userids );
                } else {
                        $this->addWhereFld( 'rev_user_text', $this->usernames );
                }
                // ... and in the specified timeframe.
-               // Ensure the same sort order for rev_user_text and rev_timestamp
+               // Ensure the same sort order for rev_user/rev_user_text and rev_timestamp
                // so our query is indexed
                if ( $this->multiUserMode ) {
-                       $this->addWhereRange( 'rev_user_text', $this->params['dir'], null, null );
+                       $this->addWhereRange( $this->idMode ? 'rev_user' : 'rev_user_text',
+                               $this->params['dir'], null, null );
                }
                $this->addTimestampWhereRange( 'rev_timestamp',
                        $this->params['dir'], $this->params['start'], $this->params['end'] );
@@ -247,7 +265,6 @@ class ApiQueryContributions extends ApiQueryBase {
                        $this->addWhereIf( 'rev_parent_id = 0', isset( $show['new'] ) );
                }
                $this->addOption( 'LIMIT', $this->params['limit'] + 1 );
-               $index = [ 'revision' => 'usertext_timestamp' ];
 
                // Mandatory fields: timestamp allows request continuation
                // ns+title checks if the user has access rights for this page
@@ -430,7 +447,11 @@ class ApiQueryContributions extends ApiQueryBase {
 
        private function continueStr( $row ) {
                if ( $this->multiUserMode ) {
-                       return "$row->rev_user_text|$row->rev_timestamp|$row->rev_id";
+                       if ( $this->idMode ) {
+                               return "id|$row->rev_user|$row->rev_timestamp|$row->rev_id";
+                       } else {
+                               return "name|$row->rev_user_text|$row->rev_timestamp|$row->rev_id";
+                       }
                } else {
                        return "$row->rev_timestamp|$row->rev_id";
                }