Speed up populateIpChanges maintenance script.
authorMusikAnimal <musikanimal@gmail.com>
Sat, 16 Sep 2017 21:05:07 +0000 (17:05 -0400)
committerReedy <reedy@wikimedia.org>
Mon, 18 Sep 2017 15:52:31 +0000 (15:52 +0000)
Use BETWEEN in populateIpChanges maintenance script, which will make it
more efficient when copying revisions with a high rev_id. Also adding a
'max-rev-id' option to prevent the script from looping through IP
changes that have already been copied since the core patch was deployed.

Bug: T175962
Change-Id: I1df10c9b7237ad5002f76f9d354c36ce879d9d9f

maintenance/populateIpChanges.php

index ffb8c43..c173270 100644 (file)
@@ -46,6 +46,12 @@ then be available when querying for IP ranges at Special:Contributions.
 TEXT
                );
                $this->addOption( 'rev-id', 'The rev_id to start copying from. Default: 0', false, true );
+               $this->addOption(
+                       'max-rev-id',
+                       'The rev_id to stop at. Default: result of MAX(rev_id)',
+                       false,
+                       true
+               );
                $this->addOption(
                        'throttle',
                        'Wait this many milliseconds after copying each batch of revisions. Default: 0',
@@ -57,20 +63,25 @@ TEXT
 
        public function doDBUpdates() {
                $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+               $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
                $dbw = $this->getDB( DB_MASTER );
                $throttle = intval( $this->getOption( 'throttle', 0 ) );
+               $maxRevId = intval( $this->getOption( 'max-rev-id', 0 ) );
                $start = $this->getOption( 'rev-id', 0 );
-               $end = $dbw->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
+               $end = $maxRevId > 0
+                       ? $maxRevId
+                       : $dbw->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ );
                $blockStart = $start;
                $revCount = 0;
 
                $this->output( "Copying IP revisions to ip_changes, from rev_id $start to rev_id $end\n" );
 
                while ( $blockStart <= $end ) {
-                       $rows = $dbw->select(
+                       $blockEnd = min( $blockStart + 200, $end );
+                       $rows = $dbr->select(
                                'revision',
                                [ 'rev_id', 'rev_timestamp', 'rev_user_text' ],
-                               [ "rev_id >= $blockStart", 'rev_user' => 0 ],
+                               [ "rev_id BETWEEN $blockStart AND $blockEnd", 'rev_user' => 0 ],
                                __METHOD__,
                                [ 'ORDER BY' => 'rev_id ASC', 'LIMIT' => $this->mBatchSize ]
                        );
@@ -80,7 +91,7 @@ TEXT
                        }
 
                        $this->output( "...checking $this->mBatchSize revisions for IP edits that need copying, " .
-                               "starting with rev_id $blockStart\n" );
+                               "between rev_ids $blockStart and $blockEnd\n" );
 
                        $insertRows = [];
                        foreach ( $rows as $row ) {