From: MusikAnimal Date: Sat, 16 Sep 2017 21:05:07 +0000 (-0400) Subject: Speed up populateIpChanges maintenance script. X-Git-Tag: 1.31.0-rc.0~2069 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=2eac9d7ef4953fdfe5f710fb3dc98c360c80c5ae Speed up populateIpChanges maintenance script. Use BETWEEN in populateIpChanges maintenance script, which will make it more efficient when copying revisions with a high rev_id. Also adding a 'max-rev-id' option to prevent the script from looping through IP changes that have already been copied since the core patch was deployed. Bug: T175962 Change-Id: I1df10c9b7237ad5002f76f9d354c36ce879d9d9f --- diff --git a/maintenance/populateIpChanges.php b/maintenance/populateIpChanges.php index ffb8c43b30..c173270d21 100644 --- a/maintenance/populateIpChanges.php +++ b/maintenance/populateIpChanges.php @@ -46,6 +46,12 @@ then be available when querying for IP ranges at Special:Contributions. TEXT ); $this->addOption( 'rev-id', 'The rev_id to start copying from. Default: 0', false, true ); + $this->addOption( + 'max-rev-id', + 'The rev_id to stop at. Default: result of MAX(rev_id)', + false, + true + ); $this->addOption( 'throttle', 'Wait this many milliseconds after copying each batch of revisions. Default: 0', @@ -57,20 +63,25 @@ TEXT public function doDBUpdates() { $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] ); $dbw = $this->getDB( DB_MASTER ); $throttle = intval( $this->getOption( 'throttle', 0 ) ); + $maxRevId = intval( $this->getOption( 'max-rev-id', 0 ) ); $start = $this->getOption( 'rev-id', 0 ); - $end = $dbw->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ ); + $end = $maxRevId > 0 + ? $maxRevId + : $dbw->selectField( 'revision', 'MAX(rev_id)', false, __METHOD__ ); $blockStart = $start; $revCount = 0; $this->output( "Copying IP revisions to ip_changes, from rev_id $start to rev_id $end\n" ); while ( $blockStart <= $end ) { - $rows = $dbw->select( + $blockEnd = min( $blockStart + 200, $end ); + $rows = $dbr->select( 'revision', [ 'rev_id', 'rev_timestamp', 'rev_user_text' ], - [ "rev_id >= $blockStart", 'rev_user' => 0 ], + [ "rev_id BETWEEN $blockStart AND $blockEnd", 'rev_user' => 0 ], __METHOD__, [ 'ORDER BY' => 'rev_id ASC', 'LIMIT' => $this->mBatchSize ] ); @@ -80,7 +91,7 @@ TEXT } $this->output( "...checking $this->mBatchSize revisions for IP edits that need copying, " . - "starting with rev_id $blockStart\n" ); + "between rev_ids $blockStart and $blockEnd\n" ); $insertRows = []; foreach ( $rows as $row ) {