X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=maintenance%2FrunBatchedQuery.php;h=b0a2b9242834440a0596e9a54205cd10093c107c;hb=87a0b17127195742e6c64472dbfdac65b4d254d7;hp=a5e7a2fa88dd7e3df6cca67987543d9819ad5cfd;hpb=e3f6c10d87732c0c8a9bbd7bb57b6c964b92e29a;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/runBatchedQuery.php b/maintenance/runBatchedQuery.php index a5e7a2fa88..b0a2b92428 100644 --- a/maintenance/runBatchedQuery.php +++ b/maintenance/runBatchedQuery.php @@ -25,6 +25,8 @@ require_once __DIR__ . '/Maintenance.php'; +use Wikimedia\Rdbms\IDatabase; + /** * Maintenance script to run a database query in batches and wait for replica DBs. * @@ -34,26 +36,74 @@ class BatchedQueryRunner extends Maintenance { public function __construct() { parent::__construct(); $this->addDescription( - "Run a query repeatedly until it affects 0 rows, and wait for replica DBs in between.\n" . - "NOTE: You need to set a LIMIT clause yourself." ); + "Run an update query on all rows of a table. " . + "Waits for replicas at appropriate intervals." ); + $this->addOption( 'table', 'The table name', true, true ); + $this->addOption( 'set', 'The SET clause', true, true ); + $this->addOption( 'where', 'The WHERE clause', false, true ); + $this->addOption( 'key', 'A column name, the values of which are unique', true, true ); + $this->addOption( 'batch-size', 'The batch size (default 1000)', false, true ); + $this->addOption( 'db', 'The database name, or omit to use the current wiki.', false, true ); } public function execute() { - if ( !$this->hasArg() ) { - $this->error( "No query specified. Specify the query as a command line parameter.", true ); + $table = $this->getOption( 'table' ); + $key = $this->getOption( 'key' ); + $set = $this->getOption( 'set' ); + $where = $this->getOption( 'where', null ); + $where = $where === null ? [] : [ $where ]; + $batchSize = $this->getOption( 'batch-size', 1000 ); + + $dbName = $this->getOption( 'db', null ); + if ( $dbName === null ) { + $dbw = $this->getDB( DB_MASTER ); + } else { + $lbf = MediaWiki\MediaWikiServices::getInstance()->getDBLoadBalancerFactory(); + $lb = $lbf->getMainLB( $dbName ); + $dbw = $lb->getConnection( DB_MASTER, [], $dbName ); } - $query = $this->getArg(); + $selectConds = $where; + $prevEnd = false; + $n = 1; - $dbw = $this->getDB( DB_MASTER ); do { $this->output( "Batch $n: " ); $n++; + + // Note that the update conditions do not rely on atomicity of the + // SELECT query in order to guarantee that all rows are updated. The + // results of the SELECT are merely a partitioning hint. Simultaneous + // updates merely result in the wrong number of rows being updated + // in a batch. + + $res = $dbw->select( $table, $key, $selectConds, __METHOD__, + [ 'ORDER BY' => $key, 'LIMIT' => $batchSize ] ); + if ( $res->numRows() ) { + $res->seek( $res->numRows() - 1 ); + $row = $res->fetchObject(); + $end = $dbw->addQuotes( $row->$key ); + $selectConds = array_merge( $where, [ "$key > $end" ] ); + $updateConds = array_merge( $where, [ "$key <= $end" ] ); + } else { + $updateConds = $where; + } + if ( $prevEnd !== false ) { + $updateConds = array_merge( [ "$key > $prevEnd" ], $updateConds ); + } + + $query = "UPDATE " . $dbw->tableName( $table ) . + " SET " . $set . + " WHERE " . $dbw->makeList( $updateConds, IDatabase::LIST_AND ); + $dbw->query( $query, __METHOD__ ); + + $prevEnd = $end; + $affected = $dbw->affectedRows(); - $this->output( "$affected rows\n" ); + $this->output( "$affected rows affected\n" ); wfWaitForSlaves(); - } while ( $affected > 0 ); + } while ( $res->numRows() ); } public function getDbType() {