require_once __DIR__ . '/Maintenance.php';
+use Wikimedia\Rdbms\IDatabase;
+
/**
* Maintenance script that will find all rows in the categorylinks table
* whose collation is out-of-date.
* @ingroup Maintenance
*/
class UpdateCollation extends Maintenance {
- const BATCH_SIZE = 10000; // Number of rows to process in one batch
- const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
+ const BATCH_SIZE = 100; // Number of rows to process in one batch
+ const SYNC_INTERVAL = 5; // Wait for replica DBs after this many batches
- public $sizeHistogram = array();
+ public $sizeHistogram = [];
public function __construct() {
parent::__construct();
global $wgCategoryCollation;
- $this->mDescription = <<<TEXT
+ $this->addDescription( <<<TEXT
This script will find all rows in the categorylinks table whose collation is
out-of-date (cl_collation != '$wgCategoryCollation') and repopulate cl_sortkey
using the page title and cl_sortkey_prefix. If all collations are
up-to-date, it will do nothing.
-TEXT;
+TEXT
+ );
$this->addOption( 'force', 'Run on all rows, even if the collation is ' .
- 'supposed to be up-to-date.' );
+ 'supposed to be up-to-date.', false, false, 'f' );
$this->addOption( 'previous-collation', 'Set the previous value of ' .
'$wgCategoryCollation here to speed up this script, especially if your ' .
'categorylinks table is large. This will only update rows with that ' .
global $wgCategoryCollation;
$dbw = $this->getDB( DB_MASTER );
+ $dbr = $this->getDB( DB_REPLICA );
$force = $this->getOption( 'force' );
$dryRun = $this->getOption( 'dry-run' );
$verboseStats = $this->getOption( 'verbose-stats' );
// but this will raise an exception, breaking all category pages
$collation->getFirstLetter( 'MediaWiki' );
- $options = array(
+ // Locally at least, (my local is a rather old version of mysql)
+ // mysql seems to filesort if there is both an equality
+ // (but not for an inequality) condition on cl_collation in the
+ // WHERE and it is also the first item in the ORDER BY.
+ if ( $this->hasOption( 'previous-collation' ) ) {
+ $orderBy = 'cl_to, cl_type, cl_from';
+ } else {
+ $orderBy = 'cl_collation, cl_to, cl_type, cl_from';
+ }
+ $options = [
'LIMIT' => self::BATCH_SIZE,
- 'ORDER BY' => 'cl_from, cl_to',
- 'STRAIGHT_JOIN',
- );
+ 'ORDER BY' => $orderBy,
+ 'STRAIGHT_JOIN' // per T58041
+ ];
- if ( $force || $dryRun ) {
- $collationConds = array();
+ if ( $force ) {
+ $collationConds = [];
} else {
if ( $this->hasOption( 'previous-collation' ) ) {
$collationConds['cl_collation'] = $this->getOption( 'previous-collation' );
} else {
- $collationConds = array( 0 =>
+ $collationConds = [ 0 =>
'cl_collation != ' . $dbw->addQuotes( $collationName )
- );
+ ];
}
- $count = $dbw->estimateRowCount(
+ $count = $dbr->estimateRowCount(
'categorylinks',
'*',
$collationConds,
);
// Improve estimate if feasible
if ( $count < 1000000 ) {
- $count = $dbw->selectField(
+ $count = $dbr->selectField(
'categorylinks',
'COUNT(*)',
$collationConds,
return;
}
- $this->output( "Fixing collation for $count rows.\n" );
+ if ( $dryRun ) {
+ $this->output( "$count rows would be updated.\n" );
+ } else {
+ $this->output( "Fixing collation for $count rows.\n" );
+ }
+ wfWaitForSlaves();
}
-
$count = 0;
$batchCount = 0;
- $batchConds = array();
+ $batchConds = [];
do {
$this->output( "Selecting next " . self::BATCH_SIZE . " rows..." );
+
+ // cl_type must be selected as a number for proper paging because
+ // enums suck.
+ if ( $dbw->getType() === 'mysql' ) {
+ $clType = 'cl_type+0 AS "cl_type_numeric"';
+ } else {
+ $clType = 'cl_type';
+ }
$res = $dbw->select(
- array( 'categorylinks', 'page' ),
- array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
- 'cl_sortkey', 'page_namespace', 'page_title'
- ),
- array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ),
+ [ 'categorylinks', 'page' ],
+ [ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
+ 'cl_sortkey', $clType,
+ 'page_namespace', 'page_title'
+ ],
+ array_merge( $collationConds, $batchConds, [ 'cl_from = page_id' ] ),
__METHOD__,
$options
);
if ( !$dryRun ) {
$dbw->update(
'categorylinks',
- array(
+ [
'cl_sortkey' => $newSortKey,
'cl_sortkey_prefix' => $prefix,
'cl_collation' => $collationName,
'cl_type' => $type,
'cl_timestamp = cl_timestamp',
- ),
- array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ),
+ ],
+ [ 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ],
__METHOD__
);
}
if ( $row ) {
- $batchConds = array( $this->getBatchCondition( $row, $dbw ) );
+ $batchConds = [ $this->getBatchCondition( $row, $dbw ) ];
}
}
if ( !$dryRun ) {
$this->output( "$count done.\n" );
if ( !$dryRun && ++$batchCount % self::SYNC_INTERVAL == 0 ) {
- $this->output( "Waiting for slaves ... " );
+ $this->output( "Waiting for replica DBs ... " );
wfWaitForSlaves();
$this->output( "done\n" );
}
/**
* Return an SQL expression selecting rows which sort above the given row,
- * assuming an ordering of cl_from, cl_to
+ * assuming an ordering of cl_collation, cl_to, cl_type, cl_from
* @param stdClass $row
- * @param DatabaseBase $dbw
+ * @param IDatabase $dbw
* @return string
*/
function getBatchCondition( $row, $dbw ) {
- $fields = array( 'cl_from', 'cl_to' );
+ if ( $this->hasOption( 'previous-collation' ) ) {
+ $fields = [ 'cl_to', 'cl_type', 'cl_from' ];
+ } else {
+ $fields = [ 'cl_collation', 'cl_to', 'cl_type', 'cl_from' ];
+ }
$first = true;
$cond = false;
$prefix = false;
foreach ( $fields as $field ) {
- $encValue = $dbw->addQuotes( $row->$field );
+ if ( $dbw->getType() === 'mysql' && $field === 'cl_type' ) {
+ // Range conditions with enums are weird in mysql
+ // This must be a numeric literal, or it won't work.
+ $encValue = intval( $row->cl_type_numeric );
+ } else {
+ $encValue = $dbw->addQuotes( $row->$field );
+ }
$inequality = "$field > $encValue";
$equality = "$field = $encValue";
if ( $first ) {
}
$numBins = 20;
$coarseHistogram = array_fill( 0, $numBins, 0 );
- $coarseBoundaries = array();
+ $coarseBoundaries = [];
$boundary = 0;
for ( $i = 0; $i < $numBins - 1; $i++ ) {
$boundary += $maxLength / $numBins;
}
}
-$maintClass = "UpdateCollation";
+$maintClass = UpdateCollation::class;
require_once RUN_MAINTENANCE_IF_MAIN;