* @ingroup Maintenance
*/
class UpdateCollation extends Maintenance {
- const BATCH_SIZE = 10000; // Number of rows to process in one batch
+ const BATCH_SIZE = 100; // Number of rows to process in one batch
const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
public $sizeHistogram = [];
// but this will raise an exception, breaking all category pages
$collation->getFirstLetter( 'MediaWiki' );
+ // Locally at least, (my local is a rather old version of mysql)
+ // mysql seems to filesort if there is both an equality
+ // (but not for an inequality) condition on cl_collation in the
+ // WHERE and it is also the first item in the ORDER BY.
+ if ( $this->hasOption( 'previous-collation' ) ) {
+ $orderBy = 'cl_to, cl_type, cl_from';
+ } else {
+ $orderBy = 'cl_collation, cl_to, cl_type, cl_from';
+ }
$options = [
'LIMIT' => self::BATCH_SIZE,
- 'ORDER BY' => 'cl_from, cl_to',
- 'STRAIGHT_JOIN',
+ 'ORDER BY' => $orderBy,
];
if ( $force || $dryRun ) {
}
$this->output( "Fixing collation for $count rows.\n" );
}
-
$count = 0;
$batchCount = 0;
$batchConds = [];
do {
$this->output( "Selecting next " . self::BATCH_SIZE . " rows..." );
+
+ // cl_type must be selected as a number for proper paging because
+ // enums suck.
+ if ( $dbw->getType() === 'mysql' ) {
+ $clType = 'cl_type+0 AS "cl_type_numeric"';
+ } else {
+ $clType = 'cl_type';
+ }
$res = $dbw->select(
[ 'categorylinks', 'page' ],
[ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
- 'cl_sortkey', 'page_namespace', 'page_title'
+ 'cl_sortkey', $clType,
+ 'page_namespace', 'page_title'
],
array_merge( $collationConds, $batchConds, [ 'cl_from = page_id' ] ),
__METHOD__,
/**
* Return an SQL expression selecting rows which sort above the given row,
- * assuming an ordering of cl_from, cl_to
+ * assuming an ordering of cl_collation, cl_to, cl_type, cl_from
* @param stdClass $row
* @param DatabaseBase $dbw
* @return string
*/
function getBatchCondition( $row, $dbw ) {
- $fields = [ 'cl_from', 'cl_to' ];
+ if ( $this->hasOption( 'previous-collation' ) ) {
+ $fields = [ 'cl_to', 'cl_type', 'cl_from' ];
+ } else {
+ $fields = [ 'cl_collation', 'cl_to', 'cl_type', 'cl_from' ];
+ }
$first = true;
$cond = false;
$prefix = false;
foreach ( $fields as $field ) {
- $encValue = $dbw->addQuotes( $row->$field );
+ if ( $dbw->getType() === 'mysql' && $field === 'cl_type' ) {
+ // Range conditions with enums are weird in mysql
+ // This must be a numeric literal, or it won't work.
+ $encValue = intval( $row->cl_type_numeric );
+ } else {
+ $encValue = $dbw->addQuotes( $row->$field );
+ }
$inequality = "$field > $encValue";
$equality = "$field = $encValue";
if ( $first ) {