<?php
/**
- * Script will find all rows in the categorylinks table whose collation is
- * out-of-date (cl_collation != $wgCategoryCollation) and repopulate cl_sortkey
+ * Find all rows in the categorylinks table whose collation is out-of-date
+ * (cl_collation != $wgCategoryCollation) and repopulate cl_sortkey
* using the page title and cl_sortkey_prefix.
*
* This program is free software; you can redistribute it and/or modify
#$optionsWithArgs = array( 'begin', 'max-slave-lag' );
-require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+require_once( __DIR__ . '/Maintenance.php' );
+/**
+ * Maintenance script that will find all rows in the categorylinks table
+ * whose collation is out-of-date.
+ *
+ * @ingroup Maintenance
+ */
class UpdateCollation extends Maintenance {
- const BATCH_SIZE = 50; // Number of rows to process in one batch
+ const BATCH_SIZE = 10000; // Number of rows to process in one batch
const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
- var $sizeHistogram = array();
+ public $sizeHistogram = array();
public function __construct() {
parent::__construct();
'collation, though, so it may miss out-of-date rows with a different, ' .
'even older collation.', false, true );
$this->addOption( 'target-collation', 'Set this to the new collation type to ' .
- 'use instead of $wgCategoryCollation. Usually you should not use this, ' .
- 'you should just update $wgCategoryCollation in LocalSettings.php.',
+ 'use instead of $wgCategoryCollation. Usually you should not use this, ' .
+ 'you should just update $wgCategoryCollation in LocalSettings.php.',
false, true );
$this->addOption( 'dry-run', 'Don\'t actually change the collations, just ' .
'compile statistics.' );
$collation = Collation::singleton();
}
- $options = array( 'LIMIT' => self::BATCH_SIZE, 'STRAIGHT_JOIN' );
+ $options = array(
+ 'LIMIT' => self::BATCH_SIZE,
+ 'ORDER BY' => 'cl_to, cl_type, cl_from',
+ 'STRAIGHT_JOIN',
+ );
if ( $force || $dryRun ) {
- $options['ORDER BY'] = 'cl_from, cl_to';
$collationConds = array();
} else {
if ( $this->hasOption( 'previous-collation' ) ) {
);
}
- if ( !$wgMiserMode ) {
+ $count = $dbw->estimateRowCount(
+ 'categorylinks',
+ '*',
+ $collationConds,
+ __METHOD__
+ );
+ // Improve estimate if feasible
+ if ( $count < 1000000 ) {
$count = $dbw->selectField(
'categorylinks',
'COUNT(*)',
$collationConds,
__METHOD__
);
- } else {
- $count = $dbw->estimateRowCount(
- 'categorylinks',
- '*',
- $collationConds,
- __METHOD__
- );
}
if ( $count == 0 ) {
$this->output( "Collations up-to-date.\n" );
$res = $dbw->select(
array( 'categorylinks', 'page' ),
array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
- 'cl_sortkey', 'page_namespace', 'page_title'
+ 'cl_sortkey', 'cl_type', 'page_namespace', 'page_title'
),
array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ),
__METHOD__,
$dbw->commit( __METHOD__ );
}
- if ( ( $force || $dryRun ) && $row ) {
- $encFrom = $dbw->addQuotes( $row->cl_from );
- $encTo = $dbw->addQuotes( $row->cl_to );
- $batchConds = array(
- "(cl_from = $encFrom AND cl_to > $encTo) " .
- " OR cl_from > $encFrom" );
+ if ( $row ) {
+ $batchConds = array( $this->getBatchCondition( $row ) );
}
$count += $res->numRows();
}
}
+ /**
+ * Return an SQL expression selecting rows which sort above the given row,
+ * assuming an ordering of cl_to, cl_type, cl_from
+ */
+ function getBatchCondition( $row ) {
+ $dbw = $this->getDB( DB_MASTER );
+ $fields = array( 'cl_to', 'cl_type', 'cl_from' );
+ $first = true;
+ $cond = false;
+ $prefix = false;
+ foreach ( $fields as $field ) {
+ $encValue = $dbw->addQuotes( $row->$field );
+ $inequality = "$field > $encValue";
+ $equality = "$field = $encValue";
+ if ( $first ) {
+ $cond = $inequality;
+ $prefix = $equality;
+ $first = false;
+ } else {
+ $cond .= " OR ($prefix AND $inequality)";
+ $prefix .= " AND $equality";
+ }
+ }
+ return $cond;
+ }
+
function updateSortKeySizeHistogram( $key ) {
$length = strlen( $key );
if ( !isset( $this->sizeHistogram[$length] ) ) {