Merge "Make refreshLinksJob explicitly check the cache rev ID"
[lhc/web/wiklou.git] / maintenance / updateCollation.php
index 37c9948..6d9a616 100644 (file)
@@ -33,10 +33,10 @@ require_once __DIR__ . '/Maintenance.php';
  * @ingroup Maintenance
  */
 class UpdateCollation extends Maintenance {
-       const BATCH_SIZE = 10000; // Number of rows to process in one batch
+       const BATCH_SIZE = 100; // Number of rows to process in one batch
        const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
 
-       public $sizeHistogram = array();
+       public $sizeHistogram = [];
 
        public function __construct() {
                parent::__construct();
@@ -51,7 +51,7 @@ TEXT
                );
 
                $this->addOption( 'force', 'Run on all rows, even if the collation is ' .
-                       'supposed to be up-to-date.' );
+                       'supposed to be up-to-date.', false, false, 'f' );
                $this->addOption( 'previous-collation', 'Set the previous value of ' .
                        '$wgCategoryCollation here to speed up this script, especially if your ' .
                        'categorylinks table is large. This will only update rows with that ' .
@@ -85,21 +85,29 @@ TEXT
                // but this will raise an exception, breaking all category pages
                $collation->getFirstLetter( 'MediaWiki' );
 
-               $options = array(
+               // Locally at least, (my local is a rather old version of mysql)
+               // mysql seems to filesort if there is both an equality
+               // (but not for an inequality) condition on cl_collation in the
+               // WHERE and it is also the first item in the ORDER BY.
+               if ( $this->hasOption( 'previous-collation' ) ) {
+                       $orderBy = 'cl_to, cl_type, cl_from';
+               } else {
+                       $orderBy = 'cl_collation, cl_to, cl_type, cl_from';
+               }
+               $options = [
                        'LIMIT' => self::BATCH_SIZE,
-                       'ORDER BY' => 'cl_from, cl_to',
-                       'STRAIGHT_JOIN',
-               );
+                       'ORDER BY' => $orderBy,
+               ];
 
                if ( $force || $dryRun ) {
-                       $collationConds = array();
+                       $collationConds = [];
                } else {
                        if ( $this->hasOption( 'previous-collation' ) ) {
                                $collationConds['cl_collation'] = $this->getOption( 'previous-collation' );
                        } else {
-                               $collationConds = array( 0 =>
+                               $collationConds = [ 0 =>
                                        'cl_collation != ' . $dbw->addQuotes( $collationName )
-                               );
+                               ];
                        }
 
                        $count = $dbw->estimateRowCount(
@@ -124,18 +132,26 @@ TEXT
                        }
                        $this->output( "Fixing collation for $count rows.\n" );
                }
-
                $count = 0;
                $batchCount = 0;
-               $batchConds = array();
+               $batchConds = [];
                do {
                        $this->output( "Selecting next " . self::BATCH_SIZE . " rows..." );
+
+                       // cl_type must be selected as a number for proper paging because
+                       // enums suck.
+                       if ( $dbw->getType() === 'mysql' ) {
+                               $clType = 'cl_type+0 AS "cl_type_numeric"';
+                       } else {
+                               $clType = 'cl_type';
+                       }
                        $res = $dbw->select(
-                               array( 'categorylinks', 'page' ),
-                               array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
-                                       'cl_sortkey', 'page_namespace', 'page_title'
-                               ),
-                               array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ),
+                               [ 'categorylinks', 'page' ],
+                               [ 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
+                                       'cl_sortkey', $clType,
+                                       'page_namespace', 'page_title'
+                               ],
+                               array_merge( $collationConds, $batchConds, [ 'cl_from = page_id' ] ),
                                __METHOD__,
                                $options
                        );
@@ -178,19 +194,19 @@ TEXT
                                if ( !$dryRun ) {
                                        $dbw->update(
                                                'categorylinks',
-                                               array(
+                                               [
                                                        'cl_sortkey' => $newSortKey,
                                                        'cl_sortkey_prefix' => $prefix,
                                                        'cl_collation' => $collationName,
                                                        'cl_type' => $type,
                                                        'cl_timestamp = cl_timestamp',
-                                               ),
-                                               array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ),
+                                               ],
+                                               [ 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ],
                                                __METHOD__
                                        );
                                }
                                if ( $row ) {
-                                       $batchConds = array( $this->getBatchCondition( $row, $dbw ) );
+                                       $batchConds = [ $this->getBatchCondition( $row, $dbw ) ];
                                }
                        }
                        if ( !$dryRun ) {
@@ -217,18 +233,28 @@ TEXT
 
        /**
         * Return an SQL expression selecting rows which sort above the given row,
-        * assuming an ordering of cl_from, cl_to
+        * assuming an ordering of cl_collation, cl_to, cl_type, cl_from
         * @param stdClass $row
         * @param DatabaseBase $dbw
         * @return string
         */
        function getBatchCondition( $row, $dbw ) {
-               $fields = array( 'cl_from', 'cl_to' );
+               if ( $this->hasOption( 'previous-collation' ) ) {
+                       $fields = [ 'cl_to', 'cl_type', 'cl_from' ];
+               } else {
+                       $fields = [ 'cl_collation', 'cl_to', 'cl_type', 'cl_from' ];
+               }
                $first = true;
                $cond = false;
                $prefix = false;
                foreach ( $fields as $field ) {
-                       $encValue = $dbw->addQuotes( $row->$field );
+                       if ( $dbw->getType() === 'mysql' && $field === 'cl_type' ) {
+                               // Range conditions with enums are weird in mysql
+                               // This must be a numeric literal, or it won't work.
+                               $encValue = intval( $row->cl_type_numeric );
+                       } else {
+                               $encValue = $dbw->addQuotes( $row->$field );
+                       }
                        $inequality = "$field > $encValue";
                        $equality = "$field = $encValue";
                        if ( $first ) {
@@ -259,7 +285,7 @@ TEXT
                }
                $numBins = 20;
                $coarseHistogram = array_fill( 0, $numBins, 0 );
-               $coarseBoundaries = array();
+               $coarseBoundaries = [];
                $boundary = 0;
                for ( $i = 0; $i < $numBins - 1; $i++ ) {
                        $boundary += $maxLength / $numBins;