Merge "Default $wgContentHandlerUseDB to true"
[lhc/web/wiklou.git] / maintenance / updateCollation.php
index 2363665..0e2791c 100644 (file)
@@ -1,7 +1,7 @@
 <?php
 /**
- * Script will find all rows in the categorylinks table whose collation is
- * out-of-date (cl_collation != $wgCategoryCollation) and repopulate cl_sortkey
+ * Find all rows in the categorylinks table whose collation is out-of-date
+ * (cl_collation != $wgCategoryCollation) and repopulate cl_sortkey
  * using the page title and cl_sortkey_prefix.
  *
  * This program is free software; you can redistribute it and/or modify
 
 #$optionsWithArgs = array( 'begin', 'max-slave-lag' );
 
-require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+require_once( __DIR__ . '/Maintenance.php' );
 
+/**
+ * Maintenance script that will find all rows in the categorylinks table
+ * whose collation is out-of-date.
+ *
+ * @ingroup Maintenance
+ */
 class UpdateCollation extends Maintenance {
-       const BATCH_SIZE = 50; // Number of rows to process in one batch
+       const BATCH_SIZE = 10000; // Number of rows to process in one batch
        const SYNC_INTERVAL = 20; // Wait for slaves after this many batches
 
-       var $sizeHistogram = array();
+       public $sizeHistogram = array();
 
        public function __construct() {
                parent::__construct();
@@ -53,8 +59,8 @@ TEXT;
                        'collation, though, so it may miss out-of-date rows with a different, ' .
                        'even older collation.', false, true );
                $this->addOption( 'target-collation', 'Set this to the new collation type to ' .
-                       'use instead of $wgCategoryCollation. Usually you should not use this, ' . 
-                       'you should just update $wgCategoryCollation in LocalSettings.php.', 
+                       'use instead of $wgCategoryCollation. Usually you should not use this, ' .
+                       'you should just update $wgCategoryCollation in LocalSettings.php.',
                        false, true );
                $this->addOption( 'dry-run', 'Don\'t actually change the collations, just ' .
                        'compile statistics.' );
@@ -76,10 +82,13 @@ TEXT;
                        $collation = Collation::singleton();
                }
 
-               $options = array( 'LIMIT' => self::BATCH_SIZE, 'STRAIGHT_JOIN' );
+               $options = array(
+                       'LIMIT' => self::BATCH_SIZE,
+                       'ORDER BY' => 'cl_to, cl_type, cl_from',
+                       'STRAIGHT_JOIN',
+               );
 
                if ( $force || $dryRun ) {
-                       $options['ORDER BY'] = 'cl_from, cl_to';
                        $collationConds = array();
                } else {
                        if ( $this->hasOption( 'previous-collation' ) ) {
@@ -90,20 +99,20 @@ TEXT;
                                );
                        }
 
-                       if ( !$wgMiserMode ) {
+                       $count = $dbw->estimateRowCount(
+                               'categorylinks',
+                               '*',
+                               $collationConds,
+                               __METHOD__
+                       );
+                       // Improve estimate if feasible
+                       if ( $count < 1000000 ) {
                                $count = $dbw->selectField(
                                        'categorylinks',
                                        'COUNT(*)',
                                        $collationConds,
                                        __METHOD__
                                );
-                       } else {
-                               $count = $dbw->estimateRowCount(
-                                       'categorylinks',
-                                       '*',
-                                       $collationConds,
-                                       __METHOD__
-                               );
                        }
                        if ( $count == 0 ) {
                                $this->output( "Collations up-to-date.\n" );
@@ -120,7 +129,7 @@ TEXT;
                        $res = $dbw->select(
                                array( 'categorylinks', 'page' ),
                                array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
-                                       'cl_sortkey', 'page_namespace', 'page_title'
+                                       'cl_sortkey', 'cl_type', 'page_namespace', 'page_title'
                                ),
                                array_merge( $collationConds, $batchConds, array( 'cl_from = page_id' ) ),
                                __METHOD__,
@@ -180,12 +189,8 @@ TEXT;
                                $dbw->commit( __METHOD__ );
                        }
 
-                       if ( ( $force || $dryRun ) && $row ) {
-                               $encFrom = $dbw->addQuotes( $row->cl_from );
-                               $encTo = $dbw->addQuotes( $row->cl_to );
-                               $batchConds = array(
-                                       "(cl_from = $encFrom AND cl_to > $encTo) " .
-                                       " OR cl_from > $encFrom" );
+                       if ( $row ) {
+                               $batchConds = array( $this->getBatchCondition( $row ) );
                        }
 
                        $count += $res->numRows();
@@ -206,6 +211,32 @@ TEXT;
                }
        }
 
+       /**
+        * Return an SQL expression selecting rows which sort above the given row,
+        * assuming an ordering of cl_to, cl_type, cl_from
+        */
+       function getBatchCondition( $row ) {
+               $dbw = $this->getDB( DB_MASTER );
+               $fields = array( 'cl_to', 'cl_type', 'cl_from' );
+               $first = true;
+               $cond = false;
+               $prefix = false;
+               foreach ( $fields as $field ) {
+                       $encValue = $dbw->addQuotes( $row->$field );
+                       $inequality = "$field > $encValue";
+                       $equality = "$field = $encValue";
+                       if ( $first ) {
+                               $cond = $inequality;
+                               $prefix = $equality;
+                               $first = false;
+                       } else {
+                               $cond .= " OR ($prefix AND $inequality)";
+                               $prefix .= " AND $equality";
+                       }
+               }
+               return $cond;
+       }
+
        function updateSortKeySizeHistogram( $key ) {
                $length = strlen( $key );
                if ( !isset( $this->sizeHistogram[$length] ) ) {