Use exact counts in updateCollation.php
[lhc/web/wiklou.git] / maintenance / updateCollation.php
1 <?php
2 /**
3 * @file
4 * @ingroup Maintenance
5 * @author Aryeh Gregor (Simetrical)
6 */
7
8 #$optionsWithArgs = array( 'begin', 'max-slave-lag' );
9
10 require_once( dirname( __FILE__ ) . '/Maintenance.php' );
11
12 class UpdateCollation extends Maintenance {
13 const BATCH_SIZE = 1000;
14
15 public function __construct() {
16 parent::__construct();
17
18 global $wgCollationVersion;
19 $this->mDescription = <<<TEXT
20 This script will find all rows in the categorylinks table whose collation is
21 out-of-date (cl_collation != $wgCollationVersion) and repopulate cl_sortkey
22 using the page title and cl_sortkey_prefix. If everything's collation is
23 up-to-date, it will do nothing.
24 TEXT;
25
26 #$this->addOption( 'force', 'Run on all rows, even if the collation is supposed to be up-to-date.' );
27 }
28
29 public function execute() {
30 global $wgCollationVersion, $wgContLang;
31
32 $dbw = wfGetDB( DB_MASTER );
33 $count = $dbw->selectField(
34 'categorylinks',
35 'COUNT(*)',
36 'cl_collation != ' . $dbw->addQuotes( $wgCollationVersion ),
37 __METHOD__
38 );
39
40 if ( $count == 0 ) {
41 $this->output( "Collations up-to-date.\n" );
42 return;
43 }
44 $this->output( "Fixing collation for $count rows.\n" );
45
46 $count = 0;
47 do {
48 $res = $dbw->select(
49 array( 'categorylinks', 'page' ),
50 array( 'cl_from', 'cl_to', 'cl_sortkey_prefix', 'cl_collation',
51 'cl_sortkey', 'page_namespace', 'page_title'
52 ),
53 array(
54 'cl_collation != ' . $dbw->addQuotes( $wgCollationVersion ),
55 'cl_from = page_id'
56 ),
57 __METHOD__,
58 array( 'LIMIT' => self::BATCH_SIZE )
59 );
60
61 $dbw->begin();
62 foreach ( $res as $row ) {
63 $title = Title::newFromRow( $row );
64 if ( $row->cl_collation == 0 ) {
65 # This is an old-style row, so the sortkey needs to be
66 # converted.
67 if ( $row->cl_sortkey == $title->getText()
68 || $row->cl_sortkey == $title->getPrefixedText() ) {
69 $prefix = '';
70 } else {
71 # Custom sortkey, use it as a prefix
72 $prefix = $row->cl_sortkey;
73 }
74 } else {
75 $prefix = $row->cl_sortkey_prefix;
76 }
77 # cl_type will be wrong for lots of pages if cl_collation is 0,
78 # so let's update it while we're here.
79 if ( $title->getNamespace() == NS_CATEGORY ) {
80 $type = 'subcat';
81 } elseif ( $title->getNamespace() == NS_FILE ) {
82 $type = 'file';
83 } else {
84 $type = 'page';
85 }
86 $dbw->update(
87 'categorylinks',
88 array(
89 'cl_sortkey' => $wgContLang->convertToSortkey(
90 $title->getCategorySortkey( $prefix ) ),
91 'cl_sortkey_prefix' => $prefix,
92 'cl_collation' => $wgCollationVersion,
93 'cl_type' => $type,
94 ),
95 array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ),
96 __METHOD__
97 );
98 }
99 $dbw->commit();
100
101 $count += $res->numRows();
102 $this->output( "$count done.\n" );
103 } while ( $res->numRows() == self::BATCH_SIZE );
104 }
105 }
106
107 $maintClass = "UpdateCollation";
108 require_once( DO_MAINTENANCE );