Add non-identity collation, with migration script
authorAryeh Gregor <simetrical@users.mediawiki.org>
Fri, 23 Jul 2010 20:58:11 +0000 (20:58 +0000)
committerAryeh Gregor <simetrical@users.mediawiki.org>
Fri, 23 Jul 2010 20:58:11 +0000 (20:58 +0000)
It seemed to work correctly, with the newly-created page "bob" sorting
as "BOB", but then I nuked all my cl_sortkey by running the migration
script before refreshLinks.php had finished running, so I'll have to
wait a while to see if it works properly with a non-messed-up database.
It's possible there's something wrong with the display of section
letters in the categories, but otherwise I think this is working right.

includes/CategoryPage.php
includes/DefaultSettings.php
languages/Language.php
maintenance/updateCollation.php [new file with mode: 0644]

index cf58034..5762331 100644 (file)
@@ -172,12 +172,18 @@ class CategoryViewer {
        * else use sortkey...
        */
        function getSubcategorySortChar( $title, $sortkey ) {
-               global $wgContLang;
+               global $wgContLang, $wgExperimentalCategorySort;
 
                if ( $title->getPrefixedText() == $sortkey ) {
-                       $firstChar = $wgContLang->firstChar( $title->getDBkey() );
+                       $word = $title->getDBkey();
                } else {
-                       $firstChar = $wgContLang->firstChar( $sortkey );
+                       $word = $sortkey;
+               }
+
+               if ( $wgExperimentalCategorySort ) {
+                       $firstChar = $wgContLang->firstLetterForLists( $word );
+               } else {
+                       $firstChar = $wgContLang->firstChar( $word );
                }
 
                return $wgContLang->convert( $firstChar );
@@ -202,7 +208,7 @@ class CategoryViewer {
         * Add a miscellaneous page
         */
        function addPage( $title, $sortkey, $pageLength, $isRedirect = false ) {
-               global $wgContLang;
+               global $wgContLang, $wgExperimentalCategorySort;
                $this->articles[] = $isRedirect
                        ? '<span class="redirect-in-category">' .
                                $this->getSkin()->link(
@@ -213,7 +219,12 @@ class CategoryViewer {
                                        array( 'known', 'noclasses' )
                                ) . '</span>'
                        : $this->getSkin()->makeSizeLinkObj( $pageLength, $title );
-               $this->articles_start_char[] = $wgContLang->convert( $wgContLang->firstChar( $sortkey ) );
+
+               if ( $wgExperimentalCategorySort ) {
+                       $this->articles_start_char[] = $wgContLang->convert( $wgContLang->firstLetterForLists( $sortkey ) );
+               } else {
+                       $this->articles_start_char[] = $wgContLang->convert( $wgContLang->firstChar( $sortkey ) );
+               }
        }
 
        function finaliseCategoryState() {
@@ -259,7 +270,7 @@ class CategoryViewer {
                        foreach ( array( 'page', 'subcat', 'file' ) as $type ) {
                                $res = $dbr->select(
                                        $tables,
-                                       $fields,
+                                       array_merge( $fields, array( 'cl_raw_sortkey' ) ),
                                        $conds + array( 'cl_type' => $type ) + ( $type == 'page' ? array( $pageCondition ) : array() ),
                                        __METHOD__,
                                        $opts + ( $type == 'page' ? array( 'LIMIT' => $this->limit + 1 ) : array() ),
@@ -278,11 +289,11 @@ class CategoryViewer {
 
                                        if ( $title->getNamespace() == NS_CATEGORY ) {
                                                $cat = Category::newFromRow( $row, $title );
-                                               $this->addSubcategoryObject( $cat, $row->cl_sortkey, $row->page_len );
+                                               $this->addSubcategoryObject( $cat, $row->cl_raw_sortkey, $row->page_len );
                                        } elseif ( $this->showGallery && $title->getNamespace() == NS_FILE ) {
-                                               $this->addImage( $title, $row->cl_sortkey, $row->page_len, $row->page_is_redirect );
+                                               $this->addImage( $title, $row->cl_raw_sortkey, $row->page_len, $row->page_is_redirect );
                                        } else {
-                                               $this->addPage( $title, $row->cl_sortkey, $row->page_len, $row->page_is_redirect );
+                                               $this->addPage( $title, $row->cl_raw_sortkey, $row->page_len, $row->page_is_redirect );
                                        }
                                }
                        }
index 42dc3df..5db1691 100644 (file)
@@ -4474,7 +4474,7 @@ $wgExperimentalCategorySort = false;
  * for all rows where cl_collation < $wgCollationVersion and regenerates
  * cl_sortkey based on cl_raw_sortkey.
  */
-$wgCollationVersion = 0;
+$wgCollationVersion = 1;
 
 /** @} */ # End categories }
 
index 95d1426..29c1cee 100644 (file)
@@ -2945,8 +2945,8 @@ class Language {
         * @return string Binary sortkey
         */
        public function convertToSortkey( $string ) {
-               # Stub function for now
-               return $string;
+               # Fake function for now
+               return strtoupper( $string );
        }
 
        /**
@@ -2986,6 +2986,6 @@ class Language {
         * @return string UTF-8 string corresponding to the first letter of input
         */
        public function firstLetterForLists( $string ) {
-               return mb_substr( $string, 0, 1 );
+               return strtoupper( mb_substr( $string, 0, 1 ) );
        }
 }
diff --git a/maintenance/updateCollation.php b/maintenance/updateCollation.php
new file mode 100644 (file)
index 0000000..93c845d
--- /dev/null
@@ -0,0 +1,74 @@
+<?php
+/**
+ * @file 
+ * @ingroup Maintenance
+ * @author Aryeh Gregor (Simetrical)
+ */
+
+#$optionsWithArgs = array( 'begin', 'max-slave-lag' );
+
+require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+
+class UpdateCollation extends Maintenance {
+       const BATCH_SIZE = 1000;
+
+       public function __construct() {
+               parent::__construct();
+
+               global $wgCollationVersion;
+               $this->mDescription = <<<TEXT
+This script will find all rows in the categorylinks table whose collation is 
+out-of-date (cl_collation < $wgCollationVersion) and repopulate cl_sortkey 
+using cl_raw_sortkey.  If everything's collation is up-to-date, it will do 
+nothing.
+TEXT;
+
+               #$this->addOption( 'force', 'Run on all rows, even if the collation is supposed to be up-to-date.' );
+       }
+       
+       public function execute() {
+               global $wgCollationVersion, $wgContLang;
+
+               $dbw = wfGetDB( DB_MASTER );
+               $count = $dbw->estimateRowCount(
+                       'categorylinks',
+                       array( 'cl_from', 'cl_to', 'cl_raw_sortkey' ),
+                       'cl_collation < ' . $dbw->addQuotes( $wgCollationVersion ),
+                       __METHOD__
+               );
+
+               $this->output( "Fixing around $count rows (estimate might be wrong).\n" );
+
+               $count = 0;
+               do {
+                       $res = $dbw->select(
+                               'categorylinks',
+                               array( 'cl_from', 'cl_to', 'cl_raw_sortkey' ),
+                               'cl_collation < ' . $dbw->addQuotes( $wgCollationVersion ),
+                               __METHOD__,
+                               array( 'LIMIT' => self::BATCH_SIZE )
+                       );
+
+                       $dbw->begin();
+                       foreach ( $res as $row ) {
+                               # TODO: Handle the case where cl_raw_sortkey is null.
+                               $dbw->update(
+                                       'categorylinks',
+                                       array(
+                                               'cl_sortkey' => $wgContLang->convertToSortkey( $row->cl_raw_sortkey ),
+                                               'cl_collation' => $wgCollationVersion
+                                       ),
+                                       array( 'cl_from' => $row->cl_from, 'cl_to' => $row->cl_to ),
+                                       __METHOD__
+                               );
+                       }
+                       $dbw->commit();
+
+                       $count += self::BATCH_SIZE;
+                       $this->output( "$count done.\n" );
+               } while ( $res->numRows() >= self::BATCH_SIZE );
+       }
+}
+
+$maintClass = "UpdateCollation";
+require_once( DO_MAINTENANCE );