Copied cleanupDupes.inc in FiveUpgrade.inc since it's only used there
[lhc/web/wiklou.git] / maintenance / FiveUpgrade.inc
index 6723bd9..fda34b9 100644 (file)
@@ -4,7 +4,6 @@
  * @ingroup Maintenance
  */
 
-require_once( 'cleanupDupes.inc' );
 require_once( 'updaters.inc' );
 
 define( 'MW_UPGRADE_COPY',     false );
@@ -350,7 +349,7 @@ class FiveUpgrade {
                }
 
                $this->log( "Checking cur table for unique title index and applying if necessary" );
-               checkDupes( true );
+               $this->checkDupes();
 
                $this->log( "...converting from cur/old to page/revision/text DB structure." );
 
@@ -1155,6 +1154,101 @@ ENDS;
                $this->copyTable( 'querycache', $tabledef, $fields );
        }
 
+       /**
+        * Check for duplicate rows in "cur" table and move duplicates entries in
+        * "old" table.
+        *
+        * This was in cleanupDupes.inc before.
+        */
+       function checkDupes() {
+               $dbw = wfGetDB( DB_MASTER );
+               if ( $dbw->indexExists( 'cur', 'name_title' ) &&
+                       $dbw->indexUnique( 'cur', 'name_title' ) ) {
+                       echo wfWikiID() . ": cur table has the current unique index; no duplicate entries.\n";
+                       return;
+               } elseif ( $dbw->indexExists( 'cur', 'name_title_dup_prevention' ) ) {
+                       echo wfWikiID() . ": cur table has a temporary name_title_dup_prevention unique index; no duplicate entries.\n";
+                       return;
+               }
+
+               echo wfWikiID() . ": cur table has the old non-unique index and may have duplicate entries.\n";
+
+               $dbw = wfGetDB( DB_MASTER );
+               $cur = $dbw->tableName( 'cur' );
+               $old = $dbw->tableName( 'old' );
+               $dbw->query( "LOCK TABLES $cur WRITE, $old WRITE" );
+               echo "Checking for duplicate cur table entries... (this may take a while on a large wiki)\n";
+               $res = $dbw->query( <<<END
+SELECT cur_namespace,cur_title,count(*) as c,min(cur_id) as id
+  FROM $cur
+ GROUP BY cur_namespace,cur_title
+HAVING c > 1
+END
+               );
+               $n = $dbw->numRows( $res );
+               echo "Found $n titles with duplicate entries.\n";
+               if ( $n > 0 ) {
+                       echo "Correcting...\n";
+                       while ( $row = $dbw->fetchObject( $res ) ) {
+                               $ns = intval( $row->cur_namespace );
+                               $title = $dbw->addQuotes( $row->cur_title );
+
+                               # Get the first responding ID; that'll be the one we keep.
+                               $id = $dbw->selectField( 'cur', 'cur_id', array(
+                                       'cur_namespace' => $row->cur_namespace,
+                                       'cur_title'     => $row->cur_title ) );
+
+                               echo "$ns:$row->cur_title (canonical ID $id)\n";
+                               if ( $id != $row->id ) {
+                                       echo "  ** minimum ID $row->id; ";
+                                       $timeMin = $dbw->selectField( 'cur', 'cur_timestamp', array(
+                                               'cur_id' => $row->id ) );
+                                       $timeFirst = $dbw->selectField( 'cur', 'cur_timestamp', array(
+                                               'cur_id' => $id ) );
+                                       if ( $timeMin == $timeFirst ) {
+                                               echo "timestamps match at $timeFirst; ok\n";
+                                       } else {
+                                               echo "timestamps don't match! min: $timeMin, first: $timeFirst; ";
+                                               if ( $timeMin > $timeFirst ) {
+                                                       $id = $row->id;
+                                                       echo "keeping minimum: $id\n";
+                                               } else {
+                                                       echo "keeping first: $id\n";
+                                               }
+                                       }
+                               }
+
+                               $dbw->query( <<<END
+INSERT
+  INTO $old
+      (old_namespace, old_title,      old_text,
+       old_comment,   old_user,       old_user_text,
+       old_timestamp, old_minor_edit, old_flags,
+       inverse_timestamp)
+SELECT cur_namespace, cur_title,      cur_text,
+       cur_comment,   cur_user,       cur_user_text,
+       cur_timestamp, cur_minor_edit, '',
+       inverse_timestamp
+  FROM $cur
+ WHERE cur_namespace=$ns
+   AND cur_title=$title
+   AND cur_id != $id
+END
+                               );
+                               $dbw->query( <<<END
+DELETE
+  FROM $cur
+ WHERE cur_namespace=$ns
+   AND cur_title=$title
+   AND cur_id != $id
+END
+                                       );
+                       }
+               }
+               $dbw->query( 'UNLOCK TABLES' );
+               echo "Done.\n";
+       }
+
        /**
         * Rename all our temporary tables into final place.
         * We've left things in place so a read-only wiki can continue running