MergeHistory: Update revactor_page too
authorBrad Jorsch <bjorsch@wikimedia.org>
Tue, 10 Sep 2019 15:24:24 +0000 (11:24 -0400)
committerMobrovac <mobrovac@wikimedia.org>
Tue, 17 Sep 2019 07:17:38 +0000 (07:17 +0000)
When using MergeHistory, we need to update the denormalized
revision_actor_temp.revactor_page to match the update we do for
revision.rev_page.

Also, we need a maintenance script to clean up the rows that were broken
by our failure to do that before.

Bug: T232464
Change-Id: Ib819a9d9fc978d75d7cc7e53f361483b69ab8020

autoload.php
includes/MergeHistory.php
maintenance/cleanupRevActorPage.php [new file with mode: 0644]

index f95e001..48d5b30 100644 (file)
@@ -272,6 +272,7 @@ $wgAutoloadLocalClasses = [
        'CleanupInvalidDbKeys' => __DIR__ . '/maintenance/cleanupInvalidDbKeys.php',
        'CleanupPreferences' => __DIR__ . '/maintenance/cleanupPreferences.php',
        'CleanupRemovedModules' => __DIR__ . '/maintenance/cleanupRemovedModules.php',
+       'CleanupRevActorPage' => __DIR__ . '/maintenance/cleanupRevActorPage.php',
        'CleanupSpam' => __DIR__ . '/maintenance/cleanupSpam.php',
        'CleanupUploadStash' => __DIR__ . '/maintenance/cleanupUploadStash.php',
        'CleanupUsersWithNoId' => __DIR__ . '/maintenance/cleanupUsersWithNoId.php',
index 4045a54..1a950c5 100644 (file)
@@ -273,6 +273,18 @@ class MergeHistory {
                        return $status;
                }
 
+               // Update denormalized revactor_page too
+               $this->dbw->update(
+                       'revision_actor_temp',
+                       [ 'revactor_page' => $this->dest->getArticleID() ],
+                       [
+                               'revactor_page' => $this->source->getArticleID(),
+                               // Slightly hacky, but should work given the values assigned in this class
+                               str_replace( 'rev_timestamp', 'revactor_timestamp', $this->timeWhere )
+                       ],
+                       __METHOD__
+               );
+
                // Make the source page a redirect if no revisions are left
                $haveRevisions = $this->dbw->lockForUpdate(
                        'revision',
diff --git a/maintenance/cleanupRevActorPage.php b/maintenance/cleanupRevActorPage.php
new file mode 100644 (file)
index 0000000..ac655fc
--- /dev/null
@@ -0,0 +1,77 @@
+<?php
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script that cleans up cases where rev_page and revactor_page
+ * became desynced, e.g. from T232464.
+ *
+ * @ingroup Maintenance
+ * @since 1.34
+ */
+class CleanupRevActorPage extends LoggedUpdateMaintenance {
+
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription(
+                       'Resyncs revactor_page with rev_page when they differ, e.g. from T232464.'
+               );
+               $this->setBatchSize( 1000 );
+       }
+
+       protected function getUpdateKey() {
+               return __CLASS__;
+       }
+
+       protected function doDBUpdates() {
+               $dbw = $this->getDB( DB_MASTER );
+               $max = $dbw->selectField( 'revision', 'MAX(rev_id)', '', __METHOD__ );
+               $batchSize = $this->mBatchSize;
+
+               $this->output( "Resyncing revactor_page with rev_page...\n" );
+
+               $count = 0;
+               for ( $start = 1; $start <= $max; $start += $batchSize ) {
+                       $end = $start + $batchSize - 1;
+                       $this->output( "... rev_id $start - $end, $count changed\n" );
+
+                       // Fetch the rows needing update
+                       $res = $dbw->select(
+                               [ 'revision', 'revision_actor_temp' ],
+                               [ 'rev_id', 'rev_page' ],
+                               [
+                                       'rev_page != revactor_page',
+                                       "rev_id >= $start",
+                                       "rev_id <= $end",
+                               ],
+                               __METHOD__,
+                               [],
+                               [ 'revision_actor_temp' => [ 'JOIN', 'rev_id = revactor_rev' ] ]
+                       );
+
+                       if ( !$res->numRows() ) {
+                               continue;
+                       }
+
+                       // Update the existing rows
+                       foreach ( $res as $row ) {
+                               $dbw->update(
+                                       'revision_actor_temp',
+                                       [ 'revactor_page' => $row->rev_page ],
+                                       [ 'revactor_rev' => $row->rev_id ],
+                                       __METHOD__
+                               );
+                               $count += $dbw->affectedRows();
+                       }
+
+                       wfWaitForSlaves();
+               }
+
+               $this->output( "Completed resync, $count row(s) updated\n" );
+
+               return true;
+       }
+}
+
+$maintClass = CleanupRevActorPage::class;
+require_once RUN_MAINTENANCE_IF_MAIN;