RevDel: Avoid log_search rows with empty values for target_author_actor
authorBrad Jorsch <bjorsch@wikimedia.org>
Thu, 7 Feb 2019 16:16:32 +0000 (11:16 -0500)
committerBrad Jorsch <bjorsch@wikimedia.org>
Thu, 21 Mar 2019 20:42:48 +0000 (16:42 -0400)
During migration, RevDel may wind up being used on items where an actor
has not been assigned yet. The code creating log_search rows for
target_author_actor needs to take this into account.

Also, to clean this up on Wikimedia wikis, I've added code to
MigrateActors to delete these rows before (re-)migrating log_search
and a --tables option so a re-run can skip trying to process all the
already-processed tables (cf. T188327#4892827).

Bug: T215525
Change-Id: Ica15e2e30445e23761e6d3d6405b3eb39a086161

RELEASE-NOTES-1.33
includes/revisiondelete/RevDelList.php
maintenance/includes/MigrateActors.php

index 3cc32cc..10b1eaa 100644 (file)
@@ -27,6 +27,12 @@ Some specific notes for MediaWiki 1.33 upgrades are below:
   run your wiki with $wgActorTableSchemaMigrationStage SCHEMA_COMPAT_READ_OLD,
   note that log_search rows needed to find revision deletions by target user
   were incorrectly deleted. See T215464 for details.
+* If revision deletions were performed when the wiki was configured with
+  $wgActorTableSchemaMigrationStage SCHEMA_COMPAT_WRITE_BOTH and without
+  migrateActors.php having been run, the log_search table may contain rows with
+  empty values for "target_author_actor" which will prevent log searches for
+  revision deletions by target user from finding those log entries. These rows
+  may be corrected by (re-)running migrateActors.php.
 
 For notes on 1.32.x and older releases, see HISTORY.
 
index 7ddf587..221359d 100644 (file)
@@ -224,8 +224,21 @@ abstract class RevDelList extends RevisionListBase {
                                        } elseif ( IP::isIPAddress( $item->getAuthorName() ) ) {
                                                $authorIPs[] = $item->getAuthorName();
                                        }
-                               }
-                               if ( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) {
+                                       if ( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) {
+                                               $actorId = $item->getAuthorActor();
+                                               // During migration, the actor field might be empty. If so, populate
+                                               // it here.
+                                               if ( !$actorId ) {
+                                                       if ( $item->getAuthorId() > 0 ) {
+                                                               $user = User::newFromId( $item->getAuthorId() );
+                                                       } else {
+                                                               $user = User::newFromName( $item->getAuthorName(), false );
+                                                       }
+                                                       $actorId = $user->getActorId( $dbw );
+                                               }
+                                               $authorActors[] = $actorId;
+                                       }
+                               } elseif ( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) {
                                        $authorActors[] = $item->getAuthorActor();
                                }
 
index ef8756f..d9c2072 100644 (file)
@@ -32,9 +32,13 @@ require_once __DIR__ . '/../Maintenance.php';
  * @ingroup Maintenance
  */
 class MigrateActors extends LoggedUpdateMaintenance {
+
+       protected $tables = null;
+
        public function __construct() {
                parent::__construct();
                $this->addDescription( 'Migrates actors from pre-1.31 columns to the \'actor\' table' );
+               $this->addOption( 'tables', 'List of tables to process, comma-separated', false, true );
                $this->setBatchSize( 100 );
        }
 
@@ -42,6 +46,10 @@ class MigrateActors extends LoggedUpdateMaintenance {
                return __CLASS__;
        }
 
+       protected function doTable( $table ) {
+               return $this->tables === null || in_array( $table, $this->tables, true );
+       }
+
        protected function doDBUpdates() {
                global $wgActorTableSchemaMigrationStage;
 
@@ -52,28 +60,51 @@ class MigrateActors extends LoggedUpdateMaintenance {
                        return false;
                }
 
-               $this->output( "Creating actor entries for all registered users\n" );
-               $end = 0;
-               $dbw = $this->getDB( DB_MASTER );
-               $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
-               $count = 0;
-               while ( $end < $max ) {
-                       $start = $end + 1;
-                       $end = min( $start + $this->mBatchSize, $max );
-                       $this->output( "... $start - $end\n" );
-                       $dbw->insertSelect(
-                               'actor',
-                               'user',
-                               [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
-                               [ "user_id >= $start", "user_id <= $end" ],
+               $tables = $this->getOption( 'tables' );
+               if ( $tables !== null ) {
+                       $this->tables = explode( ',', $tables );
+               }
+
+               if ( $this->doTable( 'user' ) ) {
+                       $this->output( "Creating actor entries for all registered users\n" );
+                       $end = 0;
+                       $dbw = $this->getDB( DB_MASTER );
+                       $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
+                       $count = 0;
+                       while ( $end < $max ) {
+                               $start = $end + 1;
+                               $end = min( $start + $this->mBatchSize, $max );
+                               $this->output( "... $start - $end\n" );
+                               $dbw->insertSelect(
+                                       'actor',
+                                       'user',
+                                       [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
+                                       [ "user_id >= $start", "user_id <= $end" ],
+                                       __METHOD__,
+                                       [ 'IGNORE' ],
+                                       [ 'ORDER BY' => [ 'user_id' ] ]
+                               );
+                               $count += $dbw->affectedRows();
+                               wfWaitForSlaves();
+                       }
+                       $this->output( "Completed actor creation, added $count new actor(s)\n" );
+               } else {
+                       $this->output( "Checking that actors exist for all registered users\n" );
+                       $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
+                       $anyMissing = $dbr->selectField(
+                               [ 'user', 'actor' ],
+                               '1',
+                               [ 'actor_id' => null ],
                                __METHOD__,
-                               [ 'IGNORE' ],
-                               [ 'ORDER BY' => [ 'user_id' ] ]
+                               [ 'LIMIT 1' ],
+                               [ 'actor' => [ 'LEFT JOIN', 'actor_user = user_id' ] ]
                        );
-                       $count += $dbw->affectedRows();
-                       wfWaitForSlaves();
+                       if ( $anyMissing ) {
+                               $this->error( 'Some users lack actors; run without --tables or include `user` in --tables.' );
+                               return false;
+                       }
+                       $this->output( "Ok, continuing.\n" );
                }
-               $this->output( "Completed actor creation, added $count new actor(s)\n" );
 
                $errors = 0;
                $errors += $this->migrateToTemp(
@@ -229,6 +260,11 @@ class MigrateActors extends LoggedUpdateMaintenance {
         * @return int Number of errors
         */
        protected function migrate( $table, $primaryKey, $userField, $nameField, $actorField ) {
+               if ( !$this->doTable( $table ) ) {
+                       $this->output( "Skipping $table, not included in --tables\n" );
+                       return 0;
+               }
+
                $complainedAboutUsers = [];
 
                $primaryKey = (array)$primaryKey;
@@ -325,6 +361,11 @@ class MigrateActors extends LoggedUpdateMaintenance {
        protected function migrateToTemp(
                $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField
        ) {
+               if ( !$this->doTable( $table ) ) {
+                       $this->output( "Skipping $table, not included in --tables\n" );
+                       return 0;
+               }
+
                $complainedAboutUsers = [];
 
                $newTable = $table . '_actor_temp';
@@ -413,6 +454,11 @@ class MigrateActors extends LoggedUpdateMaintenance {
         * @return int Number of errors
         */
        protected function migrateLogSearch() {
+               if ( !$this->doTable( 'log_search' ) ) {
+                       $this->output( "Skipping log_search, not included in --tables\n" );
+                       return 0;
+               }
+
                $complainedAboutUsers = [];
 
                $primaryKey = [ 'ls_value', 'ls_log_id' ];
@@ -424,6 +470,25 @@ class MigrateActors extends LoggedUpdateMaintenance {
                $countActors = 0;
                $countErrors = 0;
 
+               $anyBad = $dbw->selectField(
+                       'log_search',
+                       1,
+                       [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
+                       __METHOD__,
+                       [ 'LIMIT' => 1 ]
+               );
+               if ( $anyBad ) {
+                       $this->output( "... Deleting bogus rows due to T21552\n" );
+                       $dbw->delete(
+                               'log_search',
+                               [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
+                               __METHOD__
+                       );
+                       $ct = $dbw->affectedRows();
+                       $this->output( "... Deleted $ct bogus row(s) from T21552\n" );
+                       wfWaitForSlaves();
+               }
+
                $next = '1=1';
                while ( true ) {
                        // Fetch the rows needing update