Fix slow queries in migrateActors.php
authorBrad Jorsch <bjorsch@wikimedia.org>
Mon, 17 Dec 2018 17:26:13 +0000 (12:26 -0500)
committerBrad Jorsch <bjorsch@wikimedia.org>
Mon, 17 Dec 2018 17:33:39 +0000 (12:33 -0500)
MariaDB really doesn't like the complicated OR in the join condition for
the actor table, it winds up scanning the whole actor table to find the
match. After trying several different options, the best bet seems to be
to use subqueries to get the actor_id.

MariaDB also doesn't like the DISTINCTed query for log_search, it gets
confused if the selected fields contain other fields much like it
historically gets confused when GROUP BY contains fields that are
constant in the WHERE clause. So we'll do the select from log_search in
a subquery and then join with actor, and inject the constant ls_field
into the rows as a constant field.

Bug: T188327
Change-Id: I979c040441f83bdf36e2f4031152559391233e5f

maintenance/migrateActors.php

index 5e27ac8..f5a1e44 100644 (file)
@@ -119,6 +119,29 @@ class MigrateActors extends LoggedUpdateMaintenance {
                return [ $next, $display ];
        }
 
                return [ $next, $display ];
        }
 
+       /**
+        * Make the subqueries for `actor_id`
+        * @param IDatabase $dbw
+        * @param string $userField User ID field name
+        * @param string $nameField User name field name
+        * @return string SQL fragment
+        */
+       private function makeActorIdSubquery( $dbw, $userField, $nameField ) {
+               $idSubquery = $dbw->buildSelectSubquery(
+                       'actor',
+                       'actor_id',
+                       [ "$userField = actor_user" ],
+                       __METHOD__
+               );
+               $nameSubquery = $dbw->buildSelectSubquery(
+                       'actor',
+                       'actor_id',
+                       [ "$nameField = actor_name" ],
+                       __METHOD__
+               );
+               return "CASE WHEN $userField = 0 OR $userField IS NULL THEN $nameSubquery ELSE $idSubquery END";
+       }
+
        /**
         * Add actors for anons in a set of rows
         * @param IDatabase $dbw
        /**
         * Add actors for anons in a set of rows
         * @param IDatabase $dbw
@@ -214,6 +237,7 @@ class MigrateActors extends LoggedUpdateMaintenance {
                wfWaitForSlaves();
 
                $dbw = $this->getDB( DB_MASTER );
                wfWaitForSlaves();
 
                $dbw = $this->getDB( DB_MASTER );
+               $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
                $next = '1=1';
                $countUpdated = 0;
                $countActors = 0;
                $next = '1=1';
                $countUpdated = 0;
                $countActors = 0;
@@ -221,8 +245,8 @@ class MigrateActors extends LoggedUpdateMaintenance {
                while ( true ) {
                        // Fetch the rows needing update
                        $res = $dbw->select(
                while ( true ) {
                        // Fetch the rows needing update
                        $res = $dbw->select(
-                               [ $table, 'actor' ],
-                               array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' ] ),
+                               $table,
+                               array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
                                [
                                        $actorField => 0,
                                        $next,
                                [
                                        $actorField => 0,
                                        $next,
@@ -231,13 +255,6 @@ class MigrateActors extends LoggedUpdateMaintenance {
                                [
                                        'ORDER BY' => $primaryKey,
                                        'LIMIT' => $this->mBatchSize,
                                [
                                        'ORDER BY' => $primaryKey,
                                        'LIMIT' => $this->mBatchSize,
-                               ],
-                               [
-                                       'actor' => [
-                                               'LEFT JOIN',
-                                               "$userField != 0 AND actor_user = $userField OR "
-                                               . "($userField = 0 OR $userField IS NULL) AND actor_name = $nameField"
-                                       ]
                                ]
                        );
                        if ( !$res->numRows() ) {
                                ]
                        );
                        if ( !$res->numRows() ) {
@@ -315,6 +332,7 @@ class MigrateActors extends LoggedUpdateMaintenance {
                wfWaitForSlaves();
 
                $dbw = $this->getDB( DB_MASTER );
                wfWaitForSlaves();
 
                $dbw = $this->getDB( DB_MASTER );
+               $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
                $next = [];
                $countUpdated = 0;
                $countActors = 0;
                $next = [];
                $countUpdated = 0;
                $countActors = 0;
@@ -322,8 +340,8 @@ class MigrateActors extends LoggedUpdateMaintenance {
                while ( true ) {
                        // Fetch the rows needing update
                        $res = $dbw->select(
                while ( true ) {
                        // Fetch the rows needing update
                        $res = $dbw->select(
-                               [ $table, $newTable, 'actor' ],
-                               [ $primaryKey, $userField, $nameField, 'actor_id' ] + $extra,
+                               [ $table, $newTable ],
+                               [ $primaryKey, $userField, $nameField, 'actor_id' => $actorIdSubquery ] + $extra,
                                [ $newPrimaryKey => null ] + $next,
                                __METHOD__,
                                [
                                [ $newPrimaryKey => null ] + $next,
                                __METHOD__,
                                [
@@ -332,11 +350,6 @@ class MigrateActors extends LoggedUpdateMaintenance {
                                ],
                                [
                                        $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ],
                                ],
                                [
                                        $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ],
-                                       'actor' => [
-                                               'LEFT JOIN',
-                                               "$userField != 0 AND actor_user = $userField OR "
-                                               . "($userField = 0 OR $userField IS NULL) AND actor_name = $nameField"
-                                       ]
                                ]
                        );
                        if ( !$res->numRows() ) {
                                ]
                        );
                        if ( !$res->numRows() ) {
@@ -414,18 +427,31 @@ class MigrateActors extends LoggedUpdateMaintenance {
                while ( true ) {
                        // Fetch the rows needing update
                        $res = $dbw->select(
                while ( true ) {
                        // Fetch the rows needing update
                        $res = $dbw->select(
-                               [ 'log_search', 'actor' ],
-                               [ 'ls_field', 'ls_value', 'actor_id' ],
                                [
                                [
-                                       'ls_field' => 'target_author_id',
-                                       $next,
+                                       'ls' => $dbw->buildSelectSubquery(
+                                               'log_search',
+                                               'ls_value',
+                                               [
+                                                       'ls_field' => 'target_author_id',
+                                                       $next
+                                               ],
+                                               __METHOD__,
+                                               [
+                                                       'DISTINCT',
+                                                       'ORDER BY' => [ 'ls_value' ],
+                                                       'LIMIT' => $this->mBatchSize,
+                                               ]
+                                       ),
+                                       'actor'
                                ],
                                ],
-                               __METHOD__,
                                [
                                [
-                                       'DISTINCT',
-                                       'ORDER BY' => [ 'ls_value' ],
-                                       'LIMIT' => $this->mBatchSize,
+                                       'ls_field' => $dbw->addQuotes( 'target_author_id' ),
+                                       'ls_value',
+                                       'actor_id'
                                ],
                                ],
+                               [],
+                               __METHOD__,
+                               [],
                                [ 'actor' => [ 'LEFT JOIN', 'ls_value = ' . $dbw->buildStringCast( 'actor_user' ) ] ]
                        );
                        if ( !$res->numRows() ) {
                                [ 'actor' => [ 'LEFT JOIN', 'ls_value = ' . $dbw->buildStringCast( 'actor_user' ) ] ]
                        );
                        if ( !$res->numRows() ) {
@@ -474,18 +500,31 @@ class MigrateActors extends LoggedUpdateMaintenance {
                while ( true ) {
                        // Fetch the rows needing update
                        $res = $dbw->select(
                while ( true ) {
                        // Fetch the rows needing update
                        $res = $dbw->select(
-                               [ 'log_search', 'actor' ],
-                               [ 'ls_field', 'ls_value', 'actor_id' ],
                                [
                                [
-                                       'ls_field' => 'target_author_ip',
-                                       $next,
+                                       'ls' => $dbw->buildSelectSubquery(
+                                               'log_search',
+                                               'ls_value',
+                                               [
+                                                       'ls_field' => 'target_author_ip',
+                                                       $next
+                                               ],
+                                               __METHOD__,
+                                               [
+                                                       'DISTINCT',
+                                                       'ORDER BY' => [ 'ls_value' ],
+                                                       'LIMIT' => $this->mBatchSize,
+                                               ]
+                                       ),
+                                       'actor'
                                ],
                                ],
-                               __METHOD__,
                                [
                                [
-                                       'DISTINCT',
-                                       'ORDER BY' => [ 'ls_value' ],
-                                       'LIMIT' => $this->mBatchSize,
+                                       'ls_field' => $dbw->addQuotes( 'target_author_ip' ),
+                                       'ls_value',
+                                       'actor_id'
                                ],
                                ],
+                               [],
+                               __METHOD__,
+                               [],
                                [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
                        );
                        if ( !$res->numRows() ) {
                                [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
                        );
                        if ( !$res->numRows() ) {