Fix slow queries in migrateActors.php
[lhc/web/wiklou.git] / maintenance / migrateActors.php
1 <?php
2 /**
3 * Migrate actors from pre-1.31 columns to the 'actor' table
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24 use Wikimedia\Rdbms\IDatabase;
25
26 require_once __DIR__ . '/Maintenance.php';
27
28 /**
29 * Maintenance script that migrates actors from pre-1.31 columns to the
30 * 'actor' table
31 *
32 * @ingroup Maintenance
33 */
34 class MigrateActors extends LoggedUpdateMaintenance {
35 public function __construct() {
36 parent::__construct();
37 $this->addDescription( 'Migrates actors from pre-1.31 columns to the \'actor\' table' );
38 $this->setBatchSize( 100 );
39 }
40
41 protected function getUpdateKey() {
42 return __CLASS__;
43 }
44
45 protected function doDBUpdates() {
46 global $wgActorTableSchemaMigrationStage;
47
48 if ( !( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) ) {
49 $this->output(
50 "...cannot update while \$wgActorTableSchemaMigrationStage lacks SCHEMA_COMPAT_WRITE_NEW\n"
51 );
52 return false;
53 }
54
55 $this->output( "Creating actor entries for all registered users\n" );
56 $end = 0;
57 $dbw = $this->getDB( DB_MASTER );
58 $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
59 $count = 0;
60 while ( $end < $max ) {
61 $start = $end + 1;
62 $end = min( $start + $this->mBatchSize, $max );
63 $this->output( "... $start - $end\n" );
64 $dbw->insertSelect(
65 'actor',
66 'user',
67 [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
68 [ "user_id >= $start", "user_id <= $end" ],
69 __METHOD__,
70 [ 'IGNORE' ],
71 [ 'ORDER BY' => [ 'user_id' ] ]
72 );
73 $count += $dbw->affectedRows();
74 wfWaitForSlaves();
75 }
76 $this->output( "Completed actor creation, added $count new actor(s)\n" );
77
78 $errors = 0;
79 $errors += $this->migrateToTemp(
80 'revision', 'rev_id', [ 'revactor_timestamp' => 'rev_timestamp', 'revactor_page' => 'rev_page' ],
81 'rev_user', 'rev_user_text', 'revactor_rev', 'revactor_actor'
82 );
83 $errors += $this->migrate( 'archive', 'ar_id', 'ar_user', 'ar_user_text', 'ar_actor' );
84 $errors += $this->migrate( 'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text', 'ipb_by_actor' );
85 $errors += $this->migrate( 'image', 'img_name', 'img_user', 'img_user_text', 'img_actor' );
86 $errors += $this->migrate(
87 'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text', 'oi_actor'
88 );
89 $errors += $this->migrate( 'filearchive', 'fa_id', 'fa_user', 'fa_user_text', 'fa_actor' );
90 $errors += $this->migrate( 'recentchanges', 'rc_id', 'rc_user', 'rc_user_text', 'rc_actor' );
91 $errors += $this->migrate( 'logging', 'log_id', 'log_user', 'log_user_text', 'log_actor' );
92
93 $errors += $this->migrateLogSearch();
94
95 return $errors === 0;
96 }
97
98 /**
99 * Calculate a "next" condition and a display string
100 * @param IDatabase $dbw
101 * @param string[] $primaryKey Primary key of the table.
102 * @param object $row Database row
103 * @return array [ string $next, string $display ]
104 */
105 private function makeNextCond( $dbw, $primaryKey, $row ) {
106 $next = '';
107 $display = [];
108 for ( $i = count( $primaryKey ) - 1; $i >= 0; $i-- ) {
109 $field = $primaryKey[$i];
110 $display[] = $field . '=' . $row->$field;
111 $value = $dbw->addQuotes( $row->$field );
112 if ( $next === '' ) {
113 $next = "$field > $value";
114 } else {
115 $next = "$field > $value OR $field = $value AND ($next)";
116 }
117 }
118 $display = implode( ' ', array_reverse( $display ) );
119 return [ $next, $display ];
120 }
121
122 /**
123 * Make the subqueries for `actor_id`
124 * @param IDatabase $dbw
125 * @param string $userField User ID field name
126 * @param string $nameField User name field name
127 * @return string SQL fragment
128 */
129 private function makeActorIdSubquery( $dbw, $userField, $nameField ) {
130 $idSubquery = $dbw->buildSelectSubquery(
131 'actor',
132 'actor_id',
133 [ "$userField = actor_user" ],
134 __METHOD__
135 );
136 $nameSubquery = $dbw->buildSelectSubquery(
137 'actor',
138 'actor_id',
139 [ "$nameField = actor_name" ],
140 __METHOD__
141 );
142 return "CASE WHEN $userField = 0 OR $userField IS NULL THEN $nameSubquery ELSE $idSubquery END";
143 }
144
145 /**
146 * Add actors for anons in a set of rows
147 * @param IDatabase $dbw
148 * @param string $nameField
149 * @param object[] &$rows
150 * @param array &$complainedAboutUsers
151 * @param int &$countErrors
152 * @return int Count of actors inserted
153 */
154 private function addActorsForRows(
155 IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors
156 ) {
157 $needActors = [];
158 $countActors = 0;
159
160 $keep = [];
161 foreach ( $rows as $index => $row ) {
162 $keep[$index] = true;
163 if ( $row->actor_id === null ) {
164 // All registered users should have an actor_id already. So
165 // if we have a usable name here, it means they didn't run
166 // maintenance/cleanupUsersWithNoId.php
167 $name = $row->$nameField;
168 if ( User::isUsableName( $name ) ) {
169 if ( !isset( $complainedAboutUsers[$name] ) ) {
170 $complainedAboutUsers[$name] = true;
171 $this->error(
172 "User name \"$name\" is usable, cannot create an anonymous actor for it."
173 . " Run maintenance/cleanupUsersWithNoId.php to fix this situation.\n"
174 );
175 }
176 unset( $keep[$index] );
177 $countErrors++;
178 } else {
179 $needActors[$name] = 0;
180 }
181 }
182 }
183 $rows = array_intersect_key( $rows, $keep );
184
185 if ( $needActors ) {
186 $dbw->insert(
187 'actor',
188 array_map( function ( $v ) {
189 return [
190 'actor_name' => $v,
191 ];
192 }, array_keys( $needActors ) ),
193 __METHOD__
194 );
195 $countActors += $dbw->affectedRows();
196
197 $res = $dbw->select(
198 'actor',
199 [ 'actor_id', 'actor_name' ],
200 [ 'actor_name' => array_keys( $needActors ) ],
201 __METHOD__
202 );
203 foreach ( $res as $row ) {
204 $needActors[$row->actor_name] = $row->actor_id;
205 }
206 foreach ( $rows as $row ) {
207 if ( $row->actor_id === null ) {
208 $row->actor_id = $needActors[$row->$nameField];
209 }
210 }
211 }
212
213 return $countActors;
214 }
215
216 /**
217 * Migrate actors in a table.
218 *
219 * Assumes any row with the actor field non-zero have already been migrated.
220 * Blanks the name field when migrating.
221 *
222 * @param string $table Table to migrate
223 * @param string|string[] $primaryKey Primary key of the table.
224 * @param string $userField User ID field name
225 * @param string $nameField User name field name
226 * @param string $actorField Actor field name
227 * @return int Number of errors
228 */
229 protected function migrate( $table, $primaryKey, $userField, $nameField, $actorField ) {
230 $complainedAboutUsers = [];
231
232 $primaryKey = (array)$primaryKey;
233 $pkFilter = array_flip( $primaryKey );
234 $this->output(
235 "Beginning migration of $table.$userField and $table.$nameField to $table.$actorField\n"
236 );
237 wfWaitForSlaves();
238
239 $dbw = $this->getDB( DB_MASTER );
240 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
241 $next = '1=1';
242 $countUpdated = 0;
243 $countActors = 0;
244 $countErrors = 0;
245 while ( true ) {
246 // Fetch the rows needing update
247 $res = $dbw->select(
248 $table,
249 array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
250 [
251 $actorField => 0,
252 $next,
253 ],
254 __METHOD__,
255 [
256 'ORDER BY' => $primaryKey,
257 'LIMIT' => $this->mBatchSize,
258 ]
259 );
260 if ( !$res->numRows() ) {
261 break;
262 }
263
264 // Insert new actors for rows that need one
265 $rows = iterator_to_array( $res );
266 $lastRow = end( $rows );
267 $countActors += $this->addActorsForRows(
268 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
269 );
270
271 // Update the existing rows
272 foreach ( $rows as $row ) {
273 if ( !$row->actor_id ) {
274 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
275 $this->error(
276 "Could not make actor for row with $display "
277 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
278 );
279 $countErrors++;
280 continue;
281 }
282 $dbw->update(
283 $table,
284 [
285 $actorField => $row->actor_id,
286 ],
287 array_intersect_key( (array)$row, $pkFilter ) + [
288 $actorField => 0
289 ],
290 __METHOD__
291 );
292 $countUpdated += $dbw->affectedRows();
293 }
294
295 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
296 $this->output( "... $display\n" );
297 wfWaitForSlaves();
298 }
299
300 $this->output(
301 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
302 . "$countErrors error(s)\n"
303 );
304 return $countErrors;
305 }
306
307 /**
308 * Migrate actors in a table to a temporary table.
309 *
310 * Assumes the new table is named "{$table}_actor_temp", and it has two
311 * columns, in order, being the primary key of the original table and the
312 * actor ID field.
313 * Blanks the name field when migrating.
314 *
315 * @param string $table Table to migrate
316 * @param string $primaryKey Primary key of the table.
317 * @param array $extra Extra fields to copy
318 * @param string $userField User ID field name
319 * @param string $nameField User name field name
320 * @param string $newPrimaryKey Primary key of the new table.
321 * @param string $actorField Actor field name
322 */
323 protected function migrateToTemp(
324 $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField
325 ) {
326 $complainedAboutUsers = [];
327
328 $newTable = $table . '_actor_temp';
329 $this->output(
330 "Beginning migration of $table.$userField and $table.$nameField to $newTable.$actorField\n"
331 );
332 wfWaitForSlaves();
333
334 $dbw = $this->getDB( DB_MASTER );
335 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
336 $next = [];
337 $countUpdated = 0;
338 $countActors = 0;
339 $countErrors = 0;
340 while ( true ) {
341 // Fetch the rows needing update
342 $res = $dbw->select(
343 [ $table, $newTable ],
344 [ $primaryKey, $userField, $nameField, 'actor_id' => $actorIdSubquery ] + $extra,
345 [ $newPrimaryKey => null ] + $next,
346 __METHOD__,
347 [
348 'ORDER BY' => $primaryKey,
349 'LIMIT' => $this->mBatchSize,
350 ],
351 [
352 $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ],
353 ]
354 );
355 if ( !$res->numRows() ) {
356 break;
357 }
358
359 // Insert new actors for rows that need one
360 $rows = iterator_to_array( $res );
361 $lastRow = end( $rows );
362 $countActors += $this->addActorsForRows(
363 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
364 );
365
366 // Update rows
367 if ( $rows ) {
368 $inserts = [];
369 $updates = [];
370 foreach ( $rows as $row ) {
371 if ( !$row->actor_id ) {
372 list( , $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $row );
373 $this->error(
374 "Could not make actor for row with $display "
375 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
376 );
377 $countErrors++;
378 continue;
379 }
380 $ins = [
381 $newPrimaryKey => $row->$primaryKey,
382 $actorField => $row->actor_id,
383 ];
384 foreach ( $extra as $to => $from ) {
385 $ins[$to] = $row->$to; // It's aliased
386 }
387 $inserts[] = $ins;
388 $updates[] = $row->$primaryKey;
389 }
390 $this->beginTransaction( $dbw, __METHOD__ );
391 $dbw->insert( $newTable, $inserts, __METHOD__ );
392 $countUpdated += $dbw->affectedRows();
393 $this->commitTransaction( $dbw, __METHOD__ );
394 }
395
396 // Calculate the "next" condition
397 list( $n, $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $lastRow );
398 $next = [ $n ];
399 $this->output( "... $display\n" );
400 }
401
402 $this->output(
403 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
404 . "$countErrors error(s)\n"
405 );
406 return $countErrors;
407 }
408
409 /**
410 * Migrate actors in the log_search table.
411 * @return int Number of errors
412 */
413 protected function migrateLogSearch() {
414 $complainedAboutUsers = [];
415
416 $primaryKey = [ 'ls_field', 'ls_value' ];
417 $pkFilter = array_flip( $primaryKey );
418 $this->output( "Beginning migration of log_search\n" );
419 wfWaitForSlaves();
420
421 $dbw = $this->getDB( DB_MASTER );
422 $countUpdated = 0;
423 $countActors = 0;
424 $countErrors = 0;
425
426 $next = '1=1';
427 while ( true ) {
428 // Fetch the rows needing update
429 $res = $dbw->select(
430 [
431 'ls' => $dbw->buildSelectSubquery(
432 'log_search',
433 'ls_value',
434 [
435 'ls_field' => 'target_author_id',
436 $next
437 ],
438 __METHOD__,
439 [
440 'DISTINCT',
441 'ORDER BY' => [ 'ls_value' ],
442 'LIMIT' => $this->mBatchSize,
443 ]
444 ),
445 'actor'
446 ],
447 [
448 'ls_field' => $dbw->addQuotes( 'target_author_id' ),
449 'ls_value',
450 'actor_id'
451 ],
452 [],
453 __METHOD__,
454 [],
455 [ 'actor' => [ 'LEFT JOIN', 'ls_value = ' . $dbw->buildStringCast( 'actor_user' ) ] ]
456 );
457 if ( !$res->numRows() ) {
458 break;
459 }
460
461 // Update the rows
462 $del = [];
463 foreach ( $res as $row ) {
464 $lastRow = $row;
465 if ( !$row->actor_id ) {
466 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
467 $this->error( "No actor for row with $display\n" );
468 $countErrors++;
469 continue;
470 }
471 $dbw->update(
472 'log_search',
473 [
474 'ls_field' => 'target_author_actor',
475 'ls_value' => $row->actor_id,
476 ],
477 [
478 'ls_field' => $row->ls_field,
479 'ls_value' => $row->ls_value,
480 ],
481 __METHOD__,
482 [ 'IGNORE' ]
483 );
484 $countUpdated += $dbw->affectedRows();
485 $del[] = $row->ls_value;
486 }
487 if ( $del ) {
488 $dbw->delete(
489 'log_search', [ 'ls_field' => 'target_author_id', 'ls_value' => $del ], __METHOD__
490 );
491 $countUpdated += $dbw->affectedRows();
492 }
493
494 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
495 $this->output( "... $display\n" );
496 wfWaitForSlaves();
497 }
498
499 $next = '1=1';
500 while ( true ) {
501 // Fetch the rows needing update
502 $res = $dbw->select(
503 [
504 'ls' => $dbw->buildSelectSubquery(
505 'log_search',
506 'ls_value',
507 [
508 'ls_field' => 'target_author_ip',
509 $next
510 ],
511 __METHOD__,
512 [
513 'DISTINCT',
514 'ORDER BY' => [ 'ls_value' ],
515 'LIMIT' => $this->mBatchSize,
516 ]
517 ),
518 'actor'
519 ],
520 [
521 'ls_field' => $dbw->addQuotes( 'target_author_ip' ),
522 'ls_value',
523 'actor_id'
524 ],
525 [],
526 __METHOD__,
527 [],
528 [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
529 );
530 if ( !$res->numRows() ) {
531 break;
532 }
533
534 // Insert new actors for rows that need one
535 $rows = iterator_to_array( $res );
536 $lastRow = end( $rows );
537 $countActors += $this->addActorsForRows(
538 $dbw, 'ls_value', $rows, $complainedAboutUsers, $countErrors
539 );
540
541 // Update the rows
542 $del = [];
543 foreach ( $rows as $row ) {
544 if ( !$row->actor_id ) {
545 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
546 $this->error( "Could not make actor for row with $display\n" );
547 $countErrors++;
548 continue;
549 }
550 $dbw->update(
551 'log_search',
552 [
553 'ls_field' => 'target_author_actor',
554 'ls_value' => $row->actor_id,
555 ],
556 [
557 'ls_field' => $row->ls_field,
558 'ls_value' => $row->ls_value,
559 ],
560 __METHOD__,
561 [ 'IGNORE' ]
562 );
563 $countUpdated += $dbw->affectedRows();
564 $del[] = $row->ls_value;
565 }
566 if ( $del ) {
567 $dbw->delete(
568 'log_search', [ 'ls_field' => 'target_author_ip', 'ls_value' => $del ], __METHOD__
569 );
570 $countUpdated += $dbw->affectedRows();
571 }
572
573 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
574 $this->output( "... $display\n" );
575 wfWaitForSlaves();
576 }
577
578 $this->output(
579 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
580 . "$countErrors error(s)\n"
581 );
582 return $countErrors;
583 }
584 }
585
586 $maintClass = "MigrateActors";
587 require_once RUN_MAINTENANCE_IF_MAIN;