Merge "Improve docs for Title::getInternalURL/getCanonicalURL"
[lhc/web/wiklou.git] / maintenance / includes / MigrateActors.php
1 <?php
2 /**
3 * Helper for migrating actors from pre-1.31 columns to the 'actor' table
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24 use Wikimedia\Rdbms\IDatabase;
25
26 require_once __DIR__ . '/../Maintenance.php';
27
28 /**
29 * Maintenance script that migrates actors from pre-1.31 columns to the
30 * 'actor' table
31 *
32 * @ingroup Maintenance
33 */
34 class MigrateActors extends LoggedUpdateMaintenance {
35
36 protected $tables = null;
37
38 public function __construct() {
39 parent::__construct();
40 $this->addDescription( 'Migrates actors from pre-1.31 columns to the \'actor\' table' );
41 $this->addOption( 'tables', 'List of tables to process, comma-separated', false, true );
42 $this->setBatchSize( 100 );
43 }
44
45 protected function getUpdateKey() {
46 return __CLASS__;
47 }
48
49 protected function doTable( $table ) {
50 return $this->tables === null || in_array( $table, $this->tables, true );
51 }
52
53 protected function doDBUpdates() {
54 global $wgActorTableSchemaMigrationStage;
55
56 if ( !( $wgActorTableSchemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) ) {
57 $this->output(
58 "...cannot update while \$wgActorTableSchemaMigrationStage lacks SCHEMA_COMPAT_WRITE_NEW\n"
59 );
60 return false;
61 }
62
63 $tables = $this->getOption( 'tables' );
64 if ( $tables !== null ) {
65 $this->tables = explode( ',', $tables );
66 }
67
68 if ( $this->doTable( 'user' ) ) {
69 $this->output( "Creating actor entries for all registered users\n" );
70 $end = 0;
71 $dbw = $this->getDB( DB_MASTER );
72 $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
73 $count = 0;
74 while ( $end < $max ) {
75 $start = $end + 1;
76 $end = min( $start + $this->mBatchSize, $max );
77 $this->output( "... $start - $end\n" );
78 $dbw->insertSelect(
79 'actor',
80 'user',
81 [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
82 [ "user_id >= $start", "user_id <= $end" ],
83 __METHOD__,
84 [ 'IGNORE' ],
85 [ 'ORDER BY' => [ 'user_id' ] ]
86 );
87 $count += $dbw->affectedRows();
88 wfWaitForSlaves();
89 }
90 $this->output( "Completed actor creation, added $count new actor(s)\n" );
91 } else {
92 $this->output( "Checking that actors exist for all registered users\n" );
93 $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
94 $anyMissing = $dbr->selectField(
95 [ 'user', 'actor' ],
96 '1',
97 [ 'actor_id' => null ],
98 __METHOD__,
99 [ 'LIMIT 1' ],
100 [ 'actor' => [ 'LEFT JOIN', 'actor_user = user_id' ] ]
101 );
102 if ( $anyMissing ) {
103 $this->error( 'Some users lack actors; run without --tables or include `user` in --tables.' );
104 return false;
105 }
106 $this->output( "Ok, continuing.\n" );
107 }
108
109 $errors = 0;
110 $errors += $this->migrateToTemp(
111 'revision', 'rev_id', [ 'revactor_timestamp' => 'rev_timestamp', 'revactor_page' => 'rev_page' ],
112 'rev_user', 'rev_user_text', 'revactor_rev', 'revactor_actor'
113 );
114 $errors += $this->migrate( 'archive', 'ar_id', 'ar_user', 'ar_user_text', 'ar_actor' );
115 $errors += $this->migrate( 'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text', 'ipb_by_actor' );
116 $errors += $this->migrate( 'image', 'img_name', 'img_user', 'img_user_text', 'img_actor' );
117 $errors += $this->migrate(
118 'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text', 'oi_actor'
119 );
120 $errors += $this->migrate( 'filearchive', 'fa_id', 'fa_user', 'fa_user_text', 'fa_actor' );
121 $errors += $this->migrate( 'recentchanges', 'rc_id', 'rc_user', 'rc_user_text', 'rc_actor' );
122 $errors += $this->migrate( 'logging', 'log_id', 'log_user', 'log_user_text', 'log_actor' );
123
124 $errors += $this->migrateLogSearch();
125
126 return $errors === 0;
127 }
128
129 /**
130 * Calculate a "next" condition and a display string
131 * @param IDatabase $dbw
132 * @param string[] $primaryKey Primary key of the table.
133 * @param object $row Database row
134 * @return array [ string $next, string $display ]
135 */
136 private function makeNextCond( $dbw, $primaryKey, $row ) {
137 $next = '';
138 $display = [];
139 for ( $i = count( $primaryKey ) - 1; $i >= 0; $i-- ) {
140 $field = $primaryKey[$i];
141 $display[] = $field . '=' . $row->$field;
142 $value = $dbw->addQuotes( $row->$field );
143 if ( $next === '' ) {
144 $next = "$field > $value";
145 } else {
146 $next = "$field > $value OR $field = $value AND ($next)";
147 }
148 }
149 $display = implode( ' ', array_reverse( $display ) );
150 return [ $next, $display ];
151 }
152
153 /**
154 * Make the subqueries for `actor_id`
155 * @param IDatabase $dbw
156 * @param string $userField User ID field name
157 * @param string $nameField User name field name
158 * @return string SQL fragment
159 */
160 private function makeActorIdSubquery( $dbw, $userField, $nameField ) {
161 $idSubquery = $dbw->buildSelectSubquery(
162 'actor',
163 'actor_id',
164 [ "$userField = actor_user" ],
165 __METHOD__
166 );
167 $nameSubquery = $dbw->buildSelectSubquery(
168 'actor',
169 'actor_id',
170 [ "$nameField = actor_name" ],
171 __METHOD__
172 );
173 return "CASE WHEN $userField = 0 OR $userField IS NULL THEN $nameSubquery ELSE $idSubquery END";
174 }
175
176 /**
177 * Add actors for anons in a set of rows
178 *
179 * @suppress SecurityCheck-SQLInjection The array_keys/array_map is too much for static analysis
180 * @param IDatabase $dbw
181 * @param string $nameField
182 * @param object[] &$rows
183 * @param array &$complainedAboutUsers
184 * @param int &$countErrors
185 * @return int Count of actors inserted
186 */
187 private function addActorsForRows(
188 IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors
189 ) {
190 $needActors = [];
191 $countActors = 0;
192
193 $keep = [];
194 foreach ( $rows as $index => $row ) {
195 $keep[$index] = true;
196 if ( $row->actor_id === null ) {
197 // All registered users should have an actor_id already. So
198 // if we have a usable name here, it means they didn't run
199 // maintenance/cleanupUsersWithNoId.php
200 $name = $row->$nameField;
201 if ( User::isUsableName( $name ) ) {
202 if ( !isset( $complainedAboutUsers[$name] ) ) {
203 $complainedAboutUsers[$name] = true;
204 $this->error(
205 "User name \"$name\" is usable, cannot create an anonymous actor for it."
206 . " Run maintenance/cleanupUsersWithNoId.php to fix this situation.\n"
207 );
208 }
209 unset( $keep[$index] );
210 $countErrors++;
211 } else {
212 $needActors[$name] = 0;
213 }
214 }
215 }
216 $rows = array_intersect_key( $rows, $keep );
217
218 if ( $needActors ) {
219 $dbw->insert(
220 'actor',
221 array_map( function ( $v ) {
222 return [
223 'actor_name' => $v,
224 ];
225 }, array_keys( $needActors ) ),
226 __METHOD__
227 );
228 $countActors += $dbw->affectedRows();
229
230 $res = $dbw->select(
231 'actor',
232 [ 'actor_id', 'actor_name' ],
233 [ 'actor_name' => array_keys( $needActors ) ],
234 __METHOD__
235 );
236 foreach ( $res as $row ) {
237 $needActors[$row->actor_name] = $row->actor_id;
238 }
239 foreach ( $rows as $row ) {
240 if ( $row->actor_id === null ) {
241 $row->actor_id = $needActors[$row->$nameField];
242 }
243 }
244 }
245
246 return $countActors;
247 }
248
249 /**
250 * Migrate actors in a table.
251 *
252 * Assumes any row with the actor field non-zero have already been migrated.
253 * Blanks the name field when migrating.
254 *
255 * @param string $table Table to migrate
256 * @param string|string[] $primaryKey Primary key of the table.
257 * @param string $userField User ID field name
258 * @param string $nameField User name field name
259 * @param string $actorField Actor field name
260 * @return int Number of errors
261 */
262 protected function migrate( $table, $primaryKey, $userField, $nameField, $actorField ) {
263 if ( !$this->doTable( $table ) ) {
264 $this->output( "Skipping $table, not included in --tables\n" );
265 return 0;
266 }
267
268 $complainedAboutUsers = [];
269
270 $primaryKey = (array)$primaryKey;
271 $pkFilter = array_flip( $primaryKey );
272 $this->output(
273 "Beginning migration of $table.$userField and $table.$nameField to $table.$actorField\n"
274 );
275 wfWaitForSlaves();
276
277 $dbw = $this->getDB( DB_MASTER );
278 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
279 $next = '1=1';
280 $countUpdated = 0;
281 $countActors = 0;
282 $countErrors = 0;
283 while ( true ) {
284 // Fetch the rows needing update
285 $res = $dbw->select(
286 $table,
287 array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
288 [
289 $actorField => 0,
290 $next,
291 ],
292 __METHOD__,
293 [
294 'ORDER BY' => $primaryKey,
295 'LIMIT' => $this->mBatchSize,
296 ]
297 );
298 if ( !$res->numRows() ) {
299 break;
300 }
301
302 // Insert new actors for rows that need one
303 $rows = iterator_to_array( $res );
304 $lastRow = end( $rows );
305 $countActors += $this->addActorsForRows(
306 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
307 );
308
309 // Update the existing rows
310 foreach ( $rows as $row ) {
311 if ( !$row->actor_id ) {
312 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
313 $this->error(
314 "Could not make actor for row with $display "
315 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
316 );
317 $countErrors++;
318 continue;
319 }
320 $dbw->update(
321 $table,
322 [
323 $actorField => $row->actor_id,
324 ],
325 array_intersect_key( (array)$row, $pkFilter ) + [
326 $actorField => 0
327 ],
328 __METHOD__
329 );
330 $countUpdated += $dbw->affectedRows();
331 }
332
333 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
334 $this->output( "... $display\n" );
335 wfWaitForSlaves();
336 }
337
338 $this->output(
339 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
340 . "$countErrors error(s)\n"
341 );
342 return $countErrors;
343 }
344
345 /**
346 * Migrate actors in a table to a temporary table.
347 *
348 * Assumes the new table is named "{$table}_actor_temp", and it has two
349 * columns, in order, being the primary key of the original table and the
350 * actor ID field.
351 * Blanks the name field when migrating.
352 *
353 * @param string $table Table to migrate
354 * @param string $primaryKey Primary key of the table.
355 * @param array $extra Extra fields to copy
356 * @param string $userField User ID field name
357 * @param string $nameField User name field name
358 * @param string $newPrimaryKey Primary key of the new table.
359 * @param string $actorField Actor field name
360 */
361 protected function migrateToTemp(
362 $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField
363 ) {
364 if ( !$this->doTable( $table ) ) {
365 $this->output( "Skipping $table, not included in --tables\n" );
366 return 0;
367 }
368
369 $complainedAboutUsers = [];
370
371 $newTable = $table . '_actor_temp';
372 $this->output(
373 "Beginning migration of $table.$userField and $table.$nameField to $newTable.$actorField\n"
374 );
375 wfWaitForSlaves();
376
377 $dbw = $this->getDB( DB_MASTER );
378 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
379 $next = [];
380 $countUpdated = 0;
381 $countActors = 0;
382 $countErrors = 0;
383 while ( true ) {
384 // Fetch the rows needing update
385 $res = $dbw->select(
386 [ $table, $newTable ],
387 [ $primaryKey, $userField, $nameField, 'actor_id' => $actorIdSubquery ] + $extra,
388 [ $newPrimaryKey => null ] + $next,
389 __METHOD__,
390 [
391 'ORDER BY' => $primaryKey,
392 'LIMIT' => $this->mBatchSize,
393 ],
394 [
395 $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ],
396 ]
397 );
398 if ( !$res->numRows() ) {
399 break;
400 }
401
402 // Insert new actors for rows that need one
403 $rows = iterator_to_array( $res );
404 $lastRow = end( $rows );
405 $countActors += $this->addActorsForRows(
406 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
407 );
408
409 // Update rows
410 if ( $rows ) {
411 $inserts = [];
412 $updates = [];
413 foreach ( $rows as $row ) {
414 if ( !$row->actor_id ) {
415 list( , $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $row );
416 $this->error(
417 "Could not make actor for row with $display "
418 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
419 );
420 $countErrors++;
421 continue;
422 }
423 $ins = [
424 $newPrimaryKey => $row->$primaryKey,
425 $actorField => $row->actor_id,
426 ];
427 foreach ( $extra as $to => $from ) {
428 $ins[$to] = $row->$to; // It's aliased
429 }
430 $inserts[] = $ins;
431 $updates[] = $row->$primaryKey;
432 }
433 $this->beginTransaction( $dbw, __METHOD__ );
434 $dbw->insert( $newTable, $inserts, __METHOD__ );
435 $countUpdated += $dbw->affectedRows();
436 $this->commitTransaction( $dbw, __METHOD__ );
437 }
438
439 // Calculate the "next" condition
440 list( $n, $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $lastRow );
441 $next = [ $n ];
442 $this->output( "... $display\n" );
443 }
444
445 $this->output(
446 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
447 . "$countErrors error(s)\n"
448 );
449 return $countErrors;
450 }
451
452 /**
453 * Migrate actors in the log_search table.
454 * @return int Number of errors
455 */
456 protected function migrateLogSearch() {
457 if ( !$this->doTable( 'log_search' ) ) {
458 $this->output( "Skipping log_search, not included in --tables\n" );
459 return 0;
460 }
461
462 $complainedAboutUsers = [];
463
464 $primaryKey = [ 'ls_value', 'ls_log_id' ];
465 $this->output( "Beginning migration of log_search\n" );
466 wfWaitForSlaves();
467
468 $dbw = $this->getDB( DB_MASTER );
469 $countInserted = 0;
470 $countActors = 0;
471 $countErrors = 0;
472
473 $anyBad = $dbw->selectField(
474 'log_search',
475 1,
476 [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
477 __METHOD__,
478 [ 'LIMIT' => 1 ]
479 );
480 if ( $anyBad ) {
481 $this->output( "... Deleting bogus rows due to T215525\n" );
482 $dbw->delete(
483 'log_search',
484 [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
485 __METHOD__
486 );
487 $ct = $dbw->affectedRows();
488 $this->output( "... Deleted $ct bogus row(s) from T215525\n" );
489 wfWaitForSlaves();
490 }
491
492 $next = '1=1';
493 while ( true ) {
494 // Fetch the rows needing update
495 $res = $dbw->select(
496 [ 'log_search', 'actor' ],
497 [ 'ls_value', 'ls_log_id', 'actor_id' ],
498 [
499 'ls_field' => 'target_author_id',
500 $next
501 ],
502 __METHOD__,
503 [
504 'ORDER BY' => $primaryKey,
505 'LIMIT' => $this->mBatchSize,
506 ],
507 [ 'actor' => [ 'LEFT JOIN', 'actor_user = ' . $dbw->buildIntegerCast( 'ls_value' ) ] ]
508 );
509 if ( !$res->numRows() ) {
510 break;
511 }
512
513 // Insert a 'target_author_actor' for each 'target_author_id'
514 $ins = [];
515 foreach ( $res as $row ) {
516 $lastRow = $row;
517 if ( !$row->actor_id ) {
518 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
519 $this->error( "No actor for target_author_id row with $display\n" );
520 $countErrors++;
521 continue;
522 }
523 $ins[] = [
524 'ls_field' => 'target_author_actor',
525 'ls_value' => $row->actor_id,
526 'ls_log_id' => $row->ls_log_id,
527 ];
528 }
529 $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
530 $countInserted += $dbw->affectedRows();
531
532 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
533 $this->output( "... target_author_id, $display\n" );
534 wfWaitForSlaves();
535 }
536
537 $next = '1=1';
538 while ( true ) {
539 // Fetch the rows needing update
540 $res = $dbw->select(
541 [ 'log_search', 'actor' ],
542 [ 'ls_value', 'ls_log_id', 'actor_id' ],
543 [
544 'ls_field' => 'target_author_ip',
545 $next
546 ],
547 __METHOD__,
548 [
549 'ORDER BY' => $primaryKey,
550 'LIMIT' => $this->mBatchSize,
551 ],
552 [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
553 );
554 if ( !$res->numRows() ) {
555 break;
556 }
557
558 // Insert new actors for rows that need one
559 $rows = iterator_to_array( $res );
560 $lastRow = end( $rows );
561 $countActors += $this->addActorsForRows(
562 $dbw, 'ls_value', $rows, $complainedAboutUsers, $countErrors
563 );
564
565 // Insert a 'target_author_actor' for each 'target_author_ip'
566 $ins = [];
567 foreach ( $rows as $row ) {
568 if ( !$row->actor_id ) {
569 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
570 $this->error( "Could not make actor for target_author_ip row with $display\n" );
571 $countErrors++;
572 continue;
573 }
574 $ins[] = [
575 'ls_field' => 'target_author_actor',
576 'ls_value' => $row->actor_id,
577 'ls_log_id' => $row->ls_log_id,
578 ];
579 }
580 $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
581 $countInserted += $dbw->affectedRows();
582
583 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
584 $this->output( "... target_author_ip, $display\n" );
585 wfWaitForSlaves();
586 }
587
588 $this->output(
589 "Completed migration, inserted $countInserted row(s) with $countActors new actor(s), "
590 . "$countErrors error(s)\n"
591 );
592 return $countErrors;
593 }
594 }