Merge "StringUtils: Add a utility for checking if a string is a valid regex"
[lhc/web/wiklou.git] / maintenance / includes / MigrateActors.php
1 <?php
2 /**
3 * Helper for migrating actors from pre-1.31 columns to the 'actor' table
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance
22 */
23
24 use Wikimedia\Rdbms\IDatabase;
25
26 require_once __DIR__ . '/../Maintenance.php';
27
28 /**
29 * Maintenance script that migrates actors from pre-1.31 columns to the
30 * 'actor' table
31 *
32 * @ingroup Maintenance
33 */
34 class MigrateActors extends LoggedUpdateMaintenance {
35
36 protected $tables = null;
37
38 public function __construct() {
39 parent::__construct();
40 $this->addDescription( 'Migrates actors from pre-1.31 columns to the \'actor\' table' );
41 $this->addOption( 'tables', 'List of tables to process, comma-separated', false, true );
42 $this->setBatchSize( 100 );
43 }
44
45 protected function getUpdateKey() {
46 return __CLASS__;
47 }
48
49 protected function doTable( $table ) {
50 return $this->tables === null || in_array( $table, $this->tables, true );
51 }
52
53 protected function doDBUpdates() {
54 $tables = $this->getOption( 'tables' );
55 if ( $tables !== null ) {
56 $this->tables = explode( ',', $tables );
57 }
58
59 if ( $this->doTable( 'user' ) ) {
60 $this->output( "Creating actor entries for all registered users\n" );
61 $end = 0;
62 $dbw = $this->getDB( DB_MASTER );
63 $max = $dbw->selectField( 'user', 'MAX(user_id)', '', __METHOD__ );
64 $count = 0;
65 while ( $end < $max ) {
66 $start = $end + 1;
67 $end = min( $start + $this->mBatchSize, $max );
68 $this->output( "... $start - $end\n" );
69 $dbw->insertSelect(
70 'actor',
71 'user',
72 [ 'actor_user' => 'user_id', 'actor_name' => 'user_name' ],
73 [ "user_id >= $start", "user_id <= $end" ],
74 __METHOD__,
75 [ 'IGNORE' ],
76 [ 'ORDER BY' => [ 'user_id' ] ]
77 );
78 $count += $dbw->affectedRows();
79 wfWaitForSlaves();
80 }
81 $this->output( "Completed actor creation, added $count new actor(s)\n" );
82 } else {
83 $this->output( "Checking that actors exist for all registered users\n" );
84 $dbr = $this->getDB( DB_REPLICA, [ 'vslow' ] );
85 $anyMissing = $dbr->selectField(
86 [ 'user', 'actor' ],
87 '1',
88 [ 'actor_id' => null ],
89 __METHOD__,
90 [ 'LIMIT 1' ],
91 [ 'actor' => [ 'LEFT JOIN', 'actor_user = user_id' ] ]
92 );
93 if ( $anyMissing ) {
94 $this->error( 'Some users lack actors; run without --tables or include `user` in --tables.' );
95 return false;
96 }
97 $this->output( "Ok, continuing.\n" );
98 }
99
100 $errors = 0;
101 $errors += $this->migrateToTemp(
102 'revision', 'rev_id', [ 'revactor_timestamp' => 'rev_timestamp', 'revactor_page' => 'rev_page' ],
103 'rev_user', 'rev_user_text', 'revactor_rev', 'revactor_actor'
104 );
105 $errors += $this->migrate( 'archive', 'ar_id', 'ar_user', 'ar_user_text', 'ar_actor' );
106 $errors += $this->migrate( 'ipblocks', 'ipb_id', 'ipb_by', 'ipb_by_text', 'ipb_by_actor' );
107 $errors += $this->migrate( 'image', 'img_name', 'img_user', 'img_user_text', 'img_actor' );
108 $errors += $this->migrate(
109 'oldimage', [ 'oi_name', 'oi_timestamp' ], 'oi_user', 'oi_user_text', 'oi_actor'
110 );
111 $errors += $this->migrate( 'filearchive', 'fa_id', 'fa_user', 'fa_user_text', 'fa_actor' );
112 $errors += $this->migrate( 'recentchanges', 'rc_id', 'rc_user', 'rc_user_text', 'rc_actor' );
113 $errors += $this->migrate( 'logging', 'log_id', 'log_user', 'log_user_text', 'log_actor' );
114
115 $errors += $this->migrateLogSearch();
116
117 return $errors === 0;
118 }
119
120 /**
121 * Calculate a "next" condition and a display string
122 * @param IDatabase $dbw
123 * @param string[] $primaryKey Primary key of the table.
124 * @param object $row Database row
125 * @return array [ string $next, string $display ]
126 */
127 private function makeNextCond( $dbw, $primaryKey, $row ) {
128 $next = '';
129 $display = [];
130 for ( $i = count( $primaryKey ) - 1; $i >= 0; $i-- ) {
131 $field = $primaryKey[$i];
132 $display[] = $field . '=' . $row->$field;
133 $value = $dbw->addQuotes( $row->$field );
134 if ( $next === '' ) {
135 $next = "$field > $value";
136 } else {
137 $next = "$field > $value OR $field = $value AND ($next)";
138 }
139 }
140 $display = implode( ' ', array_reverse( $display ) );
141 return [ $next, $display ];
142 }
143
144 /**
145 * Make the subqueries for `actor_id`
146 * @param IDatabase $dbw
147 * @param string $userField User ID field name
148 * @param string $nameField User name field name
149 * @return string SQL fragment
150 */
151 private function makeActorIdSubquery( $dbw, $userField, $nameField ) {
152 $idSubquery = $dbw->buildSelectSubquery(
153 'actor',
154 'actor_id',
155 [ "$userField = actor_user" ],
156 __METHOD__
157 );
158 $nameSubquery = $dbw->buildSelectSubquery(
159 'actor',
160 'actor_id',
161 [ "$nameField = actor_name" ],
162 __METHOD__
163 );
164 return "CASE WHEN $userField = 0 OR $userField IS NULL THEN $nameSubquery ELSE $idSubquery END";
165 }
166
167 /**
168 * Add actors for anons in a set of rows
169 *
170 * @suppress SecurityCheck-SQLInjection The array_keys/array_map is too much for static analysis
171 * @param IDatabase $dbw
172 * @param string $nameField
173 * @param object[] &$rows
174 * @param array &$complainedAboutUsers
175 * @param int &$countErrors
176 * @return int Count of actors inserted
177 */
178 private function addActorsForRows(
179 IDatabase $dbw, $nameField, array &$rows, array &$complainedAboutUsers, &$countErrors
180 ) {
181 $needActors = [];
182 $countActors = 0;
183
184 $keep = [];
185 foreach ( $rows as $index => $row ) {
186 $keep[$index] = true;
187 if ( $row->actor_id === null ) {
188 // All registered users should have an actor_id already. So
189 // if we have a usable name here, it means they didn't run
190 // maintenance/cleanupUsersWithNoId.php
191 $name = $row->$nameField;
192 if ( User::isUsableName( $name ) ) {
193 if ( !isset( $complainedAboutUsers[$name] ) ) {
194 $complainedAboutUsers[$name] = true;
195 $this->error(
196 "User name \"$name\" is usable, cannot create an anonymous actor for it."
197 . " Run maintenance/cleanupUsersWithNoId.php to fix this situation.\n"
198 );
199 }
200 unset( $keep[$index] );
201 $countErrors++;
202 } else {
203 $needActors[$name] = 0;
204 }
205 }
206 }
207 $rows = array_intersect_key( $rows, $keep );
208
209 if ( $needActors ) {
210 $dbw->insert(
211 'actor',
212 array_map( function ( $v ) {
213 return [
214 'actor_name' => $v,
215 ];
216 }, array_keys( $needActors ) ),
217 __METHOD__
218 );
219 $countActors += $dbw->affectedRows();
220
221 $res = $dbw->select(
222 'actor',
223 [ 'actor_id', 'actor_name' ],
224 [ 'actor_name' => array_keys( $needActors ) ],
225 __METHOD__
226 );
227 foreach ( $res as $row ) {
228 $needActors[$row->actor_name] = $row->actor_id;
229 }
230 foreach ( $rows as $row ) {
231 if ( $row->actor_id === null ) {
232 $row->actor_id = $needActors[$row->$nameField];
233 }
234 }
235 }
236
237 return $countActors;
238 }
239
240 /**
241 * Migrate actors in a table.
242 *
243 * Assumes any row with the actor field non-zero have already been migrated.
244 * Blanks the name field when migrating.
245 *
246 * @param string $table Table to migrate
247 * @param string|string[] $primaryKey Primary key of the table.
248 * @param string $userField User ID field name
249 * @param string $nameField User name field name
250 * @param string $actorField Actor field name
251 * @return int Number of errors
252 */
253 protected function migrate( $table, $primaryKey, $userField, $nameField, $actorField ) {
254 if ( !$this->doTable( $table ) ) {
255 $this->output( "Skipping $table, not included in --tables\n" );
256 return 0;
257 }
258
259 $dbw = $this->getDB( DB_MASTER );
260 if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
261 $this->output( "No need to migrate $table.$userField, field does not exist\n" );
262 return 0;
263 }
264
265 $complainedAboutUsers = [];
266
267 $primaryKey = (array)$primaryKey;
268 $pkFilter = array_flip( $primaryKey );
269 $this->output(
270 "Beginning migration of $table.$userField and $table.$nameField to $table.$actorField\n"
271 );
272 wfWaitForSlaves();
273
274 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
275 $next = '1=1';
276 $countUpdated = 0;
277 $countActors = 0;
278 $countErrors = 0;
279 while ( true ) {
280 // Fetch the rows needing update
281 $res = $dbw->select(
282 $table,
283 array_merge( $primaryKey, [ $userField, $nameField, 'actor_id' => $actorIdSubquery ] ),
284 [
285 $actorField => 0,
286 $next,
287 ],
288 __METHOD__,
289 [
290 'ORDER BY' => $primaryKey,
291 'LIMIT' => $this->mBatchSize,
292 ]
293 );
294 if ( !$res->numRows() ) {
295 break;
296 }
297
298 // Insert new actors for rows that need one
299 $rows = iterator_to_array( $res );
300 $lastRow = end( $rows );
301 $countActors += $this->addActorsForRows(
302 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
303 );
304
305 // Update the existing rows
306 foreach ( $rows as $row ) {
307 if ( !$row->actor_id ) {
308 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
309 $this->error(
310 "Could not make actor for row with $display "
311 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
312 );
313 $countErrors++;
314 continue;
315 }
316 $dbw->update(
317 $table,
318 [
319 $actorField => $row->actor_id,
320 ],
321 array_intersect_key( (array)$row, $pkFilter ) + [
322 $actorField => 0
323 ],
324 __METHOD__
325 );
326 $countUpdated += $dbw->affectedRows();
327 }
328
329 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
330 $this->output( "... $display\n" );
331 wfWaitForSlaves();
332 }
333
334 $this->output(
335 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
336 . "$countErrors error(s)\n"
337 );
338 return $countErrors;
339 }
340
341 /**
342 * Migrate actors in a table to a temporary table.
343 *
344 * Assumes the new table is named "{$table}_actor_temp", and it has two
345 * columns, in order, being the primary key of the original table and the
346 * actor ID field.
347 * Blanks the name field when migrating.
348 *
349 * @param string $table Table to migrate
350 * @param string $primaryKey Primary key of the table.
351 * @param array $extra Extra fields to copy
352 * @param string $userField User ID field name
353 * @param string $nameField User name field name
354 * @param string $newPrimaryKey Primary key of the new table.
355 * @param string $actorField Actor field name
356 * @return int Number of errors
357 */
358 protected function migrateToTemp(
359 $table, $primaryKey, $extra, $userField, $nameField, $newPrimaryKey, $actorField
360 ) {
361 if ( !$this->doTable( $table ) ) {
362 $this->output( "Skipping $table, not included in --tables\n" );
363 return 0;
364 }
365
366 $dbw = $this->getDB( DB_MASTER );
367 if ( !$dbw->fieldExists( $table, $userField, __METHOD__ ) ) {
368 $this->output( "No need to migrate $table.$userField, field does not exist\n" );
369 return 0;
370 }
371
372 $complainedAboutUsers = [];
373
374 $newTable = $table . '_actor_temp';
375 $this->output(
376 "Beginning migration of $table.$userField and $table.$nameField to $newTable.$actorField\n"
377 );
378 wfWaitForSlaves();
379
380 $actorIdSubquery = $this->makeActorIdSubquery( $dbw, $userField, $nameField );
381 $next = [];
382 $countUpdated = 0;
383 $countActors = 0;
384 $countErrors = 0;
385 while ( true ) {
386 // Fetch the rows needing update
387 $res = $dbw->select(
388 [ $table, $newTable ],
389 [ $primaryKey, $userField, $nameField, 'actor_id' => $actorIdSubquery ] + $extra,
390 [ $newPrimaryKey => null ] + $next,
391 __METHOD__,
392 [
393 'ORDER BY' => $primaryKey,
394 'LIMIT' => $this->mBatchSize,
395 ],
396 [
397 $newTable => [ 'LEFT JOIN', "{$primaryKey}={$newPrimaryKey}" ],
398 ]
399 );
400 if ( !$res->numRows() ) {
401 break;
402 }
403
404 // Insert new actors for rows that need one
405 $rows = iterator_to_array( $res );
406 $lastRow = end( $rows );
407 $countActors += $this->addActorsForRows(
408 $dbw, $nameField, $rows, $complainedAboutUsers, $countErrors
409 );
410
411 // Update rows
412 if ( $rows ) {
413 $inserts = [];
414 $updates = [];
415 foreach ( $rows as $row ) {
416 if ( !$row->actor_id ) {
417 list( , $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $row );
418 $this->error(
419 "Could not make actor for row with $display "
420 . "$userField={$row->$userField} $nameField={$row->$nameField}\n"
421 );
422 $countErrors++;
423 continue;
424 }
425 $ins = [
426 $newPrimaryKey => $row->$primaryKey,
427 $actorField => $row->actor_id,
428 ];
429 foreach ( $extra as $to => $from ) {
430 $ins[$to] = $row->$to; // It's aliased
431 }
432 $inserts[] = $ins;
433 $updates[] = $row->$primaryKey;
434 }
435 $this->beginTransaction( $dbw, __METHOD__ );
436 $dbw->insert( $newTable, $inserts, __METHOD__ );
437 $countUpdated += $dbw->affectedRows();
438 $this->commitTransaction( $dbw, __METHOD__ );
439 }
440
441 // Calculate the "next" condition
442 list( $n, $display ) = $this->makeNextCond( $dbw, [ $primaryKey ], $lastRow );
443 $next = [ $n ];
444 $this->output( "... $display\n" );
445 }
446
447 $this->output(
448 "Completed migration, updated $countUpdated row(s) with $countActors new actor(s), "
449 . "$countErrors error(s)\n"
450 );
451 return $countErrors;
452 }
453
454 /**
455 * Migrate actors in the log_search table.
456 * @return int Number of errors
457 */
458 protected function migrateLogSearch() {
459 if ( !$this->doTable( 'log_search' ) ) {
460 $this->output( "Skipping log_search, not included in --tables\n" );
461 return 0;
462 }
463
464 $complainedAboutUsers = [];
465
466 $primaryKey = [ 'ls_value', 'ls_log_id' ];
467 $this->output( "Beginning migration of log_search\n" );
468 wfWaitForSlaves();
469
470 $dbw = $this->getDB( DB_MASTER );
471 $countInserted = 0;
472 $countActors = 0;
473 $countErrors = 0;
474
475 $anyBad = $dbw->selectField(
476 'log_search',
477 1,
478 [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
479 __METHOD__,
480 [ 'LIMIT' => 1 ]
481 );
482 if ( $anyBad ) {
483 $this->output( "... Deleting bogus rows due to T215525\n" );
484 $dbw->delete(
485 'log_search',
486 [ 'ls_field' => 'target_author_actor', 'ls_value' => '' ],
487 __METHOD__
488 );
489 $ct = $dbw->affectedRows();
490 $this->output( "... Deleted $ct bogus row(s) from T215525\n" );
491 wfWaitForSlaves();
492 }
493
494 $next = '1=1';
495 while ( true ) {
496 // Fetch the rows needing update
497 $res = $dbw->select(
498 [ 'log_search', 'actor' ],
499 [ 'ls_value', 'ls_log_id', 'actor_id' ],
500 [
501 'ls_field' => 'target_author_id',
502 $next
503 ],
504 __METHOD__,
505 [
506 'ORDER BY' => $primaryKey,
507 'LIMIT' => $this->mBatchSize,
508 ],
509 [ 'actor' => [ 'LEFT JOIN', 'actor_user = ' . $dbw->buildIntegerCast( 'ls_value' ) ] ]
510 );
511 if ( !$res->numRows() ) {
512 break;
513 }
514
515 // Insert a 'target_author_actor' for each 'target_author_id'
516 $ins = [];
517 foreach ( $res as $row ) {
518 $lastRow = $row;
519 if ( !$row->actor_id ) {
520 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
521 $this->error( "No actor for target_author_id row with $display\n" );
522 $countErrors++;
523 continue;
524 }
525 $ins[] = [
526 'ls_field' => 'target_author_actor',
527 'ls_value' => $row->actor_id,
528 'ls_log_id' => $row->ls_log_id,
529 ];
530 }
531 $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
532 $countInserted += $dbw->affectedRows();
533
534 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
535 $this->output( "... target_author_id, $display\n" );
536 wfWaitForSlaves();
537 }
538
539 $next = '1=1';
540 while ( true ) {
541 // Fetch the rows needing update
542 $res = $dbw->select(
543 [ 'log_search', 'actor' ],
544 [ 'ls_value', 'ls_log_id', 'actor_id' ],
545 [
546 'ls_field' => 'target_author_ip',
547 $next
548 ],
549 __METHOD__,
550 [
551 'ORDER BY' => $primaryKey,
552 'LIMIT' => $this->mBatchSize,
553 ],
554 [ 'actor' => [ 'LEFT JOIN', 'ls_value = actor_name' ] ]
555 );
556 if ( !$res->numRows() ) {
557 break;
558 }
559
560 // Insert new actors for rows that need one
561 $rows = iterator_to_array( $res );
562 $lastRow = end( $rows );
563 $countActors += $this->addActorsForRows(
564 $dbw, 'ls_value', $rows, $complainedAboutUsers, $countErrors
565 );
566
567 // Insert a 'target_author_actor' for each 'target_author_ip'
568 $ins = [];
569 foreach ( $rows as $row ) {
570 if ( !$row->actor_id ) {
571 list( , $display ) = $this->makeNextCond( $dbw, $primaryKey, $row );
572 $this->error( "Could not make actor for target_author_ip row with $display\n" );
573 $countErrors++;
574 continue;
575 }
576 $ins[] = [
577 'ls_field' => 'target_author_actor',
578 'ls_value' => $row->actor_id,
579 'ls_log_id' => $row->ls_log_id,
580 ];
581 }
582 $dbw->insert( 'log_search', $ins, __METHOD__, [ 'IGNORE' ] );
583 $countInserted += $dbw->affectedRows();
584
585 list( $next, $display ) = $this->makeNextCond( $dbw, $primaryKey, $lastRow );
586 $this->output( "... target_author_ip, $display\n" );
587 wfWaitForSlaves();
588 }
589
590 $this->output(
591 "Completed migration, inserted $countInserted row(s) with $countActors new actor(s), "
592 . "$countErrors error(s)\n"
593 );
594 return $countErrors;
595 }
596 }