Merge "Title: Title::getSubpage should not lose the interwiki prefix"
[lhc/web/wiklou.git] / maintenance / uppercaseTitlesForUnicodeTransition.php
1 <?php
2 /**
3 * Obligatory redundant license notice. Exception to the GPL's "keep intact all
4 * the notices" clause with respect to this notice is hereby granted.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Maintenance
23 */
24
25 use MediaWiki\MediaWikiServices;
26 use Wikimedia\Rdbms\IDatabase;
27
28 require_once __DIR__ . '/Maintenance.php';
29
30 /**
31 * Maintenance script to rename titles affected by changes to Unicode (or
32 * otherwise to Language::ucfirst).
33 *
34 * @ingroup Maintenance
35 */
36 class UppercaseTitlesForUnicodeTransition extends Maintenance {
37
38 /** @var bool */
39 private $run = false;
40
41 /** @var array */
42 private $charmap = [];
43
44 /** @var User */
45 private $user;
46
47 /** @var string */
48 private $reason = 'Uppercasing title for Unicode upgrade';
49
50 /** @var string[] */
51 private $tags = [];
52
53 /** @var array */
54 private $seenUsers = [];
55
56 /** @var array|null */
57 private $namespaces = null;
58
59 /** @var string|null */
60 private $prefix = null, $suffix = null;
61
62 /** @var int|null */
63 private $prefixNs = null;
64
65 /** @var string[]|null */
66 private $tables = null;
67
68 public function __construct() {
69 parent::__construct();
70 $this->addDescription(
71 "Rename titles when changing behavior of Language::ucfirst().\n"
72 . "\n"
73 . "This script skips User and User_talk pages for registered users, as renaming of users "
74 . "is too complex to try to implement here. Use something like Extension:Renameuser to "
75 . "clean those up; this script can provide a list of user names affected."
76 );
77 $this->addOption(
78 'charmap', 'Character map generated by maintenance/language/generateUcfirstOverrides.php',
79 true, true
80 );
81 $this->addOption(
82 'user', 'System user to use to do the renames. Default is "Maintenance script".', false, true
83 );
84 $this->addOption(
85 'steal',
86 'If the username specified by --user exists, specify this to force conversion to a system user.'
87 );
88 $this->addOption(
89 'run', 'If not specified, the script will not actually perform any moves (i.e. it will dry-run).'
90 );
91 $this->addOption(
92 'prefix', 'When the new title already exists, add this prefix.', false, true
93 );
94 $this->addOption(
95 'suffix', 'When the new title already exists, add this suffix.', false, true
96 );
97 $this->addOption( 'reason', 'Reason to use when moving pages.', false, true );
98 $this->addOption( 'tag', 'Change tag to apply when moving pages.', false, true );
99 $this->addOption( 'tables', 'Comma-separated list of database tables to process.', false, true );
100 $this->addOption(
101 'userlist', 'Filename to which to output usernames needing rename.', false, true
102 );
103 $this->setBatchSize( 1000 );
104 }
105
106 public function execute() {
107 $this->run = $this->getOption( 'run', false );
108
109 if ( $this->run ) {
110 $username = $this->getOption( 'user', 'Maintenance script' );
111 $steal = $this->getOption( 'steal', false );
112 $this->user = User::newSystemUser( $username, [ 'steal' => $steal ] );
113 if ( !$this->user ) {
114 $user = User::newFromName( $username );
115 if ( !$steal && $user && $user->isLoggedIn() ) {
116 $this->fatalError( "User $username already exists.\n"
117 . "Use --steal if you really want to steal it from the human who currently owns it."
118 );
119 }
120 $this->fatalError( "Could not obtain system user $username." );
121 }
122 }
123
124 $tables = $this->getOption( 'tables' );
125 if ( $tables !== null ) {
126 $this->tables = explode( ',', $tables );
127 }
128
129 $prefix = $this->getOption( 'prefix' );
130 if ( $prefix !== null ) {
131 $title = Title::newFromText( $prefix . 'X' );
132 if ( !$title || substr( $title->getDBkey(), -1 ) !== 'X' ) {
133 $this->fatalError( 'Invalid --prefix.' );
134 }
135 if ( $title->getNamespace() <= NS_MAIN || $title->isExternal() ) {
136 $this->fatalError( 'Invalid --prefix. It must not be in namespace 0 and must not be external' );
137 }
138 $this->prefixNs = $title->getNamespace();
139 $this->prefix = substr( $title->getText(), 0, -1 );
140 }
141 $this->suffix = $this->getOption( 'suffix' );
142
143 $this->reason = $this->getOption( 'reason' ) ?: $this->reason;
144 $this->tags = (array)$this->getOption( 'tag', null );
145
146 $charmapFile = $this->getOption( 'charmap' );
147 if ( !file_exists( $charmapFile ) ) {
148 $this->fatalError( "Charmap file $charmapFile does not exist." );
149 }
150 if ( !is_file( $charmapFile ) || !is_readable( $charmapFile ) ) {
151 $this->fatalError( "Charmap file $charmapFile is not readable." );
152 }
153 $this->charmap = require $charmapFile;
154 if ( !is_array( $this->charmap ) ) {
155 $this->fatalError( "Charmap file $charmapFile did not return a PHP array." );
156 }
157 $this->charmap = array_filter(
158 $this->charmap,
159 function ( $v, $k ) {
160 if ( mb_strlen( $k ) !== 1 ) {
161 $this->error( "Ignoring mapping from multi-character key '$k' to '$v'" );
162 return false;
163 }
164 return $k !== $v;
165 },
166 ARRAY_FILTER_USE_BOTH
167 );
168 if ( !$this->charmap ) {
169 $this->fatalError( "Charmap file $charmapFile did not contain any usable character mappings." );
170 }
171
172 $db = $this->getDB( $this->run ? DB_MASTER : DB_REPLICA );
173 $this->processTable( $db, true, 'page', 'page_namespace', 'page_title', [ 'page_id' ] );
174 $this->processTable( $db, true, 'image', NS_FILE, 'img_name', [] );
175 $this->processTable(
176 $db, false, 'archive', 'ar_namespace', 'ar_title', [ 'ar_timestamp', 'ar_id' ]
177 );
178 $this->processTable( $db, false, 'filearchive', NS_FILE, 'fa_name', [ 'fa_timestamp', 'fa_id' ] );
179 $this->processTable( $db, false, 'logging', 'log_namespace', 'log_title', [ 'log_id' ] );
180 $this->processTable( $db, false, 'redirect', 'rd_namespace', 'rd_title', [ 'rd_from' ] );
181 $this->processTable( $db, false, 'protected_titles', 'pt_namespace', 'pt_title', [] );
182 $this->processUsers( $db );
183 }
184
185 /**
186 * Get batched LIKE conditions from the charmap
187 * @param IDatabase $db Database handle
188 * @param string $field Field name
189 * @param int $batchSize Size of the batches
190 * @return array
191 */
192 private function getLikeBatches( IDatabase $db, $field, $batchSize = 100 ) {
193 $ret = [];
194 $likes = [];
195 foreach ( $this->charmap as $from => $to ) {
196 $likes[] = $field . $db->buildLike( $from, $db->anyString() );
197 if ( count( $likes ) >= $batchSize ) {
198 $ret[] = $db->makeList( $likes, $db::LIST_OR );
199 $likes = [];
200 }
201 }
202 if ( $likes ) {
203 $ret[] = $db->makeList( $likes, $db::LIST_OR );
204 }
205 return $ret;
206 }
207
208 /**
209 * Get the list of namespaces to operate on
210 *
211 * We only care about namespaces where we can move pages and titles are
212 * capitalized.
213 *
214 * @return int[]
215 */
216 private function getNamespaces() {
217 if ( $this->namespaces === null ) {
218 $nsinfo = MediaWikiServices::getInstance()->getNamespaceInfo();
219 $this->namespaces = array_filter(
220 array_keys( $nsinfo->getCanonicalNamespaces() ),
221 function ( $ns ) use ( $nsinfo ) {
222 return $nsinfo->isMovable( $ns ) && $nsinfo->isCapitalized( $ns );
223 }
224 );
225 usort( $this->namespaces, function ( $ns1, $ns2 ) use ( $nsinfo ) {
226 if ( $ns1 === $ns2 ) {
227 return 0;
228 }
229
230 $s1 = $nsinfo->getSubject( $ns1 );
231 $s2 = $nsinfo->getSubject( $ns2 );
232
233 // Order by subject namespace number first
234 if ( $s1 !== $s2 ) {
235 return $s1 < $s2 ? -1 : 1;
236 }
237
238 // Second, put subject namespaces before non-subject namespaces
239 if ( $s1 === $ns1 ) {
240 return -1;
241 }
242 if ( $s2 === $ns2 ) {
243 return 1;
244 }
245
246 // Don't care about the relative order if there are somehow
247 // multiple non-subject namespaces for a namespace.
248 return 0;
249 } );
250 }
251
252 return $this->namespaces;
253 }
254
255 /**
256 * Check if a ns+title is a registered user's page
257 * @param IDatabase $db Database handle
258 * @param int $ns
259 * @param string $title
260 * @return bool
261 */
262 private function isUserPage( IDatabase $db, $ns, $title ) {
263 if ( $ns !== NS_USER && $ns !== NS_USER_TALK ) {
264 return false;
265 }
266
267 list( $base ) = explode( '/', $title, 2 );
268 if ( !isset( $this->seenUsers[$base] ) ) {
269 // Can't use User directly because it might uppercase the name
270 $this->seenUsers[$base] = (bool)$db->selectField(
271 'user',
272 'user_id',
273 [ 'user_name' => strtr( $base, '_', ' ' ) ],
274 __METHOD__
275 );
276 }
277 return $this->seenUsers[$base];
278 }
279
280 /**
281 * Munge a target title, if necessary
282 * @param IDatabase $db Database handle
283 * @param Title $oldTitle
284 * @param Title &$newTitle
285 * @return bool If $newTitle is (now) ok
286 */
287 private function mungeTitle( IDatabase $db, Title $oldTitle, Title &$newTitle ) {
288 $nt = $newTitle->getPrefixedText();
289
290 $munge = false;
291 if ( $this->isUserPage( $db, $newTitle->getNamespace(), $newTitle->getText() ) ) {
292 $munge = 'Target title\'s user exists';
293 } else {
294 $mp = new MovePage( $oldTitle, $newTitle );
295 $status = $mp->isValidMove();
296 if ( !$status->isOK() && $status->hasMessage( 'articleexists' ) ) {
297 $munge = 'Target title exists';
298 }
299 }
300 if ( !$munge ) {
301 return true;
302 };
303
304 if ( $this->prefix !== null ) {
305 $newTitle = Title::makeTitle(
306 $this->prefixNs,
307 $this->prefix . $oldTitle->getPrefixedText() . ( $this->suffix ?? '' )
308 );
309 } elseif ( $this->suffix !== null ) {
310 $newTitle = Title::makeTitle( $newTitle->getNamespace(), $newTitle->getText() . $this->suffix );
311 } else {
312 $this->error(
313 "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
314 . "$munge and no --prefix or --suffix was given"
315 );
316 return false;
317 }
318
319 if ( !$newTitle->isValid() ) {
320 $this->error(
321 "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
322 . "$munge and munged title '{$newTitle->getPrefixedText()}' is not valid"
323 );
324 return false;
325 }
326 if ( $newTitle->exists() ) {
327 $this->error(
328 "Cannot move {$oldTitle->getPrefixedText()} → $nt: "
329 . "$munge and munged title '{$newTitle->getPrefixedText()}' also exists"
330 );
331 return false;
332 }
333
334 return true;
335 }
336
337 /**
338 * Use MovePage to move a title
339 * @param IDatabase $db Database handle
340 * @param int $ns
341 * @param string $title
342 * @return bool|null True on success, false on error, null if skipped
343 */
344 private function doMove( IDatabase $db, $ns, $title ) {
345 $char = mb_substr( $title, 0, 1 );
346 if ( !array_key_exists( $char, $this->charmap ) ) {
347 $this->error(
348 "Query returned NS$ns $title, which does not begin with a character in the charmap."
349 );
350 return false;
351 }
352
353 if ( $this->isUserPage( $db, $ns, $title ) ) {
354 $this->output( "... Skipping user page NS$ns $title\n" );
355 return null;
356 }
357
358 $oldTitle = Title::makeTitle( $ns, $title );
359 $newTitle = Title::makeTitle( $ns, $this->charmap[$char] . mb_substr( $title, 1 ) );
360 if ( !$this->mungeTitle( $db, $oldTitle, $newTitle ) ) {
361 return false;
362 }
363
364 $mp = new MovePage( $oldTitle, $newTitle );
365 $status = $mp->isValidMove();
366 if ( !$status->isOK() ) {
367 $this->error(
368 "Invalid move {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}: "
369 . $status->getMessage( false, false, 'en' )->useDatabase( false )->plain()
370 );
371 return false;
372 }
373
374 if ( !$this->run ) {
375 $this->output(
376 "Would rename {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()}\n"
377 );
378 return true;
379 }
380
381 $status = $mp->move( $this->user, $this->reason, false, $this->tags );
382 if ( !$status->isOK() ) {
383 $this->error(
384 "Move {$oldTitle->getPrefixedText()} → {$newTitle->getPrefixedText()} failed: "
385 . $status->getMessage( false, false, 'en' )->useDatabase( false )->plain()
386 );
387 }
388 return $status->isOK();
389 }
390
391 /**
392 * Directly update a database row
393 * @param IDatabase $db Database handle
394 * @param string $table
395 * @param string|int $nsField
396 * @param string $titleField
397 * @return bool|null True on success, false on error, null if skipped
398 */
399 private function doUpdate( IDatabase $db, $table, $nsField, $titleField, $row ) {
400 $ns = is_int( $nsField ) ? $nsField : (int)$row->$nsField;
401 $title = $row->$titleField;
402
403 $char = mb_substr( $title, 0, 1 );
404 if ( !array_key_exists( $char, $this->charmap ) ) {
405 $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
406 $this->error(
407 "Query returned $r, but title does not begin with a character in the charmap."
408 );
409 return false;
410 }
411
412 if ( $this->isUserPage( $db, $ns, $title ) ) {
413 $this->output( "... Skipping user page NS$ns $title\n" );
414 return null;
415 }
416
417 $oldTitle = Title::makeTitle( $ns, $title );
418 $newTitle = Title::makeTitle( $ns, $this->charmap[$char] . mb_substr( $title, 1 ) );
419 if ( !$this->mungeTitle( $db, $oldTitle, $newTitle ) ) {
420 return false;
421 }
422
423 if ( $this->run ) {
424 $db->update(
425 $table,
426 array_merge(
427 is_int( $nsField ) ? [] : [ $nsField => $newTitle->getNamespace() ],
428 [ $titleField => $newTitle->getDBkey() ]
429 ),
430 (array)$row,
431 __METHOD__
432 );
433 } else {
434 $r = json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE );
435 $this->output( "Would set $r to {$newTitle->getPrefixedText()}\n" );
436 }
437
438 return true;
439 }
440
441 /**
442 * Rename entries in other tables
443 * @param IDatabase $db Database handle
444 * @param bool $doMove Whether to use MovePage or direct table manipulation
445 * @param string $table
446 * @param string|int $nsField
447 * @param string $titleField
448 * @param string[] $pkFields Additional fields to match a unique index
449 * starting with $nsField and $titleField.
450 */
451 private function processTable( IDatabase $db, $doMove, $table, $nsField, $titleField, $pkFields ) {
452 if ( $this->tables !== null && !in_array( $table, $this->tables, true ) ) {
453 $this->output( "Skipping table `$table`, not in --tables.\n" );
454 return;
455 }
456
457 $batchSize = $this->getBatchSize();
458 $namespaces = $this->getNamespaces();
459 $likes = $this->getLikeBatches( $db, $titleField );
460 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
461
462 if ( is_int( $nsField ) ) {
463 $namespaces = array_intersect( $namespaces, [ $nsField ] );
464 }
465
466 if ( !$namespaces ) {
467 $this->output( "Skipping table `$table`, no valid namespaces.\n" );
468 return;
469 }
470
471 $this->output( "Processing table `$table`...\n" );
472
473 $selectFields = array_merge(
474 is_int( $nsField ) ? [] : [ $nsField ],
475 [ $titleField ],
476 $pkFields
477 );
478 $contFields = array_reverse( array_merge( [ $titleField ], $pkFields ) );
479
480 $lastReplicationWait = 0.0;
481 $count = 0;
482 $errors = 0;
483 foreach ( $namespaces as $ns ) {
484 foreach ( $likes as $like ) {
485 $cont = [];
486 do {
487 $res = $db->select(
488 $table,
489 $selectFields,
490 array_merge( [ "$nsField = $ns", $like ], $cont ),
491 __METHOD__,
492 [ 'ORDER BY' => array_merge( [ $titleField ], $pkFields ), 'LIMIT' => $batchSize ]
493 );
494 $cont = [];
495 foreach ( $res as $row ) {
496 $cont = '';
497 foreach ( $contFields as $field ) {
498 $v = $db->addQuotes( $row->$field );
499 if ( $cont === '' ) {
500 $cont = "$field > $v";
501 } else {
502 $cont = "$field > $v OR $field = $v AND ($cont)";
503 }
504 }
505 $cont = [ $cont ];
506
507 if ( $doMove ) {
508 $ns = is_int( $nsField ) ? $nsField : (int)$row->$nsField;
509 $ret = $this->doMove( $db, $ns, $row->$titleField );
510 } else {
511 $ret = $this->doUpdate( $db, $table, $nsField, $titleField, $row );
512 }
513 if ( $ret === true ) {
514 $count++;
515 } elseif ( $ret === false ) {
516 $errors++;
517 }
518 }
519
520 if ( $this->run ) {
521 $r = $cont ? json_encode( $row, JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE ) : '<end>';
522 $this->output( "... $table: $count renames, $errors errors at $r\n" );
523 $lbFactory->waitForReplication(
524 [ 'timeout' => 30, 'ifWritesSince' => $lastReplicationWait ]
525 );
526 $lastReplicationWait = microtime( true );
527 }
528 } while ( $cont );
529 }
530 }
531
532 $this->output( "Done processing table `$table`.\n" );
533 }
534
535 /**
536 * List users needing renaming
537 * @param IDatabase $db Database handle
538 */
539 private function processUsers( IDatabase $db ) {
540 $userlistFile = $this->getOption( 'userlist' );
541 if ( $userlistFile === null ) {
542 $this->output( "Not generating user list, --userlist was not specified.\n" );
543 return;
544 }
545
546 $fh = fopen( $userlistFile, 'wb' );
547 if ( !$fh ) {
548 $this->error( "Could not open user list file $userlistFile" );
549 return;
550 }
551
552 $this->output( "Generating user list...\n" );
553 $count = 0;
554 $batchSize = $this->getBatchSize();
555 foreach ( $this->getLikeBatches( $db, 'user_name' ) as $like ) {
556 $cont = [];
557 while ( true ) {
558 $names = $db->selectFieldValues(
559 'user',
560 'user_name',
561 array_merge( [ $like ], $cont ),
562 __METHOD__,
563 [ 'ORDER BY' => 'user_name', 'LIMIT' => $batchSize ]
564 );
565 if ( !$names ) {
566 break;
567 }
568
569 $last = end( $names );
570 $cont = [ 'user_name > ' . $db->addQuotes( $last ) ];
571 foreach ( $names as $name ) {
572 $char = mb_substr( $name, 0, 1 );
573 if ( !array_key_exists( $char, $this->charmap ) ) {
574 $this->error(
575 "Query returned $name, but user name does not begin with a character in the charmap."
576 );
577 continue;
578 }
579 $newName = $this->charmap[$char] . mb_substr( $name, 1 );
580 fprintf( $fh, "%s\t%s\n", $name, $newName );
581 $count++;
582 }
583 $this->output( "... at $last, $count names so far\n" );
584 }
585 }
586
587 if ( !fclose( $fh ) ) {
588 $this->error( "fclose on $userlistFile failed" );
589 }
590 $this->output( "User list output to $userlistFile, $count users need renaming.\n" );
591 }
592 }
593
594 $maintClass = UppercaseTitlesForUnicodeTransition::class;
595 require_once RUN_MAINTENANCE_IF_MAIN;