build: Enable use of stylelint-config-wikimedia and make pass
[lhc/web/wiklou.git] / maintenance / namespaceDupes.php
1 <?php
2 /**
3 * Check for articles to fix after adding/deleting namespaces
4 *
5 * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup Maintenance
25 */
26
27 use MediaWiki\Linker\LinkTarget;
28 use MediaWiki\MediaWikiServices;
29
30 require_once __DIR__ . '/Maintenance.php';
31
32 /**
33 * Maintenance script that checks for articles to fix after
34 * adding/deleting namespaces.
35 *
36 * @ingroup Maintenance
37 */
38 class NamespaceConflictChecker extends Maintenance {
39
40 /**
41 * @var Database
42 */
43 protected $db;
44
45 private $resolvablePages = 0;
46 private $totalPages = 0;
47
48 private $resolvableLinks = 0;
49 private $totalLinks = 0;
50
51 public function __construct() {
52 parent::__construct();
53 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
54 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
55 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
56 "the correct title" );
57 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
58 "<text> appended after the article name", false, true );
59 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
60 "<text> prepended before the article name", false, true );
61 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
62 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
63 "the colon will be replaced with a hyphen.",
64 false, true );
65 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
66 "specify the namespace ID of the destination.", false, true );
67 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
68 "begin with a conflicting prefix will be renamed, for example " .
69 "Talk:File:Foo -> File_Talk:Foo" );
70 }
71
72 public function execute() {
73 $this->db = $this->getDB( DB_MASTER );
74
75 $options = [
76 'fix' => $this->hasOption( 'fix' ),
77 'merge' => $this->hasOption( 'merge' ),
78 'add-suffix' => $this->getOption( 'add-suffix', '' ),
79 'add-prefix' => $this->getOption( 'add-prefix', '' ),
80 'move-talk' => $this->hasOption( 'move-talk' ),
81 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
82 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
83
84 if ( $options['source-pseudo-namespace'] !== '' ) {
85 $retval = $this->checkPrefix( $options );
86 } else {
87 $retval = $this->checkAll( $options );
88 }
89
90 if ( $retval ) {
91 $this->output( "\nLooks good!\n" );
92 } else {
93 $this->output( "\nOh noeees\n" );
94 }
95 }
96
97 /**
98 * Check all namespaces
99 *
100 * @param array $options Associative array of validated command-line options
101 *
102 * @return bool
103 */
104 private function checkAll( $options ) {
105 global $wgContLang, $wgNamespaceAliases, $wgCapitalLinks;
106
107 $spaces = [];
108
109 // List interwikis first, so they'll be overridden
110 // by any conflicting local namespaces.
111 foreach ( $this->getInterwikiList() as $prefix ) {
112 $name = $wgContLang->ucfirst( $prefix );
113 $spaces[$name] = 0;
114 }
115
116 // Now pull in all canonical and alias namespaces...
117 foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) {
118 // This includes $wgExtraNamespaces
119 if ( $name !== '' ) {
120 $spaces[$name] = $ns;
121 }
122 }
123 foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
124 if ( $name !== '' ) {
125 $spaces[$name] = $ns;
126 }
127 }
128 foreach ( $wgNamespaceAliases as $name => $ns ) {
129 $spaces[$name] = $ns;
130 }
131 foreach ( $wgContLang->getNamespaceAliases() as $name => $ns ) {
132 $spaces[$name] = $ns;
133 }
134
135 // We'll need to check for lowercase keys as well,
136 // since we're doing case-sensitive searches in the db.
137 foreach ( $spaces as $name => $ns ) {
138 $moreNames = [];
139 $moreNames[] = $wgContLang->uc( $name );
140 $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) );
141 $moreNames[] = $wgContLang->ucwords( $name );
142 $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) );
143 $moreNames[] = $wgContLang->ucwordbreaks( $name );
144 $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) );
145 if ( !$wgCapitalLinks ) {
146 foreach ( $moreNames as $altName ) {
147 $moreNames[] = $wgContLang->lcfirst( $altName );
148 }
149 $moreNames[] = $wgContLang->lcfirst( $name );
150 }
151 foreach ( array_unique( $moreNames ) as $altName ) {
152 if ( $altName !== $name ) {
153 $spaces[$altName] = $ns;
154 }
155 }
156 }
157
158 // Sort by namespace index, and if there are two with the same index,
159 // break the tie by sorting by name
160 $origSpaces = $spaces;
161 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
162 if ( $origSpaces[$a] < $origSpaces[$b] ) {
163 return -1;
164 } elseif ( $origSpaces[$a] > $origSpaces[$b] ) {
165 return 1;
166 } elseif ( $a < $b ) {
167 return -1;
168 } elseif ( $a > $b ) {
169 return 1;
170 } else {
171 return 0;
172 }
173 } );
174
175 $ok = true;
176 foreach ( $spaces as $name => $ns ) {
177 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
178 }
179
180 $this->output( "{$this->totalPages} pages to fix, " .
181 "{$this->resolvablePages} were resolvable.\n\n" );
182
183 foreach ( $spaces as $name => $ns ) {
184 if ( $ns != 0 ) {
185 /* Fix up link destinations for non-interwiki links only.
186 *
187 * For example if a page has [[Foo:Bar]] and then a Foo namespace
188 * is introduced, pagelinks needs to be updated to have
189 * page_namespace = NS_FOO.
190 *
191 * If instead an interwiki prefix was introduced called "Foo",
192 * the link should instead be moved to the iwlinks table. If a new
193 * language is introduced called "Foo", or if there is a pagelink
194 * [[fr:Bar]] when interlanguage magic links are turned on, the
195 * link would have to be moved to the langlinks table. Let's put
196 * those cases in the too-hard basket for now. The consequences are
197 * not especially severe.
198 * @fixme Handle interwiki links, and pagelinks to Category:, File:
199 * which probably need reparsing.
200 */
201
202 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
203 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
204
205 // The redirect table has interwiki links randomly mixed in, we
206 // need to filter those out. For example [[w:Foo:Bar]] would
207 // have rd_interwiki=w and rd_namespace=0, which would match the
208 // query for a conflicting namespace "Foo" if filtering wasn't done.
209 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
210 [ 'rd_interwiki' => null ] );
211 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
212 [ 'rd_interwiki' => '' ] );
213 }
214 }
215
216 $this->output( "{$this->totalLinks} links to fix, " .
217 "{$this->resolvableLinks} were resolvable.\n" );
218
219 return $ok;
220 }
221
222 /**
223 * Get the interwiki list
224 *
225 * @return array
226 */
227 private function getInterwikiList() {
228 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
229 $prefixes = [];
230 foreach ( $result as $row ) {
231 $prefixes[] = $row['iw_prefix'];
232 }
233
234 return $prefixes;
235 }
236
237 /**
238 * Check a given prefix and try to move it into the given destination namespace
239 *
240 * @param int $ns Destination namespace id
241 * @param string $name
242 * @param array $options Associative array of validated command-line options
243 * @return bool
244 */
245 private function checkNamespace( $ns, $name, $options ) {
246 $targets = $this->getTargetList( $ns, $name, $options );
247 $count = $targets->numRows();
248 $this->totalPages += $count;
249 if ( $count == 0 ) {
250 return true;
251 }
252
253 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
254
255 $ok = true;
256 foreach ( $targets as $row ) {
257
258 // Find the new title and determine the action to take
259
260 $newTitle = $this->getDestinationTitle( $ns, $name,
261 $row->page_namespace, $row->page_title, $options );
262 $logStatus = false;
263 if ( !$newTitle ) {
264 $logStatus = 'invalid title';
265 $action = 'abort';
266 } elseif ( $newTitle->exists() ) {
267 if ( $options['merge'] ) {
268 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
269 $action = 'merge';
270 } else {
271 $action = 'abort';
272 }
273 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
274 $action = 'abort';
275 $logStatus = 'dest title exists and --add-prefix not specified';
276 } else {
277 $newTitle = $this->getAlternateTitle( $newTitle, $options );
278 if ( !$newTitle ) {
279 $action = 'abort';
280 $logStatus = 'alternate title is invalid';
281 } elseif ( $newTitle->exists() ) {
282 $action = 'abort';
283 $logStatus = 'title conflict';
284 } else {
285 $action = 'move';
286 $logStatus = 'alternate';
287 }
288 }
289 } else {
290 $action = 'move';
291 $logStatus = 'no conflict';
292 }
293
294 // Take the action or log a dry run message
295
296 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
297 $pageOK = true;
298
299 switch ( $action ) {
300 case 'abort':
301 $this->output( "$logTitle *** $logStatus\n" );
302 $pageOK = false;
303 break;
304 case 'move':
305 $this->output( "$logTitle -> " .
306 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
307
308 if ( $options['fix'] ) {
309 $pageOK = $this->movePage( $row->page_id, $newTitle );
310 }
311 break;
312 case 'merge':
313 $this->output( "$logTitle => " .
314 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
315
316 if ( $options['fix'] ) {
317 $pageOK = $this->mergePage( $row, $newTitle );
318 }
319 break;
320 }
321
322 if ( $pageOK ) {
323 $this->resolvablePages++;
324 } else {
325 $ok = false;
326 }
327 }
328
329 return $ok;
330 }
331
332 /**
333 * Check and repair the destination fields in a link table
334 * @param string $table The link table name
335 * @param string $fieldPrefix The field prefix in the link table
336 * @param int $ns Destination namespace id
337 * @param string $name
338 * @param array $options Associative array of validated command-line options
339 * @param array $extraConds Extra conditions for the SQL query
340 */
341 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
342 $extraConds = []
343 ) {
344 $batchConds = [];
345 $fromField = "{$fieldPrefix}_from";
346 $namespaceField = "{$fieldPrefix}_namespace";
347 $titleField = "{$fieldPrefix}_title";
348 $batchSize = 500;
349 while ( true ) {
350 $res = $this->db->select(
351 $table,
352 [ $fromField, $namespaceField, $titleField ],
353 array_merge( $batchConds, $extraConds, [
354 $namespaceField => 0,
355 $titleField . $this->db->buildLike( "$name:", $this->db->anyString() )
356 ] ),
357 __METHOD__,
358 [
359 'ORDER BY' => [ $titleField, $fromField ],
360 'LIMIT' => $batchSize
361 ]
362 );
363
364 if ( $res->numRows() == 0 ) {
365 break;
366 }
367 foreach ( $res as $row ) {
368 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
369 "dbk={$row->$titleField}";
370 $destTitle = $this->getDestinationTitle( $ns, $name,
371 $row->$namespaceField, $row->$titleField, $options );
372 $this->totalLinks++;
373 if ( !$destTitle ) {
374 $this->output( "$table $logTitle *** INVALID\n" );
375 continue;
376 }
377 $this->resolvableLinks++;
378 if ( !$options['fix'] ) {
379 $this->output( "$table $logTitle -> " .
380 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
381 continue;
382 }
383
384 $this->db->update( $table,
385 // SET
386 [
387 $namespaceField => $destTitle->getNamespace(),
388 $titleField => $destTitle->getDBkey()
389 ],
390 // WHERE
391 [
392 $namespaceField => 0,
393 $titleField => $row->$titleField,
394 $fromField => $row->$fromField
395 ],
396 __METHOD__,
397 [ 'IGNORE' ]
398 );
399 $this->output( "$table $logTitle -> " .
400 $destTitle->getPrefixedDBkey() . "\n" );
401 }
402 $encLastTitle = $this->db->addQuotes( $row->$titleField );
403 $encLastFrom = $this->db->addQuotes( $row->$fromField );
404
405 $batchConds = [
406 "$titleField > $encLastTitle " .
407 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
408
409 wfWaitForSlaves();
410 }
411 }
412
413 /**
414 * Move the given pseudo-namespace, either replacing the colon with a hyphen
415 * (useful for pseudo-namespaces that conflict with interwiki links) or move
416 * them to another namespace if specified.
417 * @param array $options Associative array of validated command-line options
418 * @return bool
419 */
420 private function checkPrefix( $options ) {
421 $prefix = $options['source-pseudo-namespace'];
422 $ns = $options['dest-namespace'];
423 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
424
425 return $this->checkNamespace( $ns, $prefix, $options );
426 }
427
428 /**
429 * Find pages in main and talk namespaces that have a prefix of the new
430 * namespace so we know titles that will need migrating
431 *
432 * @param int $ns Destination namespace id
433 * @param string $name Prefix that is being made a namespace
434 * @param array $options Associative array of validated command-line options
435 *
436 * @return ResultWrapper
437 */
438 private function getTargetList( $ns, $name, $options ) {
439 if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
440 $checkNamespaces = [ NS_MAIN, NS_TALK ];
441 } else {
442 $checkNamespaces = NS_MAIN;
443 }
444
445 return $this->db->select( 'page',
446 [
447 'page_id',
448 'page_title',
449 'page_namespace',
450 ],
451 [
452 'page_namespace' => $checkNamespaces,
453 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
454 ],
455 __METHOD__
456 );
457 }
458
459 /**
460 * Get the preferred destination title for a given target page.
461 * @param integer $ns The destination namespace ID
462 * @param string $name The conflicting prefix
463 * @param integer $sourceNs The source namespace
464 * @param integer $sourceDbk The source DB key (i.e. page_title)
465 * @param array $options Associative array of validated command-line options
466 * @return Title|false
467 */
468 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) {
469 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
470 if ( $ns == 0 ) {
471 // An interwiki; try an alternate encoding with '-' for ':'
472 $dbk = "$name-" . $dbk;
473 }
474 $destNS = $ns;
475 if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) {
476 // This is an associated talk page moved with the --move-talk feature.
477 $destNS = MWNamespace::getTalk( $destNS );
478 }
479 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
480 if ( !$newTitle || !$newTitle->canExist() ) {
481 return false;
482 }
483 return $newTitle;
484 }
485
486 /**
487 * Get an alternative title to move a page to. This is used if the
488 * preferred destination title already exists.
489 *
490 * @param LinkTarget $linkTarget
491 * @param array $options Associative array of validated command-line options
492 * @return Title|bool
493 */
494 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
495 $prefix = $options['add-prefix'];
496 $suffix = $options['add-suffix'];
497 if ( $prefix == '' && $suffix == '' ) {
498 return false;
499 }
500 while ( true ) {
501 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
502 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
503 if ( !$title ) {
504 return false;
505 }
506 if ( !$title->exists() ) {
507 return $title;
508 }
509 }
510 }
511
512 /**
513 * Move a page
514 *
515 * @param integer $id The page_id
516 * @param LinkTarget $newLinkTarget The new title link target
517 * @return bool
518 */
519 private function movePage( $id, LinkTarget $newLinkTarget ) {
520 $this->db->update( 'page',
521 [
522 "page_namespace" => $newLinkTarget->getNamespace(),
523 "page_title" => $newLinkTarget->getDBkey(),
524 ],
525 [
526 "page_id" => $id,
527 ],
528 __METHOD__ );
529
530 // Update *_from_namespace in links tables
531 $fromNamespaceTables = [
532 [ 'pagelinks', 'pl' ],
533 [ 'templatelinks', 'tl' ],
534 [ 'imagelinks', 'il' ] ];
535 foreach ( $fromNamespaceTables as $tableInfo ) {
536 list( $table, $fieldPrefix ) = $tableInfo;
537 $this->db->update( $table,
538 // SET
539 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
540 // WHERE
541 [ "{$fieldPrefix}_from" => $id ],
542 __METHOD__ );
543 }
544
545 return true;
546 }
547
548 /**
549 * Determine if we can merge a page.
550 * We check if an inaccessible revision would become the latest and
551 * deny the merge if so -- it's theoretically possible to update the
552 * latest revision, but opens a can of worms -- search engine updates,
553 * recentchanges review, etc.
554 *
555 * @param integer $id The page_id
556 * @param LinkTarget $linkTarget The new link target
557 * @param string $logStatus This is set to the log status message on failure
558 * @return bool
559 */
560 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
561 $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
562 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
563 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
564 $logStatus = 'cannot merge since source is later';
565 return false;
566 } else {
567 return true;
568 }
569 }
570
571 /**
572 * Merge page histories
573 *
574 * @param integer $id The page_id
575 * @param Title $newTitle The new title
576 * @return bool
577 */
578 private function mergePage( $row, Title $newTitle ) {
579 $id = $row->page_id;
580
581 // Construct the WikiPage object we will need later, while the
582 // page_id still exists. Note that this cannot use makeTitleSafe(),
583 // we are deliberately constructing an invalid title.
584 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
585 $sourceTitle->resetArticleID( $id );
586 $wikiPage = new WikiPage( $sourceTitle );
587 $wikiPage->loadPageData( 'fromdbmaster' );
588
589 $destId = $newTitle->getArticleID();
590 $this->beginTransaction( $this->db, __METHOD__ );
591 $this->db->update( 'revision',
592 // SET
593 [ 'rev_page' => $destId ],
594 // WHERE
595 [ 'rev_page' => $id ],
596 __METHOD__ );
597
598 $this->db->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
599
600 $this->commitTransaction( $this->db, __METHOD__ );
601
602 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
603 * and update category counts.
604 *
605 * Calling external code with a fake broken Title is a fairly dubious
606 * idea. It's necessary because it's quite a lot of code to duplicate,
607 * but that also makes it fragile since it would be easy for someone to
608 * accidentally introduce an assumption of title validity to the code we
609 * are calling.
610 */
611 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
612 DeferredUpdates::doUpdates();
613
614 return true;
615 }
616 }
617
618 $maintClass = "NamespaceConflictChecker";
619 require_once RUN_MAINTENANCE_IF_MAIN;