Merge "Do not output invalid links for deleted names on Special:Contributions"
[lhc/web/wiklou.git] / maintenance / namespaceDupes.php
1 <?php
2 /**
3 * Check for articles to fix after adding/deleting namespaces
4 *
5 * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup Maintenance
25 */
26
27 require_once __DIR__ . '/Maintenance.php';
28
29 use MediaWiki\Linker\LinkTarget;
30 use MediaWiki\MediaWikiServices;
31 use Wikimedia\Rdbms\ResultWrapper;
32 use Wikimedia\Rdbms\IMaintainableDatabase;
33
34 /**
35 * Maintenance script that checks for articles to fix after
36 * adding/deleting namespaces.
37 *
38 * @ingroup Maintenance
39 */
40 class NamespaceConflictChecker extends Maintenance {
41
42 /**
43 * @var IMaintainableDatabase
44 */
45 protected $db;
46
47 private $resolvablePages = 0;
48 private $totalPages = 0;
49
50 private $resolvableLinks = 0;
51 private $totalLinks = 0;
52
53 public function __construct() {
54 parent::__construct();
55 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
56 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
57 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
58 "the correct title" );
59 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
60 "<text> appended after the article name", false, true );
61 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
62 "<text> prepended before the article name", false, true );
63 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
64 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
65 "the colon will be replaced with a hyphen.",
66 false, true );
67 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
68 "specify the namespace ID of the destination.", false, true );
69 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
70 "begin with a conflicting prefix will be renamed, for example " .
71 "Talk:File:Foo -> File_Talk:Foo" );
72 }
73
74 public function execute() {
75 $this->db = $this->getDB( DB_MASTER );
76
77 $options = [
78 'fix' => $this->hasOption( 'fix' ),
79 'merge' => $this->hasOption( 'merge' ),
80 'add-suffix' => $this->getOption( 'add-suffix', '' ),
81 'add-prefix' => $this->getOption( 'add-prefix', '' ),
82 'move-talk' => $this->hasOption( 'move-talk' ),
83 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
84 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
85
86 if ( $options['source-pseudo-namespace'] !== '' ) {
87 $retval = $this->checkPrefix( $options );
88 } else {
89 $retval = $this->checkAll( $options );
90 }
91
92 if ( $retval ) {
93 $this->output( "\nLooks good!\n" );
94 } else {
95 $this->output( "\nOh noeees\n" );
96 }
97 }
98
99 /**
100 * Check all namespaces
101 *
102 * @param array $options Associative array of validated command-line options
103 *
104 * @return bool
105 */
106 private function checkAll( $options ) {
107 global $wgContLang, $wgNamespaceAliases, $wgCapitalLinks;
108
109 $spaces = [];
110
111 // List interwikis first, so they'll be overridden
112 // by any conflicting local namespaces.
113 foreach ( $this->getInterwikiList() as $prefix ) {
114 $name = $wgContLang->ucfirst( $prefix );
115 $spaces[$name] = 0;
116 }
117
118 // Now pull in all canonical and alias namespaces...
119 foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) {
120 // This includes $wgExtraNamespaces
121 if ( $name !== '' ) {
122 $spaces[$name] = $ns;
123 }
124 }
125 foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
126 if ( $name !== '' ) {
127 $spaces[$name] = $ns;
128 }
129 }
130 foreach ( $wgNamespaceAliases as $name => $ns ) {
131 $spaces[$name] = $ns;
132 }
133 foreach ( $wgContLang->getNamespaceAliases() as $name => $ns ) {
134 $spaces[$name] = $ns;
135 }
136
137 // We'll need to check for lowercase keys as well,
138 // since we're doing case-sensitive searches in the db.
139 foreach ( $spaces as $name => $ns ) {
140 $moreNames = [];
141 $moreNames[] = $wgContLang->uc( $name );
142 $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) );
143 $moreNames[] = $wgContLang->ucwords( $name );
144 $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) );
145 $moreNames[] = $wgContLang->ucwordbreaks( $name );
146 $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) );
147 if ( !$wgCapitalLinks ) {
148 foreach ( $moreNames as $altName ) {
149 $moreNames[] = $wgContLang->lcfirst( $altName );
150 }
151 $moreNames[] = $wgContLang->lcfirst( $name );
152 }
153 foreach ( array_unique( $moreNames ) as $altName ) {
154 if ( $altName !== $name ) {
155 $spaces[$altName] = $ns;
156 }
157 }
158 }
159
160 // Sort by namespace index, and if there are two with the same index,
161 // break the tie by sorting by name
162 $origSpaces = $spaces;
163 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
164 if ( $origSpaces[$a] < $origSpaces[$b] ) {
165 return -1;
166 } elseif ( $origSpaces[$a] > $origSpaces[$b] ) {
167 return 1;
168 } elseif ( $a < $b ) {
169 return -1;
170 } elseif ( $a > $b ) {
171 return 1;
172 } else {
173 return 0;
174 }
175 } );
176
177 $ok = true;
178 foreach ( $spaces as $name => $ns ) {
179 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
180 }
181
182 $this->output( "{$this->totalPages} pages to fix, " .
183 "{$this->resolvablePages} were resolvable.\n\n" );
184
185 foreach ( $spaces as $name => $ns ) {
186 if ( $ns != 0 ) {
187 /* Fix up link destinations for non-interwiki links only.
188 *
189 * For example if a page has [[Foo:Bar]] and then a Foo namespace
190 * is introduced, pagelinks needs to be updated to have
191 * page_namespace = NS_FOO.
192 *
193 * If instead an interwiki prefix was introduced called "Foo",
194 * the link should instead be moved to the iwlinks table. If a new
195 * language is introduced called "Foo", or if there is a pagelink
196 * [[fr:Bar]] when interlanguage magic links are turned on, the
197 * link would have to be moved to the langlinks table. Let's put
198 * those cases in the too-hard basket for now. The consequences are
199 * not especially severe.
200 * @fixme Handle interwiki links, and pagelinks to Category:, File:
201 * which probably need reparsing.
202 */
203
204 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
205 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
206
207 // The redirect table has interwiki links randomly mixed in, we
208 // need to filter those out. For example [[w:Foo:Bar]] would
209 // have rd_interwiki=w and rd_namespace=0, which would match the
210 // query for a conflicting namespace "Foo" if filtering wasn't done.
211 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
212 [ 'rd_interwiki' => null ] );
213 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
214 [ 'rd_interwiki' => '' ] );
215 }
216 }
217
218 $this->output( "{$this->totalLinks} links to fix, " .
219 "{$this->resolvableLinks} were resolvable.\n" );
220
221 return $ok;
222 }
223
224 /**
225 * Get the interwiki list
226 *
227 * @return array
228 */
229 private function getInterwikiList() {
230 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
231 $prefixes = [];
232 foreach ( $result as $row ) {
233 $prefixes[] = $row['iw_prefix'];
234 }
235
236 return $prefixes;
237 }
238
239 /**
240 * Check a given prefix and try to move it into the given destination namespace
241 *
242 * @param int $ns Destination namespace id
243 * @param string $name
244 * @param array $options Associative array of validated command-line options
245 * @return bool
246 */
247 private function checkNamespace( $ns, $name, $options ) {
248 $targets = $this->getTargetList( $ns, $name, $options );
249 $count = $targets->numRows();
250 $this->totalPages += $count;
251 if ( $count == 0 ) {
252 return true;
253 }
254
255 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
256
257 $ok = true;
258 foreach ( $targets as $row ) {
259
260 // Find the new title and determine the action to take
261
262 $newTitle = $this->getDestinationTitle( $ns, $name,
263 $row->page_namespace, $row->page_title, $options );
264 $logStatus = false;
265 if ( !$newTitle ) {
266 $logStatus = 'invalid title';
267 $action = 'abort';
268 } elseif ( $newTitle->exists() ) {
269 if ( $options['merge'] ) {
270 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
271 $action = 'merge';
272 } else {
273 $action = 'abort';
274 }
275 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
276 $action = 'abort';
277 $logStatus = 'dest title exists and --add-prefix not specified';
278 } else {
279 $newTitle = $this->getAlternateTitle( $newTitle, $options );
280 if ( !$newTitle ) {
281 $action = 'abort';
282 $logStatus = 'alternate title is invalid';
283 } elseif ( $newTitle->exists() ) {
284 $action = 'abort';
285 $logStatus = 'title conflict';
286 } else {
287 $action = 'move';
288 $logStatus = 'alternate';
289 }
290 }
291 } else {
292 $action = 'move';
293 $logStatus = 'no conflict';
294 }
295
296 // Take the action or log a dry run message
297
298 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
299 $pageOK = true;
300
301 switch ( $action ) {
302 case 'abort':
303 $this->output( "$logTitle *** $logStatus\n" );
304 $pageOK = false;
305 break;
306 case 'move':
307 $this->output( "$logTitle -> " .
308 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
309
310 if ( $options['fix'] ) {
311 $pageOK = $this->movePage( $row->page_id, $newTitle );
312 }
313 break;
314 case 'merge':
315 $this->output( "$logTitle => " .
316 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
317
318 if ( $options['fix'] ) {
319 $pageOK = $this->mergePage( $row, $newTitle );
320 }
321 break;
322 }
323
324 if ( $pageOK ) {
325 $this->resolvablePages++;
326 } else {
327 $ok = false;
328 }
329 }
330
331 return $ok;
332 }
333
334 /**
335 * Check and repair the destination fields in a link table
336 * @param string $table The link table name
337 * @param string $fieldPrefix The field prefix in the link table
338 * @param int $ns Destination namespace id
339 * @param string $name
340 * @param array $options Associative array of validated command-line options
341 * @param array $extraConds Extra conditions for the SQL query
342 */
343 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
344 $extraConds = []
345 ) {
346 $batchConds = [];
347 $fromField = "{$fieldPrefix}_from";
348 $namespaceField = "{$fieldPrefix}_namespace";
349 $titleField = "{$fieldPrefix}_title";
350 $batchSize = 500;
351 while ( true ) {
352 $res = $this->db->select(
353 $table,
354 [ $fromField, $namespaceField, $titleField ],
355 array_merge( $batchConds, $extraConds, [
356 $namespaceField => 0,
357 $titleField . $this->db->buildLike( "$name:", $this->db->anyString() )
358 ] ),
359 __METHOD__,
360 [
361 'ORDER BY' => [ $titleField, $fromField ],
362 'LIMIT' => $batchSize
363 ]
364 );
365
366 if ( $res->numRows() == 0 ) {
367 break;
368 }
369 foreach ( $res as $row ) {
370 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
371 "dbk={$row->$titleField}";
372 $destTitle = $this->getDestinationTitle( $ns, $name,
373 $row->$namespaceField, $row->$titleField, $options );
374 $this->totalLinks++;
375 if ( !$destTitle ) {
376 $this->output( "$table $logTitle *** INVALID\n" );
377 continue;
378 }
379 $this->resolvableLinks++;
380 if ( !$options['fix'] ) {
381 $this->output( "$table $logTitle -> " .
382 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
383 continue;
384 }
385
386 $this->db->update( $table,
387 // SET
388 [
389 $namespaceField => $destTitle->getNamespace(),
390 $titleField => $destTitle->getDBkey()
391 ],
392 // WHERE
393 [
394 $namespaceField => 0,
395 $titleField => $row->$titleField,
396 $fromField => $row->$fromField
397 ],
398 __METHOD__,
399 [ 'IGNORE' ]
400 );
401 $this->output( "$table $logTitle -> " .
402 $destTitle->getPrefixedDBkey() . "\n" );
403 }
404 $encLastTitle = $this->db->addQuotes( $row->$titleField );
405 $encLastFrom = $this->db->addQuotes( $row->$fromField );
406
407 $batchConds = [
408 "$titleField > $encLastTitle " .
409 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
410
411 wfWaitForSlaves();
412 }
413 }
414
415 /**
416 * Move the given pseudo-namespace, either replacing the colon with a hyphen
417 * (useful for pseudo-namespaces that conflict with interwiki links) or move
418 * them to another namespace if specified.
419 * @param array $options Associative array of validated command-line options
420 * @return bool
421 */
422 private function checkPrefix( $options ) {
423 $prefix = $options['source-pseudo-namespace'];
424 $ns = $options['dest-namespace'];
425 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
426
427 return $this->checkNamespace( $ns, $prefix, $options );
428 }
429
430 /**
431 * Find pages in main and talk namespaces that have a prefix of the new
432 * namespace so we know titles that will need migrating
433 *
434 * @param int $ns Destination namespace id
435 * @param string $name Prefix that is being made a namespace
436 * @param array $options Associative array of validated command-line options
437 *
438 * @return ResultWrapper
439 */
440 private function getTargetList( $ns, $name, $options ) {
441 if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
442 $checkNamespaces = [ NS_MAIN, NS_TALK ];
443 } else {
444 $checkNamespaces = NS_MAIN;
445 }
446
447 return $this->db->select( 'page',
448 [
449 'page_id',
450 'page_title',
451 'page_namespace',
452 ],
453 [
454 'page_namespace' => $checkNamespaces,
455 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
456 ],
457 __METHOD__
458 );
459 }
460
461 /**
462 * Get the preferred destination title for a given target page.
463 * @param integer $ns The destination namespace ID
464 * @param string $name The conflicting prefix
465 * @param integer $sourceNs The source namespace
466 * @param integer $sourceDbk The source DB key (i.e. page_title)
467 * @param array $options Associative array of validated command-line options
468 * @return Title|false
469 */
470 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) {
471 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
472 if ( $ns == 0 ) {
473 // An interwiki; try an alternate encoding with '-' for ':'
474 $dbk = "$name-" . $dbk;
475 }
476 $destNS = $ns;
477 if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) {
478 // This is an associated talk page moved with the --move-talk feature.
479 $destNS = MWNamespace::getTalk( $destNS );
480 }
481 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
482 if ( !$newTitle || !$newTitle->canExist() ) {
483 return false;
484 }
485 return $newTitle;
486 }
487
488 /**
489 * Get an alternative title to move a page to. This is used if the
490 * preferred destination title already exists.
491 *
492 * @param LinkTarget $linkTarget
493 * @param array $options Associative array of validated command-line options
494 * @return Title|bool
495 */
496 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
497 $prefix = $options['add-prefix'];
498 $suffix = $options['add-suffix'];
499 if ( $prefix == '' && $suffix == '' ) {
500 return false;
501 }
502 while ( true ) {
503 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
504 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
505 if ( !$title ) {
506 return false;
507 }
508 if ( !$title->exists() ) {
509 return $title;
510 }
511 }
512 }
513
514 /**
515 * Move a page
516 *
517 * @param integer $id The page_id
518 * @param LinkTarget $newLinkTarget The new title link target
519 * @return bool
520 */
521 private function movePage( $id, LinkTarget $newLinkTarget ) {
522 $this->db->update( 'page',
523 [
524 "page_namespace" => $newLinkTarget->getNamespace(),
525 "page_title" => $newLinkTarget->getDBkey(),
526 ],
527 [
528 "page_id" => $id,
529 ],
530 __METHOD__ );
531
532 // Update *_from_namespace in links tables
533 $fromNamespaceTables = [
534 [ 'pagelinks', 'pl' ],
535 [ 'templatelinks', 'tl' ],
536 [ 'imagelinks', 'il' ] ];
537 foreach ( $fromNamespaceTables as $tableInfo ) {
538 list( $table, $fieldPrefix ) = $tableInfo;
539 $this->db->update( $table,
540 // SET
541 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
542 // WHERE
543 [ "{$fieldPrefix}_from" => $id ],
544 __METHOD__ );
545 }
546
547 return true;
548 }
549
550 /**
551 * Determine if we can merge a page.
552 * We check if an inaccessible revision would become the latest and
553 * deny the merge if so -- it's theoretically possible to update the
554 * latest revision, but opens a can of worms -- search engine updates,
555 * recentchanges review, etc.
556 *
557 * @param integer $id The page_id
558 * @param LinkTarget $linkTarget The new link target
559 * @param string $logStatus This is set to the log status message on failure
560 * @return bool
561 */
562 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
563 $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
564 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
565 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
566 $logStatus = 'cannot merge since source is later';
567 return false;
568 } else {
569 return true;
570 }
571 }
572
573 /**
574 * Merge page histories
575 *
576 * @param stdClass $row Page row
577 * @param Title $newTitle The new title
578 * @return bool
579 */
580 private function mergePage( $row, Title $newTitle ) {
581 $id = $row->page_id;
582
583 // Construct the WikiPage object we will need later, while the
584 // page_id still exists. Note that this cannot use makeTitleSafe(),
585 // we are deliberately constructing an invalid title.
586 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
587 $sourceTitle->resetArticleID( $id );
588 $wikiPage = new WikiPage( $sourceTitle );
589 $wikiPage->loadPageData( 'fromdbmaster' );
590
591 $destId = $newTitle->getArticleID();
592 $this->beginTransaction( $this->db, __METHOD__ );
593 $this->db->update( 'revision',
594 // SET
595 [ 'rev_page' => $destId ],
596 // WHERE
597 [ 'rev_page' => $id ],
598 __METHOD__ );
599
600 $this->db->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
601
602 $this->commitTransaction( $this->db, __METHOD__ );
603
604 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
605 * and update category counts.
606 *
607 * Calling external code with a fake broken Title is a fairly dubious
608 * idea. It's necessary because it's quite a lot of code to duplicate,
609 * but that also makes it fragile since it would be easy for someone to
610 * accidentally introduce an assumption of title validity to the code we
611 * are calling.
612 */
613 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
614 DeferredUpdates::doUpdates();
615
616 return true;
617 }
618 }
619
620 $maintClass = "NamespaceConflictChecker";
621 require_once RUN_MAINTENANCE_IF_MAIN;