Fix use of GenderCache in ApiPageSet::processTitlesArray
[lhc/web/wiklou.git] / maintenance / namespaceDupes.php
1 <?php
2 /**
3 * Check for articles to fix after adding/deleting namespaces
4 *
5 * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup Maintenance
25 */
26
27 require_once __DIR__ . '/Maintenance.php';
28
29 use MediaWiki\Linker\LinkTarget;
30 use MediaWiki\MediaWikiServices;
31 use Wikimedia\Rdbms\IResultWrapper;
32 use Wikimedia\Rdbms\IMaintainableDatabase;
33
34 /**
35 * Maintenance script that checks for articles to fix after
36 * adding/deleting namespaces.
37 *
38 * @ingroup Maintenance
39 */
40 class NamespaceDupes extends Maintenance {
41
42 /**
43 * @var IMaintainableDatabase
44 */
45 protected $db;
46
47 private $resolvablePages = 0;
48 private $totalPages = 0;
49
50 private $resolvableLinks = 0;
51 private $totalLinks = 0;
52
53 public function __construct() {
54 parent::__construct();
55 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
56 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
57 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
58 "the correct title" );
59 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
60 "<text> appended after the article name", false, true );
61 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
62 "<text> prepended before the article name", false, true );
63 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
64 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
65 "the colon will be replaced with a hyphen.",
66 false, true );
67 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
68 "specify the namespace ID of the destination.", false, true );
69 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
70 "begin with a conflicting prefix will be renamed, for example " .
71 "Talk:File:Foo -> File_Talk:Foo" );
72 }
73
74 public function execute() {
75 $options = [
76 'fix' => $this->hasOption( 'fix' ),
77 'merge' => $this->hasOption( 'merge' ),
78 'add-suffix' => $this->getOption( 'add-suffix', '' ),
79 'add-prefix' => $this->getOption( 'add-prefix', '' ),
80 'move-talk' => $this->hasOption( 'move-talk' ),
81 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
82 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
83
84 if ( $options['source-pseudo-namespace'] !== '' ) {
85 $retval = $this->checkPrefix( $options );
86 } else {
87 $retval = $this->checkAll( $options );
88 }
89
90 if ( $retval ) {
91 $this->output( "\nLooks good!\n" );
92 } else {
93 $this->output( "\nOh noeees\n" );
94 }
95 }
96
97 /**
98 * Check all namespaces
99 *
100 * @param array $options Associative array of validated command-line options
101 *
102 * @return bool
103 */
104 private function checkAll( $options ) {
105 global $wgNamespaceAliases, $wgCapitalLinks;
106
107 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
108 $spaces = [];
109
110 // List interwikis first, so they'll be overridden
111 // by any conflicting local namespaces.
112 foreach ( $this->getInterwikiList() as $prefix ) {
113 $name = $contLang->ucfirst( $prefix );
114 $spaces[$name] = 0;
115 }
116
117 // Now pull in all canonical and alias namespaces...
118 foreach (
119 MediaWikiServices::getInstance()->getNamespaceInfo()->getCanonicalNamespaces()
120 as $ns => $name
121 ) {
122 // This includes $wgExtraNamespaces
123 if ( $name !== '' ) {
124 $spaces[$name] = $ns;
125 }
126 }
127 foreach ( $contLang->getNamespaces() as $ns => $name ) {
128 if ( $name !== '' ) {
129 $spaces[$name] = $ns;
130 }
131 }
132 foreach ( $wgNamespaceAliases as $name => $ns ) {
133 $spaces[$name] = $ns;
134 }
135 foreach ( $contLang->getNamespaceAliases() as $name => $ns ) {
136 $spaces[$name] = $ns;
137 }
138
139 // We'll need to check for lowercase keys as well,
140 // since we're doing case-sensitive searches in the db.
141 foreach ( $spaces as $name => $ns ) {
142 $moreNames = [];
143 $moreNames[] = $contLang->uc( $name );
144 $moreNames[] = $contLang->ucfirst( $contLang->lc( $name ) );
145 $moreNames[] = $contLang->ucwords( $name );
146 $moreNames[] = $contLang->ucwords( $contLang->lc( $name ) );
147 $moreNames[] = $contLang->ucwordbreaks( $name );
148 $moreNames[] = $contLang->ucwordbreaks( $contLang->lc( $name ) );
149 if ( !$wgCapitalLinks ) {
150 foreach ( $moreNames as $altName ) {
151 $moreNames[] = $contLang->lcfirst( $altName );
152 }
153 $moreNames[] = $contLang->lcfirst( $name );
154 }
155 foreach ( array_unique( $moreNames ) as $altName ) {
156 if ( $altName !== $name ) {
157 $spaces[$altName] = $ns;
158 }
159 }
160 }
161
162 // Sort by namespace index, and if there are two with the same index,
163 // break the tie by sorting by name
164 $origSpaces = $spaces;
165 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
166 return $origSpaces[$a] <=> $origSpaces[$b]
167 ?: $a <=> $b;
168 } );
169
170 $ok = true;
171 foreach ( $spaces as $name => $ns ) {
172 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
173 }
174
175 $this->output( "{$this->totalPages} pages to fix, " .
176 "{$this->resolvablePages} were resolvable.\n\n" );
177
178 foreach ( $spaces as $name => $ns ) {
179 if ( $ns != 0 ) {
180 /* Fix up link destinations for non-interwiki links only.
181 *
182 * For example if a page has [[Foo:Bar]] and then a Foo namespace
183 * is introduced, pagelinks needs to be updated to have
184 * page_namespace = NS_FOO.
185 *
186 * If instead an interwiki prefix was introduced called "Foo",
187 * the link should instead be moved to the iwlinks table. If a new
188 * language is introduced called "Foo", or if there is a pagelink
189 * [[fr:Bar]] when interlanguage magic links are turned on, the
190 * link would have to be moved to the langlinks table. Let's put
191 * those cases in the too-hard basket for now. The consequences are
192 * not especially severe.
193 * @fixme Handle interwiki links, and pagelinks to Category:, File:
194 * which probably need reparsing.
195 */
196
197 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
198 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
199
200 // The redirect table has interwiki links randomly mixed in, we
201 // need to filter those out. For example [[w:Foo:Bar]] would
202 // have rd_interwiki=w and rd_namespace=0, which would match the
203 // query for a conflicting namespace "Foo" if filtering wasn't done.
204 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
205 [ 'rd_interwiki' => null ] );
206 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
207 [ 'rd_interwiki' => '' ] );
208 }
209 }
210
211 $this->output( "{$this->totalLinks} links to fix, " .
212 "{$this->resolvableLinks} were resolvable.\n" );
213
214 return $ok;
215 }
216
217 /**
218 * Get the interwiki list
219 *
220 * @return array
221 */
222 private function getInterwikiList() {
223 $result = MediaWikiServices::getInstance()->getInterwikiLookup()->getAllPrefixes();
224 $prefixes = [];
225 foreach ( $result as $row ) {
226 $prefixes[] = $row['iw_prefix'];
227 }
228
229 return $prefixes;
230 }
231
232 /**
233 * Check a given prefix and try to move it into the given destination namespace
234 *
235 * @param int $ns Destination namespace id
236 * @param string $name
237 * @param array $options Associative array of validated command-line options
238 * @return bool
239 */
240 private function checkNamespace( $ns, $name, $options ) {
241 $targets = $this->getTargetList( $ns, $name, $options );
242 $count = $targets->numRows();
243 $this->totalPages += $count;
244 if ( $count == 0 ) {
245 return true;
246 }
247
248 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
249
250 $ok = true;
251 foreach ( $targets as $row ) {
252 // Find the new title and determine the action to take
253
254 $newTitle = $this->getDestinationTitle(
255 $ns, $name, $row->page_namespace, $row->page_title );
256 $logStatus = false;
257 if ( !$newTitle ) {
258 $logStatus = 'invalid title';
259 $action = 'abort';
260 } elseif ( $newTitle->exists() ) {
261 if ( $options['merge'] ) {
262 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
263 $action = 'merge';
264 } else {
265 $action = 'abort';
266 }
267 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
268 $action = 'abort';
269 $logStatus = 'dest title exists and --add-prefix not specified';
270 } else {
271 $newTitle = $this->getAlternateTitle( $newTitle, $options );
272 if ( !$newTitle ) {
273 $action = 'abort';
274 $logStatus = 'alternate title is invalid';
275 } elseif ( $newTitle->exists() ) {
276 $action = 'abort';
277 $logStatus = 'title conflict';
278 } else {
279 $action = 'move';
280 $logStatus = 'alternate';
281 }
282 }
283 } else {
284 $action = 'move';
285 $logStatus = 'no conflict';
286 }
287
288 // Take the action or log a dry run message
289
290 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
291 $pageOK = true;
292
293 switch ( $action ) {
294 case 'abort':
295 $this->output( "$logTitle *** $logStatus\n" );
296 $pageOK = false;
297 break;
298 case 'move':
299 $this->output( "$logTitle -> " .
300 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
301
302 if ( $options['fix'] ) {
303 $pageOK = $this->movePage( $row->page_id, $newTitle );
304 }
305 break;
306 case 'merge':
307 $this->output( "$logTitle => " .
308 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
309
310 if ( $options['fix'] ) {
311 $pageOK = $this->mergePage( $row, $newTitle );
312 }
313 break;
314 }
315
316 if ( $pageOK ) {
317 $this->resolvablePages++;
318 } else {
319 $ok = false;
320 }
321 }
322
323 return $ok;
324 }
325
326 /**
327 * Check and repair the destination fields in a link table
328 * @param string $table The link table name
329 * @param string $fieldPrefix The field prefix in the link table
330 * @param int $ns Destination namespace id
331 * @param string $name
332 * @param array $options Associative array of validated command-line options
333 * @param array $extraConds Extra conditions for the SQL query
334 */
335 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
336 $extraConds = []
337 ) {
338 $dbw = $this->getDB( DB_MASTER );
339
340 $batchConds = [];
341 $fromField = "{$fieldPrefix}_from";
342 $namespaceField = "{$fieldPrefix}_namespace";
343 $titleField = "{$fieldPrefix}_title";
344 $batchSize = 500;
345 while ( true ) {
346 $res = $dbw->select(
347 $table,
348 [ $fromField, $namespaceField, $titleField ],
349 array_merge( $batchConds, $extraConds, [
350 $namespaceField => 0,
351 $titleField . $dbw->buildLike( "$name:", $dbw->anyString() )
352 ] ),
353 __METHOD__,
354 [
355 'ORDER BY' => [ $titleField, $fromField ],
356 'LIMIT' => $batchSize
357 ]
358 );
359
360 if ( $res->numRows() == 0 ) {
361 break;
362 }
363 foreach ( $res as $row ) {
364 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
365 "dbk={$row->$titleField}";
366 $destTitle = $this->getDestinationTitle(
367 $ns, $name, $row->$namespaceField, $row->$titleField );
368 $this->totalLinks++;
369 if ( !$destTitle ) {
370 $this->output( "$table $logTitle *** INVALID\n" );
371 continue;
372 }
373 $this->resolvableLinks++;
374 if ( !$options['fix'] ) {
375 $this->output( "$table $logTitle -> " .
376 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
377 continue;
378 }
379
380 $dbw->update( $table,
381 // SET
382 [
383 $namespaceField => $destTitle->getNamespace(),
384 $titleField => $destTitle->getDBkey()
385 ],
386 // WHERE
387 [
388 $namespaceField => 0,
389 $titleField => $row->$titleField,
390 $fromField => $row->$fromField
391 ],
392 __METHOD__,
393 [ 'IGNORE' ]
394 );
395 $this->output( "$table $logTitle -> " .
396 $destTitle->getPrefixedDBkey() . "\n" );
397 }
398 $encLastTitle = $dbw->addQuotes( $row->$titleField );
399 $encLastFrom = $dbw->addQuotes( $row->$fromField );
400
401 $batchConds = [
402 "$titleField > $encLastTitle " .
403 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
404
405 wfWaitForSlaves();
406 }
407 }
408
409 /**
410 * Move the given pseudo-namespace, either replacing the colon with a hyphen
411 * (useful for pseudo-namespaces that conflict with interwiki links) or move
412 * them to another namespace if specified.
413 * @param array $options Associative array of validated command-line options
414 * @return bool
415 */
416 private function checkPrefix( $options ) {
417 $prefix = $options['source-pseudo-namespace'];
418 $ns = $options['dest-namespace'];
419 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
420
421 return $this->checkNamespace( $ns, $prefix, $options );
422 }
423
424 /**
425 * Find pages in main and talk namespaces that have a prefix of the new
426 * namespace so we know titles that will need migrating
427 *
428 * @param int $ns Destination namespace id
429 * @param string $name Prefix that is being made a namespace
430 * @param array $options Associative array of validated command-line options
431 *
432 * @return IResultWrapper
433 */
434 private function getTargetList( $ns, $name, $options ) {
435 $dbw = $this->getDB( DB_MASTER );
436
437 if (
438 $options['move-talk'] &&
439 MediaWikiServices::getInstance()->getNamespaceInfo()->isSubject( $ns )
440 ) {
441 $checkNamespaces = [ NS_MAIN, NS_TALK ];
442 } else {
443 $checkNamespaces = NS_MAIN;
444 }
445
446 return $dbw->select( 'page',
447 [
448 'page_id',
449 'page_title',
450 'page_namespace',
451 ],
452 [
453 'page_namespace' => $checkNamespaces,
454 'page_title' . $dbw->buildLike( "$name:", $dbw->anyString() ),
455 ],
456 __METHOD__
457 );
458 }
459
460 /**
461 * Get the preferred destination title for a given target page.
462 * @param int $ns The destination namespace ID
463 * @param string $name The conflicting prefix
464 * @param int $sourceNs The source namespace
465 * @param int $sourceDbk The source DB key (i.e. page_title)
466 * @return Title|false
467 */
468 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk ) {
469 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
470 if ( $ns == 0 ) {
471 // An interwiki; try an alternate encoding with '-' for ':'
472 $dbk = "$name-" . $dbk;
473 }
474 $destNS = $ns;
475 $nsInfo = MediaWikiServices::getInstance()->getNamespaceInfo();
476 if ( $sourceNs == NS_TALK && $nsInfo->isSubject( $ns ) ) {
477 // This is an associated talk page moved with the --move-talk feature.
478 $destNS = $nsInfo->getTalk( $destNS );
479 }
480 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
481 if ( !$newTitle || !$newTitle->canExist() ) {
482 return false;
483 }
484 return $newTitle;
485 }
486
487 /**
488 * Get an alternative title to move a page to. This is used if the
489 * preferred destination title already exists.
490 *
491 * @param LinkTarget $linkTarget
492 * @param array $options Associative array of validated command-line options
493 * @return Title|bool
494 */
495 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
496 $prefix = $options['add-prefix'];
497 $suffix = $options['add-suffix'];
498 if ( $prefix == '' && $suffix == '' ) {
499 return false;
500 }
501 while ( true ) {
502 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
503 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
504 if ( !$title ) {
505 return false;
506 }
507 if ( !$title->exists() ) {
508 return $title;
509 }
510 }
511 }
512
513 /**
514 * Move a page
515 *
516 * @param integer $id The page_id
517 * @param LinkTarget $newLinkTarget The new title link target
518 * @return bool
519 */
520 private function movePage( $id, LinkTarget $newLinkTarget ) {
521 $dbw = $this->getDB( DB_MASTER );
522
523 $dbw->update( 'page',
524 [
525 "page_namespace" => $newLinkTarget->getNamespace(),
526 "page_title" => $newLinkTarget->getDBkey(),
527 ],
528 [
529 "page_id" => $id,
530 ],
531 __METHOD__ );
532
533 // Update *_from_namespace in links tables
534 $fromNamespaceTables = [
535 [ 'pagelinks', 'pl' ],
536 [ 'templatelinks', 'tl' ],
537 [ 'imagelinks', 'il' ] ];
538 foreach ( $fromNamespaceTables as $tableInfo ) {
539 list( $table, $fieldPrefix ) = $tableInfo;
540 $dbw->update( $table,
541 // SET
542 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
543 // WHERE
544 [ "{$fieldPrefix}_from" => $id ],
545 __METHOD__ );
546 }
547
548 return true;
549 }
550
551 /**
552 * Determine if we can merge a page.
553 * We check if an inaccessible revision would become the latest and
554 * deny the merge if so -- it's theoretically possible to update the
555 * latest revision, but opens a can of worms -- search engine updates,
556 * recentchanges review, etc.
557 *
558 * @param integer $id The page_id
559 * @param LinkTarget $linkTarget The new link target
560 * @param string $logStatus This is set to the log status message on failure
561 * @return bool
562 */
563 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
564 $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
565 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
566 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
567 $logStatus = 'cannot merge since source is later';
568 return false;
569 } else {
570 return true;
571 }
572 }
573
574 /**
575 * Merge page histories
576 *
577 * @param stdClass $row Page row
578 * @param Title $newTitle The new title
579 * @return bool
580 */
581 private function mergePage( $row, Title $newTitle ) {
582 $dbw = $this->getDB( DB_MASTER );
583
584 $id = $row->page_id;
585
586 // Construct the WikiPage object we will need later, while the
587 // page_id still exists. Note that this cannot use makeTitleSafe(),
588 // we are deliberately constructing an invalid title.
589 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
590 $sourceTitle->resetArticleID( $id );
591 $wikiPage = new WikiPage( $sourceTitle );
592 $wikiPage->loadPageData( 'fromdbmaster' );
593
594 $destId = $newTitle->getArticleID();
595 $this->beginTransaction( $dbw, __METHOD__ );
596 $dbw->update( 'revision',
597 // SET
598 [ 'rev_page' => $destId ],
599 // WHERE
600 [ 'rev_page' => $id ],
601 __METHOD__ );
602
603 $dbw->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
604
605 $this->commitTransaction( $dbw, __METHOD__ );
606
607 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
608 * and update category counts.
609 *
610 * Calling external code with a fake broken Title is a fairly dubious
611 * idea. It's necessary because it's quite a lot of code to duplicate,
612 * but that also makes it fragile since it would be easy for someone to
613 * accidentally introduce an assumption of title validity to the code we
614 * are calling.
615 */
616 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
617 DeferredUpdates::doUpdates();
618
619 return true;
620 }
621 }
622
623 $maintClass = NamespaceDupes::class;
624 require_once RUN_MAINTENANCE_IF_MAIN;