Merge "Add CollationFa"
[lhc/web/wiklou.git] / maintenance / namespaceDupes.php
1 <?php
2 /**
3 * Check for articles to fix after adding/deleting namespaces
4 *
5 * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup Maintenance
25 */
26
27 use MediaWiki\Linker\LinkTarget;
28
29 require_once __DIR__ . '/Maintenance.php';
30
31 /**
32 * Maintenance script that checks for articles to fix after
33 * adding/deleting namespaces.
34 *
35 * @ingroup Maintenance
36 */
37 class NamespaceConflictChecker extends Maintenance {
38
39 /**
40 * @var Database
41 */
42 protected $db;
43
44 private $resolvablePages = 0;
45 private $totalPages = 0;
46
47 private $resolvableLinks = 0;
48 private $totalLinks = 0;
49
50 public function __construct() {
51 parent::__construct();
52 $this->addDescription( 'Find and fix pages affected by namespace addition/removal' );
53 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
54 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
55 "the correct title" );
56 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
57 "<text> appended after the article name", false, true );
58 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
59 "<text> prepended before the article name", false, true );
60 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
61 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
62 "the colon will be replaced with a hyphen.",
63 false, true );
64 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
65 "specify the namespace ID of the destination.", false, true );
66 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
67 "begin with a conflicting prefix will be renamed, for example " .
68 "Talk:File:Foo -> File_Talk:Foo" );
69 }
70
71 public function execute() {
72 $this->db = $this->getDB( DB_MASTER );
73
74 $options = [
75 'fix' => $this->hasOption( 'fix' ),
76 'merge' => $this->hasOption( 'merge' ),
77 'add-suffix' => $this->getOption( 'add-suffix', '' ),
78 'add-prefix' => $this->getOption( 'add-prefix', '' ),
79 'move-talk' => $this->hasOption( 'move-talk' ),
80 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
81 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) ];
82
83 if ( $options['source-pseudo-namespace'] !== '' ) {
84 $retval = $this->checkPrefix( $options );
85 } else {
86 $retval = $this->checkAll( $options );
87 }
88
89 if ( $retval ) {
90 $this->output( "\nLooks good!\n" );
91 } else {
92 $this->output( "\nOh noeees\n" );
93 }
94 }
95
96 /**
97 * Check all namespaces
98 *
99 * @param array $options Associative array of validated command-line options
100 *
101 * @return bool
102 */
103 private function checkAll( $options ) {
104 global $wgContLang, $wgNamespaceAliases, $wgCapitalLinks;
105
106 $spaces = [];
107
108 // List interwikis first, so they'll be overridden
109 // by any conflicting local namespaces.
110 foreach ( $this->getInterwikiList() as $prefix ) {
111 $name = $wgContLang->ucfirst( $prefix );
112 $spaces[$name] = 0;
113 }
114
115 // Now pull in all canonical and alias namespaces...
116 foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) {
117 // This includes $wgExtraNamespaces
118 if ( $name !== '' ) {
119 $spaces[$name] = $ns;
120 }
121 }
122 foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
123 if ( $name !== '' ) {
124 $spaces[$name] = $ns;
125 }
126 }
127 foreach ( $wgNamespaceAliases as $name => $ns ) {
128 $spaces[$name] = $ns;
129 }
130 foreach ( $wgContLang->getNamespaceAliases() as $name => $ns ) {
131 $spaces[$name] = $ns;
132 }
133
134 // We'll need to check for lowercase keys as well,
135 // since we're doing case-sensitive searches in the db.
136 foreach ( $spaces as $name => $ns ) {
137 $moreNames = [];
138 $moreNames[] = $wgContLang->uc( $name );
139 $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) );
140 $moreNames[] = $wgContLang->ucwords( $name );
141 $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) );
142 $moreNames[] = $wgContLang->ucwordbreaks( $name );
143 $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) );
144 if ( !$wgCapitalLinks ) {
145 foreach ( $moreNames as $altName ) {
146 $moreNames[] = $wgContLang->lcfirst( $altName );
147 }
148 $moreNames[] = $wgContLang->lcfirst( $name );
149 }
150 foreach ( array_unique( $moreNames ) as $altName ) {
151 if ( $altName !== $name ) {
152 $spaces[$altName] = $ns;
153 }
154 }
155 }
156
157 // Sort by namespace index, and if there are two with the same index,
158 // break the tie by sorting by name
159 $origSpaces = $spaces;
160 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
161 if ( $origSpaces[$a] < $origSpaces[$b] ) {
162 return -1;
163 } elseif ( $origSpaces[$a] > $origSpaces[$b] ) {
164 return 1;
165 } elseif ( $a < $b ) {
166 return -1;
167 } elseif ( $a > $b ) {
168 return 1;
169 } else {
170 return 0;
171 }
172 } );
173
174 $ok = true;
175 foreach ( $spaces as $name => $ns ) {
176 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
177 }
178
179 $this->output( "{$this->totalPages} pages to fix, " .
180 "{$this->resolvablePages} were resolvable.\n\n" );
181
182 foreach ( $spaces as $name => $ns ) {
183 if ( $ns != 0 ) {
184 /* Fix up link destinations for non-interwiki links only.
185 *
186 * For example if a page has [[Foo:Bar]] and then a Foo namespace
187 * is introduced, pagelinks needs to be updated to have
188 * page_namespace = NS_FOO.
189 *
190 * If instead an interwiki prefix was introduced called "Foo",
191 * the link should instead be moved to the iwlinks table. If a new
192 * language is introduced called "Foo", or if there is a pagelink
193 * [[fr:Bar]] when interlanguage magic links are turned on, the
194 * link would have to be moved to the langlinks table. Let's put
195 * those cases in the too-hard basket for now. The consequences are
196 * not especially severe.
197 * @fixme Handle interwiki links, and pagelinks to Category:, File:
198 * which probably need reparsing.
199 */
200
201 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
202 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
203
204 // The redirect table has interwiki links randomly mixed in, we
205 // need to filter those out. For example [[w:Foo:Bar]] would
206 // have rd_interwiki=w and rd_namespace=0, which would match the
207 // query for a conflicting namespace "Foo" if filtering wasn't done.
208 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
209 [ 'rd_interwiki' => null ] );
210 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
211 [ 'rd_interwiki' => '' ] );
212 }
213 }
214
215 $this->output( "{$this->totalLinks} links to fix, " .
216 "{$this->resolvableLinks} were resolvable.\n" );
217
218 return $ok;
219 }
220
221 /**
222 * Get the interwiki list
223 *
224 * @return array
225 */
226 private function getInterwikiList() {
227 $result = Interwiki::getAllPrefixes();
228 $prefixes = [];
229 foreach ( $result as $row ) {
230 $prefixes[] = $row['iw_prefix'];
231 }
232
233 return $prefixes;
234 }
235
236 /**
237 * Check a given prefix and try to move it into the given destination namespace
238 *
239 * @param int $ns Destination namespace id
240 * @param string $name
241 * @param array $options Associative array of validated command-line options
242 * @return bool
243 */
244 private function checkNamespace( $ns, $name, $options ) {
245 $targets = $this->getTargetList( $ns, $name, $options );
246 $count = $targets->numRows();
247 $this->totalPages += $count;
248 if ( $count == 0 ) {
249 return true;
250 }
251
252 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
253
254 $ok = true;
255 foreach ( $targets as $row ) {
256
257 // Find the new title and determine the action to take
258
259 $newTitle = $this->getDestinationTitle( $ns, $name,
260 $row->page_namespace, $row->page_title, $options );
261 $logStatus = false;
262 if ( !$newTitle ) {
263 $logStatus = 'invalid title';
264 $action = 'abort';
265 } elseif ( $newTitle->exists() ) {
266 if ( $options['merge'] ) {
267 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
268 $action = 'merge';
269 } else {
270 $action = 'abort';
271 }
272 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
273 $action = 'abort';
274 $logStatus = 'dest title exists and --add-prefix not specified';
275 } else {
276 $newTitle = $this->getAlternateTitle( $newTitle, $options );
277 if ( !$newTitle ) {
278 $action = 'abort';
279 $logStatus = 'alternate title is invalid';
280 } elseif ( $newTitle->exists() ) {
281 $action = 'abort';
282 $logStatus = 'title conflict';
283 } else {
284 $action = 'move';
285 $logStatus = 'alternate';
286 }
287 }
288 } else {
289 $action = 'move';
290 $logStatus = 'no conflict';
291 }
292
293 // Take the action or log a dry run message
294
295 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
296 $pageOK = true;
297
298 switch ( $action ) {
299 case 'abort':
300 $this->output( "$logTitle *** $logStatus\n" );
301 $pageOK = false;
302 break;
303 case 'move':
304 $this->output( "$logTitle -> " .
305 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
306
307 if ( $options['fix'] ) {
308 $pageOK = $this->movePage( $row->page_id, $newTitle );
309 }
310 break;
311 case 'merge':
312 $this->output( "$logTitle => " .
313 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
314
315 if ( $options['fix'] ) {
316 $pageOK = $this->mergePage( $row, $newTitle );
317 }
318 break;
319 }
320
321 if ( $pageOK ) {
322 $this->resolvablePages++;
323 } else {
324 $ok = false;
325 }
326 }
327
328 return $ok;
329 }
330
331 /**
332 * Check and repair the destination fields in a link table
333 * @param string $table The link table name
334 * @param string $fieldPrefix The field prefix in the link table
335 * @param int $ns Destination namespace id
336 * @param string $name
337 * @param array $options Associative array of validated command-line options
338 * @param array $extraConds Extra conditions for the SQL query
339 */
340 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
341 $extraConds = []
342 ) {
343 $batchConds = [];
344 $fromField = "{$fieldPrefix}_from";
345 $namespaceField = "{$fieldPrefix}_namespace";
346 $titleField = "{$fieldPrefix}_title";
347 $batchSize = 500;
348 while ( true ) {
349 $res = $this->db->select(
350 $table,
351 [ $fromField, $namespaceField, $titleField ],
352 array_merge( $batchConds, $extraConds, [
353 $namespaceField => 0,
354 $titleField . $this->db->buildLike( "$name:", $this->db->anyString() )
355 ] ),
356 __METHOD__,
357 [
358 'ORDER BY' => [ $titleField, $fromField ],
359 'LIMIT' => $batchSize
360 ]
361 );
362
363 if ( $res->numRows() == 0 ) {
364 break;
365 }
366 foreach ( $res as $row ) {
367 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
368 "dbk={$row->$titleField}";
369 $destTitle = $this->getDestinationTitle( $ns, $name,
370 $row->$namespaceField, $row->$titleField, $options );
371 $this->totalLinks++;
372 if ( !$destTitle ) {
373 $this->output( "$table $logTitle *** INVALID\n" );
374 continue;
375 }
376 $this->resolvableLinks++;
377 if ( !$options['fix'] ) {
378 $this->output( "$table $logTitle -> " .
379 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
380 continue;
381 }
382
383 $this->db->update( $table,
384 // SET
385 [
386 $namespaceField => $destTitle->getNamespace(),
387 $titleField => $destTitle->getDBkey()
388 ],
389 // WHERE
390 [
391 $namespaceField => 0,
392 $titleField => $row->$titleField,
393 $fromField => $row->$fromField
394 ],
395 __METHOD__,
396 [ 'IGNORE' ]
397 );
398 $this->output( "$table $logTitle -> " .
399 $destTitle->getPrefixedDBkey() . "\n" );
400 }
401 $encLastTitle = $this->db->addQuotes( $row->$titleField );
402 $encLastFrom = $this->db->addQuotes( $row->$fromField );
403
404 $batchConds = [
405 "$titleField > $encLastTitle " .
406 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" ];
407
408 wfWaitForSlaves();
409 }
410 }
411
412 /**
413 * Move the given pseudo-namespace, either replacing the colon with a hyphen
414 * (useful for pseudo-namespaces that conflict with interwiki links) or move
415 * them to another namespace if specified.
416 * @param array $options Associative array of validated command-line options
417 * @return bool
418 */
419 private function checkPrefix( $options ) {
420 $prefix = $options['source-pseudo-namespace'];
421 $ns = $options['dest-namespace'];
422 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
423
424 return $this->checkNamespace( $ns, $prefix, $options );
425 }
426
427 /**
428 * Find pages in main and talk namespaces that have a prefix of the new
429 * namespace so we know titles that will need migrating
430 *
431 * @param int $ns Destination namespace id
432 * @param string $name Prefix that is being made a namespace
433 * @param array $options Associative array of validated command-line options
434 *
435 * @return ResultWrapper
436 */
437 private function getTargetList( $ns, $name, $options ) {
438 if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
439 $checkNamespaces = [ NS_MAIN, NS_TALK ];
440 } else {
441 $checkNamespaces = NS_MAIN;
442 }
443
444 return $this->db->select( 'page',
445 [
446 'page_id',
447 'page_title',
448 'page_namespace',
449 ],
450 [
451 'page_namespace' => $checkNamespaces,
452 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
453 ],
454 __METHOD__
455 );
456 }
457
458 /**
459 * Get the preferred destination title for a given target page.
460 * @param integer $ns The destination namespace ID
461 * @param string $name The conflicting prefix
462 * @param integer $sourceNs The source namespace
463 * @param integer $sourceDbk The source DB key (i.e. page_title)
464 * @param array $options Associative array of validated command-line options
465 * @return Title|false
466 */
467 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) {
468 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
469 if ( $ns == 0 ) {
470 // An interwiki; try an alternate encoding with '-' for ':'
471 $dbk = "$name-" . $dbk;
472 }
473 $destNS = $ns;
474 if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) {
475 // This is an associated talk page moved with the --move-talk feature.
476 $destNS = MWNamespace::getTalk( $destNS );
477 }
478 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
479 if ( !$newTitle || !$newTitle->canExist() ) {
480 return false;
481 }
482 return $newTitle;
483 }
484
485 /**
486 * Get an alternative title to move a page to. This is used if the
487 * preferred destination title already exists.
488 *
489 * @param LinkTarget $linkTarget
490 * @param array $options Associative array of validated command-line options
491 * @return Title|bool
492 */
493 private function getAlternateTitle( LinkTarget $linkTarget, $options ) {
494 $prefix = $options['add-prefix'];
495 $suffix = $options['add-suffix'];
496 if ( $prefix == '' && $suffix == '' ) {
497 return false;
498 }
499 while ( true ) {
500 $dbk = $prefix . $linkTarget->getDBkey() . $suffix;
501 $title = Title::makeTitleSafe( $linkTarget->getNamespace(), $dbk );
502 if ( !$title ) {
503 return false;
504 }
505 if ( !$title->exists() ) {
506 return $title;
507 }
508 }
509 }
510
511 /**
512 * Move a page
513 *
514 * @param integer $id The page_id
515 * @param LinkTarget $newLinkTarget The new title link target
516 * @return bool
517 */
518 private function movePage( $id, LinkTarget $newLinkTarget ) {
519 $this->db->update( 'page',
520 [
521 "page_namespace" => $newLinkTarget->getNamespace(),
522 "page_title" => $newLinkTarget->getDBkey(),
523 ],
524 [
525 "page_id" => $id,
526 ],
527 __METHOD__ );
528
529 // Update *_from_namespace in links tables
530 $fromNamespaceTables = [
531 [ 'pagelinks', 'pl' ],
532 [ 'templatelinks', 'tl' ],
533 [ 'imagelinks', 'il' ] ];
534 foreach ( $fromNamespaceTables as $tableInfo ) {
535 list( $table, $fieldPrefix ) = $tableInfo;
536 $this->db->update( $table,
537 // SET
538 [ "{$fieldPrefix}_from_namespace" => $newLinkTarget->getNamespace() ],
539 // WHERE
540 [ "{$fieldPrefix}_from" => $id ],
541 __METHOD__ );
542 }
543
544 return true;
545 }
546
547 /**
548 * Determine if we can merge a page.
549 * We check if an inaccessible revision would become the latest and
550 * deny the merge if so -- it's theoretically possible to update the
551 * latest revision, but opens a can of worms -- search engine updates,
552 * recentchanges review, etc.
553 *
554 * @param integer $id The page_id
555 * @param LinkTarget $linkTarget The new link target
556 * @param string $logStatus This is set to the log status message on failure
557 * @return bool
558 */
559 private function canMerge( $id, LinkTarget $linkTarget, &$logStatus ) {
560 $latestDest = Revision::newFromTitle( $linkTarget, 0, Revision::READ_LATEST );
561 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
562 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
563 $logStatus = 'cannot merge since source is later';
564 return false;
565 } else {
566 return true;
567 }
568 }
569
570 /**
571 * Merge page histories
572 *
573 * @param integer $id The page_id
574 * @param Title $newTitle The new title
575 * @return bool
576 */
577 private function mergePage( $row, Title $newTitle ) {
578 $id = $row->page_id;
579
580 // Construct the WikiPage object we will need later, while the
581 // page_id still exists. Note that this cannot use makeTitleSafe(),
582 // we are deliberately constructing an invalid title.
583 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
584 $sourceTitle->resetArticleID( $id );
585 $wikiPage = new WikiPage( $sourceTitle );
586 $wikiPage->loadPageData( 'fromdbmaster' );
587
588 $destId = $newTitle->getArticleID();
589 $this->beginTransaction( $this->db, __METHOD__ );
590 $this->db->update( 'revision',
591 // SET
592 [ 'rev_page' => $destId ],
593 // WHERE
594 [ 'rev_page' => $id ],
595 __METHOD__ );
596
597 $this->db->delete( 'page', [ 'page_id' => $id ], __METHOD__ );
598
599 $this->commitTransaction( $this->db, __METHOD__ );
600
601 /* Call LinksDeletionUpdate to delete outgoing links from the old title,
602 * and update category counts.
603 *
604 * Calling external code with a fake broken Title is a fairly dubious
605 * idea. It's necessary because it's quite a lot of code to duplicate,
606 * but that also makes it fragile since it would be easy for someone to
607 * accidentally introduce an assumption of title validity to the code we
608 * are calling.
609 */
610 DeferredUpdates::addUpdate( new LinksDeletionUpdate( $wikiPage ) );
611 DeferredUpdates::doUpdates();
612
613 return true;
614 }
615 }
616
617 $maintClass = "NamespaceConflictChecker";
618 require_once RUN_MAINTENANCE_IF_MAIN;