Merge "Add tests for WikiMap and WikiReference"
[lhc/web/wiklou.git] / maintenance / namespaceDupes.php
1 <?php
2 /**
3 * Check for articles to fix after adding/deleting namespaces
4 *
5 * Copyright © 2005-2007 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup Maintenance
25 */
26
27 require_once __DIR__ . '/Maintenance.php';
28
29 /**
30 * Maintenance script that checks for articles to fix after
31 * adding/deleting namespaces.
32 *
33 * @ingroup Maintenance
34 */
35 class NamespaceConflictChecker extends Maintenance {
36
37 /**
38 * @var DatabaseBase
39 */
40 protected $db;
41
42 private $resolvablePages = 0;
43 private $totalPages = 0;
44
45 private $resolvableLinks = 0;
46 private $totalLinks = 0;
47
48 public function __construct() {
49 parent::__construct();
50 $this->mDescription = "";
51 $this->addOption( 'fix', 'Attempt to automatically fix errors' );
52 $this->addOption( 'merge', "Instead of renaming conflicts, do a history merge with " .
53 "the correct title" );
54 $this->addOption( 'add-suffix', "Dupes will be renamed with correct namespace with " .
55 "<text> appended after the article name", false, true );
56 $this->addOption( 'add-prefix', "Dupes will be renamed with correct namespace with " .
57 "<text> prepended before the article name", false, true );
58 $this->addOption( 'source-pseudo-namespace', "Move all pages with the given source " .
59 "prefix (with an implied colon following it). If --dest-namespace is not specified, " .
60 "the colon will be replaced with a hyphen.",
61 false, true );
62 $this->addOption( 'dest-namespace', "In combination with --source-pseudo-namespace, " .
63 "specify the namespace ID of the destination.", false, true );
64 $this->addOption( 'move-talk', "If this is specified, pages in the Talk namespace that " .
65 "begin with a conflicting prefix will be renamed, for example " .
66 "Talk:File:Foo -> File_Talk:Foo" );
67 }
68
69 public function execute() {
70 $this->db = wfGetDB( DB_MASTER );
71
72 $options = array(
73 'fix' => $this->hasOption( 'fix' ),
74 'merge' => $this->hasOption( 'merge' ),
75 'add-suffix' => $this->getOption( 'add-suffix', '' ),
76 'add-prefix' => $this->getOption( 'add-prefix', '' ),
77 'move-talk' => $this->hasOption( 'move-talk' ),
78 'source-pseudo-namespace' => $this->getOption( 'source-pseudo-namespace', '' ),
79 'dest-namespace' => intval( $this->getOption( 'dest-namespace', 0 ) ) );
80
81 if ( $options['source-pseudo-namespace'] !== '' ) {
82 $retval = $this->checkPrefix( $options );
83 } else {
84 $retval = $this->checkAll( $options );
85 }
86
87 if ( $retval ) {
88 $this->output( "\nLooks good!\n" );
89 } else {
90 $this->output( "\nOh noeees\n" );
91 }
92 }
93
94 /**
95 * Check all namespaces
96 *
97 * @param array $options Associative array of validated command-line options
98 *
99 * @return bool
100 */
101 private function checkAll( $options ) {
102 global $wgContLang, $wgNamespaceAliases, $wgCapitalLinks;
103
104 $spaces = array();
105
106 // List interwikis first, so they'll be overridden
107 // by any conflicting local namespaces.
108 foreach ( $this->getInterwikiList() as $prefix ) {
109 $name = $wgContLang->ucfirst( $prefix );
110 $spaces[$name] = 0;
111 }
112
113 // Now pull in all canonical and alias namespaces...
114 foreach ( MWNamespace::getCanonicalNamespaces() as $ns => $name ) {
115 // This includes $wgExtraNamespaces
116 if ( $name !== '' ) {
117 $spaces[$name] = $ns;
118 }
119 }
120 foreach ( $wgContLang->getNamespaces() as $ns => $name ) {
121 if ( $name !== '' ) {
122 $spaces[$name] = $ns;
123 }
124 }
125 foreach ( $wgNamespaceAliases as $name => $ns ) {
126 $spaces[$name] = $ns;
127 }
128 foreach ( $wgContLang->getNamespaceAliases() as $name => $ns ) {
129 $spaces[$name] = $ns;
130 }
131
132 // We'll need to check for lowercase keys as well,
133 // since we're doing case-sensitive searches in the db.
134 foreach ( $spaces as $name => $ns ) {
135 $moreNames = array();
136 $moreNames[] = $wgContLang->uc( $name );
137 $moreNames[] = $wgContLang->ucfirst( $wgContLang->lc( $name ) );
138 $moreNames[] = $wgContLang->ucwords( $name );
139 $moreNames[] = $wgContLang->ucwords( $wgContLang->lc( $name ) );
140 $moreNames[] = $wgContLang->ucwordbreaks( $name );
141 $moreNames[] = $wgContLang->ucwordbreaks( $wgContLang->lc( $name ) );
142 if ( !$wgCapitalLinks ) {
143 foreach ( $moreNames as $altName ) {
144 $moreNames[] = $wgContLang->lcfirst( $altName );
145 }
146 $moreNames[] = $wgContLang->lcfirst( $name );
147 }
148 foreach ( array_unique( $moreNames ) as $altName ) {
149 if ( $altName !== $name ) {
150 $spaces[$altName] = $ns;
151 }
152 }
153 }
154
155 // Sort by namespace index, and if there are two with the same index,
156 // break the tie by sorting by name
157 $origSpaces = $spaces;
158 uksort( $spaces, function ( $a, $b ) use ( $origSpaces ) {
159 if ( $origSpaces[$a] < $origSpaces[$b] ) {
160 return -1;
161 } elseif ( $origSpaces[$a] > $origSpaces[$b] ) {
162 return 1;
163 } elseif ( $a < $b ) {
164 return -1;
165 } elseif ( $a > $b ) {
166 return 1;
167 } else {
168 return 0;
169 }
170 } );
171
172 $ok = true;
173 foreach ( $spaces as $name => $ns ) {
174 $ok = $this->checkNamespace( $ns, $name, $options ) && $ok;
175 }
176
177 $this->output( "{$this->totalPages} pages to fix, " .
178 "{$this->resolvablePages} were resolvable.\n\n" );
179
180 foreach ( $spaces as $name => $ns ) {
181 if ( $ns != 0 ) {
182 // Fix up link destinations for non-interwiki links only.
183 //
184 // For example if a page has [[Foo:Bar]] and then a Foo namespace
185 // is introduced, pagelinks needs to be updated to have
186 // page_namespace = NS_FOO.
187 //
188 // If instead an interwiki prefix was introduced called "Foo",
189 // the link should instead be moved to the iwlinks table. If a new
190 // language is introduced called "Foo", or if there is a pagelink
191 // [[fr:Bar]] when interlanguage magic links are turned on, the
192 // link would have to be moved to the langlinks table. Let's put
193 // those cases in the too-hard basket for now. The consequences are
194 // not especially severe.
195 //
196 // @fixme Handle interwiki links, and pagelinks to Category:, File:
197 // which probably need reparsing.
198
199 $this->checkLinkTable( 'pagelinks', 'pl', $ns, $name, $options );
200 $this->checkLinkTable( 'templatelinks', 'tl', $ns, $name, $options );
201
202 // The redirect table has interwiki links randomly mixed in, we
203 // need to filter those out. For example [[w:Foo:Bar]] would
204 // have rd_interwiki=w and rd_namespace=0, which would match the
205 // query for a conflicting namespace "Foo" if filtering wasn't done.
206 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
207 array( 'rd_interwiki' => null ) );
208 $this->checkLinkTable( 'redirect', 'rd', $ns, $name, $options,
209 array( 'rd_interwiki' => '' ) );
210 }
211 }
212
213 $this->output( "{$this->totalLinks} links to fix, " .
214 "{$this->resolvableLinks} were resolvable.\n" );
215
216 return $ok;
217 }
218
219 /**
220 * Get the interwiki list
221 *
222 * @return array
223 */
224 private function getInterwikiList() {
225 $result = Interwiki::getAllPrefixes();
226 $prefixes = array();
227 foreach ( $result as $row ) {
228 $prefixes[] = $row['iw_prefix'];
229 }
230
231 return $prefixes;
232 }
233
234 /**
235 * Check a given prefix and try to move it into the given destination namespace
236 *
237 * @param int $ns Destination namespace id
238 * @param string $name
239 * @param array $options Associative array of validated command-line options
240 * @return bool
241 */
242 private function checkNamespace( $ns, $name, $options ) {
243 $targets = $this->getTargetList( $ns, $name, $options );
244 $count = $targets->numRows();
245 $this->totalPages += $count;
246 if ( $count == 0 ) {
247 return true;
248 }
249
250 $dryRunNote = $options['fix'] ? '' : ' DRY RUN ONLY';
251
252 $ok = true;
253 foreach ( $targets as $row ) {
254
255 // Find the new title and determine the action to take
256
257 $newTitle = $this->getDestinationTitle( $ns, $name,
258 $row->page_namespace, $row->page_title, $options );
259 $logStatus = false;
260 if ( !$newTitle ) {
261 $logStatus = 'invalid title';
262 $action = 'abort';
263 } elseif ( $newTitle->exists() ) {
264 if ( $options['merge'] ) {
265 if ( $this->canMerge( $row->page_id, $newTitle, $logStatus ) ) {
266 $action = 'merge';
267 } else {
268 $action = 'abort';
269 }
270 } elseif ( $options['add-prefix'] == '' && $options['add-suffix'] == '' ) {
271 $action = 'abort';
272 $logStatus = 'dest title exists and --add-prefix not specified';
273 } else {
274 $newTitle = $this->getAlternateTitle( $newTitle, $options );
275 if ( !$newTitle ) {
276 $action = 'abort';
277 $logStatus = 'alternate title is invalid';
278 } elseif ( $newTitle->exists() ) {
279 $action = 'abort';
280 $logStatus = 'title conflict';
281 } else {
282 $action = 'move';
283 $logStatus = 'alternate';
284 }
285 }
286 } else {
287 $action = 'move';
288 $logStatus = 'no conflict';
289 }
290
291 // Take the action or log a dry run message
292
293 $logTitle = "id={$row->page_id} ns={$row->page_namespace} dbk={$row->page_title}";
294 $pageOK = true;
295
296 switch ( $action ) {
297 case 'abort':
298 $this->output( "$logTitle *** $logStatus\n" );
299 $pageOK = false;
300 break;
301 case 'move':
302 $this->output( "$logTitle -> " .
303 $newTitle->getPrefixedDBkey() . " ($logStatus)$dryRunNote\n" );
304
305 if ( $options['fix'] ) {
306 $pageOK = $this->movePage( $row->page_id, $newTitle );
307 }
308 break;
309 case 'merge':
310 $this->output( "$logTitle => " .
311 $newTitle->getPrefixedDBkey() . " (merge)$dryRunNote\n" );
312
313 if ( $options['fix'] ) {
314 $pageOK = $this->mergePage( $row, $newTitle );
315 }
316 break;
317 }
318
319 if ( $pageOK ) {
320 $this->resolvablePages++;
321 } else {
322 $ok = false;
323 }
324 }
325
326 return $ok;
327 }
328
329 /**
330 * Check and repair the destination fields in a link table
331 * @param string $table The link table name
332 * @param string $fieldPrefix The field prefix in the link table
333 * @param int $ns Destination namespace id
334 * @param string $name
335 * @param array $options Associative array of validated command-line options
336 * @param array $extraConds Extra conditions for the SQL query
337 */
338 private function checkLinkTable( $table, $fieldPrefix, $ns, $name, $options,
339 $extraConds = array()
340 ) {
341 $batchConds = array();
342 $fromField = "{$fieldPrefix}_from";
343 $namespaceField = "{$fieldPrefix}_namespace";
344 $titleField = "{$fieldPrefix}_title";
345 $batchSize = 500;
346 while ( true ) {
347 $res = $this->db->select(
348 $table,
349 array( $fromField, $namespaceField, $titleField ),
350 array_merge( $batchConds, $extraConds, array(
351 $namespaceField => 0,
352 $titleField . $this->db->buildLike( "$name:", $this->db->anyString() )
353 ) ),
354 __METHOD__,
355 array(
356 'ORDER BY' => array( $titleField, $fromField ),
357 'LIMIT' => $batchSize
358 )
359 );
360
361 if ( $res->numRows() == 0 ) {
362 break;
363 }
364 foreach ( $res as $row ) {
365 $logTitle = "from={$row->$fromField} ns={$row->$namespaceField} " .
366 "dbk={$row->$titleField}";
367 $destTitle = $this->getDestinationTitle( $ns, $name,
368 $row->$namespaceField, $row->$titleField, $options );
369 $this->totalLinks++;
370 if ( !$destTitle ) {
371 $this->output( "$table $logTitle *** INVALID\n" );
372 continue;
373 }
374 $this->resolvableLinks++;
375 if ( !$options['fix'] ) {
376 $this->output( "$table $logTitle -> " .
377 $destTitle->getPrefixedDBkey() . " DRY RUN\n" );
378 continue;
379 }
380
381 $this->db->update( $table,
382 // SET
383 array(
384 $namespaceField => $destTitle->getNamespace(),
385 $titleField => $destTitle->getDBkey()
386 ),
387 // WHERE
388 array(
389 $namespaceField => 0,
390 $titleField => $row->$titleField,
391 $fromField => $row->$fromField
392 ),
393 __METHOD__
394 );
395 $this->output( "$table $logTitle -> " .
396 $destTitle->getPrefixedDBkey() . "\n" );
397 }
398 $encLastTitle = $this->db->addQuotes( $row->$titleField );
399 $encLastFrom = $this->db->addQuotes( $row->$fromField );
400
401 $batchConds = array(
402 "$titleField > $encLastTitle " .
403 "OR ($titleField = $encLastTitle AND $fromField > $encLastFrom)" );
404
405 wfWaitForSlaves();
406 }
407 }
408
409 /**
410 * Move the given pseudo-namespace, either replacing the colon with a hyphen
411 * (useful for pseudo-namespaces that conflict with interwiki links) or move
412 * them to another namespace if specified.
413 * @param array $options Associative array of validated command-line options
414 * @return bool
415 */
416 private function checkPrefix( $options ) {
417 $prefix = $options['source-pseudo-namespace'];
418 $ns = $options['dest-namespace'];
419 $this->output( "Checking prefix \"$prefix\" vs namespace $ns\n" );
420
421 return $this->checkNamespace( $ns, $prefix, $options );
422 }
423
424 /**
425 * Find pages in main and talk namespaces that have a prefix of the new
426 * namespace so we know titles that will need migrating
427 *
428 * @param int $ns Destination namespace id
429 * @param string $name Prefix that is being made a namespace
430 * @param array $options Associative array of validated command-line options
431 *
432 * @return ResultWrapper
433 */
434 private function getTargetList( $ns, $name, $options ) {
435 if ( $options['move-talk'] && MWNamespace::isSubject( $ns ) ) {
436 $checkNamespaces = array( NS_MAIN, NS_TALK );
437 } else {
438 $checkNamespaces = NS_MAIN;
439 }
440
441 return $this->db->select( 'page',
442 array(
443 'page_id',
444 'page_title',
445 'page_namespace',
446 ),
447 array(
448 'page_namespace' => $checkNamespaces,
449 'page_title' . $this->db->buildLike( "$name:", $this->db->anyString() ),
450 ),
451 __METHOD__
452 );
453 }
454
455 /**
456 * Get the preferred destination title for a given target page.
457 * @param integer $ns The destination namespace ID
458 * @param string $name The conflicting prefix
459 * @param integer $sourceNs The source namespace
460 * @param integer $sourceDbk The source DB key (i.e. page_title)
461 * @param array $options Associative array of validated command-line options
462 * @return Title|false
463 */
464 private function getDestinationTitle( $ns, $name, $sourceNs, $sourceDbk, $options ) {
465 $dbk = substr( $sourceDbk, strlen( "$name:" ) );
466 if ( $ns == 0 ) {
467 // An interwiki; try an alternate encoding with '-' for ':'
468 $dbk = "$name-" . $dbk;
469 }
470 $destNS = $ns;
471 if ( $sourceNs == NS_TALK && MWNamespace::isSubject( $ns ) ) {
472 // This is an associated talk page moved with the --move-talk feature.
473 $destNS = MWNamespace::getTalk( $destNS );
474 }
475 $newTitle = Title::makeTitleSafe( $destNS, $dbk );
476 if ( !$newTitle || !$newTitle->canExist() ) {
477 return false;
478 }
479 return $newTitle;
480 }
481
482 /**
483 * Get an alternative title to move a page to. This is used if the
484 * preferred destination title already exists.
485 *
486 * @param Title $title
487 * @param array $options Associative array of validated command-line options
488 * @return Title|bool
489 */
490 private function getAlternateTitle( $title, $options ) {
491 $prefix = $options['add-prefix'];
492 $suffix = $options['add-suffix'];
493 if ( $prefix == '' && $suffix == '' ) {
494 return false;
495 }
496 while ( true ) {
497 $dbk = $prefix . $title->getDBkey() . $suffix;
498 $title = Title::makeTitleSafe( $title->getNamespace(), $dbk );
499 if ( !$title ) {
500 return false;
501 }
502 if ( !$title->exists() ) {
503 return $title;
504 }
505 }
506 }
507
508 /**
509 * Move a page
510 *
511 * @param integer $id The page_id
512 * @param Title $newTitle The new title
513 * @return bool
514 */
515 private function movePage( $id, Title $newTitle ) {
516 $this->db->update( 'page',
517 array(
518 "page_namespace" => $newTitle->getNamespace(),
519 "page_title" => $newTitle->getDBkey(),
520 ),
521 array(
522 "page_id" => $id,
523 ),
524 __METHOD__ );
525
526 // Update *_from_namespace in links tables
527 $fromNamespaceTables = array(
528 array( 'pagelinks', 'pl' ),
529 array( 'templatelinks', 'tl' ),
530 array( 'imagelinks', 'il' ) );
531 foreach ( $fromNamespaceTables as $tableInfo ) {
532 list( $table, $fieldPrefix ) = $tableInfo;
533 $this->db->update( $table,
534 // SET
535 array( "{$fieldPrefix}_from_namespace" => $newTitle->getNamespace() ),
536 // WHERE
537 array( "{$fieldPrefix}_from" => $id ),
538 __METHOD__ );
539 }
540
541 return true;
542 }
543
544 /**
545 * Determine if we can merge a page.
546 * We check if an inaccessible revision would become the latest and
547 * deny the merge if so -- it's theoretically possible to update the
548 * latest revision, but opens a can of worms -- search engine updates,
549 * recentchanges review, etc.
550 *
551 * @param integer $id The page_id
552 * @param Title $newTitle The new title
553 * @param string $logStatus This is set to the log status message on failure
554 * @return bool
555 */
556 private function canMerge( $id, Title $newTitle, &$logStatus ) {
557 $latestDest = Revision::newFromTitle( $newTitle, 0, Revision::READ_LATEST );
558 $latestSource = Revision::newFromPageId( $id, 0, Revision::READ_LATEST );
559 if ( $latestSource->getTimestamp() > $latestDest->getTimestamp() ) {
560 $logStatus = 'cannot merge since source is later';
561 return false;
562 } else {
563 return true;
564 }
565 }
566
567 /**
568 * Merge page histories
569 *
570 * @param integer $id The page_id
571 * @param Title $newTitle The new title
572 */
573 private function mergePage( $row, Title $newTitle ) {
574 $id = $row->page_id;
575
576 // Construct the WikiPage object we will need later, while the
577 // page_id still exists. Note that this cannot use makeTitleSafe(),
578 // we are deliberately constructing an invalid title.
579 $sourceTitle = Title::makeTitle( $row->page_namespace, $row->page_title );
580 $sourceTitle->resetArticleID( $id );
581 $wikiPage = new WikiPage( $sourceTitle );
582 $wikiPage->loadPageData( 'fromdbmaster' );
583
584 $destId = $newTitle->getArticleId();
585 $this->db->begin( __METHOD__ );
586 $this->db->update( 'revision',
587 // SET
588 array( 'rev_page' => $destId ),
589 // WHERE
590 array( 'rev_page' => $id ),
591 __METHOD__ );
592
593 $this->db->delete( 'page', array( 'page_id' => $id ), __METHOD__ );
594
595 // Call LinksDeletionUpdate to delete outgoing links from the old title,
596 // and update category counts.
597 //
598 // Calling external code with a fake broken Title is a fairly dubious
599 // idea. It's necessary because it's quite a lot of code to duplicate,
600 // but that also makes it fragile since it would be easy for someone to
601 // accidentally introduce an assumption of title validity to the code we
602 // are calling.
603 $update = new LinksDeletionUpdate( $wikiPage );
604 $update->doUpdate();
605 $this->db->commit( __METHOD__ );
606
607 return true;
608 }
609 }
610
611 $maintClass = "NamespaceConflictChecker";
612 require_once RUN_MAINTENANCE_IF_MAIN;