Handle missing namespace prefix in XML dumps more gracefully
authorThis, that and the other <at.light@live.com.au>
Mon, 26 Dec 2016 01:58:16 +0000 (12:58 +1100)
committerTTO <at.light@live.com.au>
Sun, 1 Jan 2017 09:11:45 +0000 (09:11 +0000)
If an XML dump of a wiki is exported using dumpBackup.php, and there are
pages in a namespace that is not registered (perhaps because of a missing
extension), they will appear in the dump in the form

<page> ... <title>PageTitle</title> <ns>1234</ns> ... </page>

This caused the ForeignTitle code to raise an undefined offset error,
because it assumed that the <title> element was of the form
"Namespace:PageTitle" when <ns> was nonzero. This assumption is not valid.

Now, the importation of such dumps will no longer throw errors and the
pages will be correctly imported, although possibly to unexpected
locations.

Bug: T114115
Change-Id: I0271435dc208e7ea118339584f8a0e359c96113a

includes/export/XmlDumpWriter.php
includes/title/NamespaceAwareForeignTitleFactory.php
tests/phpunit/includes/title/NamespaceAwareForeignTitleFactoryTest.php

index 5be166b..341e2aa 100644 (file)
@@ -431,6 +431,9 @@ class XmlDumpWriter {
                global $wgContLang;
                $prefix = $wgContLang->getFormattedNsText( $title->getNamespace() );
 
                global $wgContLang;
                $prefix = $wgContLang->getFormattedNsText( $title->getNamespace() );
 
+               // @todo Emit some kind of warning to the user if $title->getNamespace() !==
+               // NS_MAIN and $prefix === '' (viz. pages in an unregistered namespace)
+
                if ( $prefix !== '' ) {
                        $prefix .= ':';
                }
                if ( $prefix !== '' ) {
                        $prefix .= ':';
                }
index 2d67a28..4d24cb8 100644 (file)
@@ -115,15 +115,23 @@ class NamespaceAwareForeignTitleFactory implements ForeignTitleFactory {
        protected function parseTitleWithNs( $title, $ns ) {
                $pieces = explode( ':', $title, 2 );
 
        protected function parseTitleWithNs( $title, $ns ) {
                $pieces = explode( ':', $title, 2 );
 
+               // Is $title of the form Namespace:Title (true), or just Title (false)?
+               $titleIncludesNamespace = ( $ns != '0' && count( $pieces ) === 2 );
+
                if ( isset( $this->foreignNamespaces[$ns] ) ) {
                        $namespaceName = $this->foreignNamespaces[$ns];
                } else {
                if ( isset( $this->foreignNamespaces[$ns] ) ) {
                        $namespaceName = $this->foreignNamespaces[$ns];
                } else {
-                       $namespaceName = $ns == '0' ? '' : $pieces[0];
+                       // If the foreign wiki is misconfigured, XML dumps can contain a page with
+                       // a non-zero namespace ID, but whose title doesn't contain a colon
+                       // (T114115). In those cases, output a made-up namespace name to avoid
+                       // collisions. The ImportTitleFactory might replace this with something
+                       // more appropriate.
+                       $namespaceName = $titleIncludesNamespace ? $pieces[0] : "Ns$ns";
                }
 
                // We assume that the portion of the page title before the colon is the
                }
 
                // We assume that the portion of the page title before the colon is the
-               // namespace name, except in the case of namespace 0
-               if ( $ns != '0' ) {
+               // namespace name, except in the case of namespace 0.
+               if ( $titleIncludesNamespace ) {
                        $pageName = $pieces[1];
                } else {
                        $pageName = $title;
                        $pageName = $pieces[1];
                } else {
                        $pageName = $title;
index 76cedc6..520108a 100644 (file)
@@ -36,10 +36,18 @@ class NamespaceAwareForeignTitleFactoryTest extends MediaWikiTestCase {
                                'MainNamespaceArticle', null,
                                new ForeignTitle( 0, '', 'MainNamespaceArticle' ),
                        ],
                                'MainNamespaceArticle', null,
                                new ForeignTitle( 0, '', 'MainNamespaceArticle' ),
                        ],
+                       [
+                               'Magic:_The_Gathering', 0,
+                               new ForeignTitle( 0, '', 'Magic:_The_Gathering' ),
+                       ],
                        [
                                'Talk:Nice_talk', 1,
                                new ForeignTitle( 1, 'Talk', 'Nice_talk' ),
                        ],
                        [
                                'Talk:Nice_talk', 1,
                                new ForeignTitle( 1, 'Talk', 'Nice_talk' ),
                        ],
+                       [
+                               'Talk:Magic:_The_Gathering', 1,
+                               new ForeignTitle( 1, 'Talk', 'Magic:_The_Gathering' ),
+                       ],
                        [
                                'Bogus:Nice_talk', 0,
                                new ForeignTitle( 0, '', 'Bogus:Nice_talk' ),
                        [
                                'Bogus:Nice_talk', 0,
                                new ForeignTitle( 0, '', 'Bogus:Nice_talk' ),
@@ -56,6 +64,11 @@ class NamespaceAwareForeignTitleFactoryTest extends MediaWikiTestCase {
                                'Bogus:Nice_talk', 1,
                                new ForeignTitle( 1, 'Talk', 'Nice_talk' ),
                        ],
                                'Bogus:Nice_talk', 1,
                                new ForeignTitle( 1, 'Talk', 'Nice_talk' ),
                        ],
+                       // Misconfigured wiki with unregistered namespace (T114115)
+                       [
+                               'Nice_talk', 1234,
+                               new ForeignTitle( 1234, 'Ns1234', 'Nice_talk' ),
+                       ],
                ];
        }
 
                ];
        }