*/
class WikiImporter {
private $reader = null;
+ private $foreignNamespaces = null;
private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
- private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback;
+ private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
private $mNoticeCallback, $mDebug;
private $mImportUploads, $mImageBasePath;
private $mNoUpdates = false;
+ /** @var Config */
+ private $config;
+ /** @var ImportTitleFactory */
+ private $importTitleFactory;
/**
* Creates an ImportXMLReader drawing from the source provided
* @param ImportStreamSource $source
+ * @param Config $config
*/
- function __construct( ImportStreamSource $source ) {
+ function __construct( ImportStreamSource $source, Config $config = null ) {
$this->reader = new XMLReader();
+ if ( !$config ) {
+ wfDeprecated( __METHOD__ . ' without a Config instance', '1.25' );
+ $config = ConfigFactory::getDefaultInstance()->makeConfig( 'main' );
+ }
+ $this->config = $config;
if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
$this->setUploadCallback( array( $this, 'importUpload' ) );
$this->setLogItemCallback( array( $this, 'importLogItem' ) );
$this->setPageOutCallback( array( $this, 'finishImportPage' ) );
+
+ $this->importTitleFactory = new NaiveImportTitleFactory();
}
/**
return $previous;
}
+ /**
+ * Sets the factory object to use to convert ForeignTitle objects into local
+ * Title objects
+ * @param ImportTitleFactory $factory
+ */
+ public function setImportTitleFactory( $factory ) {
+ $this->importTitleFactory = $factory;
+ }
+
/**
* Set a target namespace to override the defaults
* @param null|int $namespace
if ( is_null( $namespace ) ) {
// Don't override namespaces
$this->mTargetNamespace = null;
- } elseif ( $namespace >= 0 ) {
- // @todo FIXME: Check for validity
- $this->mTargetNamespace = intval( $namespace );
+ $this->setImportTitleFactory( new NaiveImportTitleFactory() );
+ return true;
+ } elseif (
+ $namespace >= 0 &&
+ MWNamespace::exists( intval( $namespace ) )
+ ) {
+ $namespace = intval( $namespace );
+ $this->mTargetNamespace = $namespace;
+ $this->setImportTitleFactory( new NamespaceImportTitleFactory( $namespace ) );
+ return true;
} else {
return false;
}
$status = Status::newGood();
if ( is_null( $rootpage ) ) {
// No rootpage
- $this->mTargetRootPage = null;
+ $this->setImportTitleFactory( new NaiveImportTitleFactory() );
} elseif ( $rootpage !== '' ) {
$rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
$title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace )
: $wgContLang->getNsText( $title->getNamespace() );
$status->fatal( 'import-rootpage-nosubpage', $displayNSText );
} else {
- // set namespace to 'all', so the namespace check in processTitle() can passed
+ // set namespace to 'all', so the namespace check in processTitle() can pass
$this->setTargetNamespace( null );
- $this->mTargetRootPage = $title->getPrefixedDBkey();
+ $this->setImportTitleFactory( new SubpageImportTitleFactory( $title ) );
}
}
}
/**
* Mostly for hook use
* @param Title $title
- * @param string $origTitle
+ * @param ForeignTitle $foreignTitle
* @param int $revCount
* @param int $sRevCount
* @param array $pageInfo
* @return bool
*/
- public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
+ public function finishImportPage( $title, $foreignTitle, $revCount,
+ $sRevCount, $pageInfo ) {
$args = func_get_args();
- return wfRunHooks( 'AfterImportPage', $args );
+ return Hooks::run( 'AfterImportPage', $args );
}
/**
$this->debug( "-- Text: " . $revision->text );
}
+ /**
+ * Notify the callback function of site info
+ * @param array $siteInfo
+ * @return bool|mixed
+ */
+ private function siteInfoCallback( $siteInfo ) {
+ if ( isset( $this->mSiteInfoCallback ) ) {
+ return call_user_func_array( $this->mSiteInfoCallback,
+ array( $siteInfo, $this ) );
+ } else {
+ return false;
+ }
+ }
+
/**
* Notify the callback function when a new "<page>" is reached.
* @param Title $title
/**
* Notify the callback function when a "</page>" is closed.
* @param Title $title
- * @param Title $origTitle
+ * @param ForeignTitle $foreignTitle
* @param int $revCount
* @param int $sucCount Number of revisions for which callback returned true
* @param array $pageInfo Associative array of page information
*/
- private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
+ private function pageOutCallback( $title, $foreignTitle, $revCount,
+ $sucCount, $pageInfo ) {
if ( isset( $this->mPageOutCallback ) ) {
$args = func_get_args();
call_user_func_array( $this->mPageOutCallback, $args );
$buffer = "";
while ( $this->reader->read() ) {
switch ( $this->reader->nodeType ) {
- case XmlReader::TEXT:
- case XmlReader::SIGNIFICANT_WHITESPACE:
+ case XMLReader::TEXT:
+ case XMLReader::SIGNIFICANT_WHITESPACE:
$buffer .= $this->reader->value;
break;
- case XmlReader::END_ELEMENT:
+ case XMLReader::END_ELEMENT:
return $buffer;
}
}
$tag = $this->reader->name;
$type = $this->reader->nodeType;
- if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
+ if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
// Do nothing
- } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
+ } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
break;
} elseif ( $tag == 'siteinfo' ) {
$this->handleSiteInfo();
return true;
}
- /**
- * @return bool
- * @throws MWException
- */
private function handleSiteInfo() {
- // Site info is useful, but not actually used for dump imports.
- // Includes a quick short-circuit to save performance.
- if ( !$this->mSiteInfoCallback ) {
- $this->reader->next();
- return true;
+ $this->debug( "Enter site info handler." );
+ $siteInfo = array();
+
+ // Fields that can just be stuffed in the siteInfo object
+ $normalFields = array( 'sitename', 'base', 'generator', 'case' );
+
+ while ( $this->reader->read() ) {
+ if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
+ $this->reader->name == 'siteinfo' ) {
+ break;
+ }
+
+ $tag = $this->reader->name;
+
+ if ( $tag == 'namespace' ) {
+ $this->foreignNamespaces[ $this->nodeAttribute( 'key' ) ] =
+ $this->nodeContents();
+ } elseif ( in_array( $tag, $normalFields ) ) {
+ $siteInfo[$tag] = $this->nodeContents();
+ }
}
- throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
+
+ $siteInfo['_namespaces'] = $this->foreignNamespaces;
+ $this->siteInfoCallback( $siteInfo );
}
private function handleLogItem() {
'logtitle', 'params' );
while ( $this->reader->read() ) {
- if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
+ if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
$this->reader->name == 'logitem' ) {
break;
}
$tag = $this->reader->name;
- if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', array(
+ if ( !Hooks::run( 'ImportHandleLogItemXMLTag', array(
$this, $logInfo
) ) ) {
// Do nothing
* @return bool|mixed
*/
private function processLogItem( $logInfo ) {
- $revision = new WikiRevision;
+ $revision = new WikiRevision( $this->config );
$revision->setID( $logInfo['id'] );
$revision->setType( $logInfo['type'] );
$pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
// Fields that can just be stuffed in the pageInfo object
- $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
+ $normalFields = array( 'title', 'ns', 'id', 'redirect', 'restrictions' );
$skip = false;
$badTitle = false;
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
- if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
+ if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
$this->reader->name == 'page' ) {
break;
}
+ $skip = false;
+
$tag = $this->reader->name;
if ( $badTitle ) {
// The title is invalid, bail out of this page
$skip = true;
- } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
+ } elseif ( !Hooks::run( 'ImportHandlePageXMLTag', array( $this,
&$pageInfo ) ) ) {
// Do nothing
} elseif ( in_array( $tag, $normalFields ) ) {
$pageInfo[$tag] = $this->nodeAttribute( 'title' );
} else {
$pageInfo[$tag] = $this->nodeContents();
- if ( $tag == 'title' ) {
- $title = $this->processTitle( $pageInfo['title'] );
+ }
+ } elseif ( $tag == 'revision' || $tag == 'upload' ) {
+ if ( !isset( $title ) ) {
+ $title = $this->processTitle( $pageInfo['title'],
+ isset( $pageInfo['ns'] ) ? $pageInfo['ns'] : null );
+
+ if ( !$title ) {
+ $badTitle = true;
+ $skip = true;
+ }
- if ( !$title ) {
- $badTitle = true;
- $skip = true;
- }
+ $this->pageCallback( $title );
+ list( $pageInfo['_title'], $foreignTitle ) = $title;
+ }
- $this->pageCallback( $title );
- list( $pageInfo['_title'], $origTitle ) = $title;
+ if ( $title ) {
+ if ( $tag == 'revision' ) {
+ $this->handleRevision( $pageInfo );
+ } else {
+ $this->handleUpload( $pageInfo );
}
}
- } elseif ( $tag == 'revision' ) {
- $this->handleRevision( $pageInfo );
- } elseif ( $tag == 'upload' ) {
- $this->handleUpload( $pageInfo );
} elseif ( $tag != '#text' ) {
$this->warn( "Unhandled page XML tag $tag" );
$skip = true;
}
}
- $this->pageOutCallback( $pageInfo['_title'], $origTitle,
+ $this->pageOutCallback( $pageInfo['_title'], $foreignTitle,
$pageInfo['revisionCount'],
$pageInfo['successfulRevisionCount'],
$pageInfo );
$skip = false;
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
- if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
+ if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
$this->reader->name == 'revision' ) {
break;
}
$tag = $this->reader->name;
- if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', array(
+ if ( !Hooks::run( 'ImportHandleRevisionXMLTag', array(
$this, $pageInfo, $revisionInfo
) ) ) {
// Do nothing
* @return bool|mixed
*/
private function processRevision( $pageInfo, $revisionInfo ) {
- $revision = new WikiRevision;
+ $revision = new WikiRevision( $this->config );
if ( isset( $revisionInfo['id'] ) ) {
$revision->setID( $revisionInfo['id'] );
$skip = false;
while ( $skip ? $this->reader->next() : $this->reader->read() ) {
- if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
+ if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
$this->reader->name == 'upload' ) {
break;
}
$tag = $this->reader->name;
- if ( !wfRunHooks( 'ImportHandleUploadXMLTag', array(
+ if ( !Hooks::run( 'ImportHandleUploadXMLTag', array(
$this, $pageInfo
) ) ) {
// Do nothing
* @return mixed
*/
private function processUpload( $pageInfo, $uploadInfo ) {
- $revision = new WikiRevision;
+ $revision = new WikiRevision( $this->config );
$text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
$revision->setTitle( $pageInfo['_title'] );
$info = array();
while ( $this->reader->read() ) {
- if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
+ if ( $this->reader->nodeType == XMLReader::END_ELEMENT &&
$this->reader->name == 'contributor' ) {
break;
}
/**
* @param string $text
+ * @param string|null $ns
* @return array|bool
*/
- private function processTitle( $text ) {
- global $wgCommandLineMode;
-
- $workTitle = $text;
- $origTitle = Title::newFromText( $workTitle );
-
- if ( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
- # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map)
- # and than dbKey can begin with a lowercase char
- $title = Title::makeTitleSafe( $this->mTargetNamespace,
- $origTitle->getDBkey() );
+ private function processTitle( $text, $ns = null ) {
+ if ( is_null( $this->foreignNamespaces ) ) {
+ $foreignTitleFactory = new NaiveForeignTitleFactory();
} else {
- if ( !is_null( $this->mTargetRootPage ) ) {
- $workTitle = $this->mTargetRootPage . '/' . $workTitle;
- }
- $title = Title::newFromText( $workTitle );
+ $foreignTitleFactory = new NamespaceAwareForeignTitleFactory(
+ $this->foreignNamespaces );
}
+ $foreignTitle = $foreignTitleFactory->createForeignTitle( $text,
+ intval( $ns ) );
+
+ $title = $this->importTitleFactory->createTitleFromForeignTitle(
+ $foreignTitle );
+
+ $commandLineMode = $this->config->get( 'CommandLineMode' );
if ( is_null( $title ) ) {
# Invalid page title? Ignore the page
- $this->notice( 'import-error-invalid', $workTitle );
+ $this->notice( 'import-error-invalid', $foreignTitle->getFullText() );
return false;
} elseif ( $title->isExternal() ) {
$this->notice( 'import-error-interwiki', $title->getPrefixedText() );
} elseif ( !$title->canExist() ) {
$this->notice( 'import-error-special', $title->getPrefixedText() );
return false;
- } elseif ( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
+ } elseif ( !$title->userCan( 'edit' ) && !$commandLineMode ) {
# Do not import if the importing wiki user cannot edit this page
$this->notice( 'import-error-edit', $title->getPrefixedText() );
return false;
- } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
+ } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$commandLineMode ) {
# Do not import if the importing wiki user cannot create this page
$this->notice( 'import-error-create', $title->getPrefixedText() );
return false;
}
- return array( $title, $origTitle );
+ return array( $title, $foreignTitle );
}
}
/** This is a horrible hack used to keep source compatibility */
class UploadSourceAdapter {
/** @var array */
- private static $sourceRegistrations = array();
+ public static $sourceRegistrations = array();
/** @var string */
private $mSource;
*/
class WikiRevision {
/** @todo Unused? */
- private $importer = null;
+ public $importer = null;
/** @var Title */
public $title = null;
/** @var int */
- private $id = 0;
+ public $id = 0;
/** @var string */
public $timestamp = "20010115000000";
public $user_text = "";
/** @var string */
- protected $model = null;
+ public $model = null;
/** @var string */
- protected $format = null;
+ public $format = null;
/** @var string */
public $text = "";
protected $size;
/** @var Content */
- protected $content = null;
+ public $content = null;
/** @var ContentHandler */
protected $contentHandler = null;
public $comment = "";
/** @var bool */
- protected $minor = false;
+ public $minor = false;
/** @var string */
- protected $type = "";
+ public $type = "";
/** @var string */
- protected $action = "";
+ public $action = "";
/** @var string */
- protected $params = "";
+ public $params = "";
/** @var string */
- protected $fileSrc = '';
+ public $fileSrc = '';
/** @var bool|string */
- protected $sha1base36 = false;
+ public $sha1base36 = false;
/**
* @var bool
* @todo Unused?
*/
- private $isTemp = false;
+ public $isTemp = false;
/** @var string */
- protected $archiveName = '';
+ public $archiveName = '';
protected $filename;
protected $src;
/** @todo Unused? */
- private $fileIsTemp;
+ public $fileIsTemp;
/** @var bool */
private $mNoUpdates = false;
+ /** @var Config $config */
+ private $config;
+
+ public function __construct( Config $config ) {
+ $this->config = $config;
+ }
+
/**
* @param Title $title
* @throws MWException
* @return bool|string
*/
function downloadSource() {
- global $wgEnableUploads;
- if ( !$wgEnableUploads ) {
+ if ( !$this->config->get( 'EnableUploads' ) ) {
return false;
}
}
/**
- * @todo document (e.g. one-sentence class description).
+ * Used for importing XML dumps where the content of the dump is in a string.
+ * This class is ineffecient, and should only be used for small dumps.
+ * For larger dumps, ImportStreamSource should be used instead.
+ *
* @ingroup SpecialPage
*/
class ImportStringSource {
}
/**
- * @todo document (e.g. one-sentence class description).
+ * Imports a XML dump from a file (either from file upload, files on disk, or HTTP)
* @ingroup SpecialPage
*/
class ImportStreamSource {