Drop "left in" debugging var_dump from WikiImporter
[lhc/web/wiklou.git] / includes / Import.php
index 59fa583..3880e25 100644 (file)
@@ -40,12 +40,14 @@ class WikiImporter {
 
        /**
         * Creates an ImportXMLReader drawing from the source provided
-        * @param string $source
+        * @param ImportStreamSource $source
         */
-       function __construct( $source ) {
+       function __construct( ImportStreamSource $source ) {
                $this->reader = new XMLReader();
 
-               stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
+               if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
+                       stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
+               }
                $id = UploadSourceAdapter::registerSource( $source );
                if ( defined( 'LIBXML_PARSEHUGE' ) ) {
                        $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
@@ -218,7 +220,11 @@ class WikiImporter {
                        $this->mTargetRootPage = null;
                } elseif ( $rootpage !== '' ) {
                        $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
-                       $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace ) ? $this->mTargetNamespace : NS_MAIN );
+                       $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace )
+                               ? $this->mTargetNamespace
+                               : NS_MAIN
+                       );
+
                        if ( !$title || $title->isExternal() ) {
                                $status->fatal( 'import-rootpage-invalid' );
                        } else {
@@ -259,7 +265,7 @@ class WikiImporter {
         * @return bool
         */
        public function importRevision( $revision ) {
-               if ( !$revision->getContent()->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
+               if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
                        $this->notice( 'import-error-bad-location',
                                $revision->getTitle()->getPrefixedText(),
                                $revision->getID(),
@@ -285,12 +291,12 @@ class WikiImporter {
 
        /**
         * Default per-revision callback, performs the import.
-        * @param WikiRevision $rev
+        * @param WikiRevision $revision
         * @return bool
         */
-       public function importLogItem( $rev ) {
+       public function importLogItem( $revision ) {
                $dbw = wfGetDB( DB_MASTER );
-               return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
+               return $dbw->deadlockLoop( array( $revision, 'importLogItem' ) );
        }
 
        /**
@@ -310,7 +316,7 @@ class WikiImporter {
         * @param int $revCount
         * @param int $sRevCount
         * @param array $pageInfo
-        * @return
+        * @return bool
         */
        public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
                $args = func_get_args();
@@ -387,6 +393,15 @@ class WikiImporter {
                }
        }
 
+       /**
+        * Retrieves the contents of the named attribute of the current element.
+        * @param string $attr The name of the attribute
+        * @return string The value of the attribute or an empty string if it is not set in the current element.
+        */
+       public function nodeAttribute( $attr ) {
+               return $this->reader->getAttribute( $attr );
+       }
+
        /**
         * Shouldn't something like this be built-in to XMLReader?
         * Fetches text contents of the current element, assuming
@@ -414,53 +429,12 @@ class WikiImporter {
                return '';
        }
 
-       # --------------
-
-       /** Left in for debugging */
-       private function dumpElement() {
-               static $lookup = null;
-               if ( !$lookup ) {
-                       $xmlReaderConstants = array(
-                               "NONE",
-                               "ELEMENT",
-                               "ATTRIBUTE",
-                               "TEXT",
-                               "CDATA",
-                               "ENTITY_REF",
-                               "ENTITY",
-                               "PI",
-                               "COMMENT",
-                               "DOC",
-                               "DOC_TYPE",
-                               "DOC_FRAGMENT",
-                               "NOTATION",
-                               "WHITESPACE",
-                               "SIGNIFICANT_WHITESPACE",
-                               "END_ELEMENT",
-                               "END_ENTITY",
-                               "XML_DECLARATION",
-                       );
-                       $lookup = array();
-
-                       foreach ( $xmlReaderConstants as $name ) {
-                               $lookup[constant( "XmlReader::$name" )] = $name;
-                       }
-               }
-
-               print var_dump(
-                       $lookup[$this->reader->nodeType],
-                       $this->reader->name,
-                       $this->reader->value
-               ) . "\n\n";
-       }
-
        /**
         * Primary entry point
         * @throws MWException
         * @return bool
         */
        public function doImport() {
-
                // Calls to reader->read need to be wrapped in calls to
                // libxml_disable_entity_loader() to avoid local file
                // inclusion attacks (bug 46932).
@@ -518,7 +492,7 @@ class WikiImporter {
        private function handleSiteInfo() {
                // Site info is useful, but not actually used for dump imports.
                // Includes a quick short-circuit to save performance.
-               if ( ! $this->mSiteInfoCallback ) {
+               if ( !$this->mSiteInfoCallback ) {
                        $this->reader->next();
                        return true;
                }
@@ -612,17 +586,28 @@ class WikiImporter {
                                                &$pageInfo ) ) ) {
                                // Do nothing
                        } elseif ( in_array( $tag, $normalFields ) ) {
-                               $pageInfo[$tag] = $this->nodeContents();
-                               if ( $tag == 'title' ) {
-                                       $title = $this->processTitle( $pageInfo['title'] );
+                               // An XML snippet:
+                               // <page>
+                               //     <id>123</id>
+                               //     <title>Page</title>
+                               //     <redirect title="NewTitle"/>
+                               //     ...
+                               // Because the redirect tag is built differently, we need special handling for that case.
+                               if ( $tag == 'redirect' ) {
+                                       $pageInfo[$tag] = $this->nodeAttribute( 'title' );
+                               } else {
+                                       $pageInfo[$tag] = $this->nodeContents();
+                                       if ( $tag == 'title' ) {
+                                               $title = $this->processTitle( $pageInfo['title'] );
 
-                                       if ( !$title ) {
-                                               $badTitle = true;
-                                               $skip = true;
-                                       }
+                                               if ( !$title ) {
+                                                       $badTitle = true;
+                                                       $skip = true;
+                                               }
 
-                                       $this->pageCallback( $title );
-                                       list( $pageInfo['_title'], $origTitle ) = $title;
+                                               $this->pageCallback( $title );
+                                               list( $pageInfo['_title'], $origTitle ) = $title;
+                                       }
                                }
                        } elseif ( $tag == 'revision' ) {
                                $this->handleRevision( $pageInfo );
@@ -690,9 +675,6 @@ class WikiImporter {
                if ( isset( $revisionInfo['id'] ) ) {
                        $revision->setID( $revisionInfo['id'] );
                }
-               if ( isset( $revisionInfo['text'] ) ) {
-                       $revision->setText( $revisionInfo['text'] );
-               }
                if ( isset( $revisionInfo['model'] ) ) {
                        $revision->setModel( $revisionInfo['model'] );
                }
@@ -701,6 +683,14 @@ class WikiImporter {
                }
                $revision->setTitle( $pageInfo['_title'] );
 
+               if ( isset( $revisionInfo['text'] ) ) {
+                       $handler = $revision->getContentHandler();
+                       $text = $handler->importTransform(
+                               $revisionInfo['text'],
+                               $revision->getFormat() );
+
+                       $revision->setText( $text );
+               }
                if ( isset( $revisionInfo['timestamp'] ) ) {
                        $revision->setTimestamp( $revisionInfo['timestamp'] );
                } else {
@@ -900,17 +890,23 @@ class WikiImporter {
 
 /** This is a horrible hack used to keep source compatibility */
 class UploadSourceAdapter {
-       static $sourceRegistrations = array();
+       /** @var array */
+       private static $sourceRegistrations = array();
 
+       /** @var string */
        private $mSource;
+
+       /** @var string */
        private $mBuffer;
+
+       /** @var int */
        private $mPosition;
 
        /**
-        * @param string $source
+        * @param ImportStreamSource $source
         * @return string
         */
-       static function registerSource( $source ) {
+       static function registerSource( ImportStreamSource $source ) {
                $id = wfRandomString();
 
                self::$sourceRegistrations[$id] = $source;
@@ -1013,59 +1009,88 @@ class UploadSourceAdapter {
        }
 }
 
-class XMLReader2 extends XMLReader {
-
-       /**
-        * @return bool|string
-        */
-       function nodeContents() {
-               if ( $this->isEmptyElement ) {
-                       return "";
-               }
-               $buffer = "";
-               while ( $this->read() ) {
-                       switch ( $this->nodeType ) {
-                       case XmlReader::TEXT:
-                       case XmlReader::SIGNIFICANT_WHITESPACE:
-                               $buffer .= $this->value;
-                               break;
-                       case XmlReader::END_ELEMENT:
-                               return $buffer;
-                       }
-               }
-               return $this->close();
-       }
-}
-
 /**
  * @todo document (e.g. one-sentence class description).
  * @ingroup SpecialPage
  */
 class WikiRevision {
-       var $importer = null;
-
-       /**
-        * @var Title
-        */
-       var $title = null;
-       var $id = 0;
-       var $timestamp = "20010115000000";
-       var $user = 0;
-       var $user_text = "";
-       var $model = null;
-       var $format = null;
-       var $text = "";
-       var $content = null;
-       var $comment = "";
-       var $minor = false;
-       var $type = "";
-       var $action = "";
-       var $params = "";
-       var $fileSrc = '';
-       var $sha1base36 = false;
-       var $isTemp = false;
-       var $archiveName = '';
-       var $fileIsTemp;
+       /** @todo Unused? */
+       private $importer = null;
+
+       /** @var Title */
+       public $title = null;
+
+       /** @var int */
+       private $id = 0;
+
+       /** @var string */
+       public $timestamp = "20010115000000";
+
+       /**
+        * @var int
+        * @todo Can't find any uses. Public, because that's suspicious. Get clarity. */
+       public $user = 0;
+
+       /** @var string */
+       public $user_text = "";
+
+       /** @var string */
+       protected $model = null;
+
+       /** @var string */
+       protected $format = null;
+
+       /** @var string */
+       public $text = "";
+
+       /** @var int */
+       protected $size;
+
+       /** @var Content */
+       protected $content = null;
+
+       /** @var ContentHandler */
+       protected $contentHandler = null;
+
+       /** @var string */
+       public $comment = "";
+
+       /** @var bool */
+       protected $minor = false;
+
+       /** @var string */
+       protected $type = "";
+
+       /** @var string */
+       protected $action = "";
+
+       /** @var string */
+       protected $params = "";
+
+       /** @var string */
+       protected $fileSrc = '';
+
+       /** @var bool|string */
+       protected $sha1base36 = false;
+
+       /**
+        * @var bool
+        * @todo Unused?
+        */
+       private $isTemp = false;
+
+       /** @var string */
+       protected $archiveName = '';
+
+       protected $filename;
+
+       /** @var mixed */
+       protected $src;
+
+       /** @todo Unused? */
+       private $fileIsTemp;
+
+       /** @var bool */
        private $mNoUpdates = false;
 
        /**
@@ -1076,7 +1101,8 @@ class WikiRevision {
                if ( is_object( $title ) ) {
                        $this->title = $title;
                } elseif ( is_null( $title ) ) {
-                       throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
+                       throw new MWException( "WikiRevision given a null title in import. "
+                               . "You may need to adjust \$wgLegalTitleChars." );
                } else {
                        throw new MWException( "WikiRevision given non-object title in import." );
                }
@@ -1257,18 +1283,24 @@ class WikiRevision {
                return $this->text;
        }
 
+       /**
+        * @return ContentHandler
+        */
+       function getContentHandler() {
+               if ( is_null( $this->contentHandler ) ) {
+                       $this->contentHandler = ContentHandler::getForModelID( $this->getModel() );
+               }
+
+               return $this->contentHandler;
+       }
+
        /**
         * @return Content
         */
        function getContent() {
                if ( is_null( $this->content ) ) {
-                       $this->content =
-                               ContentHandler::makeContent(
-                                       $this->text,
-                                       $this->getTitle(),
-                                       $this->getModel(),
-                                       $this->getFormat()
-                               );
+                       $handler = $this->getContentHandler();
+                       $this->content = $handler->unserializeContent( $this->text, $this->getFormat() );
                }
 
                return $this->content;
@@ -1289,8 +1321,8 @@ class WikiRevision {
         * @return string
         */
        function getFormat() {
-               if ( is_null( $this->model ) ) {
-                       $this->format = ContentHandler::getForTitle( $this->getTitle() )->getDefaultFormat();
+               if ( is_null( $this->format ) ) {
+                       $this->format = $this->getContentHandler()->getDefaultFormat();
                }
 
                return $this->format;
@@ -1406,6 +1438,7 @@ class WikiRevision {
                $linkCache->clear();
 
                $page = WikiPage::factory( $this->title );
+               $page->loadPageData( 'fromdbmaster' );
                if ( !$page->exists() ) {
                        # must create the page...
                        $pageId = $page->insertOn( $dbw );
@@ -1438,7 +1471,8 @@ class WikiRevision {
                        'page' => $pageId,
                        'content_model' => $this->getModel(),
                        'content_format' => $this->getFormat(),
-                       'text' => $this->getContent()->serialize( $this->getFormat() ), //XXX: just set 'content' => $this->getContent()?
+                       //XXX: just set 'content' => $this->getContent()?
+                       'text' => $this->getContent()->serialize( $this->getFormat() ),
                        'comment' => $this->getComment(),
                        'user' => $userId,
                        'user_text' => $userText,
@@ -1450,7 +1484,11 @@ class WikiRevision {
 
                if ( $changed !== false && !$this->mNoUpdates ) {
                        wfDebug( __METHOD__ . ": running updates\n" );
-                       $page->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
+                       $page->doEditUpdates(
+                               $revision,
+                               $userObj,
+                               array( 'created' => $created, 'oldcountable' => $oldcountable )
+                       );
                }
 
                return true;
@@ -1482,8 +1520,9 @@ class WikiRevision {
                );
                // @todo FIXME: This could fail slightly for multiple matches :P
                if ( $prior ) {
-                       wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
-                               $this->timestamp . "\n" );
+                       wfDebug( __METHOD__
+                               . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp "
+                               . $this->timestamp . "\n" );
                        return;
                }
                $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
@@ -1680,13 +1719,18 @@ class ImportStreamSource {
                }
                if ( !empty( $upload['error'] ) ) {
                        switch ( $upload['error'] ) {
-                               case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
+                               case 1:
+                                       # The uploaded file exceeds the upload_max_filesize directive in php.ini.
                                        return Status::newFatal( 'importuploaderrorsize' );
-                               case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
+                               case 2:
+                                       # The uploaded file exceeds the MAX_FILE_SIZE directive that
+                                       # was specified in the HTML form.
                                        return Status::newFatal( 'importuploaderrorsize' );
-                               case 3: # The uploaded file was only partially uploaded
+                               case 3:
+                                       # The uploaded file was only partially uploaded
                                        return Status::newFatal( 'importuploaderrorpartial' );
-                               case 6: #Missing a temporary folder.
+                               case 6:
+                                       # Missing a temporary folder.
                                        return Status::newFatal( 'importuploaderrortemp' );
                                # case else: # Currently impossible
                        }
@@ -1731,7 +1775,9 @@ class ImportStreamSource {
         * @param int $pageLinkDepth
         * @return Status
         */
-       public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
+       public static function newFromInterwiki( $interwiki, $page, $history = false,
+               $templates = false, $pageLinkDepth = 0
+       ) {
                if ( $page == '' ) {
                        return Status::newFatal( 'import-noarticle' );
                }