reimplement r73652 having actually read coding conventions this time
[lhc/web/wiklou.git] / includes / ImportXMLReader.php
index 74183f3..9a9e6c4 100644 (file)
@@ -1,4 +1,10 @@
 <?php
+/**
+ * XML file reader for the page data importer
+ *
+ * @file
+ */
+
 /**
  * implements Special:Import
  * @ingroup SpecialPage
@@ -13,16 +19,17 @@ class WikiImporter {
         * Creates an ImportXMLReader drawing from the source provided
        */
        function __construct( $source ) {
-               $this->reader = new XMLReader();
-               
+               $this->reader = new XMLReader2();
+
                stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
                $id = UploadSourceAdapter::registerSource( $source );
                $this->reader->open( "uploadsource://$id" );
-               
+
                // Default callbacks
                $this->setRevisionCallback( array( $this, "importRevision" ) );
                $this->setUploadCallback( array( $this, 'importUpload' ) );
                $this->setLogItemCallback( array( $this, 'importLogItem' ) );
+               $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
        }
 
        private function throwXmlError( $err ) {
@@ -31,11 +38,11 @@ class WikiImporter {
        }
 
        private function debug( $data ) {
-               if( $this->mDebug ) {
+               if( $this->mDebug ) {
                        wfDebug( "IMPORT: $data\n" );
-               }
+               }
        }
-       
+
        private function warn( $data ) {
                wfDebug( "IMPORT: $data\n" );
        }
@@ -49,7 +56,7 @@ class WikiImporter {
                        $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
                }
        }
-       
+
        /**
         * Set debug mode...
         */
@@ -104,7 +111,7 @@ class WikiImporter {
                $this->mUploadCallback = $callback;
                return $previous;
        }
-       
+
        /**
         * Sets the action to perform as each log item reached.
         * @param $callback callback
@@ -115,7 +122,7 @@ class WikiImporter {
                $this->mLogItemCallback = $callback;
                return $previous;
        }
-       
+
        /**
         * Sets the action to perform when site info is encountered
         * @param $callback callback
@@ -141,7 +148,7 @@ class WikiImporter {
                        return false;
                }
        }
-       
+
        /**
         * Default per-revision callback, performs the import.
         * @param $revision WikiRevision
@@ -150,7 +157,7 @@ class WikiImporter {
                $dbw = wfGetDB( DB_MASTER );
                return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
        }
-       
+
        /**
         * Default per-revision callback, performs the import.
         * @param $rev WikiRevision
@@ -169,6 +176,14 @@ class WikiImporter {
                return false;
        }
 
+       /**
+        * Mostly for hook use
+        */
+       public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
+               $args = func_get_args();
+               return wfRunHooks( 'AfterImportPage', $args );
+       }
+
        /**
         * Alternate per-revision callback, for debugging.
         * @param $revision WikiRevision
@@ -185,13 +200,13 @@ class WikiImporter {
                $this->debug( "-- Comment: " . $revision->comment );
                $this->debug( "-- Text: " . $revision->text );
        }
-       
+
        /**
         * Notify the callback function when a new <page> is reached.
         * @param $title Title
         */
        function pageCallback( $title ) {
-               if( is_callable( $this->mPageCallback ) ) {
+               if( isset( $this->mPageCallback ) ) {
                        call_user_func( $this->mPageCallback, $title );
                }
        }
@@ -200,42 +215,43 @@ class WikiImporter {
         * Notify the callback function when a </page> is closed.
         * @param $title Title
         * @param $origTitle Title
-        * @param $revisionCount int
-        * @param $successCount Int: number of revisions for which callback returned true
+        * @param $revCount Integer
+        * @param $sucCount Int: number of revisions for which callback returned true
+        * @param $pageInfo Array: associative array of page information
         */
-       private function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
-               if( is_callable( $this->mPageOutCallback ) ) {
-                       call_user_func_array( $this->mPageOutCallback,
-                               array( $title, $origTitle, $revisionCount, $successCount ) );
+       private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
+               if( isset( $this->mPageOutCallback ) ) {
+                       $args = func_get_args();
+                       call_user_func_array( $this->mPageOutCallback, $args );
                }
        }
-       
+
        /**
         * Notify the callback function of a revision
         * @param $revision A WikiRevision object
         */
        private function revisionCallback( $revision ) {
-               if ( is_callable( $this->mRevisionCallback ) ) {
+               if ( isset( $this->mRevisionCallback ) ) {
                        return call_user_func_array( $this->mRevisionCallback,
                                        array( $revision, $this ) );
                } else {
                        return false;
                }
        }
-       
+
        /**
         * Notify the callback function of a new log item
         * @param $revision A WikiRevision object
         */
        private function logItemCallback( $revision ) {
-               if ( is_callable( $this->mLogItemCallback ) ) {
+               if ( isset( $this->mLogItemCallback ) ) {
                        return call_user_func_array( $this->mLogItemCallback,
                                        array( $revision, $this ) );
                } else {
                        return false;
                }
        }
-       
+
        /**
         * Shouldn't something like this be built-in to XMLReader?
         * Fetches text contents of the current element, assuming
@@ -244,25 +260,11 @@ class WikiImporter {
         * @access private
         */
        private function nodeContents() {
-               if( $this->reader->isEmptyElement ) {
-                       return "";
-               }
-               $buffer = "";
-               while( $this->reader->read() ) {
-                       switch( $this->reader->nodeType ) {
-                       case XmlReader::TEXT:
-                       case XmlReader::SIGNIFICANT_WHITESPACE:
-                               $buffer .= $this->reader->value;
-                               break;
-                       case XmlReader::END_ELEMENT:
-                               return $buffer;
-                       }
-               }
-               return $this->close();
+               return $this->reader->nodeContents();
        }
 
        # --------------
-       
+
        /** Left in for debugging */
        private function dumpElement() {
                static $lookup = null;
@@ -270,7 +272,7 @@ class WikiImporter {
                        $xmlReaderConstants = array(
                                "NONE",
                                "ELEMENT",
-                               "ATTRIBUTE", 
+                               "ATTRIBUTE",
                                "TEXT",
                                "CDATA",
                                "ENTITY_REF",
@@ -288,13 +290,13 @@ class WikiImporter {
                                "XML_DECLARATION",
                                );
                        $lookup = array();
-                       
+
                        foreach( $xmlReaderConstants as $name ) {
                                $lookup[constant("XmlReader::$name")] = $name;
                        }
                }
-               
-               print( var_dump( 
+
+               print( var_dump(
                        $lookup[$this->reader->nodeType],
                        $this->reader->name,
                        $this->reader->value
@@ -304,23 +306,23 @@ class WikiImporter {
        /**
         * Primary entry point
         */
-       public function doImport() {    
+       public function doImport() {
                $this->reader->read();
-               
+
                if ( $this->reader->name != 'mediawiki' ) {
                        throw new MWException( "Expected <mediawiki> tag, got ".
                                $this->reader->name );
                }
                $this->debug( "<mediawiki> tag is correct." );
-               
+
                $this->debug( "Starting primary dump processing loop." );
-               
+
                $keepReading = $this->reader->read();
                $skip = false;
                while ( $keepReading ) {
                        $tag = $this->reader->name;
                        $type = $this->reader->nodeType;
-                       
+
                        if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this->reader ) ) {
                                // Do nothing
                        } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
@@ -329,14 +331,14 @@ class WikiImporter {
                                $this->handleSiteInfo();
                        } elseif ( $tag == 'page' ) {
                                $this->handlePage();
-                       } elseif ( $tag == 'logitem' ) {
-                               $this->handleLogItem();
+                       } elseif ( $tag == 'logitem' ) {
+                               $this->handleLogItem();
                        } elseif ( $tag != '#text' ) {
                                $this->warn( "Unhandled top-level XML tag $tag" );
-                               
+
                                $skip = true;
                        }
-                       
+
                        if ($skip) {
                                $keepReading = $this->reader->next();
                                $skip = false;
@@ -348,7 +350,7 @@ class WikiImporter {
 
                return true;
        }
-       
+
        private function handleSiteInfo() {
                // Site info is useful, but not actually used for dump imports.
                // Includes a quick short-circuit to save performance.
@@ -358,27 +360,27 @@ class WikiImporter {
                }
                throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
        }
-       
+
        private function handleLogItem() {
                $this->debug( "Enter log item handler." );
                $logInfo = array();
-               
+
                // Fields that can just be stuffed in the pageInfo object
                $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
                                        'logtitle', 'params' );
-               
+
                while ( $this->reader->read() ) {
                        if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
                                        $this->reader->name == 'logitem') {
                                break;
                        }
-                       
+
                        $tag = $this->reader->name;
-                       
+
                        if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
                                                $this->reader, $logInfo ) ) {
                                // Do nothing
-                       } if ( in_array( $tag, $normalFields ) ) {
+                       } elseif ( in_array( $tag, $normalFields ) ) {
                                $logInfo[$tag] = $this->nodeContents();
                        } elseif ( $tag == 'contributor' ) {
                                $logInfo['contributor'] = $this->handleContributor();
@@ -386,69 +388,69 @@ class WikiImporter {
                                $this->warn( "Unhandled log-item XML tag $tag" );
                        }
                }
-               
+
                $this->processLogItem( $logInfo );
        }
-       
+
        private function processLogItem( $logInfo ) {
                $revision = new WikiRevision;
-               
+
                $revision->setID( $logInfo['id'] );
                $revision->setType( $logInfo['type'] );
                $revision->setAction( $logInfo['action'] );
                $revision->setTimestamp( $logInfo['timestamp'] );
                $revision->setParams( $logInfo['params'] );
                $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
-               
+
                if ( isset( $logInfo['comment'] ) ) {
                        $revision->setComment( $logInfo['comment'] );
                }
-               
+
                if ( isset( $logInfo['contributor']['ip'] ) ) {
                        $revision->setUserIP( $logInfo['contributor']['ip'] );
                }
                if ( isset( $logInfo['contributor']['username'] ) ) {
                        $revision->setUserName( $logInfo['contributor']['username'] );
                }
-               
+
                return $this->logItemCallback( $revision );
        }
-       
+
        private function handlePage() {
                // Handle page data.
                $this->debug( "Enter page handler." );
                $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
-               
+
                // Fields that can just be stuffed in the pageInfo object
                $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
-               
+
                $skip = false;
                $badTitle = false;
-               
+
                while ( $skip ? $this->reader->next() : $this->reader->read() ) {
                        if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
                                        $this->reader->name == 'page') {
                                break;
                        }
-                       
+
                        $tag = $this->reader->name;
-                       
+
                        if ( $badTitle ) {
                                // The title is invalid, bail out of this page
                                $skip = true;
-                       } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', $this->reader,
-                                               $pageInfo ) ) {
+                       } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this->reader,
+                                               &$pageInfo ) ) ) {
                                // Do nothing
-                       } if ( in_array( $tag, $normalFields ) ) {
+                       } elseif ( in_array( $tag, $normalFields ) ) {
                                $pageInfo[$tag] = $this->nodeContents();
                                if ( $tag == 'title' ) {
                                        $title = $this->processTitle( $pageInfo['title'] );
-                                       
+
                                        if ( !$title ) {
                                                $badTitle = true;
                                                $skip = true;
                                        }
-                                       
+
                                        $this->pageCallback( $title );
                                        list( $pageInfo['_title'], $origTitle ) = $title;
                                }
@@ -461,32 +463,33 @@ class WikiImporter {
                                $skip = true;
                        }
                }
-               
+
                $this->pageOutCallback( $pageInfo['_title'], $origTitle,
                                        $pageInfo['revisionCount'],
-                                       $pageInfo['successfulRevisionCount'] );
+                                       $pageInfo['successfulRevisionCount'],
+                                       $pageInfo );
        }
-       
+
        private function handleRevision( &$pageInfo ) {
                $this->debug( "Enter revision handler" );
                $revisionInfo = array();
-               
+
                $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' );
-               
+
                $skip = false;
-               
+
                while ( $skip ? $this->reader->next() : $this->reader->read() ) {
                        if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
                                        $this->reader->name == 'revision') {
                                break;
                        }
-                       
+
                        $tag = $this->reader->name;
-                       
+
                        if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this->reader,
                                                $pageInfo, $revisionInfo ) ) {
                                // Do nothing
-                       } if ( in_array( $tag, $normalFields ) ) {
+                       } elseif ( in_array( $tag, $normalFields ) ) {
                                $revisionInfo[$tag] = $this->nodeContents();
                        } elseif ( $tag == 'contributor' ) {
                                $revisionInfo['contributor'] = $this->handleContributor();
@@ -495,59 +498,59 @@ class WikiImporter {
                                $skip = true;
                        }
                }
-               
+
                $pageInfo['revisionCount']++;
                if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
                        $pageInfo['successfulRevisionCount']++;
                }
        }
-       
+
        private function processRevision( $pageInfo, $revisionInfo ) {
                $revision = new WikiRevision;
-               
+
                $revision->setID( $revisionInfo['id'] );
                $revision->setText( $revisionInfo['text'] );
                $revision->setTitle( $pageInfo['_title'] );
                $revision->setTimestamp( $revisionInfo['timestamp'] );
-               
+
                if ( isset( $revisionInfo['comment'] ) ) {
                        $revision->setComment( $revisionInfo['comment'] );
                }
-               
+
                if ( isset( $revisionInfo['minor'] ) )
                        $revision->setMinor( true );
-               
+
                if ( isset( $revisionInfo['contributor']['ip'] ) ) {
                        $revision->setUserIP( $revisionInfo['contributor']['ip'] );
                }
                if ( isset( $revisionInfo['contributor']['username'] ) ) {
                        $revision->setUserName( $revisionInfo['contributor']['username'] );
                }
-               
+
                return $this->revisionCallback( $revision );
        }
-       
+
        private function handleUpload( &$pageInfo ) {
                $this->debug( "Enter upload handler" );
                $uploadInfo = array();
-               
+
                $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
                                        'src', 'size' );
-               
+
                $skip = false;
-               
+
                while ( $skip ? $this->reader->next() : $this->reader->read() ) {
                        if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
                                        $this->reader->name == 'upload') {
                                break;
                        }
-                       
+
                        $tag = $this->reader->name;
-                       
+
                        if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this->reader,
                                                $pageInfo, $revisionInfo ) ) {
                                // Do nothing
-                       } if ( in_array( $tag, $normalFields ) ) {
+                       } elseif ( in_array( $tag, $normalFields ) ) {
                                $uploadInfo[$tag] = $this->nodeContents();
                        } elseif ( $tag == 'contributor' ) {
                                $uploadInfo['contributor'] = $this->handleContributor();
@@ -556,13 +559,13 @@ class WikiImporter {
                                $skip = true;
                        }
                }
-               
+
                return $this->processUpload( $pageInfo, $uploadInfo );
        }
-       
+
        private function processUpload( $pageInfo, $uploadInfo ) {
                $revision = new WikiRevision;
-               
+
                $revision->setTitle( $pageInfo['_title'] );
                $revision->setID( $uploadInfo['id'] );
                $revision->setTimestamp( $uploadInfo['timestamp'] );
@@ -571,49 +574,49 @@ class WikiImporter {
                $revision->setSrc( $uploadInfo['src'] );
                $revision->setSize( intval( $uploadInfo['size'] ) );
                $revision->setComment( $uploadInfo['comment'] );
-               
+
                if ( isset( $uploadInfo['contributor']['ip'] ) ) {
                        $revision->setUserIP( $revisionInfo['contributor']['ip'] );
                }
                if ( isset( $uploadInfo['contributor']['username'] ) ) {
                        $revision->setUserName( $revisionInfo['contributor']['username'] );
                }
-               
+
                return $this->uploadCallback( $revision );
        }
-       
+
        private function handleContributor() {
                $fields = array( 'id', 'ip', 'username' );
                $info = array();
-               
+
                while ( $this->reader->read() ) {
                        if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
                                        $this->reader->name == 'contributor') {
                                break;
                        }
-                       
+
                        $tag = $this->reader->name;
-                       
+
                        if ( in_array( $tag, $fields ) ) {
                                $info[$tag] = $this->nodeContents();
                        }
                }
-               
+
                return $info;
        }
-       
+
        private function processTitle( $text ) {
                $workTitle = $text;
                $origTitle = Title::newFromText( $workTitle );
                $title = null;
-               
+
                if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
                        $title = Title::makeTitle( $this->mTargetNamespace,
                                $origTitle->getDBkey() );
                } else {
                        $title = Title::newFromText( $workTitle );
                }
-               
+
                if( is_null( $title ) ) {
                        // Invalid page title? Ignore the page
                        $this->notice( "Skipping invalid page title '$workTitle'" );
@@ -621,7 +624,7 @@ class WikiImporter {
                        $this->notice( "Skipping interwiki page title '$workTitle'" );
                        $title = null;
                }
-               
+
                return array( $origTitle, $title );
        }
 }
@@ -629,61 +632,61 @@ class WikiImporter {
 /** This is a horrible hack used to keep source compatibility */
 class UploadSourceAdapter {
        static $sourceRegistrations = array();
-       
+
        private $mSource;
        private $mBuffer;
        private $mPosition;
-       
+
        static function registerSource( $source ) {
                $id = wfGenerateToken();
-               
+
                self::$sourceRegistrations[$id] = $source;
-               
+
                return $id;
        }
-       
+
        function stream_open( $path, $mode, $options, &$opened_path ) {
                $url = parse_url($path);
                $id = $url['host'];
-               
+
                if ( !isset( self::$sourceRegistrations[$id] ) ) {
                        return false;
                }
-               
+
                $this->mSource = self::$sourceRegistrations[$id];
-               
+
                return true;
        }
-       
+
        function stream_read( $count ) {
                $return = '';
                $leave = false;
-               
+
                while ( !$leave && !$this->mSource->atEnd() &&
                                strlen($this->mBuffer) < $count ) {
                        $read = $this->mSource->readChunk();
-                       
+
                        if ( !strlen($read) ) {
-                               $leave = true;
+                               $leave = true;
                        }
-                       
+
                        $this->mBuffer .= $read;
                }
-       
+
                if ( strlen($this->mBuffer) ) {
                        $return = substr( $this->mBuffer, 0, $count );
                        $this->mBuffer = substr( $this->mBuffer, $count );
                }
-               
+
                $this->mPosition += strlen($return);
-               
+
                return $return;
        }
-       
+
        function stream_write( $data ) {
                return false;
        }
-       
+
        function stream_tell() {
                return $this->mPosition;
        }
@@ -694,7 +697,7 @@ class UploadSourceAdapter {
 
        function url_stat() {
                $result = array();
-               
+
                $result['dev'] = $result[0] = 0;
                $result['ino'] = $result[1] = 0;
                $result['mode'] = $result[2] = 0;
@@ -708,7 +711,27 @@ class UploadSourceAdapter {
                $result['ctime'] = $result[10] = 0;
                $result['blksize'] = $result[11] = 0;
                $result['blocks'] = $result[12] = 0;
-               
+
                return $result;
        }
 }
+
+class XMLReader2 extends XMLReader {
+       function nodeContents() {
+               if( $this->isEmptyElement ) {
+                       return "";
+               }
+               $buffer = "";
+               while( $this->read() ) {
+                       switch( $this->nodeType ) {
+                       case XmlReader::TEXT:
+                       case XmlReader::SIGNIFICANT_WHITESPACE:
+                               $buffer .= $this->value;
+                               break;
+                       case XmlReader::END_ELEMENT:
+                               return $buffer;
+                       }
+               }
+               return $this->close();
+       }
+}