Handle nested li in ol or ul. That happens when someone use something like:
[lhc/web/wiklou.git] / includes / SpecialImport.php
index 5765a50..91abcea 100644 (file)
@@ -3,20 +3,20 @@
  * MediaWiki page data importer
  * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
  * http://www.mediawiki.org/
- * 
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or 
+ * the Free Software Foundation; either version 2 of the License, or
  * (at your option) any later version.
- * 
+ *
  * This program is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  * GNU General Public License for more details.
- * 
+ *
  * You should have received a copy of the GNU General Public License along
  * with this program; if not, write to the Free Software Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  * http://www.gnu.org/copyleft/gpl.html
  *
  * @package MediaWiki
@@ -30,14 +30,13 @@ require_once( 'WikiError.php' );
  * Constructor
  */
 function wfSpecialImport( $page = '' ) {
-       global $wgUser, $wgOut, $wgLang, $wgRequest, $wgTitle;
-       global $wgImportSources;
-       
+       global $wgUser, $wgOut, $wgRequest, $wgTitle, $wgImportSources;
+
        ###
 #      $wgOut->addWikiText( "Special:Import is not ready for this beta release, sorry." );
 #      return;
        ###
-       
+
        if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
                switch( $wgRequest->getVal( "source" ) ) {
                case "upload":
@@ -55,7 +54,7 @@ function wfSpecialImport( $page = '' ) {
                default:
                        $source = new WikiError( "Unknown import source type" );
                }
-               
+
                if( WikiError::isError( $source ) ) {
                        $wgOut->addWikiText( wfEscapeWikiText( $source->getMessage() ) );
                } else {
@@ -70,9 +69,9 @@ function wfSpecialImport( $page = '' ) {
                        }
                }
        }
-       
+
        $action = $wgTitle->escapeLocalUrl( 'action=submit' );
-       
+
        if( $wgUser->isAllowed( 'importupload' ) ) {
                $wgOut->addWikiText( wfMsg( "importtext" ) );
                $wgOut->addHTML( "
@@ -92,7 +91,7 @@ function wfSpecialImport( $page = '' ) {
                        $wgOut->addWikiText( wfMsg( 'importnosources' ) );
                }
        }
-       
+
        if( !empty( $wgImportSources ) ) {
                $wgOut->addHTML( "
 <fieldset>
@@ -123,61 +122,79 @@ function wfSpecialImport( $page = '' ) {
  */
 class WikiRevision {
        var $title = NULL;
+       var $id = 0;
        var $timestamp = "20010115000000";
        var $user = 0;
        var $user_text = "";
        var $text = "";
        var $comment = "";
-       
+       var $minor = false;
+
        function setTitle( $text ) {
                $this->title = Title::newFromText( $text );
        }
-       
+
+       function setID( $id ) {
+               $this->id = $id;
+       }
+
        function setTimestamp( $ts ) {
                # 2003-08-05T18:30:02Z
-               $this->timestamp = preg_replace( '/^(....)-(..)-(..)T(..):(..):(..)Z$/', '$1$2$3$4$5$6', $ts );
+               $this->timestamp = wfTimestamp( TS_MW, $ts );
        }
-       
+
        function setUsername( $user ) {
                $this->user_text = $user;
        }
-       
+
        function setUserIP( $ip ) {
                $this->user_text = $ip;
        }
-       
+
        function setText( $text ) {
                $this->text = $text;
        }
-       
+
        function setComment( $text ) {
                $this->comment = $text;
        }
-       
+
+       function setMinor( $minor ) {
+               $this->minor = (bool)$minor;
+       }
+
        function getTitle() {
                return $this->title;
        }
-       
+
+       function getID() { 
+               return $this->id;
+       }
+
        function getTimestamp() {
                return $this->timestamp;
        }
-       
+
        function getUser() {
                return $this->user_text;
        }
-       
+
        function getText() {
                return $this->text;
        }
-       
+
        function getComment() {
                return $this->comment;
        }
 
+       function getMinor() {
+               return $this->minor;
+       }
+
        function importOldRevision() {
                $fname = "WikiImporter::importOldRevision";
                $dbw =& wfGetDB( DB_MASTER );
-               
+
                # Sneak a single revision into place
                $user = User::newFromName( $this->getUser() );
                if( $user ) {
@@ -188,21 +205,25 @@ class WikiRevision {
                        $userText = $this->getUser();
                }
 
+               // avoid memory leak...?
+               $linkCache =& LinkCache::singleton();
+               $linkCache->clear();
+
                $article = new Article( $this->title );
                $pageId = $article->getId();
                if( $pageId == 0 ) {
                        # must create the page...
                        $pageId = $article->insertOn( $dbw );
                }
-               
+
                # FIXME: Check for exact conflicts
                # FIXME: Use original rev_id optionally
                # FIXME: blah blah blah
-               
+
                #if( $numrows > 0 ) {
                #       return wfMsg( "importhistoryconflict" );
                #}
-               
+
                # Insert the row
                $revision = new Revision( array(
                        'page'       => $pageId,
@@ -211,11 +232,11 @@ class WikiRevision {
                        'user'       => $userId,
                        'user_text'  => $userText,
                        'timestamp'  => $this->timestamp,
-                       'minor_edit' => 0
+                       'minor_edit' => $this->minor,
                        ) );
                $revId = $revision->insertOn( $dbw );
                $article->updateIfNewerOn( $dbw, $revision );
-               
+
                return true;
        }
 
@@ -231,46 +252,50 @@ class WikiImporter {
        var $mPageCallback = null;
        var $mRevisionCallback = null;
        var $lastfield;
-       
+
        function WikiImporter( $source ) {
                $this->setRevisionCallback( array( &$this, "importRevision" ) );
                $this->mSource = $source;
        }
-       
+
        function throwXmlError( $err ) {
                $this->debug( "FAILURE: $err" );
+               wfDebug( "WikiImporter XML error: $err\n" );
        }
-       
+
        # --------------
-       
+
        function doImport() {
                if( empty( $this->mSource ) ) {
                        return new WikiErrorMsg( "importnotext" );
                }
-               
+
                $parser = xml_parser_create( "UTF-8" );
-               
+
                # case folding violates XML standard, turn it off
                xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
-               
-               xml_set_object( $parser, &$this );
+
+               xml_set_object( $parser, $this );
                xml_set_element_handler( $parser, "in_start", "" );
-               
+
+               $offset = 0; // for context extraction on error reporting
                do {
                        $chunk = $this->mSource->readChunk();
                        if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
-                               return new WikiXmlError( $parser );
+                               wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
+                               return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset );
                        }
+                       $offset += strlen( $chunk );
                } while( $chunk !== false && !$this->mSource->atEnd() );
                xml_parser_free( $parser );
-               
+
                return true;
        }
-       
+
        function debug( $data ) {
                #wfDebug( "IMPORT: $data\n" );
        }
-       
+
        function notice( $data ) {
                global $wgCommandLineMode;
                if( $wgCommandLineMode ) {
@@ -280,7 +305,7 @@ class WikiImporter {
                        $wgOut->addHTML( "<li>$data</li>\n" );
                }
        }
-       
+
        /**
         * Sets the action to perform as each new page in the stream is reached.
         * @param callable $callback
@@ -291,7 +316,7 @@ class WikiImporter {
                $this->mPageCallback = $callback;
                return $previous;
        }
-       
+
        /**
         * Sets the action to perform as each page revision is reached.
         * @param callable $callback
@@ -302,7 +327,7 @@ class WikiImporter {
                $this->mRevisionCallback = $callback;
                return $previous;
        }
-       
+
        /**
         * Default per-revision callback, performs the import.
         * @param WikiRevision $revision
@@ -330,7 +355,7 @@ class WikiImporter {
                $this->debug( "-- Comment: " . $revision->comment );
                $this->debug( "-- Text: " . $revision->text );
        }
-       
+
        /**
         * Notify the callback function when a new <page> is reached.
         * @param Title $title
@@ -341,13 +366,13 @@ class WikiImporter {
                        call_user_func( $this->mPageCallback, $title );
                }
        }
-       
-       
+
+
        # XML parser callbacks from here out -- beware!
        function donothing( $parser, $x, $y="" ) {
                #$this->debug( "donothing" );
        }
-       
+
        function in_start( $parser, $name, $attribs ) {
                $this->debug( "in_start $name" );
                if( $name != "mediawiki" ) {
@@ -355,7 +380,7 @@ class WikiImporter {
                }
                xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
        }
-       
+
        function in_mediawiki( $parser, $name, $attribs ) {
                $this->debug( "in_mediawiki $name" );
                if( $name == 'siteinfo' ) {
@@ -373,13 +398,14 @@ class WikiImporter {
                }
                xml_set_element_handler( $parser, "donothing", "donothing" );
        }
-       
-       
+
+
        function in_siteinfo( $parser, $name, $attribs ) {
                // no-ops for now
                $this->debug( "in_siteinfo $name" );
                switch( $name ) {
                case "sitename":
+               case "base":
                case "generator":
                case "case":
                case "namespaces":
@@ -389,13 +415,13 @@ class WikiImporter {
                        return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
                }
        }
-       
+
        function out_siteinfo( $parser, $name ) {
                if( $name == "siteinfo" ) {
                        xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
                }
        }
-       
+
 
        function in_page( $parser, $name, $attribs ) {
                $this->debug( "in_page $name" );
@@ -418,18 +444,18 @@ class WikiImporter {
                        return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
                }
        }
-       
+
        function out_page( $parser, $name ) {
                $this->debug( "out_page $name" );
                if( $name != "page" ) {
                        return $this->throwXMLerror( "Expected </page>, got </$name>" );
                }
                xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
-               
+
                $this->workTitle = NULL;
                $this->workRevision = NULL;
        }
-       
+
        function in_nothing( $parser, $name, $attribs ) {
                $this->debug( "in_nothing $name" );
                return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
@@ -445,12 +471,17 @@ class WikiImporter {
                }
                xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
                xml_set_character_data_handler( $parser, "donothing" );
-               
+
                switch( $this->appendfield ) {
                case "title":
                        $this->workTitle = $this->appenddata;
                        $this->pageCallback( $this->workTitle );
                        break;
+               case "id":
+                       if ( $this->parenttag == 'revision' ) {
+                               $this->workRevision->setID( $this->appenddata );
+                       }
+                       break;
                case "text":
                        $this->workRevision->setText( $this->appenddata );
                        break;
@@ -466,19 +497,23 @@ class WikiImporter {
                case "comment":
                        $this->workRevision->setComment( $this->appenddata );
                        break;
+               case "minor":
+                       $this->workRevision->setMinor( true );
+                       break;
                default:
                        $this->debug( "Bad append: {$this->appendfield}" );
                }
                $this->appendfield = "";
                $this->appenddata = "";
        }
-       
+
        function in_revision( $parser, $name, $attribs ) {
                $this->debug( "in_revision $name" );
                switch( $name ) {
                case "id":
                case "timestamp":
                case "comment":
+               case "minor":
                case "text":
                        $this->parenttag = "revision";
                        $this->appendfield = $name;
@@ -492,28 +527,28 @@ class WikiImporter {
                        return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
                }
        }
-       
+
        function out_revision( $parser, $name ) {
                $this->debug( "out_revision $name" );
                if( $name != "revision" ) {
                        return $this->throwXMLerror( "Expected </revision>, got </$name>" );
                }
                xml_set_element_handler( $parser, "in_page", "out_page" );
-               
-               $out = call_user_func( $this->mRevisionCallback,
-                       &$this->workRevision,
-                       &$this );
+
+               $out = call_user_func_array( $this->mRevisionCallback,
+                       array( &$this->workRevision, &$this ) );
                if( !empty( $out ) ) {
                        global $wgOut;
                        $wgOut->addHTML( "<li>" . $out . "</li>\n" );
                }
        }
-       
+
        function in_contributor( $parser, $name, $attribs ) {
                $this->debug( "in_contributor $name" );
                switch( $name ) {
                case "username":
                case "ip":
+               case "id":
                        $this->parenttag = "contributor";
                        $this->appendfield = $name;
                        xml_set_element_handler( $parser, "in_nothing", "out_append" );
@@ -523,7 +558,7 @@ class WikiImporter {
                        $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
                }
        }
-       
+
        function out_contributor( $parser, $name ) {
                $this->debug( "out_contributor $name" );
                if( $name != "contributor" ) {
@@ -540,11 +575,11 @@ class ImportStringSource {
                $this->mString = $string;
                $this->mRead = false;
        }
-       
+
        function atEnd() {
                return $this->mRead;
        }
-       
+
        function readChunk() {
                if( $this->atEnd() ) {
                        return false;
@@ -560,15 +595,15 @@ class ImportStreamSource {
        function ImportStreamSource( $handle ) {
                $this->mHandle = $handle;
        }
-       
+
        function atEnd() {
                return feof( $this->mHandle );
        }
-       
+
        function readChunk() {
                return fread( $this->mHandle, 32768 );
        }
-       
+
        function newFromFile( $filename ) {
                $file = @fopen( $filename, 'rt' );
                if( !$file ) {
@@ -578,10 +613,8 @@ class ImportStreamSource {
        }
 
        function newFromUpload( $fieldname = "xmlimport" ) {
-               global $wgOut;
-               
                $upload =& $_FILES[$fieldname];
-               
+
                if( !isset( $upload ) ) {
                        return new WikiErrorMsg( 'importnofile' );
                }
@@ -595,7 +628,7 @@ class ImportStreamSource {
                        return new WikiErrorMsg( 'importnofile' );
                }
        }
-       
+
        function newFromURL( $url ) {
                # fopen-wrappers are normally turned off for security.
                ini_set( "allow_url_fopen", true );
@@ -603,7 +636,7 @@ class ImportStreamSource {
                ini_set( "allow_url_fopen", false );
                return $ret;
        }
-       
+
        function newFromInterwiki( $interwiki, $page ) {
                $base = Title::getInterwikiLink( $interwiki );
                if( empty( $base ) ) {