* Some enhancements to live preview
[lhc/web/wiklou.git] / includes / Export.php
index 7f34a80..f15c778 100644 (file)
@@ -1,62 +1,65 @@
 <?php
-# Copyright (C) 2003, 2005 Brion Vibber <brion@pobox.com>
+# Copyright (C) 2003, 2005, 2006 Brion Vibber <brion@pobox.com>
 # http://www.mediawiki.org/
-# 
+#
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or 
+# the Free Software Foundation; either version 2 of the License, or
 # (at your option) any later version.
-# 
+#
 # This program is distributed in the hope that it will be useful,
 # but WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 # GNU General Public License for more details.
-# 
+#
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 # http://www.gnu.org/copyleft/gpl.html
+
 /**
  *
- * @package MediaWiki
- * @subpackage SpecialPage
+ * @addtogroup SpecialPage
  */
 
-/** */
-require_once( 'Revision.php' );
+class WikiExporter {
+       var $list_authors = false ; # Return distinct author list (when not returning full history)
+       var $author_list = "" ;
 
-define( 'MW_EXPORT_FULL',     0 );
-define( 'MW_EXPORT_CURRENT',  1 );
+       const FULL = 0;
+       const CURRENT = 1;
 
-define( 'MW_EXPORT_BUFFER',   0 );
-define( 'MW_EXPORT_STREAM',   1 );
+       const BUFFER = 0;
+       const STREAM = 1;
 
+       const TEXT = 0;
+       const STUB = 1;
 
-/**
- * @package MediaWiki
- * @subpackage SpecialPage
- */
-class WikiExporter {
        /**
-        * If using MW_EXPORT_STREAM to stream a large amount of data,
+        * If using WikiExporter::STREAM to stream a large amount of data,
         * provide a database connection which is not managed by
         * LoadBalancer to read from: some history blob types will
         * make additional queries to pull source data while the
         * main query is still running.
         *
         * @param Database $db
-        * @param int $history one of MW_EXPORT_FULL or MW_EXPORT_CURRENT
-        * @param int $buffer one of MW_EXPORT_BUFFER or MW_EXPORT_STREAM
+        * @param mixed $history one of WikiExporter::FULL or WikiExporter::CURRENT, or an
+        *                       associative array:
+        *                         offset: non-inclusive offset at which to start the query
+        *                         limit: maximum number of rows to return
+        *                         dir: "asc" or "desc" timestamp order
+        * @param int $buffer one of WikiExporter::BUFFER or WikiExporter::STREAM
         */
-       function WikiExporter( &$db, $history = MW_EXPORT_CURRENT,
-                       $buffer = MW_EXPORT_BUFFER ) {
+       function __construct( &$db, $history = WikiExporter::CURRENT,
+                       $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) {
                $this->db =& $db;
                $this->history = $history;
                $this->buffer  = $buffer;
                $this->writer  = new XmlDumpWriter();
                $this->sink    = new DumpOutput();
+               $this->text    = $text;
        }
-       
+
        /**
         * Set the DumpOutput or DumpFilter object which will receive
         * various row objects and XML output for filtering. Filters
@@ -67,7 +70,7 @@ class WikiExporter {
        function setOutputSink( &$sink ) {
                $this->sink =& $sink;
        }
-       
+
        function openStream() {
                $output = $this->writer->openStream();
                $this->sink->writeOpenStream( $output );
@@ -86,7 +89,7 @@ class WikiExporter {
        function allPages() {
                return $this->dumpFrom( '' );
        }
-       
+
        /**
         * Dumps a series of page and revision records for those pages
         * in the database falling within the page_id range given.
@@ -101,7 +104,7 @@ class WikiExporter {
                }
                return $this->dumpFrom( $condition );
        }
-       
+
        /**
         * @param Title $title
         */
@@ -110,7 +113,7 @@ class WikiExporter {
                        'page_namespace=' . $title->getNamespace() .
                        ' AND page_title=' . $this->db->addQuotes( $title->getDbKey() ) );
        }
-       
+
        function pageByName( $name ) {
                $title = Title::newFromText( $name );
                if( is_null( $title ) ) {
@@ -119,62 +122,124 @@ class WikiExporter {
                        return $this->pageByTitle( $title );
                }
        }
-       
+
        function pagesByName( $names ) {
                foreach( $names as $name ) {
                        $this->pageByName( $name );
                }
        }
 
-       
+
        // -------------------- private implementation below --------------------
-       
+
+       # Generates the distinct list of authors of an article
+       # Not called by default (depends on $this->list_authors)
+       # Can be set by Special:Export when not exporting whole history
+       function do_list_authors ( $page , $revision , $cond ) {
+               $fname = "do_list_authors" ;
+               wfProfileIn( $fname );
+               $this->author_list = "<contributors>";
+               $sql = "SELECT DISTINCT rev_user_text,rev_user FROM {$page},{$revision} WHERE page_id=rev_page AND " . $cond ;
+               $result = $this->db->query( $sql, $fname );
+               $resultset = $this->db->resultObject( $result );
+               while( $row = $resultset->fetchObject() ) {
+                       $this->author_list .= "<contributor>" . 
+                               "<username>" . 
+                               htmlentities( $row->rev_user_text )  . 
+                               "</username>" . 
+                               "<id>" . 
+                               $row->rev_user .
+                               "</id>" . 
+                               "</contributor>";
+               }
+               wfProfileOut( $fname );
+               $this->author_list .= "</contributors>";
+       }
+
        function dumpFrom( $cond = '' ) {
                $fname = 'WikiExporter::dumpFrom';
                wfProfileIn( $fname );
-               
+
                $page     = $this->db->tableName( 'page' );
                $revision = $this->db->tableName( 'revision' );
                $text     = $this->db->tableName( 'text' );
-               
-               if( $this->history == MW_EXPORT_FULL ) {
+
+               $order = 'ORDER BY page_id';
+               $limit = '';
+
+               if( $this->history == WikiExporter::FULL ) {
                        $join = 'page_id=rev_page';
-               } elseif( $this->history == MW_EXPORT_CURRENT ) {
+               } elseif( $this->history == WikiExporter::CURRENT ) {
+                       if ( $this->list_authors && $cond != '' )  { // List authors, if so desired
+                               $this->do_list_authors ( $page , $revision , $cond );
+                       }
                        $join = 'page_id=rev_page AND page_latest=rev_id';
+               } elseif ( is_array( $this->history ) ) {
+                       $join = 'page_id=rev_page';
+                       if ( $this->history['dir'] == 'asc' ) {
+                               $op = '>';
+                               $order .= ', rev_timestamp';
+                       } else {
+                               $op = '<';
+                               $order .= ', rev_timestamp DESC';
+                       }
+                       if ( !empty( $this->history['offset'] ) ) {
+                               $join .= " AND rev_timestamp $op " . $this->db->addQuotes(
+                                       $this->db->timestamp( $this->history['offset'] ) );
+                       }
+                       if ( !empty( $this->history['limit'] ) ) {
+                               $limitNum = intval( $this->history['limit'] );
+                               if ( $limitNum > 0 ) {
+                                       $limit = "LIMIT $limitNum";
+                               }
+                       }
                } else {
                        wfProfileOut( $fname );
                        return new WikiError( "$fname given invalid history dump type." );
                }
                $where = ( $cond == '' ) ? '' : "$cond AND";
-               
-               if( $this->buffer == MW_EXPORT_STREAM ) {
+
+               if( $this->buffer == WikiExporter::STREAM ) {
                        $prev = $this->db->bufferResults( false );
                }
                if( $cond == '' ) {
                        // Optimization hack for full-database dump
-                       $pageindex = 'FORCE INDEX (PRIMARY)';
-                       $revindex = 'FORCE INDEX(page_timestamp)';
+                       $revindex = $pageindex = $this->db->useIndexClause("PRIMARY");
+                       $straight = ' /*! STRAIGHT_JOIN */ ';
                } else {
                        $pageindex = '';
                        $revindex = '';
+                       $straight = '';
                }
-               $result = $this->db->query(
-                       "SELECT * FROM
-                               $page $pageindex,
-                               $revision $revindex,
-                               $text
-                               WHERE $where $join AND rev_text_id=old_id
-                               ORDER BY page_id", $fname );
+               if( $this->text == WikiExporter::STUB ) {
+                       $sql = "SELECT $straight * FROM
+                                       $page $pageindex,
+                                       $revision $revindex
+                                       WHERE $where $join
+                                       $order $limit";
+               } else {
+                       $sql = "SELECT $straight * FROM
+                                       $page $pageindex,
+                                       $revision $revindex,
+                                       $text
+                                       WHERE $where $join AND rev_text_id=old_id
+                                       $order $limit";
+               }
+               $result = $this->db->query( $sql, $fname );
                $wrapper = $this->db->resultObject( $result );
                $this->outputStream( $wrapper );
-               
-               if( $this->buffer == MW_EXPORT_STREAM ) {
+
+               if ( $this->list_authors ) {
+                       $this->outputStream( $wrapper );
+               }
+
+               if( $this->buffer == WikiExporter::STREAM ) {
                        $this->db->bufferResults( $prev );
                }
-               
+
                wfProfileOut( $fname );
        }
-       
+
        /**
         * Runs through a query result set dumping page and revision records.
         * The result set should be sorted/grouped by page to avoid duplicate
@@ -206,7 +271,7 @@ class WikiExporter {
                        $this->sink->writeRevision( $row, $output );
                }
                if( isset( $last ) ) {
-                       $output = $this->writer->closePage();
+                       $output = $this->author_list . $this->writer->closePage();
                        $this->sink->writeClosePage( $output );
                }
                $resultset->free();
@@ -214,15 +279,15 @@ class WikiExporter {
 }
 
 class XmlDumpWriter {
-       
+
        /**
         * Returns the export schema version.
         * @return string
         */
        function schemaVersion() {
-               return "0.3";
+               return "0.3"; // FIXME: upgrade to 0.4 when updated XSD is ready, for the revision deletion bits
        }
-       
+
        /**
         * Opens the XML output stream's root <mediawiki> element.
         * This does not include an xml directive, so is safe to include
@@ -247,7 +312,7 @@ class XmlDumpWriter {
                        "\n" .
                        $this->siteInfo();
        }
-       
+
        function siteInfo() {
                $info = array(
                        $this->sitename(),
@@ -259,29 +324,28 @@ class XmlDumpWriter {
                        implode( "\n    ", $info ) .
                        "\n  </siteinfo>\n";
        }
-       
+
        function sitename() {
                global $wgSitename;
                return wfElement( 'sitename', array(), $wgSitename );
        }
-       
+
        function generator() {
                global $wgVersion;
                return wfElement( 'generator', array(), "MediaWiki $wgVersion" );
        }
-       
+
        function homelink() {
-               $page = Title::newFromText( wfMsgForContent( 'mainpage' ) );
-               return wfElement( 'base', array(), $page->getFullUrl() );
+               return wfElement( 'base', array(), Title::newMainPage()->getFullUrl() );
        }
-       
+
        function caseSetting() {
                global $wgCapitalLinks;
                // "case-insensitive" option is reserved for future
                $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive';
                return wfElement( 'case', array(), $sensitivity );
        }
-       
+
        function namespaces() {
                global $wgContLang;
                $spaces = "  <namespaces>\n";
@@ -291,7 +355,7 @@ class XmlDumpWriter {
                $spaces .= "    </namespaces>";
                return $spaces;
        }
-       
+
        /**
         * Closes the output stream with the closing root element.
         * Call when finished dumping things.
@@ -300,7 +364,7 @@ class XmlDumpWriter {
                return "</mediawiki>\n";
        }
 
-       
+
        /**
         * Opens a <page> section on the output stream, with data
         * from the given database row.
@@ -320,7 +384,7 @@ class XmlDumpWriter {
                }
                return $out;
        }
-       
+
        /**
         * Closes a <page> section on the output stream.
         *
@@ -329,7 +393,7 @@ class XmlDumpWriter {
        function closePage() {
                return "  </page>\n";
        }
-       
+
        /**
         * Dumps a <revision> section on the output stream, with
         * data filled in from the given database row.
@@ -341,36 +405,52 @@ class XmlDumpWriter {
        function writeRevision( $row ) {
                $fname = 'WikiExporter::dumpRev';
                wfProfileIn( $fname );
-               
+
                $out  = "    <revision>\n";
                $out .= "      " . wfElement( 'id', null, strval( $row->rev_id ) ) . "\n";
-               
-               $ts = wfTimestamp2ISO8601( strval( $row->rev_timestamp ) );
+
+               $ts = wfTimestamp( TS_ISO_8601, $row->rev_timestamp );
                $out .= "      " . wfElement( 'timestamp', null, $ts ) . "\n";
-               
-               $out .= "      <contributor>\n";
-               if( $row->rev_user ) {
-                       $out .= "        " . wfElementClean( 'username', null, strval( $row->rev_user_text ) ) . "\n";
-                       $out .= "        " . wfElement( 'id', null, strval( $row->rev_user ) ) . "\n";
+
+               if( $row->rev_deleted & Revision::DELETED_USER ) {
+                       $out .= "      " . wfElement( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n";
                } else {
-                       $out .= "        " . wfElementClean( 'ip', null, strval( $row->rev_user_text ) ) . "\n";
+                       $out .= "      <contributor>\n";
+                       if( $row->rev_user ) {
+                               $out .= "        " . wfElementClean( 'username', null, strval( $row->rev_user_text ) ) . "\n";
+                               $out .= "        " . wfElement( 'id', null, strval( $row->rev_user ) ) . "\n";
+                       } else {
+                               $out .= "        " . wfElementClean( 'ip', null, strval( $row->rev_user_text ) ) . "\n";
+                       }
+                       $out .= "      </contributor>\n";
                }
-               $out .= "      </contributor>\n";
-               
+
                if( $row->rev_minor_edit ) {
                        $out .=  "      <minor/>\n";
                }
-               if( $row->rev_comment != '' ) {
+               if( $row->rev_deleted & Revision::DELETED_COMMENT ) {
+                       $out .= "      " . wfElement( 'comment', array( 'deleted' => 'deleted' ) ) . "\n";
+               } elseif( $row->rev_comment != '' ) {
                        $out .= "      " . wfElementClean( 'comment', null, strval( $row->rev_comment ) ) . "\n";
                }
-       
-               $text = strval( Revision::getRevisionText( $row ) );
-               $out .= "      " . wfElementClean( 'text',
-                       array( 'xml:space' => 'preserve' ),
-                       strval( $text ) ) . "\n";
-               
+
+               if( $row->rev_deleted & Revision::DELETED_TEXT ) {
+                       $out .= "      " . wfElement( 'text', array( 'deleted' => 'deleted' ) ) . "\n";
+               } elseif( isset( $row->old_text ) ) {
+                       // Raw text from the database may have invalid chars
+                       $text = strval( Revision::getRevisionText( $row ) );
+                       $out .= "      " . wfElementClean( 'text',
+                               array( 'xml:space' => 'preserve' ),
+                               strval( $text ) ) . "\n";
+               } else {
+                       // Stub output
+                       $out .= "      " . wfElement( 'text',
+                               array( 'id' => $row->rev_text_id ),
+                               "" ) . "\n";
+               }
+
                $out .= "    </revision>\n";
-               
+
                wfProfileOut( $fname );
                return $out;
        }
@@ -385,23 +465,23 @@ class DumpOutput {
        function writeOpenStream( $string ) {
                $this->write( $string );
        }
-       
+
        function writeCloseStream( $string ) {
                $this->write( $string );
        }
-       
+
        function writeOpenPage( $page, $string ) {
                $this->write( $string );
        }
-       
+
        function writeClosePage( $string ) {
                $this->write( $string );
        }
-       
+
        function writeRevision( $rev, $string ) {
                $this->write( $string );
        }
-       
+
        /**
         * Override to write to a different stream type.
         * @return bool
@@ -416,11 +496,11 @@ class DumpOutput {
  */
 class DumpFileOutput extends DumpOutput {
        var $handle;
-       
+
        function DumpFileOutput( $file ) {
                $this->handle = fopen( $file, "wt" );
        }
-       
+
        function write( $string ) {
                fputs( $this->handle, $string );
        }
@@ -463,7 +543,10 @@ class DumpBZip2Output extends DumpPipeOutput {
  */
 class Dump7ZipOutput extends DumpPipeOutput {
        function Dump7ZipOutput( $file ) {
-               $command = "7za a -si " . wfEscapeShellArg( $file );
+               $command = "7za a -bd -si " . wfEscapeShellArg( $file );
+               // Suppress annoying useless crap from p7zip
+               // Unfortunately this could suppress real error messages too
+               $command .= " >/dev/null 2>&1";
                parent::DumpPipeOutput( $command );
        }
 }
@@ -479,40 +562,40 @@ class DumpFilter {
        function DumpFilter( &$sink ) {
                $this->sink =& $sink;
        }
-       
+
        function writeOpenStream( $string ) {
                $this->sink->writeOpenStream( $string );
        }
-       
+
        function writeCloseStream( $string ) {
                $this->sink->writeCloseStream( $string );
        }
-       
+
        function writeOpenPage( $page, $string ) {
                $this->sendingThisPage = $this->pass( $page, $string );
                if( $this->sendingThisPage ) {
                        $this->sink->writeOpenPage( $page, $string );
                }
        }
-       
+
        function writeClosePage( $string ) {
                if( $this->sendingThisPage ) {
                        $this->sink->writeClosePage( $string );
                        $this->sendingThisPage = false;
                }
        }
-       
+
        function writeRevision( $rev, $string ) {
                if( $this->sendingThisPage ) {
                        $this->sink->writeRevision( $rev, $string );
                }
        }
-       
+
        /**
         * Override for page-based filter types.
         * @return bool
         */
-       function pass( $page, $string ) {
+       function pass( $page ) {
                return true;
        }
 }
@@ -522,7 +605,7 @@ class DumpFilter {
  */
 class DumpNotalkFilter extends DumpFilter {
        function pass( $page ) {
-               return Namespace::isTalk( $page->page_namespace );
+               return !Namespace::isTalk( $page->page_namespace );
        }
 }
 
@@ -531,11 +614,11 @@ class DumpNotalkFilter extends DumpFilter {
  */
 class DumpNamespaceFilter extends DumpFilter {
        var $invert = false;
-       var $match = array();
-       
+       var $namespaces = array();
+
        function DumpNamespaceFilter( &$sink, $param ) {
                parent::DumpFilter( $sink );
-               
+
                $constants = array(
                        "NS_MAIN"           => NS_MAIN,
                        "NS_TALK"           => NS_TALK,
@@ -553,24 +636,26 @@ class DumpNamespaceFilter extends DumpFilter {
                        "NS_HELP_TALK"      => NS_HELP_TALK,
                        "NS_CATEGORY"       => NS_CATEGORY,
                        "NS_CATEGORY_TALK"  => NS_CATEGORY_TALK );
-               
+
                if( $param{0} == '!' ) {
                        $this->invert = true;
                        $param = substr( $param, 1 );
                }
-               
+
                foreach( explode( ',', $param ) as $key ) {
                        $key = trim( $key );
-                       if( isset( $contants[$key] ) ) {
+                       if( isset( $constants[$key] ) ) {
                                $ns = $constants[$key];
                                $this->namespaces[$ns] = true;
                        } elseif( is_numeric( $key ) ) {
                                $ns = intval( $key );
                                $this->namespaces[$ns] = true;
+                       } else {
+                               throw new MWException( "Unrecognized namespace key '$key'\n" );
                        }
                }
        }
-       
+
        function pass( $page ) {
                $match = isset( $this->namespaces[$page->page_namespace] );
                return $this->invert xor $match;
@@ -583,12 +668,12 @@ class DumpNamespaceFilter extends DumpFilter {
  */
 class DumpLatestFilter extends DumpFilter {
        var $page, $pageString, $rev, $revString;
-       
+
        function writeOpenPage( $page, $string ) {
                $this->page = $page;
                $this->pageString = $string;
        }
-       
+
        function writeClosePage( $string ) {
                if( $this->rev ) {
                        $this->sink->writeOpenPage( $this->page, $this->pageString );
@@ -600,7 +685,7 @@ class DumpLatestFilter extends DumpFilter {
                $this->page = null;
                $this->pageString = null;
        }
-       
+
        function writeRevision( $rev, $string ) {
                if( $rev->rev_id == $this->page->page_latest ) {
                        $this->rev = $rev;
@@ -617,31 +702,31 @@ class DumpMultiWriter {
                $this->sinks = $sinks;
                $this->count = count( $sinks );
        }
-       
+
        function writeOpenStream( $string ) {
                for( $i = 0; $i < $this->count; $i++ ) {
                        $this->sinks[$i]->writeOpenStream( $string );
                }
        }
-       
+
        function writeCloseStream( $string ) {
                for( $i = 0; $i < $this->count; $i++ ) {
                        $this->sinks[$i]->writeCloseStream( $string );
                }
        }
-       
+
        function writeOpenPage( $page, $string ) {
                for( $i = 0; $i < $this->count; $i++ ) {
                        $this->sinks[$i]->writeOpenPage( $page, $string );
                }
        }
-       
+
        function writeClosePage( $string ) {
                for( $i = 0; $i < $this->count; $i++ ) {
                        $this->sinks[$i]->writeClosePage( $string );
                }
        }
-       
+
        function writeRevision( $rev, $string ) {
                for( $i = 0; $i < $this->count; $i++ ) {
                        $this->sinks[$i]->writeRevision( $rev, $string );
@@ -649,24 +734,17 @@ class DumpMultiWriter {
        }
 }
 
-
-
-function wfTimestamp2ISO8601( $ts ) {
-       #2003-08-05T18:30:02Z
-       return preg_replace( '/^(....)(..)(..)(..)(..)(..)$/', '$1-$2-$3T$4:$5:$6Z', wfTimestamp( TS_MW, $ts ) );
-}
-
 function xmlsafe( $string ) {
        $fname = 'xmlsafe';
        wfProfileIn( $fname );
-       
+
        /**
         * The page may contain old data which has not been properly normalized.
         * Invalid UTF-8 sequences or forbidden control characters will make our
         * XML output invalid, so be sure to strip them out.
         */
        $string = UtfNormal::cleanUp( $string );
-       
+
        $string = htmlspecialchars( $string );
        wfProfileOut( $fname );
        return $string;