X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FExport.php;h=7d0a824e3240354a486734741e60a08ee80b060e;hb=f9da6c460b6f4cfd457b0ebf1a6e7a347755706b;hp=88ef6e3dbfb9bc20e60e3803024aa39894facc5b;hpb=3a2d55705a797cf5e46051f360de1ae72d443530;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Export.php b/includes/Export.php index 88ef6e3dbf..7d0a824e32 100644 --- a/includes/Export.php +++ b/includes/Export.php @@ -1,5 +1,5 @@ +# Copyright (C) 2003, 2005, 2006 Brion Vibber # http://www.mediawiki.org/ # # This program is free software; you can redistribute it and/or modify @@ -14,45 +14,48 @@ # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., -# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # http://www.gnu.org/copyleft/gpl.html + /** - * - * @package MediaWiki - * @subpackage SpecialPage + * @defgroup Dump Dump */ -/** */ -require_once( 'Revision.php' ); +/** + * @ingroup SpecialPage Dump + */ +class WikiExporter { + var $list_authors = false ; # Return distinct author list (when not returning full history) + var $author_list = "" ; -define( 'MW_EXPORT_FULL', 0 ); -define( 'MW_EXPORT_CURRENT', 1 ); + var $dumpUploads = false; -define( 'MW_EXPORT_BUFFER', 0 ); -define( 'MW_EXPORT_STREAM', 1 ); + const FULL = 0; + const CURRENT = 1; -define( 'MW_EXPORT_TEXT', 0 ); -define( 'MW_EXPORT_STUB', 1 ); + const BUFFER = 0; + const STREAM = 1; + const TEXT = 0; + const STUB = 1; -/** - * @package MediaWiki - * @subpackage SpecialPage - */ -class WikiExporter { /** - * If using MW_EXPORT_STREAM to stream a large amount of data, + * If using WikiExporter::STREAM to stream a large amount of data, * provide a database connection which is not managed by * LoadBalancer to read from: some history blob types will * make additional queries to pull source data while the * main query is still running. * - * @param Database $db - * @param int $history one of MW_EXPORT_FULL or MW_EXPORT_CURRENT - * @param int $buffer one of MW_EXPORT_BUFFER or MW_EXPORT_STREAM + * @param $db Database + * @param $history Mixed: one of WikiExporter::FULL or WikiExporter::CURRENT, + * or an associative array: + * offset: non-inclusive offset at which to start the query + * limit: maximum number of rows to return + * dir: "asc" or "desc" timestamp order + * @param $buffer Int: one of WikiExporter::BUFFER or WikiExporter::STREAM */ - function WikiExporter( &$db, $history = MW_EXPORT_CURRENT, - $buffer = MW_EXPORT_BUFFER, $text = MW_EXPORT_TEXT ) { + function __construct( &$db, $history = WikiExporter::CURRENT, + $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { $this->db =& $db; $this->history = $history; $this->buffer = $buffer; @@ -66,7 +69,7 @@ class WikiExporter { * various row objects and XML output for filtering. Filters * can be chained or used as callbacks. * - * @param mixed $callback + * @param $sink mixed */ function setOutputSink( &$sink ) { $this->sink =& $sink; @@ -94,8 +97,8 @@ class WikiExporter { /** * Dumps a series of page and revision records for those pages * in the database falling within the page_id range given. - * @param int $start Inclusive lower limit (this id is included) - * @param int $end Exclusive upper limit (this id is not included) + * @param $start Int: inclusive lower limit (this id is included) + * @param $end Int: Exclusive upper limit (this id is not included) * If 0, no upper limit. */ function pagesByRange( $start, $end ) { @@ -107,12 +110,12 @@ class WikiExporter { } /** - * @param Title $title + * @param $title Title */ function pageByTitle( $title ) { return $this->dumpFrom( 'page_namespace=' . $title->getNamespace() . - ' AND page_title=' . $this->db->addQuotes( $title->getDbKey() ) ); + ' AND page_title=' . $this->db->addQuotes( $title->getDBkey() ) ); } function pageByName( $name ) { @@ -133,6 +136,33 @@ class WikiExporter { // -------------------- private implementation below -------------------- + # Generates the distinct list of authors of an article + # Not called by default (depends on $this->list_authors) + # Can be set by Special:Export when not exporting whole history + function do_list_authors ( $page , $revision , $cond ) { + $fname = "do_list_authors" ; + wfProfileIn( $fname ); + $this->author_list = ""; + //rev_deleted + $nothidden = '(rev_deleted & '.Revision::DELETED_USER.') = 0'; + + $sql = "SELECT DISTINCT rev_user_text,rev_user FROM {$page},{$revision} WHERE page_id=rev_page AND $nothidden AND " . $cond ; + $result = $this->db->query( $sql, $fname ); + $resultset = $this->db->resultObject( $result ); + while( $row = $resultset->fetchObject() ) { + $this->author_list .= "" . + "" . + htmlentities( $row->rev_user_text ) . + "" . + "" . + $row->rev_user . + "" . + ""; + } + wfProfileOut( $fname ); + $this->author_list .= ""; + } + function dumpFrom( $cond = '' ) { $fname = 'WikiExporter::dumpFrom'; wfProfileIn( $fname ); @@ -141,46 +171,76 @@ class WikiExporter { $revision = $this->db->tableName( 'revision' ); $text = $this->db->tableName( 'text' ); - if( $this->history == MW_EXPORT_FULL ) { + $order = 'ORDER BY page_id'; + $limit = ''; + + if( $this->history == WikiExporter::FULL ) { $join = 'page_id=rev_page'; - } elseif( $this->history == MW_EXPORT_CURRENT ) { + } elseif( $this->history == WikiExporter::CURRENT ) { + if ( $this->list_authors && $cond != '' ) { // List authors, if so desired + $this->do_list_authors ( $page , $revision , $cond ); + } $join = 'page_id=rev_page AND page_latest=rev_id'; + } elseif ( is_array( $this->history ) ) { + $join = 'page_id=rev_page'; + if ( $this->history['dir'] == 'asc' ) { + $op = '>'; + $order .= ', rev_timestamp'; + } else { + $op = '<'; + $order .= ', rev_timestamp DESC'; + } + if ( !empty( $this->history['offset'] ) ) { + $join .= " AND rev_timestamp $op " . $this->db->addQuotes( + $this->db->timestamp( $this->history['offset'] ) ); + } + if ( !empty( $this->history['limit'] ) ) { + $limitNum = intval( $this->history['limit'] ); + if ( $limitNum > 0 ) { + $limit = "LIMIT $limitNum"; + } + } } else { wfProfileOut( $fname ); return new WikiError( "$fname given invalid history dump type." ); } $where = ( $cond == '' ) ? '' : "$cond AND"; - if( $this->buffer == MW_EXPORT_STREAM ) { + if( $this->buffer == WikiExporter::STREAM ) { $prev = $this->db->bufferResults( false ); } if( $cond == '' ) { // Optimization hack for full-database dump - $pageindex = 'FORCE INDEX (PRIMARY)'; - $revindex = 'FORCE INDEX(page_timestamp)'; + $revindex = $pageindex = $this->db->useIndexClause("PRIMARY"); + $straight = ' /*! STRAIGHT_JOIN */ '; } else { $pageindex = ''; $revindex = ''; + $straight = ''; } - if( $this->text == MW_EXPORT_STUB ) { - $sql = "SELECT * FROM + if( $this->text == WikiExporter::STUB ) { + $sql = "SELECT $straight * FROM $page $pageindex, $revision $revindex WHERE $where $join - ORDER BY page_id"; + $order $limit"; } else { - $sql = "SELECT * FROM + $sql = "SELECT $straight * FROM $page $pageindex, $revision $revindex, $text WHERE $where $join AND rev_text_id=old_id - ORDER BY page_id"; + $order $limit"; } $result = $this->db->query( $sql, $fname ); $wrapper = $this->db->resultObject( $result ); $this->outputStream( $wrapper ); - if( $this->buffer == MW_EXPORT_STREAM ) { + if ( $this->list_authors ) { + $this->outputStream( $wrapper ); + } + + if( $this->buffer == WikiExporter::STREAM ) { $this->db->bufferResults( $prev ); } @@ -197,7 +257,7 @@ class WikiExporter { * separate database connection not managed by LoadBalancer; some * blob storage types will make queries to pull source data. * - * @param ResultWrapper $resultset + * @param $resultset ResultWrapper * @access private */ function outputStream( $resultset ) { @@ -207,7 +267,11 @@ class WikiExporter { $last->page_namespace != $row->page_namespace || $last->page_title != $row->page_title ) { if( isset( $last ) ) { - $output = $this->writer->closePage(); + $output = ''; + if( $this->dumpUploads ) { + $output .= $this->writer->writeUploads( $last ); + } + $output .= $this->writer->closePage(); $this->sink->writeClosePage( $output ); } $output = $this->writer->openPage( $row ); @@ -218,13 +282,21 @@ class WikiExporter { $this->sink->writeRevision( $row, $output ); } if( isset( $last ) ) { - $output = $this->writer->closePage(); + $output = ''; + if( $this->dumpUploads ) { + $output .= $this->writer->writeUploads( $last ); + } + $output .= $this->author_list; + $output .= $this->writer->closePage(); $this->sink->writeClosePage( $output ); } $resultset->free(); } } +/** + * @ingroup Dump + */ class XmlDumpWriter { /** @@ -232,7 +304,7 @@ class XmlDumpWriter { * @return string */ function schemaVersion() { - return "0.3"; + return "0.3"; // FIXME: upgrade to 0.4 when updated XSD is ready, for the revision deletion bits } /** @@ -283,8 +355,7 @@ class XmlDumpWriter { } function homelink() { - $page = Title::newFromText( wfMsgForContent( 'mainpage' ) ); - return wfElement( 'base', array(), $page->getFullUrl() ); + return wfElement( 'base', array(), Title::newMainPage()->getFullUrl() ); } function caseSetting() { @@ -317,7 +388,7 @@ class XmlDumpWriter { * Opens a section on the output stream, with data * from the given database row. * - * @param object $row + * @param $row object * @return string * @access private */ @@ -346,7 +417,7 @@ class XmlDumpWriter { * Dumps a section on the output stream, with * data filled in from the given database row. * - * @param object $row + * @param $row object * @return string * @access private */ @@ -357,26 +428,26 @@ class XmlDumpWriter { $out = " \n"; $out .= " " . wfElement( 'id', null, strval( $row->rev_id ) ) . "\n"; - $ts = wfTimestamp( TS_ISO_8601, $row->rev_timestamp ); - $out .= " " . wfElement( 'timestamp', null, $ts ) . "\n"; + $out .= $this->writeTimestamp( $row->rev_timestamp ); - $out .= " \n"; - if( $row->rev_user ) { - $out .= " " . wfElementClean( 'username', null, strval( $row->rev_user_text ) ) . "\n"; - $out .= " " . wfElement( 'id', null, strval( $row->rev_user ) ) . "\n"; + if( $row->rev_deleted & Revision::DELETED_USER ) { + $out .= " " . wfElement( 'contributor', array( 'deleted' => 'deleted' ) ) . "\n"; } else { - $out .= " " . wfElementClean( 'ip', null, strval( $row->rev_user_text ) ) . "\n"; + $out .= $this->writeContributor( $row->rev_user, $row->rev_user_text ); } - $out .= " \n"; if( $row->rev_minor_edit ) { $out .= " \n"; } - if( $row->rev_comment != '' ) { + if( $row->rev_deleted & Revision::DELETED_COMMENT ) { + $out .= " " . wfElement( 'comment', array( 'deleted' => 'deleted' ) ) . "\n"; + } elseif( $row->rev_comment != '' ) { $out .= " " . wfElementClean( 'comment', null, strval( $row->rev_comment ) ) . "\n"; } - if( isset( $row->old_text ) ) { + if( $row->rev_deleted & Revision::DELETED_TEXT ) { + $out .= " " . wfElement( 'text', array( 'deleted' => 'deleted' ) ) . "\n"; + } elseif( isset( $row->old_text ) ) { // Raw text from the database may have invalid chars $text = strval( Revision::getRevisionText( $row ) ); $out .= " " . wfElementClean( 'text', @@ -395,11 +466,58 @@ class XmlDumpWriter { return $out; } + function writeTimestamp( $timestamp ) { + $ts = wfTimestamp( TS_ISO_8601, $timestamp ); + return " " . wfElement( 'timestamp', null, $ts ) . "\n"; + } + + function writeContributor( $id, $text ) { + $out = " \n"; + if( $id ) { + $out .= " " . wfElementClean( 'username', null, strval( $text ) ) . "\n"; + $out .= " " . wfElement( 'id', null, strval( $id ) ) . "\n"; + } else { + $out .= " " . wfElementClean( 'ip', null, strval( $text ) ) . "\n"; + } + $out .= " \n"; + return $out; + } + + /** + * Warning! This data is potentially inconsistent. :( + */ + function writeUploads( $row ) { + if( $row->page_namespace == NS_IMAGE ) { + $img = wfFindFile( $row->page_title ); + if( $img ) { + $out = ''; + foreach( array_reverse( $img->getHistory() ) as $ver ) { + $out .= $this->writeUpload( $ver ); + } + $out .= $this->writeUpload( $img ); + return $out; + } + } + return ''; + } + + function writeUpload( $file ) { + return " \n" . + $this->writeTimestamp( $file->getTimestamp() ) . + $this->writeContributor( $file->getUser( 'id' ), $file->getUser( 'text' ) ) . + " " . wfElementClean( 'comment', null, $file->getDescription() ) . "\n" . + " " . wfElement( 'filename', null, $file->getName() ) . "\n" . + " " . wfElement( 'src', null, $file->getFullUrl() ) . "\n" . + " " . wfElement( 'size', null, $file->getSize() ) . "\n" . + " \n"; + } + } /** * Base class for output stream; prints to stdout or buffer or whereever. + * @ingroup Dump */ class DumpOutput { function writeOpenStream( $string ) { @@ -433,6 +551,7 @@ class DumpOutput { /** * Stream outputter to send data to a file. + * @ingroup Dump */ class DumpFileOutput extends DumpOutput { var $handle; @@ -450,6 +569,7 @@ class DumpFileOutput extends DumpOutput { * Stream outputter to send data to a file via some filter program. * Even if compression is available in a library, using a separate * program can allow us to make use of a multi-processor system. + * @ingroup Dump */ class DumpPipeOutput extends DumpFileOutput { function DumpPipeOutput( $command, $file = null ) { @@ -462,6 +582,7 @@ class DumpPipeOutput extends DumpFileOutput { /** * Sends dump output via the gzip compressor. + * @ingroup Dump */ class DumpGZipOutput extends DumpPipeOutput { function DumpGZipOutput( $file ) { @@ -471,6 +592,7 @@ class DumpGZipOutput extends DumpPipeOutput { /** * Sends dump output via the bgzip2 compressor. + * @ingroup Dump */ class DumpBZip2Output extends DumpPipeOutput { function DumpBZip2Output( $file ) { @@ -480,10 +602,14 @@ class DumpBZip2Output extends DumpPipeOutput { /** * Sends dump output via the p7zip compressor. + * @ingroup Dump */ class Dump7ZipOutput extends DumpPipeOutput { function Dump7ZipOutput( $file ) { $command = "7za a -bd -si " . wfEscapeShellArg( $file ); + // Suppress annoying useless crap from p7zip + // Unfortunately this could suppress real error messages too + $command .= ' >' . wfGetNull() . ' 2>&1'; parent::DumpPipeOutput( $command ); } } @@ -494,6 +620,7 @@ class Dump7ZipOutput extends DumpPipeOutput { * Dump output filter class. * This just does output filtering and streaming; XML formatting is done * higher up, so be careful in what you do. + * @ingroup Dump */ class DumpFilter { function DumpFilter( &$sink ) { @@ -532,22 +659,24 @@ class DumpFilter { * Override for page-based filter types. * @return bool */ - function pass( $page, $string ) { + function pass( $page ) { return true; } } /** * Simple dump output filter to exclude all talk pages. + * @ingroup Dump */ class DumpNotalkFilter extends DumpFilter { function pass( $page ) { - return !Namespace::isTalk( $page->page_namespace ); + return !MWNamespace::isTalk( $page->page_namespace ); } } /** * Dump output filter to include or exclude pages in a given set of namespaces. + * @ingroup Dump */ class DumpNamespaceFilter extends DumpFilter { var $invert = false; @@ -588,7 +717,7 @@ class DumpNamespaceFilter extends DumpFilter { $ns = intval( $key ); $this->namespaces[$ns] = true; } else { - wfDie( "Unrecognized namespace key '$key'\n" ); + throw new MWException( "Unrecognized namespace key '$key'\n" ); } } } @@ -602,6 +731,7 @@ class DumpNamespaceFilter extends DumpFilter { /** * Dump output filter to include only the last revision in each page sequence. + * @ingroup Dump */ class DumpLatestFilter extends DumpFilter { var $page, $pageString, $rev, $revString; @@ -633,6 +763,7 @@ class DumpLatestFilter extends DumpFilter { /** * Base class for output stream; prints to stdout or buffer or whereever. + * @ingroup Dump */ class DumpMultiWriter { function DumpMultiWriter( $sinks ) { @@ -686,5 +817,3 @@ function xmlsafe( $string ) { wfProfileOut( $fname ); return $string; } - -?>