# http://www.mediawiki.org/ # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # http://www.gnu.org/copyleft/gpl.html /** * * @package MediaWiki * @subpackage SpecialPage */ /** */ require_once( 'Revision.php' ); /** * */ function wfSpecialExport( $page = '' ) { global $wgOut, $wgLang, $wgRequest; if( $wgRequest->getVal( 'action' ) == 'submit') { $page = $wgRequest->getText( 'pages' ); $curonly = $wgRequest->getCheck( 'curonly' ); } else { # Pre-check the 'current version only' box in the UI $curonly = true; } if( $page != '' ) { $wgOut->disable(); header( "Content-type: application/xml; charset=utf-8" ); $pages = explode( "\n", $page ); $db =& wfGetDB( DB_SLAVE ); $history = $curonly ? MW_EXPORT_CURRENT : MW_EXPORT_FULL; $exporter = new WikiExporter( $db, $history ); $exporter->openStream(); $exporter->pagesByName( $pages ); $exporter->closeStream(); return; } $wgOut->addWikiText( wfMsg( "exporttext" ) ); $titleObj = Title::makeTitle( NS_SPECIAL, "Export" ); $action = $titleObj->escapeLocalURL( 'action=submit' ); $wgOut->addHTML( "


" ); } define( 'MW_EXPORT_FULL', 0 ); define( 'MW_EXPORT_CURRENT', 1 ); define( 'MW_EXPORT_BUFFER', 0 ); define( 'MW_EXPORT_STREAM', 1 ); /** * @package MediaWiki * @subpackage SpecialPage */ class WikiExporter { var $pageCallback = null; var $revCallback = null; /** * If using MW_EXPORT_STREAM to stream a large amount of data, * provide a database connection which is not managed by * LoadBalancer to read from: some history blob types will * make additional queries to pull source data while the * main query is still running. * * @param Database $db * @param int $history one of MW_EXPORT_FULL or MW_EXPORT_CURRENT * @param int $buffer one of MW_EXPORT_BUFFER or MW_EXPORT_STREAM */ function WikiExporter( &$db, $history = MW_EXPORT_CURRENT, $buffer = MW_EXPORT_BUFFER ) { $this->db =& $db; $this->history = $history; $this->buffer = $buffer; } /** * Set a callback to be called after each page in the output * stream is closed. The callback will be passed a database row * object with the last revision output. * * A set callback can be removed by passing null here. * * @param mixed $callback */ function setPageCallback( $callback ) { $this->pageCallback = $callback; } /** * Set a callback to be called after each revision in the output * stream is closed. The callback will be passed a database row * object with the revision data. * * A set callback can be removed by passing null here. * * @param mixed $callback */ function setRevisionCallback( $callback ) { $this->revCallback = $callback; } /** * Returns the export schema version. * @return string */ function schemaVersion() { return "0.3"; } /** * Opens the XML output stream's root element. * This does not include an xml directive, so is safe to include * as a subelement in a larger XML stream. Namespace and XML Schema * references are included. * * To capture the stream to a string, use PHP's output buffering * functions. Output will be encoded in UTF-8. */ function openStream() { global $wgContLanguageCode; $ver = $this->schemaVersion(); print wfElement( 'mediawiki', array( 'xmlns' => "http://www.mediawiki.org/xml/export-$ver/", 'xmlns:xsi' => "http://www.w3.org/2001/XMLSchema-instance", 'xsi:schemaLocation' => "http://www.mediawiki.org/xml/export-$ver/ " . "http://www.mediawiki.org/xml/export-$ver.xsd", 'version' => $ver, 'xml:lang' => $wgContLanguageCode ), null ) . "\n"; $this->siteInfo(); } function siteInfo() { $info = array( $this->sitename(), $this->homelink(), $this->generator(), $this->caseSetting(), $this->namespaces() ); print "\n"; foreach( $info as $item ) { print " $item\n"; } print "\n"; } function sitename() { global $wgSitename; return wfElement( 'sitename', array(), $wgSitename ); } function generator() { global $wgVersion; return wfElement( 'generator', array(), "MediaWiki $wgVersion" ); } function homelink() { $page = Title::newFromText( wfMsgForContent( 'mainpage' ) ); return wfElement( 'base', array(), $page->getFullUrl() ); } function caseSetting() { global $wgCapitalLinks; // "case-insensitive" option is reserved for future $sensitivity = $wgCapitalLinks ? 'first-letter' : 'case-sensitive'; return wfElement( 'case', array(), $sensitivity ); } function namespaces() { global $wgContLang; $spaces = "\n"; foreach( $wgContLang->getNamespaces() as $ns => $title ) { $spaces .= ' ' . wfElement( 'namespace', array( 'key' => $ns ), str_replace( '_', ' ', $title ) ) . "\n"; } $spaces .= " "; return $spaces; } /** * Closes the output stream with the closing root element. * Call when finished dumping things. */ function closeStream() { print "\n"; } /** * Dumps a series of page and revision records for all pages * in the database, either including complete history or only * the most recent version. * * * @param Database $db */ function allPages() { return $this->dumpFrom( '' ); } /** * @param Title $title */ function pageByTitle( $title ) { return $this->dumpFrom( 'page_namespace=' . $title->getNamespace() . ' AND page_title=' . $this->db->addQuotes( $title->getDbKey() ) ); } function pageByName( $name ) { $title = Title::newFromText( $name ); if( is_null( $title ) ) { return WikiError( "Can't export invalid title" ); } else { return $this->pageByTitle( $title ); } } function pagesByName( $names ) { foreach( $names as $name ) { $this->pageByName( $name ); } } // -------------------- private implementation below -------------------- function dumpFrom( $cond = '' ) { $fname = 'WikiExporter::dumpFrom'; wfProfileIn( $fname ); $page = $this->db->tableName( 'page' ); $revision = $this->db->tableName( 'revision' ); $text = $this->db->tableName( 'text' ); if( $this->history == MW_EXPORT_FULL ) { $join = 'page_id=rev_page'; } elseif( $this->history == MW_EXPORT_CURRENT ) { $join = 'page_id=rev_page AND page_latest=rev_id'; } else { wfProfileOut( $fname ); return new WikiError( "$fname given invalid history dump type." ); } $where = ( $cond == '' ) ? '' : "$cond AND"; if( $this->buffer == MW_EXPORT_STREAM ) { $prev = $this->db->bufferResults( false ); } $result = $this->db->query( "SELECT * FROM $page FORCE INDEX (PRIMARY), $revision FORCE INDEX(page_timestamp), $text WHERE $where $join AND rev_text_id=old_id ORDER BY page_id", $fname ); $wrapper = $this->db->resultObject( $result ); $this->outputStream( $wrapper ); if( $this->buffer == MW_EXPORT_STREAM ) { $this->db->bufferResults( $prev ); } wfProfileOut( $fname ); } /** * Runs through a query result set dumping page and revision records. * The result set should be sorted/grouped by page to avoid duplicate * page records in the output. * * The result set will be freed once complete. Should be safe for * streaming (non-buffered) queries, as long as it was made on a * separate database connection not managed by LoadBalancer; some * blob storage types will make queries to pull source data. * * @param ResultWrapper $resultset * @access private */ function outputStream( $resultset ) { $last = null; while( $row = $resultset->fetchObject() ) { if( is_null( $last ) || $last->page_namespace != $row->page_namespace || $last->page_title != $row->page_title ) { if( isset( $last ) ) { $this->closePage( $last ); } $this->openPage( $row ); $last = $row; } $this->dumpRev( $row ); } if( isset( $last ) ) { $this->closePage( $last ); } $resultset->free(); } /** * Opens a section on the output stream, with data * from the given database row. * * @param object $row * @access private */ function openPage( $row ) { print "\n"; $title = Title::makeTitle( $row->page_namespace, $row->page_title ); print ' ' . wfElementClean( 'title', array(), $title->getPrefixedText() ) . "\n"; print ' ' . wfElement( 'id', array(), $row->page_id ) . "\n"; if( '' != $row->page_restrictions ) { print ' ' . wfElement( 'restrictions', array(), $row->page_restrictions ) . "\n"; } } /** * Closes a section on the output stream. * If a per-page callback has been set, it will be called * and passed the last database row used for this page. * * @param object $row * @access private */ function closePage( $row ) { print "\n"; if( isset( $this->pageCallback ) ) { call_user_func( $this->pageCallback, $row ); } } /** * Dumps a section on the output stream, with * data filled in from the given database row. * * @param object $row * @access private */ function dumpRev( $row ) { $fname = 'WikiExporter::dumpRev'; wfProfileIn( $fname ); print " \n"; print " " . wfElement( 'id', null, $row->rev_id ) . "\n"; $ts = wfTimestamp2ISO8601( $row->rev_timestamp ); print " " . wfElement( 'timestamp', null, $ts ) . "\n"; print " "; if( $row->rev_user ) { print wfElementClean( 'username', null, $row->rev_user_text ); print wfElement( 'id', null, $row->rev_user ); } else { print wfElementClean( 'ip', null, $row->rev_user_text ); } print "\n"; if( $row->rev_minor_edit ) { print " \n"; } if( $row->rev_comment != '' ) { print " " . wfElementClean( 'comment', null, $row->rev_comment ) . "\n"; } $text = Revision::getRevisionText( $row ); print " " . wfElementClean( 'text', array( 'xml:space' => 'preserve' ), $text ) . "\n"; print " \n"; wfProfileOut( $fname ); if( isset( $this->revCallback ) ) { call_user_func( $this->revCallback, $row ); } } } function wfTimestamp2ISO8601( $ts ) { #2003-08-05T18:30:02Z return preg_replace( '/^(....)(..)(..)(..)(..)(..)$/', '$1-$2-$3T$4:$5:$6Z', $ts ); } function xmlsafe( $string ) { $fname = 'xmlsafe'; wfProfileIn( $fname ); /** * The page may contain old data which has not been properly normalized. * Invalid UTF-8 sequences or forbidden control characters will make our * XML output invalid, so be sure to strip them out. */ $string = UtfNormal::cleanUp( $string ); $string = htmlspecialchars( $string ); wfProfileOut( $fname ); return $string; } ?>