includes/specials/SpecialExport.php

   1 <?php
   2 # Copyright (C) 2003-2008 Brion Vibber <brion@pobox.com>
   3 # http://www.mediawiki.org/
   4 #
   5 # This program is free software; you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 2 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License along
  16 # with this program; if not, write to the Free Software Foundation, Inc.,
  17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18 # http://www.gnu.org/copyleft/gpl.html
  19 /**
  20  * @file
  21  * @ingroup SpecialPage
  22  */
  23
  24 class SpecialExport extends SpecialPage {
  25
  26         private $curonly, $doExport, $pageLinkDepth, $templates;
  27         private $images;
  28
  29         public function __construct() {
  30                 parent::__construct( 'Export' );
  31         }
  32
  33         public function execute( $par ) {
  34                 global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors;
  35                 global $wgExportAllowHistory, $wgExportMaxHistory;
  36
  37                 // Set some variables
  38                 $this->curonly = true;
  39                 $this->doExport = false;
  40                 $this->templates = $wgRequest->getCheck( 'templates' );
  41                 $this->images = $wgRequest->getCheckImages; // Doesn't do anything yet
  42                 $this->pageLinkDepth = $wgRequest->getIntOrNull( 'pagelink-depth' );
  43
  44                 if ( $wgRequest->getCheck( 'addcat' ) ) {
  45                         $page = $wgRequest->getText( 'pages' );
  46                         $catname = $wgRequest->getText( 'catname' );
  47
  48                         if ( $catname !== '' && $catname !== NULL && $catname !== false ) {
  49                                 $t = Title::makeTitleSafe( NS_MAIN, $catname );
  50                                 if ( $t ) {
  51                                         /**
  52                                          * @fixme This can lead to hitting memory limit for very large
  53                                          * categories. Ideally we would do the lookup synchronously
  54                                          * during the export in a single query.
  55                                          */
  56                                         $catpages = $this->getPagesFromCategory( $t );
  57                                         if ( $catpages ) $page .= "\n" . implode( "\n", $catpages );
  58                                 }
  59                         }
  60                 }
  61                 else if( $wgRequest->wasPosted() && $page == '' ) {
  62                         $page = $wgRequest->getText( 'pages' );
  63                         $this->curonly = $wgRequest->getCheck( 'curonly' );
  64                         $rawOffset = $wgRequest->getVal( 'offset' );
  65                         if( $rawOffset ) {
  66                                 $offset = wfTimestamp( TS_MW, $rawOffset );
  67                         } else {
  68                                 $offset = null;
  69                         }
  70                         $limit = $wgRequest->getInt( 'limit' );
  71                         $dir = $wgRequest->getVal( 'dir' );
  72                         $history = array(
  73                                 'dir' => 'asc',
  74                                 'offset' => false,
  75                                 'limit' => $wgExportMaxHistory,
  76                         );
  77                         $historyCheck = $wgRequest->getCheck( 'history' );
  78                         if ( $this->curonly ) {
  79                                 $history = WikiExporter::CURRENT;
  80                         } elseif ( !$historyCheck ) {
  81                                 if ( $limit > 0 && $limit < $wgExportMaxHistory ) {
  82                                         $history['limit'] = $limit;
  83                                 }
  84                                 if ( !is_null( $offset ) ) {
  85                                         $history['offset'] = $offset;
  86                                 }
  87                                 if ( strtolower( $dir ) == 'desc' ) {
  88                                         $history['dir'] = 'desc';
  89                                 }
  90                         }
  91
  92                         if( $page != '' ) $this->doExport = true;
  93                 } else {
  94                         // Default to current-only for GET requests
  95                         $page = $wgRequest->getText( 'pages', $page );
  96                         $historyCheck = $wgRequest->getCheck( 'history' );
  97                         if( $historyCheck ) {
  98                                 $history = WikiExporter::FULL;
  99                         } else {
 100                                 $history = WikiExporter::CURRENT;
 101                         }
 102
 103                         if( $page != '' ) $this->doExport = true;
 104                 }
 105
 106                 if( !$wgExportAllowHistory ) {
 107                         // Override
 108                         $history = WikiExporter::CURRENT;
 109                 }
 110
 111                 $list_authors = $wgRequest->getCheck( 'listauthors' );
 112                 if ( !$this->curonly || !$wgExportAllowListContributors ) $list_authors = false ;
 113
 114                 if ( $this->doExport ) {
 115                         $wgOut->disable();
 116                         // Cancel output buffering and gzipping if set
 117                         // This should provide safer streaming for pages with history
 118                         wfResetOutputBuffers();
 119                         header( "Content-type: application/xml; charset=utf-8" );
 120                         if( $wgRequest->getCheck( 'wpDownload' ) ) {
 121                                 // Provide a sane filename suggestion
 122                                 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
 123                                 $wgRequest->response()->header( "Content-disposition: attachment;filename={$filename}" );
 124                         }
 125                         $this->doExport( $page, $history );
 126                         return;
 127                 }
 128
 129                 $wgOut->addHTML( wfMsgExt( 'exporttext', 'parse' ) );
 130
 131                 $form = Xml::openElement( 'form', array( 'method' => 'post',
 132                         'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
 133                 $form .= Xml::inputLabel( wfMsg( 'export-addcattext' )  , 'catname', 'catname', 40 ) . '&nbsp;';
 134                 $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
 135                 $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
 136                 $form .= '<br />';
 137
 138                 if( $wgExportAllowHistory ) {
 139                         $form .= Xml::checkLabel( wfMsg( 'exportcuronly' ), 'curonly', 'curonly', true ) . '<br />';
 140                 } else {
 141                         $wgOut->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
 142                 }
 143                 $form .= Xml::checkLabel( wfMsg( 'export-templates' ), 'templates', 'wpExportTemplates', false ) . '<br />';
 144                 $form .= Xml::inputLabel( wfMsg( 'export-pagelinks' ), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
 145                 // Enable this when we can do something useful exporting/importing image information. :)
 146                 //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
 147                 $form .= Xml::checkLabel( wfMsg( 'export-download' ), 'wpDownload', 'wpDownload', true ) . '<br />';
 148
 149                 $form .= Xml::submitButton( wfMsg( 'export-submit' ), array( 'accesskey' => 's' ) );
 150                 $form .= Xml::closeElement( 'form' );
 151                 $wgOut->addHTML( $form );
 152         }
 153
 154         /**
 155          * Do the actual page exporting
 156          * @param string $page User input on what page(s) to export
 157          * @param mixed  $history one of the WikiExporter history export constants
 158          */
 159         private function doExport( $page, $history ) {
 160                 global $wgExportMaxHistory;
 161
 162                 /* Split up the input and look up linked pages */
 163                 $inputPages = array_filter( explode( "\n", $page ), array( $this, 'filterPage' ) );
 164                 $pageSet = array_flip( $inputPages );
 165
 166                 if( $this->templates ) {
 167                         $pageSet = $this->getTemplates( $inputPages, $pageSet );
 168                 }
 169
 170                 if( $linkDepth = $this->pageLinkDepth ) {
 171                         $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
 172                 }
 173
 174                 /*
 175                 // Enable this when we can do something useful exporting/importing image information. :)
 176                 if( $this->images ) ) {
 177                         $pageSet = $this->getImages( $inputPages, $pageSet );
 178                 }
 179                 */
 180
 181                 $pages = array_keys( $pageSet );
 182
 183                 /* Ok, let's get to it... */
 184                 if( $history == WikiExporter::CURRENT ) {
 185                         $lb = false;
 186                         $db = wfGetDB( DB_SLAVE );
 187                         $buffer = WikiExporter::BUFFER;
 188                 } else {
 189                         // Use an unbuffered query; histories may be very long!
 190                         $lb = wfGetLBFactory()->newMainLB();
 191                         $db = $lb->getConnection( DB_SLAVE );
 192                         $buffer = WikiExporter::STREAM;
 193
 194                         // This might take a while... :D
 195                         wfSuppressWarnings();
 196                         set_time_limit(0);
 197                         wfRestoreWarnings();
 198                 }
 199                                 $exporter = new WikiExporter( $db, $history, $buffer );
 200                 $exporter->list_authors = $list_authors ;
 201                 $exporter->openStream();
 202                 foreach( $pages as $page ) {
 203                         /*
 204                         if( $wgExportMaxHistory && !$this->curonly ) {
 205                                 $title = Title::newFromText( $page );
 206                                 if( $title ) {
 207                                         $count = Revision::countByTitle( $db, $title );
 208                                         if( $count > $wgExportMaxHistory ) {
 209                                                 wfDebug( __FUNCTION__ .
 210                                                         ": Skipped $page, $count revisions too big\n" );
 211                                                 continue;
 212                                         }
 213                                 }
 214                         }*/
 215                         #Bug 8824: Only export pages the user can read
 216                         $title = Title::newFromText( $page );
 217                         if( is_null( $title ) ) continue; #TODO: perhaps output an <error> tag or something.
 218                                 if( !$title->userCanRead() ) continue; #TODO: perhaps output an <error> tag or something.
 219
 220                                 $exporter->pageByTitle( $title );
 221                 }
 222
 223                 $exporter->closeStream();
 224                 if( $lb ) {
 225                         $lb->closeAll();
 226                 }
 227         }
 228
 229         private function getPagesFromCategory( $title ) {
 230                 global $wgContLang;
 231
 232                 $name = $title->getDBkey();
 233
 234                 $dbr = wfGetDB( DB_SLAVE );
 235
 236                 list( $page, $categorylinks ) = $dbr->tableNamesN( 'page', 'categorylinks' );
 237                 $sql = "SELECT page_namespace, page_title FROM $page " .
 238                         "JOIN $categorylinks ON cl_from = page_id " .
 239                         "WHERE cl_to = " . $dbr->addQuotes( $name );
 240
 241                 $pages = array();
 242                 $res = $dbr->query( $sql, __METHOD__ );
 243                 while ( $row = $dbr->fetchObject( $res ) ) {
 244                         $n = $row->page_title;
 245                         if ($row->page_namespace) {
 246                                 $ns = $wgContLang->getNsText( $row->page_namespace );
 247                                 $n = $ns . ':' . $n;
 248                         }
 249
 250                         $pages[] = $n;
 251                 }
 252                 $dbr->freeResult($res);
 253
 254         return $pages;
 255         }
 256
 257         /**
 258          * Expand a list of pages to include templates used in those pages.
 259          * @param $inputPages array, list of titles to look up
 260          * @param $pageSet array, associative array indexed by titles for output
 261          * @return array associative array index by titles
 262          */
 263         private function getTemplates( $inputPages, $pageSet ) {
 264                 return $this->getLinks( $inputPages, $pageSet,
 265                                         'templatelinks',
 266                                         array( 'tl_namespace AS namespace', 'tl_title AS title' ),
 267                                         array( 'page_id=tl_from' ) );
 268         }
 269
 270         /** Expand a list of pages to include pages linked to from that page. */
 271         private function getPageLinks( $inputPages, $pageSet, $depth ) {
 272                 for( $depth=$depth; $depth>0; --$depth ) {
 273                 $pageSet = $this->getLinks( $inputPages, $pageSet, 'pagelinks',
 274                         array( 'pl_namespace AS namespace', 'pl_title AS title' ),
 275                         array( 'page_id=pl_from' ) );
 276                 }
 277                 return $pageSet;
 278         }
 279
 280         /**
 281          * Expand a list of pages to include images used in those pages.
 282          * @param $inputPages array, list of titles to look up
 283          * @param $pageSet array, associative array indexed by titles for output
 284          * @return array associative array index by titles
 285          */
 286         private function getImages( $inputPages, $pageSet ) {
 287                 return $this->getLinks( $inputPages, $pageSet,
 288                                         'imagelinks',
 289                                         array( NS_FILE . ' AS namespace', 'il_to AS title' ),
 290                                         array( 'page_id=il_from' ) );
 291         }
 292
 293         /**
 294          * Expand a list of pages to include items used in those pages.
 295          * @private
 296          */
 297         private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
 298                 $dbr = wfGetDB( DB_SLAVE );
 299                 foreach( $inputPages as $page ) {
 300                         $title = Title::newFromText( $page );
 301                         if( $title ) {
 302                                 $pageSet[$title->getPrefixedText()] = true;
 303                                 /// @fixme May or may not be more efficient to batch these
 304                                 ///        by namespace when given multiple input pages.
 305                                 $result = $dbr->select(
 306                                         array( 'page', $table ),
 307                                         $fields,
 308                                         array_merge( $join,
 309                                                 array(
 310                                                         'page_namespace' => $title->getNamespace(),
 311                                                         'page_title' => $title->getDBKey() ) ),
 312                                         __METHOD__ );
 313                                 foreach( $result as $row ) {
 314                                         $template = Title::makeTitle( $row->namespace, $row->title );
 315                                         $pageSet[$template->getPrefixedText()] = true;
 316                                 }
 317                         }
 318                 }
 319                 return $pageSet;
 320         }
 321
 322         /**
 323          * Callback function to remove empty strings from the pages array.
 324          */
 325         private function filterPage( $page ) {
 326                 return $page !== '' && $page !== null;
 327         }
 328 }