includes/specials/SpecialExport.php

   1 <?php
   2 # Copyright (C) 2003-2008 Brion Vibber <brion@pobox.com>
   3 # http://www.mediawiki.org/
   4 #
   5 # This program is free software; you can redistribute it and/or modify
   6 # it under the terms of the GNU General Public License as published by
   7 # the Free Software Foundation; either version 2 of the License, or
   8 # (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License along
  16 # with this program; if not, write to the Free Software Foundation, Inc.,
  17 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18 # http://www.gnu.org/copyleft/gpl.html
  19 /**
  20  * @file
  21  * @ingroup SpecialPage
  22  */
  23
  24 class SpecialExport extends SpecialPage {
  25
  26         private $curonly, $doExport, $pageLinkDepth, $templates;
  27         private $images;
  28
  29         public function __construct() {
  30                 parent::__construct( 'Export' );
  31         }
  32
  33         public function execute( $par ) {
  34                 global $wgOut, $wgRequest, $wgSitename, $wgExportAllowListContributors;
  35                 global $wgExportAllowHistory, $wgExportMaxHistory;
  36
  37                 $this->setHeaders();
  38                 $this->outputHeader();
  39
  40                 // Set some variables
  41                 $this->curonly = true;
  42                 $this->doExport = false;
  43                 $this->templates = $wgRequest->getCheck( 'templates' );
  44                 $this->images = $wgRequest->getCheck( 'images' ); // Doesn't do anything yet
  45                 $this->pageLinkDepth = $wgRequest->getIntOrNull( 'pagelink-depth' );
  46
  47                 if ( $wgRequest->getCheck( 'addcat' ) ) {
  48                         $page = $wgRequest->getText( 'pages' );
  49                         $catname = $wgRequest->getText( 'catname' );
  50
  51                         if ( $catname !== '' && $catname !== NULL && $catname !== false ) {
  52                                 $t = Title::makeTitleSafe( NS_MAIN, $catname );
  53                                 if ( $t ) {
  54                                         /**
  55                                          * @fixme This can lead to hitting memory limit for very large
  56                                          * categories. Ideally we would do the lookup synchronously
  57                                          * during the export in a single query.
  58                                          */
  59                                         $catpages = $this->getPagesFromCategory( $t );
  60                                         if ( $catpages ) $page .= "\n" . implode( "\n", $catpages );
  61                                 }
  62                         }
  63                 }
  64                 else if( $wgRequest->wasPosted() && $page == '' ) {
  65                         $page = $wgRequest->getText( 'pages' );
  66                         $this->curonly = $wgRequest->getCheck( 'curonly' );
  67                         $rawOffset = $wgRequest->getVal( 'offset' );
  68                         if( $rawOffset ) {
  69                                 $offset = wfTimestamp( TS_MW, $rawOffset );
  70                         } else {
  71                                 $offset = null;
  72                         }
  73                         $limit = $wgRequest->getInt( 'limit' );
  74                         $dir = $wgRequest->getVal( 'dir' );
  75                         $history = array(
  76                                 'dir' => 'asc',
  77                                 'offset' => false,
  78                                 'limit' => $wgExportMaxHistory,
  79                         );
  80                         $historyCheck = $wgRequest->getCheck( 'history' );
  81                         if ( $this->curonly ) {
  82                                 $history = WikiExporter::CURRENT;
  83                         } elseif ( !$historyCheck ) {
  84                                 if ( $limit > 0 && $limit < $wgExportMaxHistory ) {
  85                                         $history['limit'] = $limit;
  86                                 }
  87                                 if ( !is_null( $offset ) ) {
  88                                         $history['offset'] = $offset;
  89                                 }
  90                                 if ( strtolower( $dir ) == 'desc' ) {
  91                                         $history['dir'] = 'desc';
  92                                 }
  93                         }
  94
  95                         if( $page != '' ) $this->doExport = true;
  96                 } else {
  97                         // Default to current-only for GET requests
  98                         $page = $wgRequest->getText( 'pages', $par );
  99                         $historyCheck = $wgRequest->getCheck( 'history' );
 100                         if( $historyCheck ) {
 101                                 $history = WikiExporter::FULL;
 102                         } else {
 103                                 $history = WikiExporter::CURRENT;
 104                         }
 105
 106                         if( $page != '' ) $this->doExport = true;
 107                 }
 108
 109                 if( !$wgExportAllowHistory ) {
 110                         // Override
 111                         $history = WikiExporter::CURRENT;
 112                 }
 113
 114                 $list_authors = $wgRequest->getCheck( 'listauthors' );
 115                 if ( !$this->curonly || !$wgExportAllowListContributors ) $list_authors = false ;
 116
 117                 if ( $this->doExport ) {
 118                         $wgOut->disable();
 119                         // Cancel output buffering and gzipping if set
 120                         // This should provide safer streaming for pages with history
 121                         wfResetOutputBuffers();
 122                         header( "Content-type: application/xml; charset=utf-8" );
 123                         if( $wgRequest->getCheck( 'wpDownload' ) ) {
 124                                 // Provide a sane filename suggestion
 125                                 $filename = urlencode( $wgSitename . '-' . wfTimestampNow() . '.xml' );
 126                                 $wgRequest->response()->header( "Content-disposition: attachment;filename={$filename}" );
 127                         }
 128                         $this->doExport( $page, $history );
 129                         return;
 130                 }
 131
 132                 $wgOut->addHTML( wfMsgExt( 'exporttext', 'parse' ) );
 133
 134                 $form = Xml::openElement( 'form', array( 'method' => 'post',
 135                         'action' => $this->getTitle()->getLocalUrl( 'action=submit' ) ) );
 136                 $form .= Xml::inputLabel( wfMsg( 'export-addcattext' )  , 'catname', 'catname', 40 ) . '&nbsp;';
 137                 $form .= Xml::submitButton( wfMsg( 'export-addcat' ), array( 'name' => 'addcat' ) ) . '<br />';
 138                 $form .= Xml::element( 'textarea', array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ), $page, false );
 139                 $form .= '<br />';
 140
 141                 if( $wgExportAllowHistory ) {
 142                         $form .= Xml::checkLabel( wfMsg( 'exportcuronly' ), 'curonly', 'curonly', true ) . '<br />';
 143                 } else {
 144                         $wgOut->addHTML( wfMsgExt( 'exportnohistory', 'parse' ) );
 145                 }
 146                 $form .= Xml::checkLabel( wfMsg( 'export-templates' ), 'templates', 'wpExportTemplates', false ) . '<br />';
 147                 $form .= Xml::inputLabel( wfMsg( 'export-pagelinks' ), 'pagelink-depth', 'pagelink-depth', 20, 0 ) . '<br />';
 148                 // Enable this when we can do something useful exporting/importing image information. :)
 149                 //$form .= Xml::checkLabel( wfMsg( 'export-images' ), 'images', 'wpExportImages', false ) . '<br />';
 150                 $form .= Xml::checkLabel( wfMsg( 'export-download' ), 'wpDownload', 'wpDownload', true ) . '<br />';
 151
 152                 $form .= Xml::submitButton( wfMsg( 'export-submit' ), array( 'accesskey' => 's' ) );
 153                 $form .= Xml::closeElement( 'form' );
 154                 $wgOut->addHTML( $form );
 155         }
 156
 157         /**
 158          * Do the actual page exporting
 159          * @param string $page User input on what page(s) to export
 160          * @param mixed  $history one of the WikiExporter history export constants
 161          */
 162         private function doExport( $page, $history ) {
 163                 global $wgExportMaxHistory;
 164
 165                 /* Split up the input and look up linked pages */
 166                 $inputPages = array_filter( explode( "\n", $page ), array( $this, 'filterPage' ) );
 167                 $pageSet = array_flip( $inputPages );
 168
 169                 if( $this->templates ) {
 170                         $pageSet = $this->getTemplates( $inputPages, $pageSet );
 171                 }
 172
 173                 if( $linkDepth = $this->pageLinkDepth ) {
 174                         $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
 175                 }
 176
 177                 /*
 178                 // Enable this when we can do something useful exporting/importing image information. :)
 179                 if( $this->images ) ) {
 180                         $pageSet = $this->getImages( $inputPages, $pageSet );
 181                 }
 182                 */
 183
 184                 $pages = array_keys( $pageSet );
 185
 186                 /* Ok, let's get to it... */
 187                 if( $history == WikiExporter::CURRENT ) {
 188                         $lb = false;
 189                         $db = wfGetDB( DB_SLAVE );
 190                         $buffer = WikiExporter::BUFFER;
 191                 } else {
 192                         // Use an unbuffered query; histories may be very long!
 193                         $lb = wfGetLBFactory()->newMainLB();
 194                         $db = $lb->getConnection( DB_SLAVE );
 195                         $buffer = WikiExporter::STREAM;
 196
 197                         // This might take a while... :D
 198                         wfSuppressWarnings();
 199                         set_time_limit(0);
 200                         wfRestoreWarnings();
 201                 }
 202                                 $exporter = new WikiExporter( $db, $history, $buffer );
 203                 $exporter->list_authors = $list_authors ;
 204                 $exporter->openStream();
 205                 foreach( $pages as $page ) {
 206                         /*
 207                         if( $wgExportMaxHistory && !$this->curonly ) {
 208                                 $title = Title::newFromText( $page );
 209                                 if( $title ) {
 210                                         $count = Revision::countByTitle( $db, $title );
 211                                         if( $count > $wgExportMaxHistory ) {
 212                                                 wfDebug( __FUNCTION__ .
 213                                                         ": Skipped $page, $count revisions too big\n" );
 214                                                 continue;
 215                                         }
 216                                 }
 217                         }*/
 218                         #Bug 8824: Only export pages the user can read
 219                         $title = Title::newFromText( $page );
 220                         if( is_null( $title ) ) continue; #TODO: perhaps output an <error> tag or something.
 221                                 if( !$title->userCanRead() ) continue; #TODO: perhaps output an <error> tag or something.
 222
 223                                 $exporter->pageByTitle( $title );
 224                 }
 225
 226                 $exporter->closeStream();
 227                 if( $lb ) {
 228                         $lb->closeAll();
 229                 }
 230         }
 231
 232         private function getPagesFromCategory( $title ) {
 233                 global $wgContLang;
 234
 235                 $name = $title->getDBkey();
 236
 237                 $dbr = wfGetDB( DB_SLAVE );
 238
 239                 list( $page, $categorylinks ) = $dbr->tableNamesN( 'page', 'categorylinks' );
 240                 $sql = "SELECT page_namespace, page_title FROM $page " .
 241                         "JOIN $categorylinks ON cl_from = page_id " .
 242                         "WHERE cl_to = " . $dbr->addQuotes( $name );
 243
 244                 $pages = array();
 245                 $res = $dbr->query( $sql, __METHOD__ );
 246                 while ( $row = $dbr->fetchObject( $res ) ) {
 247                         $n = $row->page_title;
 248                         if ($row->page_namespace) {
 249                                 $ns = $wgContLang->getNsText( $row->page_namespace );
 250                                 $n = $ns . ':' . $n;
 251                         }
 252
 253                         $pages[] = $n;
 254                 }
 255                 $dbr->freeResult($res);
 256
 257         return $pages;
 258         }
 259
 260         /**
 261          * Expand a list of pages to include templates used in those pages.
 262          * @param $inputPages array, list of titles to look up
 263          * @param $pageSet array, associative array indexed by titles for output
 264          * @return array associative array index by titles
 265          */
 266         private function getTemplates( $inputPages, $pageSet ) {
 267                 return $this->getLinks( $inputPages, $pageSet,
 268                                         'templatelinks',
 269                                         array( 'tl_namespace AS namespace', 'tl_title AS title' ),
 270                                         array( 'page_id=tl_from' ) );
 271         }
 272
 273         /** Expand a list of pages to include pages linked to from that page. */
 274         private function getPageLinks( $inputPages, $pageSet, $depth ) {
 275                 for( $depth=$depth; $depth>0; --$depth ) {
 276                 $pageSet = $this->getLinks( $inputPages, $pageSet, 'pagelinks',
 277                         array( 'pl_namespace AS namespace', 'pl_title AS title' ),
 278                         array( 'page_id=pl_from' ) );
 279                 }
 280                 return $pageSet;
 281         }
 282
 283         /**
 284          * Expand a list of pages to include images used in those pages.
 285          * @param $inputPages array, list of titles to look up
 286          * @param $pageSet array, associative array indexed by titles for output
 287          * @return array associative array index by titles
 288          */
 289         private function getImages( $inputPages, $pageSet ) {
 290                 return $this->getLinks( $inputPages, $pageSet,
 291                                         'imagelinks',
 292                                         array( NS_FILE . ' AS namespace', 'il_to AS title' ),
 293                                         array( 'page_id=il_from' ) );
 294         }
 295
 296         /**
 297          * Expand a list of pages to include items used in those pages.
 298          * @private
 299          */
 300         private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
 301                 $dbr = wfGetDB( DB_SLAVE );
 302                 foreach( $inputPages as $page ) {
 303                         $title = Title::newFromText( $page );
 304                         if( $title ) {
 305                                 $pageSet[$title->getPrefixedText()] = true;
 306                                 /// @fixme May or may not be more efficient to batch these
 307                                 ///        by namespace when given multiple input pages.
 308                                 $result = $dbr->select(
 309                                         array( 'page', $table ),
 310                                         $fields,
 311                                         array_merge( $join,
 312                                                 array(
 313                                                         'page_namespace' => $title->getNamespace(),
 314                                                         'page_title' => $title->getDBKey() ) ),
 315                                         __METHOD__ );
 316                                 foreach( $result as $row ) {
 317                                         $template = Title::makeTitle( $row->namespace, $row->title );
 318                                         $pageSet[$template->getPrefixedText()] = true;
 319                                 }
 320                         }
 321                 }
 322                 return $pageSet;
 323         }
 324
 325         /**
 326          * Callback function to remove empty strings from the pages array.
 327          */
 328         private function filterPage( $page ) {
 329                 return $page !== '' && $page !== null;
 330         }
 331 }