Merge "Do not flip margin of magnify icon on user interface language"
[lhc/web/wiklou.git] / includes / specials / SpecialExport.php
1 <?php
2 /**
3 * Implements Special:Export
4 *
5 * Copyright © 2003-2008 Brion Vibber <brion@pobox.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @file
23 * @ingroup SpecialPage
24 */
25
26 /**
27 * A special page that allows users to export pages in a XML file
28 *
29 * @ingroup SpecialPage
30 */
31 class SpecialExport extends SpecialPage {
32 private $curonly, $doExport, $pageLinkDepth, $templates;
33 private $images;
34
35 public function __construct() {
36 parent::__construct( 'Export' );
37 }
38
39 public function execute( $par ) {
40 $this->setHeaders();
41 $this->outputHeader();
42 $config = $this->getConfig();
43
44 // Set some variables
45 $this->curonly = true;
46 $this->doExport = false;
47 $request = $this->getRequest();
48 $this->templates = $request->getCheck( 'templates' );
49 $this->images = $request->getCheck( 'images' ); // Doesn't do anything yet
50 $this->pageLinkDepth = $this->validateLinkDepth(
51 $request->getIntOrNull( 'pagelink-depth' )
52 );
53 $nsindex = '';
54 $exportall = false;
55
56 if ( $request->getCheck( 'addcat' ) ) {
57 $page = $request->getText( 'pages' );
58 $catname = $request->getText( 'catname' );
59
60 if ( $catname !== '' && $catname !== null && $catname !== false ) {
61 $t = Title::makeTitleSafe( NS_MAIN, $catname );
62 if ( $t ) {
63 /**
64 * @todo FIXME: This can lead to hitting memory limit for very large
65 * categories. Ideally we would do the lookup synchronously
66 * during the export in a single query.
67 */
68 $catpages = $this->getPagesFromCategory( $t );
69 if ( $catpages ) {
70 $page .= "\n" . implode( "\n", $catpages );
71 }
72 }
73 }
74 } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
75 $page = $request->getText( 'pages' );
76 $nsindex = $request->getText( 'nsindex', '' );
77
78 if ( strval( $nsindex ) !== '' ) {
79 /**
80 * Same implementation as above, so same @todo
81 */
82 $nspages = $this->getPagesFromNamespace( $nsindex );
83 if ( $nspages ) {
84 $page .= "\n" . implode( "\n", $nspages );
85 }
86 }
87 } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
88 $this->doExport = true;
89 $exportall = true;
90
91 /* Although $page and $history are not used later on, we
92 nevertheless set them to avoid that PHP notices about using
93 undefined variables foul up our XML output (see call to
94 doExport(...) further down) */
95 $page = '';
96 $history = '';
97 } elseif ( $request->wasPosted() && $par == '' ) {
98 $page = $request->getText( 'pages' );
99 $this->curonly = $request->getCheck( 'curonly' );
100 $rawOffset = $request->getVal( 'offset' );
101
102 if ( $rawOffset ) {
103 $offset = wfTimestamp( TS_MW, $rawOffset );
104 } else {
105 $offset = null;
106 }
107
108 $maxHistory = $config->get( 'ExportMaxHistory' );
109 $limit = $request->getInt( 'limit' );
110 $dir = $request->getVal( 'dir' );
111 $history = array(
112 'dir' => 'asc',
113 'offset' => false,
114 'limit' => $maxHistory,
115 );
116 $historyCheck = $request->getCheck( 'history' );
117
118 if ( $this->curonly ) {
119 $history = WikiExporter::CURRENT;
120 } elseif ( !$historyCheck ) {
121 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
122 $history['limit'] = $limit;
123 }
124
125 if ( !is_null( $offset ) ) {
126 $history['offset'] = $offset;
127 }
128
129 if ( strtolower( $dir ) == 'desc' ) {
130 $history['dir'] = 'desc';
131 }
132 }
133
134 if ( $page != '' ) {
135 $this->doExport = true;
136 }
137 } else {
138 // Default to current-only for GET requests.
139 $page = $request->getText( 'pages', $par );
140 $historyCheck = $request->getCheck( 'history' );
141
142 if ( $historyCheck ) {
143 $history = WikiExporter::FULL;
144 } else {
145 $history = WikiExporter::CURRENT;
146 }
147
148 if ( $page != '' ) {
149 $this->doExport = true;
150 }
151 }
152
153 if ( !$config->get( 'ExportAllowHistory' ) ) {
154 // Override
155 $history = WikiExporter::CURRENT;
156 }
157
158 $list_authors = $request->getCheck( 'listauthors' );
159 if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
160 $list_authors = false;
161 }
162
163 if ( $this->doExport ) {
164 $this->getOutput()->disable();
165
166 // Cancel output buffering and gzipping if set
167 // This should provide safer streaming for pages with history
168 wfResetOutputBuffers();
169 $request->response()->header( "Content-type: application/xml; charset=utf-8" );
170
171 if ( $request->getCheck( 'wpDownload' ) ) {
172 // Provide a sane filename suggestion
173 $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
174 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
175 }
176
177 $this->doExport( $page, $history, $list_authors, $exportall );
178
179 return;
180 }
181
182 $out = $this->getOutput();
183 $out->addWikiMsg( 'exporttext' );
184
185 $form = Xml::openElement( 'form', array( 'method' => 'post',
186 'action' => $this->getPageTitle()->getLocalURL( 'action=submit' ) ) );
187 $form .= Xml::inputLabel(
188 $this->msg( 'export-addcattext' )->text(),
189 'catname',
190 'catname',
191 40
192 ) . '&#160;';
193 $form .= Xml::submitButton(
194 $this->msg( 'export-addcat' )->text(),
195 array( 'name' => 'addcat' )
196 ) . '<br />';
197
198 if ( $config->get( 'ExportFromNamespaces' ) ) {
199 $form .= Html::namespaceSelector(
200 array(
201 'selected' => $nsindex,
202 'label' => $this->msg( 'export-addnstext' )->text()
203 ), array(
204 'name' => 'nsindex',
205 'id' => 'namespace',
206 'class' => 'namespaceselector',
207 )
208 ) . '&#160;';
209 $form .= Xml::submitButton(
210 $this->msg( 'export-addns' )->text(),
211 array( 'name' => 'addns' )
212 ) . '<br />';
213 }
214
215 if ( $config->get( 'ExportAllowAll' ) ) {
216 $form .= Xml::checkLabel(
217 $this->msg( 'exportall' )->text(),
218 'exportall',
219 'exportall',
220 $request->wasPosted() ? $request->getCheck( 'exportall' ) : false
221 ) . '<br />';
222 }
223
224 $form .= Xml::element(
225 'textarea',
226 array( 'name' => 'pages', 'cols' => 40, 'rows' => 10 ),
227 $page,
228 false
229 );
230 $form .= '<br />';
231
232 if ( $config->get( 'ExportAllowHistory' ) ) {
233 $form .= Xml::checkLabel(
234 $this->msg( 'exportcuronly' )->text(),
235 'curonly',
236 'curonly',
237 $request->wasPosted() ? $request->getCheck( 'curonly' ) : true
238 ) . '<br />';
239 } else {
240 $out->addWikiMsg( 'exportnohistory' );
241 }
242
243 $form .= Xml::checkLabel(
244 $this->msg( 'export-templates' )->text(),
245 'templates',
246 'wpExportTemplates',
247 $request->wasPosted() ? $request->getCheck( 'templates' ) : false
248 ) . '<br />';
249
250 if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
251 $form .= Xml::inputLabel(
252 $this->msg( 'export-pagelinks' )->text(),
253 'pagelink-depth',
254 'pagelink-depth',
255 20,
256 0
257 ) . '<br />';
258 }
259
260 /* Enable this when we can do something useful exporting/importing image information.
261 $form .= Xml::checkLabel(
262 $this->msg( 'export-images' )->text(),
263 'images',
264 'wpExportImages',
265 false
266 ) . '<br />';
267 */
268 $form .= Xml::checkLabel(
269 $this->msg( 'export-download' )->text(),
270 'wpDownload',
271 'wpDownload',
272 $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true
273 ) . '<br />';
274
275 if ( $config->get( 'ExportAllowListContributors' ) ) {
276 $form .= Xml::checkLabel(
277 $this->msg( 'exportlistauthors' )->text(),
278 'listauthors',
279 'listauthors',
280 $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false
281 ) . '<br />';
282 }
283
284 $form .= Xml::submitButton(
285 $this->msg( 'export-submit' )->text(),
286 Linker::tooltipAndAccesskeyAttribs( 'export' )
287 );
288 $form .= Xml::closeElement( 'form' );
289
290 $out->addHTML( $form );
291 }
292
293 /**
294 * @return bool
295 */
296 private function userCanOverrideExportDepth() {
297 return $this->getUser()->isAllowed( 'override-export-depth' );
298 }
299
300 /**
301 * Do the actual page exporting
302 *
303 * @param string $page User input on what page(s) to export
304 * @param int $history One of the WikiExporter history export constants
305 * @param bool $list_authors Whether to add distinct author list (when
306 * not returning full history)
307 * @param bool $exportall Whether to export everything
308 */
309 private function doExport( $page, $history, $list_authors, $exportall ) {
310
311 // If we are grabbing everything, enable full history and ignore the rest
312 if ( $exportall ) {
313 $history = WikiExporter::FULL;
314 } else {
315
316 $pageSet = array(); // Inverted index of all pages to look up
317
318 // Split up and normalize input
319 foreach ( explode( "\n", $page ) as $pageName ) {
320 $pageName = trim( $pageName );
321 $title = Title::newFromText( $pageName );
322 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
323 // Only record each page once!
324 $pageSet[$title->getPrefixedText()] = true;
325 }
326 }
327
328 // Set of original pages to pass on to further manipulation...
329 $inputPages = array_keys( $pageSet );
330
331 // Look up any linked pages if asked...
332 if ( $this->templates ) {
333 $pageSet = $this->getTemplates( $inputPages, $pageSet );
334 }
335 $linkDepth = $this->pageLinkDepth;
336 if ( $linkDepth ) {
337 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
338 }
339
340 // Enable this when we can do something useful exporting/importing image information.
341 // if( $this->images ) ) {
342 // $pageSet = $this->getImages( $inputPages, $pageSet );
343 // }
344
345 $pages = array_keys( $pageSet );
346
347 // Normalize titles to the same format and remove dupes, see bug 17374
348 foreach ( $pages as $k => $v ) {
349 $pages[$k] = str_replace( " ", "_", $v );
350 }
351
352 $pages = array_unique( $pages );
353 }
354
355 /* Ok, let's get to it... */
356 if ( $history == WikiExporter::CURRENT ) {
357 $lb = false;
358 $db = wfGetDB( DB_SLAVE );
359 $buffer = WikiExporter::BUFFER;
360 } else {
361 // Use an unbuffered query; histories may be very long!
362 $lb = wfGetLBFactory()->newMainLB();
363 $db = $lb->getConnection( DB_SLAVE );
364 $buffer = WikiExporter::STREAM;
365
366 // This might take a while... :D
367 wfSuppressWarnings();
368 set_time_limit( 0 );
369 wfRestoreWarnings();
370 }
371
372 $exporter = new WikiExporter( $db, $history, $buffer );
373 $exporter->list_authors = $list_authors;
374 $exporter->openStream();
375
376 if ( $exportall ) {
377 $exporter->allPages();
378 } else {
379 foreach ( $pages as $page ) {
380 #Bug 8824: Only export pages the user can read
381 $title = Title::newFromText( $page );
382 if ( is_null( $title ) ) {
383 // @todo Perhaps output an <error> tag or something.
384 continue;
385 }
386
387 if ( !$title->userCan( 'read', $this->getUser() ) ) {
388 // @todo Perhaps output an <error> tag or something.
389 continue;
390 }
391
392 $exporter->pageByTitle( $title );
393 }
394 }
395
396 $exporter->closeStream();
397
398 if ( $lb ) {
399 $lb->closeAll();
400 }
401 }
402
403 /**
404 * @param Title $title
405 * @return array
406 */
407 private function getPagesFromCategory( $title ) {
408 global $wgContLang;
409
410 $name = $title->getDBkey();
411
412 $dbr = wfGetDB( DB_SLAVE );
413 $res = $dbr->select(
414 array( 'page', 'categorylinks' ),
415 array( 'page_namespace', 'page_title' ),
416 array( 'cl_from=page_id', 'cl_to' => $name ),
417 __METHOD__,
418 array( 'LIMIT' => '5000' )
419 );
420
421 $pages = array();
422
423 foreach ( $res as $row ) {
424 $n = $row->page_title;
425 if ( $row->page_namespace ) {
426 $ns = $wgContLang->getNsText( $row->page_namespace );
427 $n = $ns . ':' . $n;
428 }
429
430 $pages[] = $n;
431 }
432
433 return $pages;
434 }
435
436 /**
437 * @param int $nsindex
438 * @return array
439 */
440 private function getPagesFromNamespace( $nsindex ) {
441 global $wgContLang;
442
443 $dbr = wfGetDB( DB_SLAVE );
444 $res = $dbr->select(
445 'page',
446 array( 'page_namespace', 'page_title' ),
447 array( 'page_namespace' => $nsindex ),
448 __METHOD__,
449 array( 'LIMIT' => '5000' )
450 );
451
452 $pages = array();
453
454 foreach ( $res as $row ) {
455 $n = $row->page_title;
456
457 if ( $row->page_namespace ) {
458 $ns = $wgContLang->getNsText( $row->page_namespace );
459 $n = $ns . ':' . $n;
460 }
461
462 $pages[] = $n;
463 }
464
465 return $pages;
466 }
467
468 /**
469 * Expand a list of pages to include templates used in those pages.
470 * @param array $inputPages List of titles to look up
471 * @param array $pageSet Associative array indexed by titles for output
472 * @return array Associative array index by titles
473 */
474 private function getTemplates( $inputPages, $pageSet ) {
475 return $this->getLinks( $inputPages, $pageSet,
476 'templatelinks',
477 array( 'namespace' => 'tl_namespace', 'title' => 'tl_title' ),
478 array( 'page_id=tl_from' )
479 );
480 }
481
482 /**
483 * Validate link depth setting, if available.
484 * @param int $depth
485 * @return int
486 */
487 private function validateLinkDepth( $depth ) {
488 if ( $depth < 0 ) {
489 return 0;
490 }
491
492 if ( !$this->userCanOverrideExportDepth() ) {
493 $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
494 if ( $depth > $maxLinkDepth ) {
495 return $maxLinkDepth;
496 }
497 }
498
499 /*
500 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
501 * crazy-big export from being done by someone setting the depth
502 * number too high. In other words, last resort safety net.
503 */
504
505 return intval( min( $depth, 5 ) );
506 }
507
508 /**
509 * Expand a list of pages to include pages linked to from that page.
510 * @param array $inputPages
511 * @param array $pageSet
512 * @param int $depth
513 * @return array
514 */
515 private function getPageLinks( $inputPages, $pageSet, $depth ) {
516 // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
517 for ( ; $depth > 0; --$depth ) {
518 // @codingStandardsIgnoreEnd
519 $pageSet = $this->getLinks(
520 $inputPages, $pageSet, 'pagelinks',
521 array( 'namespace' => 'pl_namespace', 'title' => 'pl_title' ),
522 array( 'page_id=pl_from' )
523 );
524 $inputPages = array_keys( $pageSet );
525 }
526
527 return $pageSet;
528 }
529
530 /**
531 * Expand a list of pages to include images used in those pages.
532 *
533 * @param array $inputPages List of titles to look up
534 * @param array $pageSet Associative array indexed by titles for output
535 *
536 * @return array Associative array index by titles
537 */
538 private function getImages( $inputPages, $pageSet ) {
539 return $this->getLinks(
540 $inputPages,
541 $pageSet,
542 'imagelinks',
543 array( 'namespace' => NS_FILE, 'title' => 'il_to' ),
544 array( 'page_id=il_from' )
545 );
546 }
547
548 /**
549 * Expand a list of pages to include items used in those pages.
550 * @param array $inputPages Array of page titles
551 * @param array $pageSet
552 * @param string $table
553 * @param array $fields Array of field names
554 * @param array $join
555 * @return array
556 */
557 private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
558 $dbr = wfGetDB( DB_SLAVE );
559
560 foreach ( $inputPages as $page ) {
561 $title = Title::newFromText( $page );
562
563 if ( $title ) {
564 $pageSet[$title->getPrefixedText()] = true;
565 /// @todo FIXME: May or may not be more efficient to batch these
566 /// by namespace when given multiple input pages.
567 $result = $dbr->select(
568 array( 'page', $table ),
569 $fields,
570 array_merge(
571 $join,
572 array(
573 'page_namespace' => $title->getNamespace(),
574 'page_title' => $title->getDBkey()
575 )
576 ),
577 __METHOD__
578 );
579
580 foreach ( $result as $row ) {
581 $template = Title::makeTitle( $row->namespace, $row->title );
582 $pageSet[$template->getPrefixedText()] = true;
583 }
584 }
585 }
586
587 return $pageSet;
588 }
589
590 protected function getGroupName() {
591 return 'pagetools';
592 }
593 }