Merge "Convert Special:DeletedContributions to use OOUI."
[lhc/web/wiklou.git] / includes / specials / SpecialExport.php
1 <?php
2 /**
3 * Implements Special:Export
4 *
5 * Copyright © 2003-2008 Brion Vibber <brion@pobox.com>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @file
23 * @ingroup SpecialPage
24 */
25
26 /**
27 * A special page that allows users to export pages in a XML file
28 *
29 * @ingroup SpecialPage
30 */
31 class SpecialExport extends SpecialPage {
32 private $curonly, $doExport, $pageLinkDepth, $templates;
33
34 public function __construct() {
35 parent::__construct( 'Export' );
36 }
37
38 public function execute( $par ) {
39 $this->setHeaders();
40 $this->outputHeader();
41 $config = $this->getConfig();
42
43 // Set some variables
44 $this->curonly = true;
45 $this->doExport = false;
46 $request = $this->getRequest();
47 $this->templates = $request->getCheck( 'templates' );
48 $this->pageLinkDepth = $this->validateLinkDepth(
49 $request->getIntOrNull( 'pagelink-depth' )
50 );
51 $nsindex = '';
52 $exportall = false;
53
54 if ( $request->getCheck( 'addcat' ) ) {
55 $page = $request->getText( 'pages' );
56 $catname = $request->getText( 'catname' );
57
58 if ( $catname !== '' && $catname !== null && $catname !== false ) {
59 $t = Title::makeTitleSafe( NS_MAIN, $catname );
60 if ( $t ) {
61 /**
62 * @todo FIXME: This can lead to hitting memory limit for very large
63 * categories. Ideally we would do the lookup synchronously
64 * during the export in a single query.
65 */
66 $catpages = $this->getPagesFromCategory( $t );
67 if ( $catpages ) {
68 if ( $page !== '' ) {
69 $page .= "\n";
70 }
71 $page .= implode( "\n", $catpages );
72 }
73 }
74 }
75 } elseif ( $request->getCheck( 'addns' ) && $config->get( 'ExportFromNamespaces' ) ) {
76 $page = $request->getText( 'pages' );
77 $nsindex = $request->getText( 'nsindex', '' );
78
79 if ( strval( $nsindex ) !== '' ) {
80 /**
81 * Same implementation as above, so same @todo
82 */
83 $nspages = $this->getPagesFromNamespace( $nsindex );
84 if ( $nspages ) {
85 $page .= "\n" . implode( "\n", $nspages );
86 }
87 }
88 } elseif ( $request->getCheck( 'exportall' ) && $config->get( 'ExportAllowAll' ) ) {
89 $this->doExport = true;
90 $exportall = true;
91
92 /* Although $page and $history are not used later on, we
93 nevertheless set them to avoid that PHP notices about using
94 undefined variables foul up our XML output (see call to
95 doExport(...) further down) */
96 $page = '';
97 $history = '';
98 } elseif ( $request->wasPosted() && $par == '' ) {
99 $page = $request->getText( 'pages' );
100 $this->curonly = $request->getCheck( 'curonly' );
101 $rawOffset = $request->getVal( 'offset' );
102
103 if ( $rawOffset ) {
104 $offset = wfTimestamp( TS_MW, $rawOffset );
105 } else {
106 $offset = null;
107 }
108
109 $maxHistory = $config->get( 'ExportMaxHistory' );
110 $limit = $request->getInt( 'limit' );
111 $dir = $request->getVal( 'dir' );
112 $history = [
113 'dir' => 'asc',
114 'offset' => false,
115 'limit' => $maxHistory,
116 ];
117 $historyCheck = $request->getCheck( 'history' );
118
119 if ( $this->curonly ) {
120 $history = WikiExporter::CURRENT;
121 } elseif ( !$historyCheck ) {
122 if ( $limit > 0 && ( $maxHistory == 0 || $limit < $maxHistory ) ) {
123 $history['limit'] = $limit;
124 }
125
126 if ( !is_null( $offset ) ) {
127 $history['offset'] = $offset;
128 }
129
130 if ( strtolower( $dir ) == 'desc' ) {
131 $history['dir'] = 'desc';
132 }
133 }
134
135 if ( $page != '' ) {
136 $this->doExport = true;
137 }
138 } else {
139 // Default to current-only for GET requests.
140 $page = $request->getText( 'pages', $par );
141 $historyCheck = $request->getCheck( 'history' );
142
143 if ( $historyCheck ) {
144 $history = WikiExporter::FULL;
145 } else {
146 $history = WikiExporter::CURRENT;
147 }
148
149 if ( $page != '' ) {
150 $this->doExport = true;
151 }
152 }
153
154 if ( !$config->get( 'ExportAllowHistory' ) ) {
155 // Override
156 $history = WikiExporter::CURRENT;
157 }
158
159 $list_authors = $request->getCheck( 'listauthors' );
160 if ( !$this->curonly || !$config->get( 'ExportAllowListContributors' ) ) {
161 $list_authors = false;
162 }
163
164 if ( $this->doExport ) {
165 $this->getOutput()->disable();
166
167 // Cancel output buffering and gzipping if set
168 // This should provide safer streaming for pages with history
169 wfResetOutputBuffers();
170 $request->response()->header( "Content-type: application/xml; charset=utf-8" );
171 $request->response()->header( "X-Robots-Tag: noindex,nofollow" );
172
173 if ( $request->getCheck( 'wpDownload' ) ) {
174 // Provide a sane filename suggestion
175 $filename = urlencode( $config->get( 'Sitename' ) . '-' . wfTimestampNow() . '.xml' );
176 $request->response()->header( "Content-disposition: attachment;filename={$filename}" );
177 }
178
179 $this->doExport( $page, $history, $list_authors, $exportall );
180
181 return;
182 }
183
184 $out = $this->getOutput();
185 $out->addWikiMsg( 'exporttext' );
186
187 if ( $page == '' ) {
188 $categoryName = $request->getText( 'catname' );
189 } else {
190 $categoryName = '';
191 }
192
193 $formDescriptor = [
194 'catname' => [
195 'type' => 'textwithbutton',
196 'name' => 'catname',
197 'horizontal-label' => true,
198 'label-message' => 'export-addcattext',
199 'default' => $categoryName,
200 'size' => 40,
201 'buttontype' => 'submit',
202 'buttonname' => 'addcat',
203 'buttondefault' => $this->msg( 'export-addcat' )->text(),
204 'hide-if' => [ '===', 'exportall', '1' ],
205 ],
206 ];
207 if ( $config->get( 'ExportFromNamespaces' ) ) {
208 $formDescriptor += [
209 'nsindex' => [
210 'type' => 'namespaceselectwithbutton',
211 'default' => $nsindex,
212 'label-message' => 'export-addnstext',
213 'horizontal-label' => true,
214 'name' => 'nsindex',
215 'id' => 'namespace',
216 'cssclass' => 'namespaceselector',
217 'buttontype' => 'submit',
218 'buttonname' => 'addns',
219 'buttondefault' => $this->msg( 'export-addns' )->text(),
220 'hide-if' => [ '===', 'exportall', '1' ],
221 ],
222 ];
223 }
224
225 if ( $config->get( 'ExportAllowAll' ) ) {
226 $formDescriptor += [
227 'exportall' => [
228 'type' => 'check',
229 'label-message' => 'exportall',
230 'name' => 'exportall',
231 'id' => 'exportall',
232 'default' => $request->wasPosted() ? $request->getCheck( 'exportall' ) : false,
233 ],
234 ];
235 }
236
237 $formDescriptor += [
238 'textarea' => [
239 'class' => 'HTMLTextAreaField',
240 'name' => 'pages',
241 'label-message' => 'export-manual',
242 'nodata' => true,
243 'rows' => 10,
244 'default' => $page,
245 'hide-if' => [ '===', 'exportall', '1' ],
246 ],
247 ];
248
249 if ( $config->get( 'ExportAllowHistory' ) ) {
250 $formDescriptor += [
251 'curonly' => [
252 'type' => 'check',
253 'label-message' => 'exportcuronly',
254 'name' => 'curonly',
255 'id' => 'curonly',
256 'default' => $request->wasPosted() ? $request->getCheck( 'curonly' ) : true,
257 ],
258 ];
259 } else {
260 $out->addWikiMsg( 'exportnohistory' );
261 }
262
263 $formDescriptor += [
264 'templates' => [
265 'type' => 'check',
266 'label-message' => 'export-templates',
267 'name' => 'templates',
268 'id' => 'wpExportTemplates',
269 'default' => $request->wasPosted() ? $request->getCheck( 'templates' ) : false,
270 ],
271 ];
272
273 if ( $config->get( 'ExportMaxLinkDepth' ) || $this->userCanOverrideExportDepth() ) {
274 $formDescriptor += [
275 'pagelink-depth' => [
276 'type' => 'text',
277 'name' => 'pagelink-depth',
278 'id' => 'pagelink-depth',
279 'label-message' => 'export-pagelinks',
280 'default' => '0',
281 'size' => 20,
282 ],
283 ];
284 }
285
286 $formDescriptor += [
287 'wpDownload' => [
288 'type' => 'check',
289 'name' =>'wpDownload',
290 'id' => 'wpDownload',
291 'default' => $request->wasPosted() ? $request->getCheck( 'wpDownload' ) : true,
292 'label-message' => 'export-download',
293 ],
294 ];
295
296 if ( $config->get( 'ExportAllowListContributors' ) ) {
297 $formDescriptor += [
298 'listauthors' => [
299 'type' => 'check',
300 'label-message' => 'exportlistauthors',
301 'default' => $request->wasPosted() ? $request->getCheck( 'listauthors' ) : false,
302 'name' => 'listauthors',
303 'id' => 'listauthors',
304 ],
305 ];
306 }
307
308 $htmlForm = HTMLForm::factory( 'ooui', $formDescriptor, $this->getContext() );
309 $htmlForm->setSubmitTextMsg( 'export-submit' );
310 $htmlForm->prepareForm()->displayForm( false );
311 $this->addHelpLink( 'Help:Export' );
312 }
313
314 /**
315 * @return bool
316 */
317 private function userCanOverrideExportDepth() {
318 return $this->getUser()->isAllowed( 'override-export-depth' );
319 }
320
321 /**
322 * Do the actual page exporting
323 *
324 * @param string $page User input on what page(s) to export
325 * @param int $history One of the WikiExporter history export constants
326 * @param bool $list_authors Whether to add distinct author list (when
327 * not returning full history)
328 * @param bool $exportall Whether to export everything
329 */
330 private function doExport( $page, $history, $list_authors, $exportall ) {
331
332 // If we are grabbing everything, enable full history and ignore the rest
333 if ( $exportall ) {
334 $history = WikiExporter::FULL;
335 } else {
336 $pageSet = []; // Inverted index of all pages to look up
337
338 // Split up and normalize input
339 foreach ( explode( "\n", $page ) as $pageName ) {
340 $pageName = trim( $pageName );
341 $title = Title::newFromText( $pageName );
342 if ( $title && !$title->isExternal() && $title->getText() !== '' ) {
343 // Only record each page once!
344 $pageSet[$title->getPrefixedText()] = true;
345 }
346 }
347
348 // Set of original pages to pass on to further manipulation...
349 $inputPages = array_keys( $pageSet );
350
351 // Look up any linked pages if asked...
352 if ( $this->templates ) {
353 $pageSet = $this->getTemplates( $inputPages, $pageSet );
354 }
355 $linkDepth = $this->pageLinkDepth;
356 if ( $linkDepth ) {
357 $pageSet = $this->getPageLinks( $inputPages, $pageSet, $linkDepth );
358 }
359
360 $pages = array_keys( $pageSet );
361
362 // Normalize titles to the same format and remove dupes, see bug 17374
363 foreach ( $pages as $k => $v ) {
364 $pages[$k] = str_replace( " ", "_", $v );
365 }
366
367 $pages = array_unique( $pages );
368 }
369
370 /* Ok, let's get to it... */
371 if ( $history == WikiExporter::CURRENT ) {
372 $lb = false;
373 $db = wfGetDB( DB_REPLICA );
374 $buffer = WikiExporter::BUFFER;
375 } else {
376 // Use an unbuffered query; histories may be very long!
377 $lb = wfGetLBFactory()->newMainLB();
378 $db = $lb->getConnection( DB_REPLICA );
379 $buffer = WikiExporter::STREAM;
380
381 // This might take a while... :D
382 MediaWiki\suppressWarnings();
383 set_time_limit( 0 );
384 MediaWiki\restoreWarnings();
385 }
386
387 $exporter = new WikiExporter( $db, $history, $buffer );
388 $exporter->list_authors = $list_authors;
389 $exporter->openStream();
390
391 if ( $exportall ) {
392 $exporter->allPages();
393 } else {
394 foreach ( $pages as $page ) {
395 # Bug 8824: Only export pages the user can read
396 $title = Title::newFromText( $page );
397 if ( is_null( $title ) ) {
398 // @todo Perhaps output an <error> tag or something.
399 continue;
400 }
401
402 if ( !$title->userCan( 'read', $this->getUser() ) ) {
403 // @todo Perhaps output an <error> tag or something.
404 continue;
405 }
406
407 $exporter->pageByTitle( $title );
408 }
409 }
410
411 $exporter->closeStream();
412
413 if ( $lb ) {
414 $lb->closeAll();
415 }
416 }
417
418 /**
419 * @param Title $title
420 * @return array
421 */
422 private function getPagesFromCategory( $title ) {
423 global $wgContLang;
424
425 $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
426
427 $name = $title->getDBkey();
428
429 $dbr = wfGetDB( DB_REPLICA );
430 $res = $dbr->select(
431 [ 'page', 'categorylinks' ],
432 [ 'page_namespace', 'page_title' ],
433 [ 'cl_from=page_id', 'cl_to' => $name ],
434 __METHOD__,
435 [ 'LIMIT' => $maxPages ]
436 );
437
438 $pages = [];
439
440 foreach ( $res as $row ) {
441 $n = $row->page_title;
442 if ( $row->page_namespace ) {
443 $ns = $wgContLang->getNsText( $row->page_namespace );
444 $n = $ns . ':' . $n;
445 }
446
447 $pages[] = $n;
448 }
449
450 return $pages;
451 }
452
453 /**
454 * @param int $nsindex
455 * @return array
456 */
457 private function getPagesFromNamespace( $nsindex ) {
458 global $wgContLang;
459
460 $maxPages = $this->getConfig()->get( 'ExportPagelistLimit' );
461
462 $dbr = wfGetDB( DB_REPLICA );
463 $res = $dbr->select(
464 'page',
465 [ 'page_namespace', 'page_title' ],
466 [ 'page_namespace' => $nsindex ],
467 __METHOD__,
468 [ 'LIMIT' => $maxPages ]
469 );
470
471 $pages = [];
472
473 foreach ( $res as $row ) {
474 $n = $row->page_title;
475
476 if ( $row->page_namespace ) {
477 $ns = $wgContLang->getNsText( $row->page_namespace );
478 $n = $ns . ':' . $n;
479 }
480
481 $pages[] = $n;
482 }
483
484 return $pages;
485 }
486
487 /**
488 * Expand a list of pages to include templates used in those pages.
489 * @param array $inputPages List of titles to look up
490 * @param array $pageSet Associative array indexed by titles for output
491 * @return array Associative array index by titles
492 */
493 private function getTemplates( $inputPages, $pageSet ) {
494 return $this->getLinks( $inputPages, $pageSet,
495 'templatelinks',
496 [ 'namespace' => 'tl_namespace', 'title' => 'tl_title' ],
497 [ 'page_id=tl_from' ]
498 );
499 }
500
501 /**
502 * Validate link depth setting, if available.
503 * @param int $depth
504 * @return int
505 */
506 private function validateLinkDepth( $depth ) {
507 if ( $depth < 0 ) {
508 return 0;
509 }
510
511 if ( !$this->userCanOverrideExportDepth() ) {
512 $maxLinkDepth = $this->getConfig()->get( 'ExportMaxLinkDepth' );
513 if ( $depth > $maxLinkDepth ) {
514 return $maxLinkDepth;
515 }
516 }
517
518 /*
519 * There's a HARD CODED limit of 5 levels of recursion here to prevent a
520 * crazy-big export from being done by someone setting the depth
521 * number too high. In other words, last resort safety net.
522 */
523
524 return intval( min( $depth, 5 ) );
525 }
526
527 /**
528 * Expand a list of pages to include pages linked to from that page.
529 * @param array $inputPages
530 * @param array $pageSet
531 * @param int $depth
532 * @return array
533 */
534 private function getPageLinks( $inputPages, $pageSet, $depth ) {
535 // @codingStandardsIgnoreStart Squiz.WhiteSpace.SemicolonSpacing.Incorrect
536 for ( ; $depth > 0; --$depth ) {
537 // @codingStandardsIgnoreEnd
538 $pageSet = $this->getLinks(
539 $inputPages, $pageSet, 'pagelinks',
540 [ 'namespace' => 'pl_namespace', 'title' => 'pl_title' ],
541 [ 'page_id=pl_from' ]
542 );
543 $inputPages = array_keys( $pageSet );
544 }
545
546 return $pageSet;
547 }
548
549 /**
550 * Expand a list of pages to include items used in those pages.
551 * @param array $inputPages Array of page titles
552 * @param array $pageSet
553 * @param string $table
554 * @param array $fields Array of field names
555 * @param array $join
556 * @return array
557 */
558 private function getLinks( $inputPages, $pageSet, $table, $fields, $join ) {
559 $dbr = wfGetDB( DB_REPLICA );
560
561 foreach ( $inputPages as $page ) {
562 $title = Title::newFromText( $page );
563
564 if ( $title ) {
565 $pageSet[$title->getPrefixedText()] = true;
566 /// @todo FIXME: May or may not be more efficient to batch these
567 /// by namespace when given multiple input pages.
568 $result = $dbr->select(
569 [ 'page', $table ],
570 $fields,
571 array_merge(
572 $join,
573 [
574 'page_namespace' => $title->getNamespace(),
575 'page_title' => $title->getDBkey()
576 ]
577 ),
578 __METHOD__
579 );
580
581 foreach ( $result as $row ) {
582 $template = Title::makeTitle( $row->namespace, $row->title );
583 $pageSet[$template->getPrefixedText()] = true;
584 }
585 }
586 }
587
588 return $pageSet;
589 }
590
591 protected function getGroupName() {
592 return 'pagetools';
593 }
594 }