Merge "ResourceLoader: Test that all module dependencies are satisfiable"
[lhc/web/wiklou.git] / includes / Import.php
1 <?php
2 /**
3 * MediaWiki page data importer.
4 *
5 * Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
6 * https://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup SpecialPage
25 */
26
27 /**
28 * XML file reader for the page data importer
29 *
30 * implements Special:Import
31 * @ingroup SpecialPage
32 */
33 class WikiImporter {
34 private $reader = null;
35 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
36 private $mSiteInfoCallback, $mTargetNamespace, $mTargetRootPage, $mPageOutCallback;
37 private $mNoticeCallback, $mDebug;
38 private $mImportUploads, $mImageBasePath;
39 private $mNoUpdates = false;
40
41 /**
42 * Creates an ImportXMLReader drawing from the source provided
43 * @param ImportStreamSource $source
44 */
45 function __construct( ImportStreamSource $source ) {
46 $this->reader = new XMLReader();
47
48 if ( !in_array( 'uploadsource', stream_get_wrappers() ) ) {
49 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
50 }
51 $id = UploadSourceAdapter::registerSource( $source );
52 if ( defined( 'LIBXML_PARSEHUGE' ) ) {
53 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
54 } else {
55 $this->reader->open( "uploadsource://$id" );
56 }
57
58 // Default callbacks
59 $this->setRevisionCallback( array( $this, "importRevision" ) );
60 $this->setUploadCallback( array( $this, 'importUpload' ) );
61 $this->setLogItemCallback( array( $this, 'importLogItem' ) );
62 $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
63 }
64
65 /**
66 * @return null|XMLReader
67 */
68 public function getReader() {
69 return $this->reader;
70 }
71
72 public function throwXmlError( $err ) {
73 $this->debug( "FAILURE: $err" );
74 wfDebug( "WikiImporter XML error: $err\n" );
75 }
76
77 public function debug( $data ) {
78 if ( $this->mDebug ) {
79 wfDebug( "IMPORT: $data\n" );
80 }
81 }
82
83 public function warn( $data ) {
84 wfDebug( "IMPORT: $data\n" );
85 }
86
87 public function notice( $msg /*, $param, ...*/ ) {
88 $params = func_get_args();
89 array_shift( $params );
90
91 if ( is_callable( $this->mNoticeCallback ) ) {
92 call_user_func( $this->mNoticeCallback, $msg, $params );
93 } else { # No ImportReporter -> CLI
94 echo wfMessage( $msg, $params )->text() . "\n";
95 }
96 }
97
98 /**
99 * Set debug mode...
100 * @param bool $debug
101 */
102 function setDebug( $debug ) {
103 $this->mDebug = $debug;
104 }
105
106 /**
107 * Set 'no updates' mode. In this mode, the link tables will not be updated by the importer
108 * @param bool $noupdates
109 */
110 function setNoUpdates( $noupdates ) {
111 $this->mNoUpdates = $noupdates;
112 }
113
114 /**
115 * Set a callback that displays notice messages
116 *
117 * @param callable $callback
118 * @return callable
119 */
120 public function setNoticeCallback( $callback ) {
121 return wfSetVar( $this->mNoticeCallback, $callback );
122 }
123
124 /**
125 * Sets the action to perform as each new page in the stream is reached.
126 * @param callable $callback
127 * @return callable
128 */
129 public function setPageCallback( $callback ) {
130 $previous = $this->mPageCallback;
131 $this->mPageCallback = $callback;
132 return $previous;
133 }
134
135 /**
136 * Sets the action to perform as each page in the stream is completed.
137 * Callback accepts the page title (as a Title object), a second object
138 * with the original title form (in case it's been overridden into a
139 * local namespace), and a count of revisions.
140 *
141 * @param callable $callback
142 * @return callable
143 */
144 public function setPageOutCallback( $callback ) {
145 $previous = $this->mPageOutCallback;
146 $this->mPageOutCallback = $callback;
147 return $previous;
148 }
149
150 /**
151 * Sets the action to perform as each page revision is reached.
152 * @param callable $callback
153 * @return callable
154 */
155 public function setRevisionCallback( $callback ) {
156 $previous = $this->mRevisionCallback;
157 $this->mRevisionCallback = $callback;
158 return $previous;
159 }
160
161 /**
162 * Sets the action to perform as each file upload version is reached.
163 * @param callable $callback
164 * @return callable
165 */
166 public function setUploadCallback( $callback ) {
167 $previous = $this->mUploadCallback;
168 $this->mUploadCallback = $callback;
169 return $previous;
170 }
171
172 /**
173 * Sets the action to perform as each log item reached.
174 * @param callable $callback
175 * @return callable
176 */
177 public function setLogItemCallback( $callback ) {
178 $previous = $this->mLogItemCallback;
179 $this->mLogItemCallback = $callback;
180 return $previous;
181 }
182
183 /**
184 * Sets the action to perform when site info is encountered
185 * @param callable $callback
186 * @return callable
187 */
188 public function setSiteInfoCallback( $callback ) {
189 $previous = $this->mSiteInfoCallback;
190 $this->mSiteInfoCallback = $callback;
191 return $previous;
192 }
193
194 /**
195 * Set a target namespace to override the defaults
196 * @param null|int $namespace
197 * @return bool
198 */
199 public function setTargetNamespace( $namespace ) {
200 if ( is_null( $namespace ) ) {
201 // Don't override namespaces
202 $this->mTargetNamespace = null;
203 } elseif ( $namespace >= 0 ) {
204 // @todo FIXME: Check for validity
205 $this->mTargetNamespace = intval( $namespace );
206 } else {
207 return false;
208 }
209 }
210
211 /**
212 * Set a target root page under which all pages are imported
213 * @param null|string $rootpage
214 * @return Status
215 */
216 public function setTargetRootPage( $rootpage ) {
217 $status = Status::newGood();
218 if ( is_null( $rootpage ) ) {
219 // No rootpage
220 $this->mTargetRootPage = null;
221 } elseif ( $rootpage !== '' ) {
222 $rootpage = rtrim( $rootpage, '/' ); //avoid double slashes
223 $title = Title::newFromText( $rootpage, !is_null( $this->mTargetNamespace )
224 ? $this->mTargetNamespace
225 : NS_MAIN
226 );
227
228 if ( !$title || $title->isExternal() ) {
229 $status->fatal( 'import-rootpage-invalid' );
230 } else {
231 if ( !MWNamespace::hasSubpages( $title->getNamespace() ) ) {
232 global $wgContLang;
233
234 $displayNSText = $title->getNamespace() == NS_MAIN
235 ? wfMessage( 'blanknamespace' )->text()
236 : $wgContLang->getNsText( $title->getNamespace() );
237 $status->fatal( 'import-rootpage-nosubpage', $displayNSText );
238 } else {
239 // set namespace to 'all', so the namespace check in processTitle() can passed
240 $this->setTargetNamespace( null );
241 $this->mTargetRootPage = $title->getPrefixedDBkey();
242 }
243 }
244 }
245 return $status;
246 }
247
248 /**
249 * @param string $dir
250 */
251 public function setImageBasePath( $dir ) {
252 $this->mImageBasePath = $dir;
253 }
254
255 /**
256 * @param bool $import
257 */
258 public function setImportUploads( $import ) {
259 $this->mImportUploads = $import;
260 }
261
262 /**
263 * Default per-revision callback, performs the import.
264 * @param WikiRevision $revision
265 * @return bool
266 */
267 public function importRevision( $revision ) {
268 if ( !$revision->getContentHandler()->canBeUsedOn( $revision->getTitle() ) ) {
269 $this->notice( 'import-error-bad-location',
270 $revision->getTitle()->getPrefixedText(),
271 $revision->getID(),
272 $revision->getModel(),
273 $revision->getFormat() );
274
275 return false;
276 }
277
278 try {
279 $dbw = wfGetDB( DB_MASTER );
280 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
281 } catch ( MWContentSerializationException $ex ) {
282 $this->notice( 'import-error-unserialize',
283 $revision->getTitle()->getPrefixedText(),
284 $revision->getID(),
285 $revision->getModel(),
286 $revision->getFormat() );
287 }
288
289 return false;
290 }
291
292 /**
293 * Default per-revision callback, performs the import.
294 * @param WikiRevision $revision
295 * @return bool
296 */
297 public function importLogItem( $revision ) {
298 $dbw = wfGetDB( DB_MASTER );
299 return $dbw->deadlockLoop( array( $revision, 'importLogItem' ) );
300 }
301
302 /**
303 * Dummy for now...
304 * @param WikiRevision $revision
305 * @return bool
306 */
307 public function importUpload( $revision ) {
308 $dbw = wfGetDB( DB_MASTER );
309 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
310 }
311
312 /**
313 * Mostly for hook use
314 * @param Title $title
315 * @param string $origTitle
316 * @param int $revCount
317 * @param int $sRevCount
318 * @param array $pageInfo
319 * @return bool
320 */
321 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
322 $args = func_get_args();
323 return wfRunHooks( 'AfterImportPage', $args );
324 }
325
326 /**
327 * Alternate per-revision callback, for debugging.
328 * @param WikiRevision $revision
329 */
330 public function debugRevisionHandler( &$revision ) {
331 $this->debug( "Got revision:" );
332 if ( is_object( $revision->title ) ) {
333 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
334 } else {
335 $this->debug( "-- Title: <invalid>" );
336 }
337 $this->debug( "-- User: " . $revision->user_text );
338 $this->debug( "-- Timestamp: " . $revision->timestamp );
339 $this->debug( "-- Comment: " . $revision->comment );
340 $this->debug( "-- Text: " . $revision->text );
341 }
342
343 /**
344 * Notify the callback function when a new "<page>" is reached.
345 * @param Title $title
346 */
347 function pageCallback( $title ) {
348 if ( isset( $this->mPageCallback ) ) {
349 call_user_func( $this->mPageCallback, $title );
350 }
351 }
352
353 /**
354 * Notify the callback function when a "</page>" is closed.
355 * @param Title $title
356 * @param Title $origTitle
357 * @param int $revCount
358 * @param int $sucCount Number of revisions for which callback returned true
359 * @param array $pageInfo Associative array of page information
360 */
361 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
362 if ( isset( $this->mPageOutCallback ) ) {
363 $args = func_get_args();
364 call_user_func_array( $this->mPageOutCallback, $args );
365 }
366 }
367
368 /**
369 * Notify the callback function of a revision
370 * @param WikiRevision $revision
371 * @return bool|mixed
372 */
373 private function revisionCallback( $revision ) {
374 if ( isset( $this->mRevisionCallback ) ) {
375 return call_user_func_array( $this->mRevisionCallback,
376 array( $revision, $this ) );
377 } else {
378 return false;
379 }
380 }
381
382 /**
383 * Notify the callback function of a new log item
384 * @param WikiRevision $revision
385 * @return bool|mixed
386 */
387 private function logItemCallback( $revision ) {
388 if ( isset( $this->mLogItemCallback ) ) {
389 return call_user_func_array( $this->mLogItemCallback,
390 array( $revision, $this ) );
391 } else {
392 return false;
393 }
394 }
395
396 /**
397 * Retrieves the contents of the named attribute of the current element.
398 * @param string $attr The name of the attribute
399 * @return string The value of the attribute or an empty string if it is not set in the current element.
400 */
401 public function nodeAttribute( $attr ) {
402 return $this->reader->getAttribute( $attr );
403 }
404
405 /**
406 * Shouldn't something like this be built-in to XMLReader?
407 * Fetches text contents of the current element, assuming
408 * no sub-elements or such scary things.
409 * @return string
410 * @access private
411 */
412 public function nodeContents() {
413 if ( $this->reader->isEmptyElement ) {
414 return "";
415 }
416 $buffer = "";
417 while ( $this->reader->read() ) {
418 switch ( $this->reader->nodeType ) {
419 case XmlReader::TEXT:
420 case XmlReader::SIGNIFICANT_WHITESPACE:
421 $buffer .= $this->reader->value;
422 break;
423 case XmlReader::END_ELEMENT:
424 return $buffer;
425 }
426 }
427
428 $this->reader->close();
429 return '';
430 }
431
432 # --------------
433
434 /** Left in for debugging */
435 private function dumpElement() {
436 static $lookup = null;
437 if ( !$lookup ) {
438 $xmlReaderConstants = array(
439 "NONE",
440 "ELEMENT",
441 "ATTRIBUTE",
442 "TEXT",
443 "CDATA",
444 "ENTITY_REF",
445 "ENTITY",
446 "PI",
447 "COMMENT",
448 "DOC",
449 "DOC_TYPE",
450 "DOC_FRAGMENT",
451 "NOTATION",
452 "WHITESPACE",
453 "SIGNIFICANT_WHITESPACE",
454 "END_ELEMENT",
455 "END_ENTITY",
456 "XML_DECLARATION",
457 );
458 $lookup = array();
459
460 foreach ( $xmlReaderConstants as $name ) {
461 $lookup[constant( "XmlReader::$name" )] = $name;
462 }
463 }
464
465 print var_dump(
466 $lookup[$this->reader->nodeType],
467 $this->reader->name,
468 $this->reader->value
469 ) . "\n\n";
470 }
471
472 /**
473 * Primary entry point
474 * @throws MWException
475 * @return bool
476 */
477 public function doImport() {
478
479 // Calls to reader->read need to be wrapped in calls to
480 // libxml_disable_entity_loader() to avoid local file
481 // inclusion attacks (bug 46932).
482 $oldDisable = libxml_disable_entity_loader( true );
483 $this->reader->read();
484
485 if ( $this->reader->name != 'mediawiki' ) {
486 libxml_disable_entity_loader( $oldDisable );
487 throw new MWException( "Expected <mediawiki> tag, got " .
488 $this->reader->name );
489 }
490 $this->debug( "<mediawiki> tag is correct." );
491
492 $this->debug( "Starting primary dump processing loop." );
493
494 $keepReading = $this->reader->read();
495 $skip = false;
496 while ( $keepReading ) {
497 $tag = $this->reader->name;
498 $type = $this->reader->nodeType;
499
500 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
501 // Do nothing
502 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
503 break;
504 } elseif ( $tag == 'siteinfo' ) {
505 $this->handleSiteInfo();
506 } elseif ( $tag == 'page' ) {
507 $this->handlePage();
508 } elseif ( $tag == 'logitem' ) {
509 $this->handleLogItem();
510 } elseif ( $tag != '#text' ) {
511 $this->warn( "Unhandled top-level XML tag $tag" );
512
513 $skip = true;
514 }
515
516 if ( $skip ) {
517 $keepReading = $this->reader->next();
518 $skip = false;
519 $this->debug( "Skip" );
520 } else {
521 $keepReading = $this->reader->read();
522 }
523 }
524
525 libxml_disable_entity_loader( $oldDisable );
526 return true;
527 }
528
529 /**
530 * @return bool
531 * @throws MWException
532 */
533 private function handleSiteInfo() {
534 // Site info is useful, but not actually used for dump imports.
535 // Includes a quick short-circuit to save performance.
536 if ( !$this->mSiteInfoCallback ) {
537 $this->reader->next();
538 return true;
539 }
540 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
541 }
542
543 private function handleLogItem() {
544 $this->debug( "Enter log item handler." );
545 $logInfo = array();
546
547 // Fields that can just be stuffed in the pageInfo object
548 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
549 'logtitle', 'params' );
550
551 while ( $this->reader->read() ) {
552 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
553 $this->reader->name == 'logitem' ) {
554 break;
555 }
556
557 $tag = $this->reader->name;
558
559 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag', array(
560 $this, $logInfo
561 ) ) ) {
562 // Do nothing
563 } elseif ( in_array( $tag, $normalFields ) ) {
564 $logInfo[$tag] = $this->nodeContents();
565 } elseif ( $tag == 'contributor' ) {
566 $logInfo['contributor'] = $this->handleContributor();
567 } elseif ( $tag != '#text' ) {
568 $this->warn( "Unhandled log-item XML tag $tag" );
569 }
570 }
571
572 $this->processLogItem( $logInfo );
573 }
574
575 /**
576 * @param array $logInfo
577 * @return bool|mixed
578 */
579 private function processLogItem( $logInfo ) {
580 $revision = new WikiRevision;
581
582 $revision->setID( $logInfo['id'] );
583 $revision->setType( $logInfo['type'] );
584 $revision->setAction( $logInfo['action'] );
585 $revision->setTimestamp( $logInfo['timestamp'] );
586 $revision->setParams( $logInfo['params'] );
587 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
588 $revision->setNoUpdates( $this->mNoUpdates );
589
590 if ( isset( $logInfo['comment'] ) ) {
591 $revision->setComment( $logInfo['comment'] );
592 }
593
594 if ( isset( $logInfo['contributor']['ip'] ) ) {
595 $revision->setUserIP( $logInfo['contributor']['ip'] );
596 }
597 if ( isset( $logInfo['contributor']['username'] ) ) {
598 $revision->setUserName( $logInfo['contributor']['username'] );
599 }
600
601 return $this->logItemCallback( $revision );
602 }
603
604 private function handlePage() {
605 // Handle page data.
606 $this->debug( "Enter page handler." );
607 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
608
609 // Fields that can just be stuffed in the pageInfo object
610 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
611
612 $skip = false;
613 $badTitle = false;
614
615 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
616 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
617 $this->reader->name == 'page' ) {
618 break;
619 }
620
621 $tag = $this->reader->name;
622
623 if ( $badTitle ) {
624 // The title is invalid, bail out of this page
625 $skip = true;
626 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
627 &$pageInfo ) ) ) {
628 // Do nothing
629 } elseif ( in_array( $tag, $normalFields ) ) {
630 // An XML snippet:
631 // <page>
632 // <id>123</id>
633 // <title>Page</title>
634 // <redirect title="NewTitle"/>
635 // ...
636 // Because the redirect tag is built differently, we need special handling for that case.
637 if ( $tag == 'redirect' ) {
638 $pageInfo[$tag] = $this->nodeAttribute( 'title' );
639 } else {
640 $pageInfo[$tag] = $this->nodeContents();
641 if ( $tag == 'title' ) {
642 $title = $this->processTitle( $pageInfo['title'] );
643
644 if ( !$title ) {
645 $badTitle = true;
646 $skip = true;
647 }
648
649 $this->pageCallback( $title );
650 list( $pageInfo['_title'], $origTitle ) = $title;
651 }
652 }
653 } elseif ( $tag == 'revision' ) {
654 $this->handleRevision( $pageInfo );
655 } elseif ( $tag == 'upload' ) {
656 $this->handleUpload( $pageInfo );
657 } elseif ( $tag != '#text' ) {
658 $this->warn( "Unhandled page XML tag $tag" );
659 $skip = true;
660 }
661 }
662
663 $this->pageOutCallback( $pageInfo['_title'], $origTitle,
664 $pageInfo['revisionCount'],
665 $pageInfo['successfulRevisionCount'],
666 $pageInfo );
667 }
668
669 /**
670 * @param array $pageInfo
671 */
672 private function handleRevision( &$pageInfo ) {
673 $this->debug( "Enter revision handler" );
674 $revisionInfo = array();
675
676 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'model', 'format', 'text' );
677
678 $skip = false;
679
680 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
681 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
682 $this->reader->name == 'revision' ) {
683 break;
684 }
685
686 $tag = $this->reader->name;
687
688 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', array(
689 $this, $pageInfo, $revisionInfo
690 ) ) ) {
691 // Do nothing
692 } elseif ( in_array( $tag, $normalFields ) ) {
693 $revisionInfo[$tag] = $this->nodeContents();
694 } elseif ( $tag == 'contributor' ) {
695 $revisionInfo['contributor'] = $this->handleContributor();
696 } elseif ( $tag != '#text' ) {
697 $this->warn( "Unhandled revision XML tag $tag" );
698 $skip = true;
699 }
700 }
701
702 $pageInfo['revisionCount']++;
703 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
704 $pageInfo['successfulRevisionCount']++;
705 }
706 }
707
708 /**
709 * @param array $pageInfo
710 * @param array $revisionInfo
711 * @return bool|mixed
712 */
713 private function processRevision( $pageInfo, $revisionInfo ) {
714 $revision = new WikiRevision;
715
716 if ( isset( $revisionInfo['id'] ) ) {
717 $revision->setID( $revisionInfo['id'] );
718 }
719 if ( isset( $revisionInfo['model'] ) ) {
720 $revision->setModel( $revisionInfo['model'] );
721 }
722 if ( isset( $revisionInfo['format'] ) ) {
723 $revision->setFormat( $revisionInfo['format'] );
724 }
725 $revision->setTitle( $pageInfo['_title'] );
726
727 if ( isset( $revisionInfo['text'] ) ) {
728 $handler = $revision->getContentHandler();
729 $text = $handler->importTransform(
730 $revisionInfo['text'],
731 $revision->getFormat() );
732
733 $revision->setText( $text );
734 }
735 if ( isset( $revisionInfo['timestamp'] ) ) {
736 $revision->setTimestamp( $revisionInfo['timestamp'] );
737 } else {
738 $revision->setTimestamp( wfTimestampNow() );
739 }
740
741 if ( isset( $revisionInfo['comment'] ) ) {
742 $revision->setComment( $revisionInfo['comment'] );
743 }
744
745 if ( isset( $revisionInfo['minor'] ) ) {
746 $revision->setMinor( true );
747 }
748 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
749 $revision->setUserIP( $revisionInfo['contributor']['ip'] );
750 }
751 if ( isset( $revisionInfo['contributor']['username'] ) ) {
752 $revision->setUserName( $revisionInfo['contributor']['username'] );
753 }
754 $revision->setNoUpdates( $this->mNoUpdates );
755
756 return $this->revisionCallback( $revision );
757 }
758
759 /**
760 * @param array $pageInfo
761 * @return mixed
762 */
763 private function handleUpload( &$pageInfo ) {
764 $this->debug( "Enter upload handler" );
765 $uploadInfo = array();
766
767 $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
768 'src', 'size', 'sha1base36', 'archivename', 'rel' );
769
770 $skip = false;
771
772 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
773 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
774 $this->reader->name == 'upload' ) {
775 break;
776 }
777
778 $tag = $this->reader->name;
779
780 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', array(
781 $this, $pageInfo
782 ) ) ) {
783 // Do nothing
784 } elseif ( in_array( $tag, $normalFields ) ) {
785 $uploadInfo[$tag] = $this->nodeContents();
786 } elseif ( $tag == 'contributor' ) {
787 $uploadInfo['contributor'] = $this->handleContributor();
788 } elseif ( $tag == 'contents' ) {
789 $contents = $this->nodeContents();
790 $encoding = $this->reader->getAttribute( 'encoding' );
791 if ( $encoding === 'base64' ) {
792 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
793 $uploadInfo['isTempSrc'] = true;
794 }
795 } elseif ( $tag != '#text' ) {
796 $this->warn( "Unhandled upload XML tag $tag" );
797 $skip = true;
798 }
799 }
800
801 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
802 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
803 if ( file_exists( $path ) ) {
804 $uploadInfo['fileSrc'] = $path;
805 $uploadInfo['isTempSrc'] = false;
806 }
807 }
808
809 if ( $this->mImportUploads ) {
810 return $this->processUpload( $pageInfo, $uploadInfo );
811 }
812 }
813
814 /**
815 * @param string $contents
816 * @return string
817 */
818 private function dumpTemp( $contents ) {
819 $filename = tempnam( wfTempDir(), 'importupload' );
820 file_put_contents( $filename, $contents );
821 return $filename;
822 }
823
824 /**
825 * @param array $pageInfo
826 * @param array $uploadInfo
827 * @return mixed
828 */
829 private function processUpload( $pageInfo, $uploadInfo ) {
830 $revision = new WikiRevision;
831 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
832
833 $revision->setTitle( $pageInfo['_title'] );
834 $revision->setID( $pageInfo['id'] );
835 $revision->setTimestamp( $uploadInfo['timestamp'] );
836 $revision->setText( $text );
837 $revision->setFilename( $uploadInfo['filename'] );
838 if ( isset( $uploadInfo['archivename'] ) ) {
839 $revision->setArchiveName( $uploadInfo['archivename'] );
840 }
841 $revision->setSrc( $uploadInfo['src'] );
842 if ( isset( $uploadInfo['fileSrc'] ) ) {
843 $revision->setFileSrc( $uploadInfo['fileSrc'],
844 !empty( $uploadInfo['isTempSrc'] ) );
845 }
846 if ( isset( $uploadInfo['sha1base36'] ) ) {
847 $revision->setSha1Base36( $uploadInfo['sha1base36'] );
848 }
849 $revision->setSize( intval( $uploadInfo['size'] ) );
850 $revision->setComment( $uploadInfo['comment'] );
851
852 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
853 $revision->setUserIP( $uploadInfo['contributor']['ip'] );
854 }
855 if ( isset( $uploadInfo['contributor']['username'] ) ) {
856 $revision->setUserName( $uploadInfo['contributor']['username'] );
857 }
858 $revision->setNoUpdates( $this->mNoUpdates );
859
860 return call_user_func( $this->mUploadCallback, $revision );
861 }
862
863 /**
864 * @return array
865 */
866 private function handleContributor() {
867 $fields = array( 'id', 'ip', 'username' );
868 $info = array();
869
870 while ( $this->reader->read() ) {
871 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
872 $this->reader->name == 'contributor' ) {
873 break;
874 }
875
876 $tag = $this->reader->name;
877
878 if ( in_array( $tag, $fields ) ) {
879 $info[$tag] = $this->nodeContents();
880 }
881 }
882
883 return $info;
884 }
885
886 /**
887 * @param string $text
888 * @return array|bool
889 */
890 private function processTitle( $text ) {
891 global $wgCommandLineMode;
892
893 $workTitle = $text;
894 $origTitle = Title::newFromText( $workTitle );
895
896 if ( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
897 # makeTitleSafe, because $origTitle can have a interwiki (different setting of interwiki map)
898 # and than dbKey can begin with a lowercase char
899 $title = Title::makeTitleSafe( $this->mTargetNamespace,
900 $origTitle->getDBkey() );
901 } else {
902 if ( !is_null( $this->mTargetRootPage ) ) {
903 $workTitle = $this->mTargetRootPage . '/' . $workTitle;
904 }
905 $title = Title::newFromText( $workTitle );
906 }
907
908 if ( is_null( $title ) ) {
909 # Invalid page title? Ignore the page
910 $this->notice( 'import-error-invalid', $workTitle );
911 return false;
912 } elseif ( $title->isExternal() ) {
913 $this->notice( 'import-error-interwiki', $title->getPrefixedText() );
914 return false;
915 } elseif ( !$title->canExist() ) {
916 $this->notice( 'import-error-special', $title->getPrefixedText() );
917 return false;
918 } elseif ( !$title->userCan( 'edit' ) && !$wgCommandLineMode ) {
919 # Do not import if the importing wiki user cannot edit this page
920 $this->notice( 'import-error-edit', $title->getPrefixedText() );
921 return false;
922 } elseif ( !$title->exists() && !$title->userCan( 'create' ) && !$wgCommandLineMode ) {
923 # Do not import if the importing wiki user cannot create this page
924 $this->notice( 'import-error-create', $title->getPrefixedText() );
925 return false;
926 }
927
928 return array( $title, $origTitle );
929 }
930 }
931
932 /** This is a horrible hack used to keep source compatibility */
933 class UploadSourceAdapter {
934 /** @var array */
935 private static $sourceRegistrations = array();
936
937 /** @var string */
938 private $mSource;
939
940 /** @var string */
941 private $mBuffer;
942
943 /** @var int */
944 private $mPosition;
945
946 /**
947 * @param ImportStreamSource $source
948 * @return string
949 */
950 static function registerSource( ImportStreamSource $source ) {
951 $id = wfRandomString();
952
953 self::$sourceRegistrations[$id] = $source;
954
955 return $id;
956 }
957
958 /**
959 * @param string $path
960 * @param string $mode
961 * @param array $options
962 * @param string $opened_path
963 * @return bool
964 */
965 function stream_open( $path, $mode, $options, &$opened_path ) {
966 $url = parse_url( $path );
967 $id = $url['host'];
968
969 if ( !isset( self::$sourceRegistrations[$id] ) ) {
970 return false;
971 }
972
973 $this->mSource = self::$sourceRegistrations[$id];
974
975 return true;
976 }
977
978 /**
979 * @param int $count
980 * @return string
981 */
982 function stream_read( $count ) {
983 $return = '';
984 $leave = false;
985
986 while ( !$leave && !$this->mSource->atEnd() &&
987 strlen( $this->mBuffer ) < $count ) {
988 $read = $this->mSource->readChunk();
989
990 if ( !strlen( $read ) ) {
991 $leave = true;
992 }
993
994 $this->mBuffer .= $read;
995 }
996
997 if ( strlen( $this->mBuffer ) ) {
998 $return = substr( $this->mBuffer, 0, $count );
999 $this->mBuffer = substr( $this->mBuffer, $count );
1000 }
1001
1002 $this->mPosition += strlen( $return );
1003
1004 return $return;
1005 }
1006
1007 /**
1008 * @param string $data
1009 * @return bool
1010 */
1011 function stream_write( $data ) {
1012 return false;
1013 }
1014
1015 /**
1016 * @return mixed
1017 */
1018 function stream_tell() {
1019 return $this->mPosition;
1020 }
1021
1022 /**
1023 * @return bool
1024 */
1025 function stream_eof() {
1026 return $this->mSource->atEnd();
1027 }
1028
1029 /**
1030 * @return array
1031 */
1032 function url_stat() {
1033 $result = array();
1034
1035 $result['dev'] = $result[0] = 0;
1036 $result['ino'] = $result[1] = 0;
1037 $result['mode'] = $result[2] = 0;
1038 $result['nlink'] = $result[3] = 0;
1039 $result['uid'] = $result[4] = 0;
1040 $result['gid'] = $result[5] = 0;
1041 $result['rdev'] = $result[6] = 0;
1042 $result['size'] = $result[7] = 0;
1043 $result['atime'] = $result[8] = 0;
1044 $result['mtime'] = $result[9] = 0;
1045 $result['ctime'] = $result[10] = 0;
1046 $result['blksize'] = $result[11] = 0;
1047 $result['blocks'] = $result[12] = 0;
1048
1049 return $result;
1050 }
1051 }
1052
1053 /**
1054 * @todo document (e.g. one-sentence class description).
1055 * @ingroup SpecialPage
1056 */
1057 class WikiRevision {
1058 /** @todo Unused? */
1059 private $importer = null;
1060
1061 /** @var Title */
1062 public $title = null;
1063
1064 /** @var int */
1065 private $id = 0;
1066
1067 /** @var string */
1068 public $timestamp = "20010115000000";
1069
1070 /**
1071 * @var int
1072 * @todo Can't find any uses. Public, because that's suspicious. Get clarity. */
1073 public $user = 0;
1074
1075 /** @var string */
1076 public $user_text = "";
1077
1078 /** @var string */
1079 protected $model = null;
1080
1081 /** @var string */
1082 protected $format = null;
1083
1084 /** @var string */
1085 public $text = "";
1086
1087 /** @var int */
1088 protected $size;
1089
1090 /** @var Content */
1091 protected $content = null;
1092
1093 /** @var ContentHandler */
1094 protected $contentHandler = null;
1095
1096 /** @var string */
1097 public $comment = "";
1098
1099 /** @var bool */
1100 protected $minor = false;
1101
1102 /** @var string */
1103 protected $type = "";
1104
1105 /** @var string */
1106 protected $action = "";
1107
1108 /** @var string */
1109 protected $params = "";
1110
1111 /** @var string */
1112 protected $fileSrc = '';
1113
1114 /** @var bool|string */
1115 protected $sha1base36 = false;
1116
1117 /**
1118 * @var bool
1119 * @todo Unused?
1120 */
1121 private $isTemp = false;
1122
1123 /** @var string */
1124 protected $archiveName = '';
1125
1126 protected $filename;
1127
1128 /** @var mixed */
1129 protected $src;
1130
1131 /** @todo Unused? */
1132 private $fileIsTemp;
1133
1134 /** @var bool */
1135 private $mNoUpdates = false;
1136
1137 /**
1138 * @param Title $title
1139 * @throws MWException
1140 */
1141 function setTitle( $title ) {
1142 if ( is_object( $title ) ) {
1143 $this->title = $title;
1144 } elseif ( is_null( $title ) ) {
1145 throw new MWException( "WikiRevision given a null title in import. "
1146 . "You may need to adjust \$wgLegalTitleChars." );
1147 } else {
1148 throw new MWException( "WikiRevision given non-object title in import." );
1149 }
1150 }
1151
1152 /**
1153 * @param int $id
1154 */
1155 function setID( $id ) {
1156 $this->id = $id;
1157 }
1158
1159 /**
1160 * @param string $ts
1161 */
1162 function setTimestamp( $ts ) {
1163 # 2003-08-05T18:30:02Z
1164 $this->timestamp = wfTimestamp( TS_MW, $ts );
1165 }
1166
1167 /**
1168 * @param string $user
1169 */
1170 function setUsername( $user ) {
1171 $this->user_text = $user;
1172 }
1173
1174 /**
1175 * @param string $ip
1176 */
1177 function setUserIP( $ip ) {
1178 $this->user_text = $ip;
1179 }
1180
1181 /**
1182 * @param string $model
1183 */
1184 function setModel( $model ) {
1185 $this->model = $model;
1186 }
1187
1188 /**
1189 * @param string $format
1190 */
1191 function setFormat( $format ) {
1192 $this->format = $format;
1193 }
1194
1195 /**
1196 * @param string $text
1197 */
1198 function setText( $text ) {
1199 $this->text = $text;
1200 }
1201
1202 /**
1203 * @param string $text
1204 */
1205 function setComment( $text ) {
1206 $this->comment = $text;
1207 }
1208
1209 /**
1210 * @param bool $minor
1211 */
1212 function setMinor( $minor ) {
1213 $this->minor = (bool)$minor;
1214 }
1215
1216 /**
1217 * @param mixed $src
1218 */
1219 function setSrc( $src ) {
1220 $this->src = $src;
1221 }
1222
1223 /**
1224 * @param string $src
1225 * @param bool $isTemp
1226 */
1227 function setFileSrc( $src, $isTemp ) {
1228 $this->fileSrc = $src;
1229 $this->fileIsTemp = $isTemp;
1230 }
1231
1232 /**
1233 * @param string $sha1base36
1234 */
1235 function setSha1Base36( $sha1base36 ) {
1236 $this->sha1base36 = $sha1base36;
1237 }
1238
1239 /**
1240 * @param string $filename
1241 */
1242 function setFilename( $filename ) {
1243 $this->filename = $filename;
1244 }
1245
1246 /**
1247 * @param string $archiveName
1248 */
1249 function setArchiveName( $archiveName ) {
1250 $this->archiveName = $archiveName;
1251 }
1252
1253 /**
1254 * @param int $size
1255 */
1256 function setSize( $size ) {
1257 $this->size = intval( $size );
1258 }
1259
1260 /**
1261 * @param string $type
1262 */
1263 function setType( $type ) {
1264 $this->type = $type;
1265 }
1266
1267 /**
1268 * @param string $action
1269 */
1270 function setAction( $action ) {
1271 $this->action = $action;
1272 }
1273
1274 /**
1275 * @param array $params
1276 */
1277 function setParams( $params ) {
1278 $this->params = $params;
1279 }
1280
1281 /**
1282 * @param bool $noupdates
1283 */
1284 public function setNoUpdates( $noupdates ) {
1285 $this->mNoUpdates = $noupdates;
1286 }
1287
1288 /**
1289 * @return Title
1290 */
1291 function getTitle() {
1292 return $this->title;
1293 }
1294
1295 /**
1296 * @return int
1297 */
1298 function getID() {
1299 return $this->id;
1300 }
1301
1302 /**
1303 * @return string
1304 */
1305 function getTimestamp() {
1306 return $this->timestamp;
1307 }
1308
1309 /**
1310 * @return string
1311 */
1312 function getUser() {
1313 return $this->user_text;
1314 }
1315
1316 /**
1317 * @return string
1318 *
1319 * @deprecated Since 1.21, use getContent() instead.
1320 */
1321 function getText() {
1322 ContentHandler::deprecated( __METHOD__, '1.21' );
1323
1324 return $this->text;
1325 }
1326
1327 /**
1328 * @return ContentHandler
1329 */
1330 function getContentHandler() {
1331 if ( is_null( $this->contentHandler ) ) {
1332 $this->contentHandler = ContentHandler::getForModelID( $this->getModel() );
1333 }
1334
1335 return $this->contentHandler;
1336 }
1337
1338 /**
1339 * @return Content
1340 */
1341 function getContent() {
1342 if ( is_null( $this->content ) ) {
1343 $handler = $this->getContentHandler();
1344 $this->content = $handler->unserializeContent( $this->text, $this->getFormat() );
1345 }
1346
1347 return $this->content;
1348 }
1349
1350 /**
1351 * @return string
1352 */
1353 function getModel() {
1354 if ( is_null( $this->model ) ) {
1355 $this->model = $this->getTitle()->getContentModel();
1356 }
1357
1358 return $this->model;
1359 }
1360
1361 /**
1362 * @return string
1363 */
1364 function getFormat() {
1365 if ( is_null( $this->format ) ) {
1366 $this->format = $this->getContentHandler()->getDefaultFormat();
1367 }
1368
1369 return $this->format;
1370 }
1371
1372 /**
1373 * @return string
1374 */
1375 function getComment() {
1376 return $this->comment;
1377 }
1378
1379 /**
1380 * @return bool
1381 */
1382 function getMinor() {
1383 return $this->minor;
1384 }
1385
1386 /**
1387 * @return mixed
1388 */
1389 function getSrc() {
1390 return $this->src;
1391 }
1392
1393 /**
1394 * @return bool|string
1395 */
1396 function getSha1() {
1397 if ( $this->sha1base36 ) {
1398 return wfBaseConvert( $this->sha1base36, 36, 16 );
1399 }
1400 return false;
1401 }
1402
1403 /**
1404 * @return string
1405 */
1406 function getFileSrc() {
1407 return $this->fileSrc;
1408 }
1409
1410 /**
1411 * @return bool
1412 */
1413 function isTempSrc() {
1414 return $this->isTemp;
1415 }
1416
1417 /**
1418 * @return mixed
1419 */
1420 function getFilename() {
1421 return $this->filename;
1422 }
1423
1424 /**
1425 * @return string
1426 */
1427 function getArchiveName() {
1428 return $this->archiveName;
1429 }
1430
1431 /**
1432 * @return mixed
1433 */
1434 function getSize() {
1435 return $this->size;
1436 }
1437
1438 /**
1439 * @return string
1440 */
1441 function getType() {
1442 return $this->type;
1443 }
1444
1445 /**
1446 * @return string
1447 */
1448 function getAction() {
1449 return $this->action;
1450 }
1451
1452 /**
1453 * @return string
1454 */
1455 function getParams() {
1456 return $this->params;
1457 }
1458
1459 /**
1460 * @return bool
1461 */
1462 function importOldRevision() {
1463 $dbw = wfGetDB( DB_MASTER );
1464
1465 # Sneak a single revision into place
1466 $user = User::newFromName( $this->getUser() );
1467 if ( $user ) {
1468 $userId = intval( $user->getId() );
1469 $userText = $user->getName();
1470 $userObj = $user;
1471 } else {
1472 $userId = 0;
1473 $userText = $this->getUser();
1474 $userObj = new User;
1475 }
1476
1477 // avoid memory leak...?
1478 $linkCache = LinkCache::singleton();
1479 $linkCache->clear();
1480
1481 $page = WikiPage::factory( $this->title );
1482 $page->loadPageData( 'fromdbmaster' );
1483 if ( !$page->exists() ) {
1484 # must create the page...
1485 $pageId = $page->insertOn( $dbw );
1486 $created = true;
1487 $oldcountable = null;
1488 } else {
1489 $pageId = $page->getId();
1490 $created = false;
1491
1492 $prior = $dbw->selectField( 'revision', '1',
1493 array( 'rev_page' => $pageId,
1494 'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
1495 'rev_user_text' => $userText,
1496 'rev_comment' => $this->getComment() ),
1497 __METHOD__
1498 );
1499 if ( $prior ) {
1500 // @todo FIXME: This could fail slightly for multiple matches :P
1501 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
1502 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
1503 return false;
1504 }
1505 $oldcountable = $page->isCountable();
1506 }
1507
1508 # @todo FIXME: Use original rev_id optionally (better for backups)
1509 # Insert the row
1510 $revision = new Revision( array(
1511 'title' => $this->title,
1512 'page' => $pageId,
1513 'content_model' => $this->getModel(),
1514 'content_format' => $this->getFormat(),
1515 //XXX: just set 'content' => $this->getContent()?
1516 'text' => $this->getContent()->serialize( $this->getFormat() ),
1517 'comment' => $this->getComment(),
1518 'user' => $userId,
1519 'user_text' => $userText,
1520 'timestamp' => $this->timestamp,
1521 'minor_edit' => $this->minor,
1522 ) );
1523 $revision->insertOn( $dbw );
1524 $changed = $page->updateIfNewerOn( $dbw, $revision );
1525
1526 if ( $changed !== false && !$this->mNoUpdates ) {
1527 wfDebug( __METHOD__ . ": running updates\n" );
1528 $page->doEditUpdates(
1529 $revision,
1530 $userObj,
1531 array( 'created' => $created, 'oldcountable' => $oldcountable )
1532 );
1533 }
1534
1535 return true;
1536 }
1537
1538 /**
1539 * @return mixed
1540 */
1541 function importLogItem() {
1542 $dbw = wfGetDB( DB_MASTER );
1543 # @todo FIXME: This will not record autoblocks
1544 if ( !$this->getTitle() ) {
1545 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
1546 $this->timestamp . "\n" );
1547 return;
1548 }
1549 # Check if it exists already
1550 // @todo FIXME: Use original log ID (better for backups)
1551 $prior = $dbw->selectField( 'logging', '1',
1552 array( 'log_type' => $this->getType(),
1553 'log_action' => $this->getAction(),
1554 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
1555 'log_namespace' => $this->getTitle()->getNamespace(),
1556 'log_title' => $this->getTitle()->getDBkey(),
1557 'log_comment' => $this->getComment(),
1558 #'log_user_text' => $this->user_text,
1559 'log_params' => $this->params ),
1560 __METHOD__
1561 );
1562 // @todo FIXME: This could fail slightly for multiple matches :P
1563 if ( $prior ) {
1564 wfDebug( __METHOD__
1565 . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp "
1566 . $this->timestamp . "\n" );
1567 return;
1568 }
1569 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
1570 $data = array(
1571 'log_id' => $log_id,
1572 'log_type' => $this->type,
1573 'log_action' => $this->action,
1574 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
1575 'log_user' => User::idFromName( $this->user_text ),
1576 #'log_user_text' => $this->user_text,
1577 'log_namespace' => $this->getTitle()->getNamespace(),
1578 'log_title' => $this->getTitle()->getDBkey(),
1579 'log_comment' => $this->getComment(),
1580 'log_params' => $this->params
1581 );
1582 $dbw->insert( 'logging', $data, __METHOD__ );
1583 }
1584
1585 /**
1586 * @return bool
1587 */
1588 function importUpload() {
1589 # Construct a file
1590 $archiveName = $this->getArchiveName();
1591 if ( $archiveName ) {
1592 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
1593 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
1594 RepoGroup::singleton()->getLocalRepo(), $archiveName );
1595 } else {
1596 $file = wfLocalFile( $this->getTitle() );
1597 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
1598 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
1599 $archiveName = $file->getTimestamp() . '!' . $file->getName();
1600 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
1601 RepoGroup::singleton()->getLocalRepo(), $archiveName );
1602 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
1603 }
1604 }
1605 if ( !$file ) {
1606 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
1607 return false;
1608 }
1609
1610 # Get the file source or download if necessary
1611 $source = $this->getFileSrc();
1612 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
1613 if ( !$source ) {
1614 $source = $this->downloadSource();
1615 $flags |= File::DELETE_SOURCE;
1616 }
1617 if ( !$source ) {
1618 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
1619 return false;
1620 }
1621 $sha1 = $this->getSha1();
1622 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
1623 if ( $flags & File::DELETE_SOURCE ) {
1624 # Broken file; delete it if it is a temporary file
1625 unlink( $source );
1626 }
1627 wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
1628 return false;
1629 }
1630
1631 $user = User::newFromName( $this->user_text );
1632
1633 # Do the actual upload
1634 if ( $archiveName ) {
1635 $status = $file->uploadOld( $source, $archiveName,
1636 $this->getTimestamp(), $this->getComment(), $user, $flags );
1637 } else {
1638 $status = $file->upload( $source, $this->getComment(), $this->getComment(),
1639 $flags, false, $this->getTimestamp(), $user );
1640 }
1641
1642 if ( $status->isGood() ) {
1643 wfDebug( __METHOD__ . ": Successful\n" );
1644 return true;
1645 } else {
1646 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
1647 return false;
1648 }
1649 }
1650
1651 /**
1652 * @return bool|string
1653 */
1654 function downloadSource() {
1655 global $wgEnableUploads;
1656 if ( !$wgEnableUploads ) {
1657 return false;
1658 }
1659
1660 $tempo = tempnam( wfTempDir(), 'download' );
1661 $f = fopen( $tempo, 'wb' );
1662 if ( !$f ) {
1663 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
1664 return false;
1665 }
1666
1667 // @todo FIXME!
1668 $src = $this->getSrc();
1669 $data = Http::get( $src );
1670 if ( !$data ) {
1671 wfDebug( "IMPORT: couldn't fetch source $src\n" );
1672 fclose( $f );
1673 unlink( $tempo );
1674 return false;
1675 }
1676
1677 fwrite( $f, $data );
1678 fclose( $f );
1679
1680 return $tempo;
1681 }
1682
1683 }
1684
1685 /**
1686 * @todo document (e.g. one-sentence class description).
1687 * @ingroup SpecialPage
1688 */
1689 class ImportStringSource {
1690 function __construct( $string ) {
1691 $this->mString = $string;
1692 $this->mRead = false;
1693 }
1694
1695 /**
1696 * @return bool
1697 */
1698 function atEnd() {
1699 return $this->mRead;
1700 }
1701
1702 /**
1703 * @return bool|string
1704 */
1705 function readChunk() {
1706 if ( $this->atEnd() ) {
1707 return false;
1708 }
1709 $this->mRead = true;
1710 return $this->mString;
1711 }
1712 }
1713
1714 /**
1715 * @todo document (e.g. one-sentence class description).
1716 * @ingroup SpecialPage
1717 */
1718 class ImportStreamSource {
1719 function __construct( $handle ) {
1720 $this->mHandle = $handle;
1721 }
1722
1723 /**
1724 * @return bool
1725 */
1726 function atEnd() {
1727 return feof( $this->mHandle );
1728 }
1729
1730 /**
1731 * @return string
1732 */
1733 function readChunk() {
1734 return fread( $this->mHandle, 32768 );
1735 }
1736
1737 /**
1738 * @param string $filename
1739 * @return Status
1740 */
1741 static function newFromFile( $filename ) {
1742 wfSuppressWarnings();
1743 $file = fopen( $filename, 'rt' );
1744 wfRestoreWarnings();
1745 if ( !$file ) {
1746 return Status::newFatal( "importcantopen" );
1747 }
1748 return Status::newGood( new ImportStreamSource( $file ) );
1749 }
1750
1751 /**
1752 * @param string $fieldname
1753 * @return Status
1754 */
1755 static function newFromUpload( $fieldname = "xmlimport" ) {
1756 $upload =& $_FILES[$fieldname];
1757
1758 if ( $upload === null || !$upload['name'] ) {
1759 return Status::newFatal( 'importnofile' );
1760 }
1761 if ( !empty( $upload['error'] ) ) {
1762 switch ( $upload['error'] ) {
1763 case 1:
1764 # The uploaded file exceeds the upload_max_filesize directive in php.ini.
1765 return Status::newFatal( 'importuploaderrorsize' );
1766 case 2:
1767 # The uploaded file exceeds the MAX_FILE_SIZE directive that
1768 # was specified in the HTML form.
1769 return Status::newFatal( 'importuploaderrorsize' );
1770 case 3:
1771 # The uploaded file was only partially uploaded
1772 return Status::newFatal( 'importuploaderrorpartial' );
1773 case 6:
1774 # Missing a temporary folder.
1775 return Status::newFatal( 'importuploaderrortemp' );
1776 # case else: # Currently impossible
1777 }
1778
1779 }
1780 $fname = $upload['tmp_name'];
1781 if ( is_uploaded_file( $fname ) ) {
1782 return ImportStreamSource::newFromFile( $fname );
1783 } else {
1784 return Status::newFatal( 'importnofile' );
1785 }
1786 }
1787
1788 /**
1789 * @param string $url
1790 * @param string $method
1791 * @return Status
1792 */
1793 static function newFromURL( $url, $method = 'GET' ) {
1794 wfDebug( __METHOD__ . ": opening $url\n" );
1795 # Use the standard HTTP fetch function; it times out
1796 # quicker and sorts out user-agent problems which might
1797 # otherwise prevent importing from large sites, such
1798 # as the Wikimedia cluster, etc.
1799 $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
1800 if ( $data !== false ) {
1801 $file = tmpfile();
1802 fwrite( $file, $data );
1803 fflush( $file );
1804 fseek( $file, 0 );
1805 return Status::newGood( new ImportStreamSource( $file ) );
1806 } else {
1807 return Status::newFatal( 'importcantopen' );
1808 }
1809 }
1810
1811 /**
1812 * @param string $interwiki
1813 * @param string $page
1814 * @param bool $history
1815 * @param bool $templates
1816 * @param int $pageLinkDepth
1817 * @return Status
1818 */
1819 public static function newFromInterwiki( $interwiki, $page, $history = false,
1820 $templates = false, $pageLinkDepth = 0
1821 ) {
1822 if ( $page == '' ) {
1823 return Status::newFatal( 'import-noarticle' );
1824 }
1825 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
1826 if ( is_null( $link ) || !$link->isExternal() ) {
1827 return Status::newFatal( 'importbadinterwiki' );
1828 } else {
1829 $params = array();
1830 if ( $history ) {
1831 $params['history'] = 1;
1832 }
1833 if ( $templates ) {
1834 $params['templates'] = 1;
1835 }
1836 if ( $pageLinkDepth ) {
1837 $params['pagelink-depth'] = $pageLinkDepth;
1838 }
1839 $url = $link->getFullURL( $params );
1840 # For interwikis, use POST to avoid redirects.
1841 return ImportStreamSource::newFromURL( $url, "POST" );
1842 }
1843 }
1844 }