Expand the defines from JSTokenizer::__construct() placing them at the top of the...
[lhc/web/wiklou.git] / includes / Import.php
1 <?php
2 /**
3 * MediaWiki page data importer
4 *
5 * Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
6 * http://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup SpecialPage
25 */
26
27 /**
28 * XML file reader for the page data importer
29 *
30 * implements Special:Import
31 * @ingroup SpecialPage
32 */
33 class WikiImporter {
34 private $reader = null;
35 private $mLogItemCallback, $mUploadCallback, $mRevisionCallback, $mPageCallback;
36 private $mSiteInfoCallback, $mTargetNamespace, $mPageOutCallback;
37 private $mDebug;
38 private $mImportUploads, $mImageBasePath;
39
40 /**
41 * Creates an ImportXMLReader drawing from the source provided
42 */
43 function __construct( $source ) {
44 $this->reader = new XMLReader();
45
46 stream_wrapper_register( 'uploadsource', 'UploadSourceAdapter' );
47 $id = UploadSourceAdapter::registerSource( $source );
48 if (defined( 'LIBXML_PARSEHUGE' ) ) {
49 $this->reader->open( "uploadsource://$id", null, LIBXML_PARSEHUGE );
50 }
51 else {
52 $this->reader->open( "uploadsource://$id" );
53 }
54
55 // Default callbacks
56 $this->setRevisionCallback( array( $this, "importRevision" ) );
57 $this->setUploadCallback( array( $this, 'importUpload' ) );
58 $this->setLogItemCallback( array( $this, 'importLogItem' ) );
59 $this->setPageOutCallback( array( $this, 'finishImportPage' ) );
60 }
61
62 private function throwXmlError( $err ) {
63 $this->debug( "FAILURE: $err" );
64 wfDebug( "WikiImporter XML error: $err\n" );
65 }
66
67 private function debug( $data ) {
68 if( $this->mDebug ) {
69 wfDebug( "IMPORT: $data\n" );
70 }
71 }
72
73 private function warn( $data ) {
74 wfDebug( "IMPORT: $data\n" );
75 }
76
77 private function notice( $data ) {
78 global $wgCommandLineMode;
79 if( $wgCommandLineMode ) {
80 print "$data\n";
81 } else {
82 global $wgOut;
83 $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
84 }
85 }
86
87 /**
88 * Set debug mode...
89 */
90 function setDebug( $debug ) {
91 $this->mDebug = $debug;
92 }
93
94 /**
95 * Sets the action to perform as each new page in the stream is reached.
96 * @param $callback callback
97 * @return callback
98 */
99 public function setPageCallback( $callback ) {
100 $previous = $this->mPageCallback;
101 $this->mPageCallback = $callback;
102 return $previous;
103 }
104
105 /**
106 * Sets the action to perform as each page in the stream is completed.
107 * Callback accepts the page title (as a Title object), a second object
108 * with the original title form (in case it's been overridden into a
109 * local namespace), and a count of revisions.
110 *
111 * @param $callback callback
112 * @return callback
113 */
114 public function setPageOutCallback( $callback ) {
115 $previous = $this->mPageOutCallback;
116 $this->mPageOutCallback = $callback;
117 return $previous;
118 }
119
120 /**
121 * Sets the action to perform as each page revision is reached.
122 * @param $callback callback
123 * @return callback
124 */
125 public function setRevisionCallback( $callback ) {
126 $previous = $this->mRevisionCallback;
127 $this->mRevisionCallback = $callback;
128 return $previous;
129 }
130
131 /**
132 * Sets the action to perform as each file upload version is reached.
133 * @param $callback callback
134 * @return callback
135 */
136 public function setUploadCallback( $callback ) {
137 $previous = $this->mUploadCallback;
138 $this->mUploadCallback = $callback;
139 return $previous;
140 }
141
142 /**
143 * Sets the action to perform as each log item reached.
144 * @param $callback callback
145 * @return callback
146 */
147 public function setLogItemCallback( $callback ) {
148 $previous = $this->mLogItemCallback;
149 $this->mLogItemCallback = $callback;
150 return $previous;
151 }
152
153 /**
154 * Sets the action to perform when site info is encountered
155 * @param $callback callback
156 * @return callback
157 */
158 public function setSiteInfoCallback( $callback ) {
159 $previous = $this->mSiteInfoCallback;
160 $this->mSiteInfoCallback = $callback;
161 return $previous;
162 }
163
164 /**
165 * Set a target namespace to override the defaults
166 */
167 public function setTargetNamespace( $namespace ) {
168 if( is_null( $namespace ) ) {
169 // Don't override namespaces
170 $this->mTargetNamespace = null;
171 } elseif( $namespace >= 0 ) {
172 // @todo FIXME: Check for validity
173 $this->mTargetNamespace = intval( $namespace );
174 } else {
175 return false;
176 }
177 }
178
179 /**
180 *
181 */
182 public function setImageBasePath( $dir ) {
183 $this->mImageBasePath = $dir;
184 }
185 public function setImportUploads( $import ) {
186 $this->mImportUploads = $import;
187 }
188
189 /**
190 * Default per-revision callback, performs the import.
191 * @param $revision WikiRevision
192 */
193 public function importRevision( $revision ) {
194 $dbw = wfGetDB( DB_MASTER );
195 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
196 }
197
198 /**
199 * Default per-revision callback, performs the import.
200 * @param $rev WikiRevision
201 */
202 public function importLogItem( $rev ) {
203 $dbw = wfGetDB( DB_MASTER );
204 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
205 }
206
207 /**
208 * Dummy for now...
209 */
210 public function importUpload( $revision ) {
211 $dbw = wfGetDB( DB_MASTER );
212 return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
213 }
214
215 /**
216 * Mostly for hook use
217 */
218 public function finishImportPage( $title, $origTitle, $revCount, $sRevCount, $pageInfo ) {
219 $args = func_get_args();
220 return wfRunHooks( 'AfterImportPage', $args );
221 }
222
223 /**
224 * Alternate per-revision callback, for debugging.
225 * @param $revision WikiRevision
226 */
227 public function debugRevisionHandler( &$revision ) {
228 $this->debug( "Got revision:" );
229 if( is_object( $revision->title ) ) {
230 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
231 } else {
232 $this->debug( "-- Title: <invalid>" );
233 }
234 $this->debug( "-- User: " . $revision->user_text );
235 $this->debug( "-- Timestamp: " . $revision->timestamp );
236 $this->debug( "-- Comment: " . $revision->comment );
237 $this->debug( "-- Text: " . $revision->text );
238 }
239
240 /**
241 * Notify the callback function when a new <page> is reached.
242 * @param $title Title
243 */
244 function pageCallback( $title ) {
245 if( isset( $this->mPageCallback ) ) {
246 call_user_func( $this->mPageCallback, $title );
247 }
248 }
249
250 /**
251 * Notify the callback function when a </page> is closed.
252 * @param $title Title
253 * @param $origTitle Title
254 * @param $revCount Integer
255 * @param $sucCount Int: number of revisions for which callback returned true
256 * @param $pageInfo Array: associative array of page information
257 */
258 private function pageOutCallback( $title, $origTitle, $revCount, $sucCount, $pageInfo ) {
259 if( isset( $this->mPageOutCallback ) ) {
260 $args = func_get_args();
261 call_user_func_array( $this->mPageOutCallback, $args );
262 }
263 }
264
265 /**
266 * Notify the callback function of a revision
267 * @param $revision A WikiRevision object
268 */
269 private function revisionCallback( $revision ) {
270 if ( isset( $this->mRevisionCallback ) ) {
271 return call_user_func_array( $this->mRevisionCallback,
272 array( $revision, $this ) );
273 } else {
274 return false;
275 }
276 }
277
278 /**
279 * Notify the callback function of a new log item
280 * @param $revision A WikiRevision object
281 */
282 private function logItemCallback( $revision ) {
283 if ( isset( $this->mLogItemCallback ) ) {
284 return call_user_func_array( $this->mLogItemCallback,
285 array( $revision, $this ) );
286 } else {
287 return false;
288 }
289 }
290
291 /**
292 * Shouldn't something like this be built-in to XMLReader?
293 * Fetches text contents of the current element, assuming
294 * no sub-elements or such scary things.
295 * @return string
296 * @access private
297 */
298 private function nodeContents() {
299 if( $this->reader->isEmptyElement ) {
300 return "";
301 }
302 $buffer = "";
303 while( $this->reader->read() ) {
304 switch( $this->reader->nodeType ) {
305 case XmlReader::TEXT:
306 case XmlReader::SIGNIFICANT_WHITESPACE:
307 $buffer .= $this->reader->value;
308 break;
309 case XmlReader::END_ELEMENT:
310 return $buffer;
311 }
312 }
313
314 $this->reader->close();
315 return '';
316 }
317
318 # --------------
319
320 /** Left in for debugging */
321 private function dumpElement() {
322 static $lookup = null;
323 if (!$lookup) {
324 $xmlReaderConstants = array(
325 "NONE",
326 "ELEMENT",
327 "ATTRIBUTE",
328 "TEXT",
329 "CDATA",
330 "ENTITY_REF",
331 "ENTITY",
332 "PI",
333 "COMMENT",
334 "DOC",
335 "DOC_TYPE",
336 "DOC_FRAGMENT",
337 "NOTATION",
338 "WHITESPACE",
339 "SIGNIFICANT_WHITESPACE",
340 "END_ELEMENT",
341 "END_ENTITY",
342 "XML_DECLARATION",
343 );
344 $lookup = array();
345
346 foreach( $xmlReaderConstants as $name ) {
347 $lookup[constant("XmlReader::$name")] = $name;
348 }
349 }
350
351 print( var_dump(
352 $lookup[$this->reader->nodeType],
353 $this->reader->name,
354 $this->reader->value
355 )."\n\n" );
356 }
357
358 /**
359 * Primary entry point
360 */
361 public function doImport() {
362 $this->reader->read();
363
364 if ( $this->reader->name != 'mediawiki' ) {
365 throw new MWException( "Expected <mediawiki> tag, got ".
366 $this->reader->name );
367 }
368 $this->debug( "<mediawiki> tag is correct." );
369
370 $this->debug( "Starting primary dump processing loop." );
371
372 $keepReading = $this->reader->read();
373 $skip = false;
374 while ( $keepReading ) {
375 $tag = $this->reader->name;
376 $type = $this->reader->nodeType;
377
378 if ( !wfRunHooks( 'ImportHandleToplevelXMLTag', $this ) ) {
379 // Do nothing
380 } elseif ( $tag == 'mediawiki' && $type == XmlReader::END_ELEMENT ) {
381 break;
382 } elseif ( $tag == 'siteinfo' ) {
383 $this->handleSiteInfo();
384 } elseif ( $tag == 'page' ) {
385 $this->handlePage();
386 } elseif ( $tag == 'logitem' ) {
387 $this->handleLogItem();
388 } elseif ( $tag != '#text' ) {
389 $this->warn( "Unhandled top-level XML tag $tag" );
390
391 $skip = true;
392 }
393
394 if ($skip) {
395 $keepReading = $this->reader->next();
396 $skip = false;
397 $this->debug( "Skip" );
398 } else {
399 $keepReading = $this->reader->read();
400 }
401 }
402
403 return true;
404 }
405
406 private function handleSiteInfo() {
407 // Site info is useful, but not actually used for dump imports.
408 // Includes a quick short-circuit to save performance.
409 if ( ! $this->mSiteInfoCallback ) {
410 $this->reader->next();
411 return true;
412 }
413 throw new MWException( "SiteInfo tag is not yet handled, do not set mSiteInfoCallback" );
414 }
415
416 private function handleLogItem() {
417 $this->debug( "Enter log item handler." );
418 $logInfo = array();
419
420 // Fields that can just be stuffed in the pageInfo object
421 $normalFields = array( 'id', 'comment', 'type', 'action', 'timestamp',
422 'logtitle', 'params' );
423
424 while ( $this->reader->read() ) {
425 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
426 $this->reader->name == 'logitem') {
427 break;
428 }
429
430 $tag = $this->reader->name;
431
432 if ( !wfRunHooks( 'ImportHandleLogItemXMLTag',
433 $this, $logInfo ) ) {
434 // Do nothing
435 } elseif ( in_array( $tag, $normalFields ) ) {
436 $logInfo[$tag] = $this->nodeContents();
437 } elseif ( $tag == 'contributor' ) {
438 $logInfo['contributor'] = $this->handleContributor();
439 } elseif ( $tag != '#text' ) {
440 $this->warn( "Unhandled log-item XML tag $tag" );
441 }
442 }
443
444 $this->processLogItem( $logInfo );
445 }
446
447 private function processLogItem( $logInfo ) {
448 $revision = new WikiRevision;
449
450 $revision->setID( $logInfo['id'] );
451 $revision->setType( $logInfo['type'] );
452 $revision->setAction( $logInfo['action'] );
453 $revision->setTimestamp( $logInfo['timestamp'] );
454 $revision->setParams( $logInfo['params'] );
455 $revision->setTitle( Title::newFromText( $logInfo['logtitle'] ) );
456
457 if ( isset( $logInfo['comment'] ) ) {
458 $revision->setComment( $logInfo['comment'] );
459 }
460
461 if ( isset( $logInfo['contributor']['ip'] ) ) {
462 $revision->setUserIP( $logInfo['contributor']['ip'] );
463 }
464 if ( isset( $logInfo['contributor']['username'] ) ) {
465 $revision->setUserName( $logInfo['contributor']['username'] );
466 }
467
468 return $this->logItemCallback( $revision );
469 }
470
471 private function handlePage() {
472 // Handle page data.
473 $this->debug( "Enter page handler." );
474 $pageInfo = array( 'revisionCount' => 0, 'successfulRevisionCount' => 0 );
475
476 // Fields that can just be stuffed in the pageInfo object
477 $normalFields = array( 'title', 'id', 'redirect', 'restrictions' );
478
479 $skip = false;
480 $badTitle = false;
481
482 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
483 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
484 $this->reader->name == 'page') {
485 break;
486 }
487
488 $tag = $this->reader->name;
489
490 if ( $badTitle ) {
491 // The title is invalid, bail out of this page
492 $skip = true;
493 } elseif ( !wfRunHooks( 'ImportHandlePageXMLTag', array( $this,
494 &$pageInfo ) ) ) {
495 // Do nothing
496 } elseif ( in_array( $tag, $normalFields ) ) {
497 $pageInfo[$tag] = $this->nodeContents();
498 if ( $tag == 'title' ) {
499 $title = $this->processTitle( $pageInfo['title'] );
500
501 if ( !$title ) {
502 $badTitle = true;
503 $skip = true;
504 }
505
506 $this->pageCallback( $title );
507 list( $pageInfo['_title'], $origTitle ) = $title;
508 }
509 } elseif ( $tag == 'revision' ) {
510 $this->handleRevision( $pageInfo );
511 } elseif ( $tag == 'upload' ) {
512 $this->handleUpload( $pageInfo );
513 } elseif ( $tag != '#text' ) {
514 $this->warn( "Unhandled page XML tag $tag" );
515 $skip = true;
516 }
517 }
518
519 $this->pageOutCallback( $pageInfo['_title'], $origTitle,
520 $pageInfo['revisionCount'],
521 $pageInfo['successfulRevisionCount'],
522 $pageInfo );
523 }
524
525 private function handleRevision( &$pageInfo ) {
526 $this->debug( "Enter revision handler" );
527 $revisionInfo = array();
528
529 $normalFields = array( 'id', 'timestamp', 'comment', 'minor', 'text' );
530
531 $skip = false;
532
533 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
534 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
535 $this->reader->name == 'revision') {
536 break;
537 }
538
539 $tag = $this->reader->name;
540
541 if ( !wfRunHooks( 'ImportHandleRevisionXMLTag', $this,
542 $pageInfo, $revisionInfo ) ) {
543 // Do nothing
544 } elseif ( in_array( $tag, $normalFields ) ) {
545 $revisionInfo[$tag] = $this->nodeContents();
546 } elseif ( $tag == 'contributor' ) {
547 $revisionInfo['contributor'] = $this->handleContributor();
548 } elseif ( $tag != '#text' ) {
549 $this->warn( "Unhandled revision XML tag $tag" );
550 $skip = true;
551 }
552 }
553
554 $pageInfo['revisionCount']++;
555 if ( $this->processRevision( $pageInfo, $revisionInfo ) ) {
556 $pageInfo['successfulRevisionCount']++;
557 }
558 }
559
560 private function processRevision( $pageInfo, $revisionInfo ) {
561 $revision = new WikiRevision;
562
563 if( isset( $revisionInfo['id'] ) ) {
564 $revision->setID( $revisionInfo['id'] );
565 }
566 if ( isset( $revisionInfo['text'] ) ) {
567 $revision->setText( $revisionInfo['text'] );
568 }
569 $revision->setTitle( $pageInfo['_title'] );
570
571 if ( isset( $revisionInfo['timestamp'] ) ) {
572 $revision->setTimestamp( $revisionInfo['timestamp'] );
573 } else {
574 $revision->setTimestamp( wfTimestampNow() );
575 }
576
577 if ( isset( $revisionInfo['comment'] ) ) {
578 $revision->setComment( $revisionInfo['comment'] );
579 }
580
581 if ( isset( $revisionInfo['minor'] ) ) {
582 $revision->setMinor( true );
583 }
584 if ( isset( $revisionInfo['contributor']['ip'] ) ) {
585 $revision->setUserIP( $revisionInfo['contributor']['ip'] );
586 }
587 if ( isset( $revisionInfo['contributor']['username'] ) ) {
588 $revision->setUserName( $revisionInfo['contributor']['username'] );
589 }
590
591 return $this->revisionCallback( $revision );
592 }
593
594 private function handleUpload( &$pageInfo ) {
595 $this->debug( "Enter upload handler" );
596 $uploadInfo = array();
597
598 $normalFields = array( 'timestamp', 'comment', 'filename', 'text',
599 'src', 'size', 'sha1base36', 'archivename', 'rel' );
600
601 $skip = false;
602
603 while ( $skip ? $this->reader->next() : $this->reader->read() ) {
604 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
605 $this->reader->name == 'upload') {
606 break;
607 }
608
609 $tag = $this->reader->name;
610
611 if ( !wfRunHooks( 'ImportHandleUploadXMLTag', $this,
612 $pageInfo ) ) {
613 // Do nothing
614 } elseif ( in_array( $tag, $normalFields ) ) {
615 $uploadInfo[$tag] = $this->nodeContents();
616 } elseif ( $tag == 'contributor' ) {
617 $uploadInfo['contributor'] = $this->handleContributor();
618 } elseif ( $tag == 'contents' ) {
619 $contents = $this->nodeContents();
620 $encoding = $this->reader->getAttribute( 'encoding' );
621 if ( $encoding === 'base64' ) {
622 $uploadInfo['fileSrc'] = $this->dumpTemp( base64_decode( $contents ) );
623 $uploadInfo['isTempSrc'] = true;
624 }
625 } elseif ( $tag != '#text' ) {
626 $this->warn( "Unhandled upload XML tag $tag" );
627 $skip = true;
628 }
629 }
630
631 if ( $this->mImageBasePath && isset( $uploadInfo['rel'] ) ) {
632 $path = "{$this->mImageBasePath}/{$uploadInfo['rel']}";
633 if ( file_exists( $path ) ) {
634 $uploadInfo['fileSrc'] = $path;
635 $uploadInfo['isTempSrc'] = false;
636 }
637 }
638
639 if ( $this->mImportUploads ) {
640 return $this->processUpload( $pageInfo, $uploadInfo );
641 }
642 }
643
644 private function dumpTemp( $contents ) {
645 $filename = tempnam( wfTempDir(), 'importupload' );
646 file_put_contents( $filename, $contents );
647 return $filename;
648 }
649
650
651 private function processUpload( $pageInfo, $uploadInfo ) {
652 $revision = new WikiRevision;
653 $text = isset( $uploadInfo['text'] ) ? $uploadInfo['text'] : '';
654
655 $revision->setTitle( $pageInfo['_title'] );
656 $revision->setID( $pageInfo['id'] );
657 $revision->setTimestamp( $uploadInfo['timestamp'] );
658 $revision->setText( $text );
659 $revision->setFilename( $uploadInfo['filename'] );
660 if ( isset( $uploadInfo['archivename'] ) ) {
661 $revision->setArchiveName( $uploadInfo['archivename'] );
662 }
663 $revision->setSrc( $uploadInfo['src'] );
664 if ( isset( $uploadInfo['fileSrc'] ) ) {
665 $revision->setFileSrc( $uploadInfo['fileSrc'],
666 !empty( $uploadInfo['isTempSrc'] ) );
667 }
668 if ( isset( $uploadInfo['sha1base36'] ) ) {
669 $revision->setSha1Base36( $uploadInfo['sha1base36'] );
670 }
671 $revision->setSize( intval( $uploadInfo['size'] ) );
672 $revision->setComment( $uploadInfo['comment'] );
673
674 if ( isset( $uploadInfo['contributor']['ip'] ) ) {
675 $revision->setUserIP( $uploadInfo['contributor']['ip'] );
676 }
677 if ( isset( $uploadInfo['contributor']['username'] ) ) {
678 $revision->setUserName( $uploadInfo['contributor']['username'] );
679 }
680
681 return call_user_func( $this->mUploadCallback, $revision );
682 }
683
684 private function handleContributor() {
685 $fields = array( 'id', 'ip', 'username' );
686 $info = array();
687
688 while ( $this->reader->read() ) {
689 if ( $this->reader->nodeType == XmlReader::END_ELEMENT &&
690 $this->reader->name == 'contributor') {
691 break;
692 }
693
694 $tag = $this->reader->name;
695
696 if ( in_array( $tag, $fields ) ) {
697 $info[$tag] = $this->nodeContents();
698 }
699 }
700
701 return $info;
702 }
703
704 private function processTitle( $text ) {
705 $workTitle = $text;
706 $origTitle = Title::newFromText( $workTitle );
707
708 if( !is_null( $this->mTargetNamespace ) && !is_null( $origTitle ) ) {
709 $title = Title::makeTitle( $this->mTargetNamespace,
710 $origTitle->getDBkey() );
711 } else {
712 $title = Title::newFromText( $workTitle );
713 }
714
715 if( is_null( $title ) ) {
716 // Invalid page title? Ignore the page
717 $this->notice( "Skipping invalid page title '$workTitle'" );
718 return false;
719 } elseif( $title->getInterwiki() != '' ) {
720 $this->notice( "Skipping interwiki page title '$workTitle'" );
721 return false;
722 }
723
724 return array( $title, $origTitle );
725 }
726 }
727
728 /** This is a horrible hack used to keep source compatibility */
729 class UploadSourceAdapter {
730 static $sourceRegistrations = array();
731
732 private $mSource;
733 private $mBuffer;
734 private $mPosition;
735
736 static function registerSource( $source ) {
737 $id = wfGenerateToken();
738
739 self::$sourceRegistrations[$id] = $source;
740
741 return $id;
742 }
743
744 function stream_open( $path, $mode, $options, &$opened_path ) {
745 $url = parse_url($path);
746 $id = $url['host'];
747
748 if ( !isset( self::$sourceRegistrations[$id] ) ) {
749 return false;
750 }
751
752 $this->mSource = self::$sourceRegistrations[$id];
753
754 return true;
755 }
756
757 function stream_read( $count ) {
758 $return = '';
759 $leave = false;
760
761 while ( !$leave && !$this->mSource->atEnd() &&
762 strlen($this->mBuffer) < $count ) {
763 $read = $this->mSource->readChunk();
764
765 if ( !strlen($read) ) {
766 $leave = true;
767 }
768
769 $this->mBuffer .= $read;
770 }
771
772 if ( strlen($this->mBuffer) ) {
773 $return = substr( $this->mBuffer, 0, $count );
774 $this->mBuffer = substr( $this->mBuffer, $count );
775 }
776
777 $this->mPosition += strlen($return);
778
779 return $return;
780 }
781
782 function stream_write( $data ) {
783 return false;
784 }
785
786 function stream_tell() {
787 return $this->mPosition;
788 }
789
790 function stream_eof() {
791 return $this->mSource->atEnd();
792 }
793
794 function url_stat() {
795 $result = array();
796
797 $result['dev'] = $result[0] = 0;
798 $result['ino'] = $result[1] = 0;
799 $result['mode'] = $result[2] = 0;
800 $result['nlink'] = $result[3] = 0;
801 $result['uid'] = $result[4] = 0;
802 $result['gid'] = $result[5] = 0;
803 $result['rdev'] = $result[6] = 0;
804 $result['size'] = $result[7] = 0;
805 $result['atime'] = $result[8] = 0;
806 $result['mtime'] = $result[9] = 0;
807 $result['ctime'] = $result[10] = 0;
808 $result['blksize'] = $result[11] = 0;
809 $result['blocks'] = $result[12] = 0;
810
811 return $result;
812 }
813 }
814
815 class XMLReader2 extends XMLReader {
816 function nodeContents() {
817 if( $this->isEmptyElement ) {
818 return "";
819 }
820 $buffer = "";
821 while( $this->read() ) {
822 switch( $this->nodeType ) {
823 case XmlReader::TEXT:
824 case XmlReader::SIGNIFICANT_WHITESPACE:
825 $buffer .= $this->value;
826 break;
827 case XmlReader::END_ELEMENT:
828 return $buffer;
829 }
830 }
831 return $this->close();
832 }
833 }
834
835 /**
836 * @todo document (e.g. one-sentence class description).
837 * @ingroup SpecialPage
838 */
839 class WikiRevision {
840 var $importer = null;
841 var $title = null;
842 var $id = 0;
843 var $timestamp = "20010115000000";
844 var $user = 0;
845 var $user_text = "";
846 var $text = "";
847 var $comment = "";
848 var $minor = false;
849 var $type = "";
850 var $action = "";
851 var $params = "";
852 var $fileSrc = '';
853 var $sha1base36 = false;
854 var $isTemp = false;
855 var $archiveName = '';
856
857 function setTitle( $title ) {
858 if( is_object( $title ) ) {
859 $this->title = $title;
860 } elseif( is_null( $title ) ) {
861 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
862 } else {
863 throw new MWException( "WikiRevision given non-object title in import." );
864 }
865 }
866
867 function setID( $id ) {
868 $this->id = $id;
869 }
870
871 function setTimestamp( $ts ) {
872 # 2003-08-05T18:30:02Z
873 $this->timestamp = wfTimestamp( TS_MW, $ts );
874 }
875
876 function setUsername( $user ) {
877 $this->user_text = $user;
878 }
879
880 function setUserIP( $ip ) {
881 $this->user_text = $ip;
882 }
883
884 function setText( $text ) {
885 $this->text = $text;
886 }
887
888 function setComment( $text ) {
889 $this->comment = $text;
890 }
891
892 function setMinor( $minor ) {
893 $this->minor = (bool)$minor;
894 }
895
896 function setSrc( $src ) {
897 $this->src = $src;
898 }
899 function setFileSrc( $src, $isTemp ) {
900 $this->fileSrc = $src;
901 $this->fileIsTemp = $isTemp;
902 }
903 function setSha1Base36( $sha1base36 ) {
904 $this->sha1base36 = $sha1base36;
905 }
906
907 function setFilename( $filename ) {
908 $this->filename = $filename;
909 }
910 function setArchiveName( $archiveName ) {
911 $this->archiveName = $archiveName;
912 }
913
914 function setSize( $size ) {
915 $this->size = intval( $size );
916 }
917
918 function setType( $type ) {
919 $this->type = $type;
920 }
921
922 function setAction( $action ) {
923 $this->action = $action;
924 }
925
926 function setParams( $params ) {
927 $this->params = $params;
928 }
929
930 /**
931 * @return Title
932 */
933 function getTitle() {
934 return $this->title;
935 }
936
937 function getID() {
938 return $this->id;
939 }
940
941 function getTimestamp() {
942 return $this->timestamp;
943 }
944
945 function getUser() {
946 return $this->user_text;
947 }
948
949 function getText() {
950 return $this->text;
951 }
952
953 function getComment() {
954 return $this->comment;
955 }
956
957 function getMinor() {
958 return $this->minor;
959 }
960
961 function getSrc() {
962 return $this->src;
963 }
964 function getSha1() {
965 if ( $this->sha1base36 ) {
966 return wfBaseConvert( $this->sha1base36, 36, 16 );
967 }
968 return false;
969 }
970 function getFileSrc() {
971 return $this->fileSrc;
972 }
973 function isTempSrc() {
974 return $this->isTemp;
975 }
976
977 function getFilename() {
978 return $this->filename;
979 }
980 function getArchiveName() {
981 return $this->archiveName;
982 }
983
984 function getSize() {
985 return $this->size;
986 }
987
988 function getType() {
989 return $this->type;
990 }
991
992 function getAction() {
993 return $this->action;
994 }
995
996 function getParams() {
997 return $this->params;
998 }
999
1000 function importOldRevision() {
1001 $dbw = wfGetDB( DB_MASTER );
1002
1003 # Sneak a single revision into place
1004 $user = User::newFromName( $this->getUser() );
1005 if( $user ) {
1006 $userId = intval( $user->getId() );
1007 $userText = $user->getName();
1008 $userObj = $user;
1009 } else {
1010 $userId = 0;
1011 $userText = $this->getUser();
1012 $userObj = new User;
1013 }
1014
1015 // avoid memory leak...?
1016 $linkCache = LinkCache::singleton();
1017 $linkCache->clear();
1018
1019 $article = new Article( $this->title );
1020 $pageId = $article->getId();
1021 if( $pageId == 0 ) {
1022 # must create the page...
1023 $pageId = $article->insertOn( $dbw );
1024 $created = true;
1025 $oldcountable = null;
1026 } else {
1027 $created = false;
1028
1029 $prior = $dbw->selectField( 'revision', '1',
1030 array( 'rev_page' => $pageId,
1031 'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
1032 'rev_user_text' => $userText,
1033 'rev_comment' => $this->getComment() ),
1034 __METHOD__
1035 );
1036 if( $prior ) {
1037 // @todo FIXME: This could fail slightly for multiple matches :P
1038 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
1039 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
1040 return false;
1041 }
1042 $oldcountable = $article->isCountable();
1043 }
1044
1045 # @todo FIXME: Use original rev_id optionally (better for backups)
1046 # Insert the row
1047 $revision = new Revision( array(
1048 'page' => $pageId,
1049 'text' => $this->getText(),
1050 'comment' => $this->getComment(),
1051 'user' => $userId,
1052 'user_text' => $userText,
1053 'timestamp' => $this->timestamp,
1054 'minor_edit' => $this->minor,
1055 ) );
1056 $revision->insertOn( $dbw );
1057 $changed = $article->updateIfNewerOn( $dbw, $revision );
1058
1059 if ( $changed !== false ) {
1060 wfDebug( __METHOD__ . ": running updates\n" );
1061 $article->doEditUpdates( $revision, $userObj, array( 'created' => $created, 'oldcountable' => $oldcountable ) );
1062 }
1063
1064 return true;
1065 }
1066
1067 function importLogItem() {
1068 $dbw = wfGetDB( DB_MASTER );
1069 # @todo FIXME: This will not record autoblocks
1070 if( !$this->getTitle() ) {
1071 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
1072 $this->timestamp . "\n" );
1073 return;
1074 }
1075 # Check if it exists already
1076 // @todo FIXME: Use original log ID (better for backups)
1077 $prior = $dbw->selectField( 'logging', '1',
1078 array( 'log_type' => $this->getType(),
1079 'log_action' => $this->getAction(),
1080 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
1081 'log_namespace' => $this->getTitle()->getNamespace(),
1082 'log_title' => $this->getTitle()->getDBkey(),
1083 'log_comment' => $this->getComment(),
1084 #'log_user_text' => $this->user_text,
1085 'log_params' => $this->params ),
1086 __METHOD__
1087 );
1088 // @todo FIXME: This could fail slightly for multiple matches :P
1089 if( $prior ) {
1090 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
1091 $this->timestamp . "\n" );
1092 return false;
1093 }
1094 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
1095 $data = array(
1096 'log_id' => $log_id,
1097 'log_type' => $this->type,
1098 'log_action' => $this->action,
1099 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
1100 'log_user' => User::idFromName( $this->user_text ),
1101 #'log_user_text' => $this->user_text,
1102 'log_namespace' => $this->getTitle()->getNamespace(),
1103 'log_title' => $this->getTitle()->getDBkey(),
1104 'log_comment' => $this->getComment(),
1105 'log_params' => $this->params
1106 );
1107 $dbw->insert( 'logging', $data, __METHOD__ );
1108 }
1109
1110 function importUpload() {
1111 # Construct a file
1112 $archiveName = $this->getArchiveName();
1113 if ( $archiveName ) {
1114 wfDebug( __METHOD__ . "Importing archived file as $archiveName\n" );
1115 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
1116 RepoGroup::singleton()->getLocalRepo(), $archiveName );
1117 } else {
1118 $file = wfLocalFile( $this->getTitle() );
1119 wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
1120 if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
1121 $archiveName = $file->getTimestamp() . '!' . $file->getName();
1122 $file = OldLocalFile::newFromArchiveName( $this->getTitle(),
1123 RepoGroup::singleton()->getLocalRepo(), $archiveName );
1124 wfDebug( __METHOD__ . "File already exists; importing as $archiveName\n" );
1125 }
1126 }
1127 if( !$file ) {
1128 wfDebug( __METHOD__ . ': Bad file for ' . $this->getTitle() . "\n" );
1129 return false;
1130 }
1131
1132 # Get the file source or download if necessary
1133 $source = $this->getFileSrc();
1134 $flags = $this->isTempSrc() ? File::DELETE_SOURCE : 0;
1135 if ( !$source ) {
1136 $source = $this->downloadSource();
1137 $flags |= File::DELETE_SOURCE;
1138 }
1139 if( !$source ) {
1140 wfDebug( __METHOD__ . ": Could not fetch remote file.\n" );
1141 return false;
1142 }
1143 $sha1 = $this->getSha1();
1144 if ( $sha1 && ( $sha1 !== sha1_file( $source ) ) ) {
1145 if ( $flags & File::DELETE_SOURCE ) {
1146 # Broken file; delete it if it is a temporary file
1147 unlink( $source );
1148 }
1149 wfDebug( __METHOD__ . ": Corrupt file $source.\n" );
1150 return false;
1151 }
1152
1153 $user = User::newFromName( $this->user_text );
1154
1155 # Do the actual upload
1156 if ( $archiveName ) {
1157 $status = $file->uploadOld( $source, $archiveName,
1158 $this->getTimestamp(), $this->getComment(), $user, $flags );
1159 } else {
1160 $status = $file->upload( $source, $this->getComment(), $this->getComment(),
1161 $flags, false, $this->getTimestamp(), $user );
1162 }
1163
1164 if ( $status->isGood() ) {
1165 wfDebug( __METHOD__ . ": Succesful\n" );
1166 return true;
1167 } else {
1168 wfDebug( __METHOD__ . ': failed: ' . $status->getXml() . "\n" );
1169 return false;
1170 }
1171 }
1172
1173 function downloadSource() {
1174 global $wgEnableUploads;
1175 if( !$wgEnableUploads ) {
1176 return false;
1177 }
1178
1179 $tempo = tempnam( wfTempDir(), 'download' );
1180 $f = fopen( $tempo, 'wb' );
1181 if( !$f ) {
1182 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
1183 return false;
1184 }
1185
1186 // @todo FIXME!
1187 $src = $this->getSrc();
1188 $data = Http::get( $src );
1189 if( !$data ) {
1190 wfDebug( "IMPORT: couldn't fetch source $src\n" );
1191 fclose( $f );
1192 unlink( $tempo );
1193 return false;
1194 }
1195
1196 fwrite( $f, $data );
1197 fclose( $f );
1198
1199 return $tempo;
1200 }
1201
1202 }
1203
1204 /**
1205 * @todo document (e.g. one-sentence class description).
1206 * @ingroup SpecialPage
1207 */
1208 class ImportStringSource {
1209 function __construct( $string ) {
1210 $this->mString = $string;
1211 $this->mRead = false;
1212 }
1213
1214 function atEnd() {
1215 return $this->mRead;
1216 }
1217
1218 function readChunk() {
1219 if( $this->atEnd() ) {
1220 return false;
1221 } else {
1222 $this->mRead = true;
1223 return $this->mString;
1224 }
1225 }
1226 }
1227
1228 /**
1229 * @todo document (e.g. one-sentence class description).
1230 * @ingroup SpecialPage
1231 */
1232 class ImportStreamSource {
1233 function __construct( $handle ) {
1234 $this->mHandle = $handle;
1235 }
1236
1237 function atEnd() {
1238 return feof( $this->mHandle );
1239 }
1240
1241 function readChunk() {
1242 return fread( $this->mHandle, 32768 );
1243 }
1244
1245 static function newFromFile( $filename ) {
1246 wfSuppressWarnings();
1247 $file = fopen( $filename, 'rt' );
1248 wfRestoreWarnings();
1249 if( !$file ) {
1250 return Status::newFatal( "importcantopen" );
1251 }
1252 return Status::newGood( new ImportStreamSource( $file ) );
1253 }
1254
1255 static function newFromUpload( $fieldname = "xmlimport" ) {
1256 $upload =& $_FILES[$fieldname];
1257
1258 if( !isset( $upload ) || !$upload['name'] ) {
1259 return Status::newFatal( 'importnofile' );
1260 }
1261 if( !empty( $upload['error'] ) ) {
1262 switch($upload['error']){
1263 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
1264 return Status::newFatal( 'importuploaderrorsize' );
1265 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
1266 return Status::newFatal( 'importuploaderrorsize' );
1267 case 3: # The uploaded file was only partially uploaded
1268 return Status::newFatal( 'importuploaderrorpartial' );
1269 case 6: #Missing a temporary folder.
1270 return Status::newFatal( 'importuploaderrortemp' );
1271 # case else: # Currently impossible
1272 }
1273
1274 }
1275 $fname = $upload['tmp_name'];
1276 if( is_uploaded_file( $fname ) ) {
1277 return ImportStreamSource::newFromFile( $fname );
1278 } else {
1279 return Status::newFatal( 'importnofile' );
1280 }
1281 }
1282
1283 static function newFromURL( $url, $method = 'GET' ) {
1284 wfDebug( __METHOD__ . ": opening $url\n" );
1285 # Use the standard HTTP fetch function; it times out
1286 # quicker and sorts out user-agent problems which might
1287 # otherwise prevent importing from large sites, such
1288 # as the Wikimedia cluster, etc.
1289 $data = Http::request( $method, $url );
1290 if( $data !== false ) {
1291 $file = tmpfile();
1292 fwrite( $file, $data );
1293 fflush( $file );
1294 fseek( $file, 0 );
1295 return Status::newGood( new ImportStreamSource( $file ) );
1296 } else {
1297 return Status::newFatal( 'importcantopen' );
1298 }
1299 }
1300
1301 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
1302 if( $page == '' ) {
1303 return Status::newFatal( 'import-noarticle' );
1304 }
1305 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
1306 if( is_null( $link ) || $link->getInterwiki() == '' ) {
1307 return Status::newFatal( 'importbadinterwiki' );
1308 } else {
1309 $params = array();
1310 if ( $history ) $params['history'] = 1;
1311 if ( $templates ) $params['templates'] = 1;
1312 if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
1313 $url = $link->getFullUrl( $params );
1314 # For interwikis, use POST to avoid redirects.
1315 return ImportStreamSource::newFromURL( $url, "POST" );
1316 }
1317 }
1318 }