a4eeb92c52500c7270d21731ab1ccf63d78fff7d
[lhc/web/wiklou.git] / includes / Import.php
1 <?php
2 /**
3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @file
23 * @ingroup SpecialPage
24 */
25
26 /**
27 *
28 * @ingroup SpecialPage
29 */
30 class WikiRevision {
31 var $title = null;
32 var $id = 0;
33 var $timestamp = "20010115000000";
34 var $user = 0;
35 var $user_text = "";
36 var $text = "";
37 var $comment = "";
38 var $minor = false;
39 var $type = "";
40 var $action = "";
41 var $params = "";
42
43 function setTitle( $title ) {
44 if( is_object( $title ) ) {
45 $this->title = $title;
46 } elseif( is_null( $title ) ) {
47 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
48 } else {
49 throw new MWException( "WikiRevision given non-object title in import." );
50 }
51 }
52
53 function setID( $id ) {
54 $this->id = $id;
55 }
56
57 function setTimestamp( $ts ) {
58 # 2003-08-05T18:30:02Z
59 $this->timestamp = wfTimestamp( TS_MW, $ts );
60 }
61
62 function setUsername( $user ) {
63 $this->user_text = $user;
64 }
65
66 function setUserIP( $ip ) {
67 $this->user_text = $ip;
68 }
69
70 function setText( $text ) {
71 $this->text = $text;
72 }
73
74 function setComment( $text ) {
75 $this->comment = $text;
76 }
77
78 function setMinor( $minor ) {
79 $this->minor = (bool)$minor;
80 }
81
82 function setSrc( $src ) {
83 $this->src = $src;
84 }
85
86 function setFilename( $filename ) {
87 $this->filename = $filename;
88 }
89
90 function setSize( $size ) {
91 $this->size = intval( $size );
92 }
93
94 function setType( $type ) {
95 $this->type = $type;
96 }
97
98 function setAction( $action ) {
99 $this->action = $action;
100 }
101
102 function setParams( $params ) {
103 $this->params = $params;
104 }
105
106 function getTitle() {
107 return $this->title;
108 }
109
110 function getID() {
111 return $this->id;
112 }
113
114 function getTimestamp() {
115 return $this->timestamp;
116 }
117
118 function getUser() {
119 return $this->user_text;
120 }
121
122 function getText() {
123 return $this->text;
124 }
125
126 function getComment() {
127 return $this->comment;
128 }
129
130 function getMinor() {
131 return $this->minor;
132 }
133
134 function getSrc() {
135 return $this->src;
136 }
137
138 function getFilename() {
139 return $this->filename;
140 }
141
142 function getSize() {
143 return $this->size;
144 }
145
146 function getType() {
147 return $this->type;
148 }
149
150 function getAction() {
151 return $this->action;
152 }
153
154 function getParams() {
155 return $this->params;
156 }
157
158 function importOldRevision() {
159 $dbw = wfGetDB( DB_MASTER );
160
161 # Sneak a single revision into place
162 $user = User::newFromName( $this->getUser() );
163 if( $user ) {
164 $userId = intval( $user->getId() );
165 $userText = $user->getName();
166 } else {
167 $userId = 0;
168 $userText = $this->getUser();
169 }
170
171 // avoid memory leak...?
172 $linkCache = LinkCache::singleton();
173 $linkCache->clear();
174
175 $article = new Article( $this->title );
176 $pageId = $article->getId();
177 if( $pageId == 0 ) {
178 # must create the page...
179 $pageId = $article->insertOn( $dbw );
180 $created = true;
181 } else {
182 $created = false;
183
184 $prior = $dbw->selectField( 'revision', '1',
185 array( 'rev_page' => $pageId,
186 'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
187 'rev_user_text' => $userText,
188 'rev_comment' => $this->getComment() ),
189 __METHOD__
190 );
191 if( $prior ) {
192 // FIXME: this could fail slightly for multiple matches :P
193 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
194 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
195 return false;
196 }
197 }
198
199 # FIXME: Use original rev_id optionally (better for backups)
200 # Insert the row
201 $revision = new Revision( array(
202 'page' => $pageId,
203 'text' => $this->getText(),
204 'comment' => $this->getComment(),
205 'user' => $userId,
206 'user_text' => $userText,
207 'timestamp' => $this->timestamp,
208 'minor_edit' => $this->minor,
209 ) );
210 $revId = $revision->insertOn( $dbw );
211 $changed = $article->updateIfNewerOn( $dbw, $revision );
212
213 if( $created ) {
214 wfDebug( __METHOD__ . ": running onArticleCreate\n" );
215 Article::onArticleCreate( $this->title );
216
217 wfDebug( __METHOD__ . ": running create updates\n" );
218 $article->createUpdates( $revision );
219
220 } elseif( $changed ) {
221 wfDebug( __METHOD__ . ": running onArticleEdit\n" );
222 Article::onArticleEdit( $this->title );
223
224 wfDebug( __METHOD__ . ": running edit updates\n" );
225 $article->editUpdates(
226 $this->getText(),
227 $this->getComment(),
228 $this->minor,
229 $this->timestamp,
230 $revId );
231 }
232
233 return true;
234 }
235
236 function importLogItem() {
237 $dbw = wfGetDB( DB_MASTER );
238 # FIXME: this will not record autoblocks
239 if( !$this->getTitle() ) {
240 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
241 $this->timestamp . "\n" );
242 return;
243 }
244 # Check if it exists already
245 // FIXME: use original log ID (better for backups)
246 $prior = $dbw->selectField( 'logging', '1',
247 array( 'log_type' => $this->getType(),
248 'log_action' => $this->getAction(),
249 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
250 #'log_user_text' => $this->user_text,
251 'log_params' => $this->params ),
252 __METHOD__
253 );
254 // FIXME: this could fail slightly for multiple matches :P
255 if( $prior ) {
256 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
257 $this->timestamp . "\n" );
258 return false;
259 }
260 $log_id = $dbw->nextSequenceValue( 'log_log_id_seq' );
261 $data = array(
262 'log_id' => $log_id,
263 'log_type' => $this->type,
264 'log_action' => $this->action,
265 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
266 'log_user' => User::idFromName( $this->user_text ),
267 #'log_user_text' => $this->user_text,
268 'log_namespace' => $this->getTitle()->getNamespace(),
269 'log_title' => $this->getTitle()->getDBkey(),
270 'log_comment' => $this->getComment(),
271 'log_params' => $this->params
272 );
273 $dbw->insert( 'logging', $data, __METHOD__ );
274 }
275
276 function importUpload() {
277 wfDebug( __METHOD__ . ": STUB\n" );
278
279 /**
280 // from file revert...
281 $source = $this->file->getArchiveVirtualUrl( $this->oldimage );
282 $comment = $wgRequest->getText( 'wpComment' );
283 // TODO: Preserve file properties from database instead of reloading from file
284 $status = $this->file->upload( $source, $comment, $comment );
285 if( $status->isGood() ) {
286 */
287
288 /**
289 // from file upload...
290 $this->mLocalFile = wfLocalFile( $nt );
291 $this->mDestName = $this->mLocalFile->getName();
292 //....
293 $status = $this->mLocalFile->upload( $this->mTempPath, $this->mComment, $pageText,
294 File::DELETE_SOURCE, $this->mFileProps );
295 if ( !$status->isGood() ) {
296 $resultDetails = array( 'internal' => $status->getWikiText() );
297 */
298
299 // @fixme upload() uses $wgUser, which is wrong here
300 // it may also create a page without our desire, also wrong potentially.
301 // and, it will record a *current* upload, but we might want an archive version here
302
303 $file = wfLocalFile( $this->getTitle() );
304 if( !$file ) {
305 var_dump( $file );
306 wfDebug( "IMPORT: Bad file. :(\n" );
307 return false;
308 }
309
310 $source = $this->downloadSource();
311 if( !$source ) {
312 wfDebug( "IMPORT: Could not fetch remote file. :(\n" );
313 return false;
314 }
315
316 $status = $file->upload( $source,
317 $this->getComment(),
318 $this->getComment(), // Initial page, if none present...
319 File::DELETE_SOURCE,
320 false, // props...
321 $this->getTimestamp() );
322
323 if( $status->isGood() ) {
324 // yay?
325 wfDebug( "IMPORT: is ok?\n" );
326 return true;
327 }
328
329 wfDebug( "IMPORT: is bad? " . $status->getXml() . "\n" );
330 return false;
331
332 }
333
334 function downloadSource() {
335 global $wgEnableUploads;
336 if( !$wgEnableUploads ) {
337 return false;
338 }
339
340 $tempo = tempnam( wfTempDir(), 'download' );
341 $f = fopen( $tempo, 'wb' );
342 if( !$f ) {
343 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
344 return false;
345 }
346
347 // @fixme!
348 $src = $this->getSrc();
349 $data = Http::get( $src );
350 if( !$data ) {
351 wfDebug( "IMPORT: couldn't fetch source $src\n" );
352 fclose( $f );
353 unlink( $tempo );
354 return false;
355 }
356
357 fwrite( $f, $data );
358 fclose( $f );
359
360 return $tempo;
361 }
362
363 }
364
365 /**
366 * implements Special:Import
367 * @ingroup SpecialPage
368 */
369 class WikiImporter {
370 var $mDebug = false;
371 var $mSource = null;
372 var $mPageCallback = null;
373 var $mPageOutCallback = null;
374 var $mRevisionCallback = null;
375 var $mLogItemCallback = null;
376 var $mUploadCallback = null;
377 var $mTargetNamespace = null;
378 var $lastfield;
379 var $tagStack = array();
380
381 function __construct( $source ) {
382 $this->setRevisionCallback( array( $this, "importRevision" ) );
383 $this->setUploadCallback( array( $this, "importUpload" ) );
384 $this->setLogItemCallback( array( $this, "importLogItem" ) );
385 $this->mSource = $source;
386 }
387
388 function throwXmlError( $err ) {
389 $this->debug( "FAILURE: $err" );
390 wfDebug( "WikiImporter XML error: $err\n" );
391 }
392
393 # --------------
394
395 function doImport() {
396 if( empty( $this->mSource ) ) {
397 return new WikiErrorMsg( "importnotext" );
398 }
399
400 $parser = xml_parser_create( "UTF-8" );
401
402 # case folding violates XML standard, turn it off
403 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
404
405 xml_set_object( $parser, $this );
406 xml_set_element_handler( $parser, "in_start", "" );
407
408 $offset = 0; // for context extraction on error reporting
409 do {
410 $chunk = $this->mSource->readChunk();
411 if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
412 wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
413 return new WikiXmlError( $parser, wfMsgHtml( 'import-parse-failure' ), $chunk, $offset );
414 }
415 $offset += strlen( $chunk );
416 } while( $chunk !== false && !$this->mSource->atEnd() );
417 xml_parser_free( $parser );
418
419 return true;
420 }
421
422 function debug( $data ) {
423 if( $this->mDebug ) {
424 wfDebug( "IMPORT: $data\n" );
425 }
426 }
427
428 function notice( $data ) {
429 global $wgCommandLineMode;
430 if( $wgCommandLineMode ) {
431 print "$data\n";
432 } else {
433 global $wgOut;
434 $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
435 }
436 }
437
438 /**
439 * Set debug mode...
440 */
441 function setDebug( $debug ) {
442 $this->mDebug = $debug;
443 }
444
445 /**
446 * Sets the action to perform as each new page in the stream is reached.
447 * @param $callback callback
448 * @return callback
449 */
450 function setPageCallback( $callback ) {
451 $previous = $this->mPageCallback;
452 $this->mPageCallback = $callback;
453 return $previous;
454 }
455
456 /**
457 * Sets the action to perform as each page in the stream is completed.
458 * Callback accepts the page title (as a Title object), a second object
459 * with the original title form (in case it's been overridden into a
460 * local namespace), and a count of revisions.
461 *
462 * @param $callback callback
463 * @return callback
464 */
465 function setPageOutCallback( $callback ) {
466 $previous = $this->mPageOutCallback;
467 $this->mPageOutCallback = $callback;
468 return $previous;
469 }
470
471 /**
472 * Sets the action to perform as each page revision is reached.
473 * @param $callback callback
474 * @return callback
475 */
476 function setRevisionCallback( $callback ) {
477 $previous = $this->mRevisionCallback;
478 $this->mRevisionCallback = $callback;
479 return $previous;
480 }
481
482 /**
483 * Sets the action to perform as each file upload version is reached.
484 * @param $callback callback
485 * @return callback
486 */
487 function setUploadCallback( $callback ) {
488 $previous = $this->mUploadCallback;
489 $this->mUploadCallback = $callback;
490 return $previous;
491 }
492
493 /**
494 * Sets the action to perform as each log item reached.
495 * @param $callback callback
496 * @return callback
497 */
498 function setLogItemCallback( $callback ) {
499 $previous = $this->mLogItemCallback;
500 $this->mLogItemCallback = $callback;
501 return $previous;
502 }
503
504 /**
505 * Set a target namespace to override the defaults
506 */
507 function setTargetNamespace( $namespace ) {
508 if( is_null( $namespace ) ) {
509 // Don't override namespaces
510 $this->mTargetNamespace = null;
511 } elseif( $namespace >= 0 ) {
512 // FIXME: Check for validity
513 $this->mTargetNamespace = intval( $namespace );
514 } else {
515 return false;
516 }
517 }
518
519 /**
520 * Default per-revision callback, performs the import.
521 * @param $revision WikiRevision
522 * @private
523 */
524 function importRevision( $revision ) {
525 $dbw = wfGetDB( DB_MASTER );
526 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
527 }
528
529 /**
530 * Default per-revision callback, performs the import.
531 * @param $revision WikiRevision
532 * @private
533 */
534 function importLogItem( $rev ) {
535 $dbw = wfGetDB( DB_MASTER );
536 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
537 }
538
539 /**
540 * Dummy for now...
541 */
542 function importUpload( $revision ) {
543 //$dbw = wfGetDB( DB_MASTER );
544 //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
545 return false;
546 }
547
548 /**
549 * Alternate per-revision callback, for debugging.
550 * @param $revision WikiRevision
551 * @private
552 */
553 function debugRevisionHandler( &$revision ) {
554 $this->debug( "Got revision:" );
555 if( is_object( $revision->title ) ) {
556 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
557 } else {
558 $this->debug( "-- Title: <invalid>" );
559 }
560 $this->debug( "-- User: " . $revision->user_text );
561 $this->debug( "-- Timestamp: " . $revision->timestamp );
562 $this->debug( "-- Comment: " . $revision->comment );
563 $this->debug( "-- Text: " . $revision->text );
564 }
565
566 /**
567 * Notify the callback function when a new <page> is reached.
568 * @param $title Title
569 * @private
570 */
571 function pageCallback( $title ) {
572 if( is_callable( $this->mPageCallback ) ) {
573 call_user_func( $this->mPageCallback, $title );
574 }
575 }
576
577 /**
578 * Notify the callback function when a </page> is closed.
579 * @param $title Title
580 * @param $origTitle Title
581 * @param $revisionCount int
582 * @param $successCount Int: number of revisions for which callback returned true
583 * @private
584 */
585 function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
586 if( is_callable( $this->mPageOutCallback ) ) {
587 call_user_func( $this->mPageOutCallback, $title, $origTitle,
588 $revisionCount, $successCount );
589 }
590 }
591
592 # XML parser callbacks from here out -- beware!
593 function donothing( $parser, $x, $y="" ) {
594 #$this->debug( "donothing" );
595 }
596
597 function in_start( $parser, $name, $attribs ) {
598 $this->debug( "in_start $name" );
599 if( $name != "mediawiki" ) {
600 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
601 }
602 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
603 }
604
605 function in_mediawiki( $parser, $name, $attribs ) {
606 $this->debug( "in_mediawiki $name" );
607 if( $name == 'siteinfo' ) {
608 xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
609 } elseif( $name == 'page' ) {
610 $this->push( $name );
611 $this->workRevisionCount = 0;
612 $this->workSuccessCount = 0;
613 $this->uploadCount = 0;
614 $this->uploadSuccessCount = 0;
615 xml_set_element_handler( $parser, "in_page", "out_page" );
616 } elseif( $name == 'logitem' ) {
617 $this->push( $name );
618 $this->workRevision = new WikiRevision;
619 xml_set_element_handler( $parser, "in_logitem", "out_logitem" );
620 } else {
621 return $this->throwXMLerror( "Expected <page>, got <$name>" );
622 }
623 }
624 function out_mediawiki( $parser, $name ) {
625 $this->debug( "out_mediawiki $name" );
626 if( $name != "mediawiki" ) {
627 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
628 }
629 xml_set_element_handler( $parser, "donothing", "donothing" );
630 }
631
632
633 function in_siteinfo( $parser, $name, $attribs ) {
634 // no-ops for now
635 $this->debug( "in_siteinfo $name" );
636 switch( $name ) {
637 case "sitename":
638 case "base":
639 case "generator":
640 case "case":
641 case "namespaces":
642 case "namespace":
643 break;
644 default:
645 return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
646 }
647 }
648
649 function out_siteinfo( $parser, $name ) {
650 if( $name == "siteinfo" ) {
651 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
652 }
653 }
654
655
656 function in_page( $parser, $name, $attribs ) {
657 $this->debug( "in_page $name" );
658 switch( $name ) {
659 case "id":
660 case "title":
661 case "restrictions":
662 $this->appendfield = $name;
663 $this->appenddata = "";
664 xml_set_element_handler( $parser, "in_nothing", "out_append" );
665 xml_set_character_data_handler( $parser, "char_append" );
666 break;
667 case "revision":
668 $this->push( "revision" );
669 if( is_object( $this->pageTitle ) ) {
670 $this->workRevision = new WikiRevision;
671 $this->workRevision->setTitle( $this->pageTitle );
672 $this->workRevisionCount++;
673 } else {
674 // Skipping items due to invalid page title
675 $this->workRevision = null;
676 }
677 xml_set_element_handler( $parser, "in_revision", "out_revision" );
678 break;
679 case "upload":
680 $this->push( "upload" );
681 if( is_object( $this->pageTitle ) ) {
682 $this->workRevision = new WikiRevision;
683 $this->workRevision->setTitle( $this->pageTitle );
684 $this->uploadCount++;
685 } else {
686 // Skipping items due to invalid page title
687 $this->workRevision = null;
688 }
689 xml_set_element_handler( $parser, "in_upload", "out_upload" );
690 break;
691 default:
692 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
693 }
694 }
695
696 function out_page( $parser, $name ) {
697 $this->debug( "out_page $name" );
698 $this->pop();
699 if( $name != "page" ) {
700 return $this->throwXMLerror( "Expected </page>, got </$name>" );
701 }
702 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
703
704 $this->pageOutCallback( $this->pageTitle, $this->origTitle,
705 $this->workRevisionCount, $this->workSuccessCount );
706
707 $this->workTitle = null;
708 $this->workRevision = null;
709 $this->workRevisionCount = 0;
710 $this->workSuccessCount = 0;
711 $this->pageTitle = null;
712 $this->origTitle = null;
713 }
714
715 function in_nothing( $parser, $name, $attribs ) {
716 $this->debug( "in_nothing $name" );
717 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
718 }
719
720 function char_append( $parser, $data ) {
721 $this->debug( "char_append '$data'" );
722 $this->appenddata .= $data;
723 }
724
725 function out_append( $parser, $name ) {
726 $this->debug( "out_append $name" );
727 if( $name != $this->appendfield ) {
728 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
729 }
730
731 switch( $this->appendfield ) {
732 case "title":
733 $this->workTitle = $this->appenddata;
734 $this->origTitle = Title::newFromText( $this->workTitle );
735 if( !is_null( $this->mTargetNamespace ) && !is_null( $this->origTitle ) ) {
736 $this->pageTitle = Title::makeTitle( $this->mTargetNamespace,
737 $this->origTitle->getDBkey() );
738 } else {
739 $this->pageTitle = Title::newFromText( $this->workTitle );
740 }
741 if( is_null( $this->pageTitle ) ) {
742 // Invalid page title? Ignore the page
743 $this->notice( "Skipping invalid page title '$this->workTitle'" );
744 } else {
745 $this->pageCallback( $this->workTitle );
746 }
747 break;
748 case "id":
749 if ( $this->parentTag() == 'revision' || $this->parentTag() == 'logitem' ) {
750 if( $this->workRevision )
751 $this->workRevision->setID( $this->appenddata );
752 }
753 break;
754 case "text":
755 if( $this->workRevision )
756 $this->workRevision->setText( $this->appenddata );
757 break;
758 case "username":
759 if( $this->workRevision )
760 $this->workRevision->setUsername( $this->appenddata );
761 break;
762 case "ip":
763 if( $this->workRevision )
764 $this->workRevision->setUserIP( $this->appenddata );
765 break;
766 case "timestamp":
767 if( $this->workRevision )
768 $this->workRevision->setTimestamp( $this->appenddata );
769 break;
770 case "comment":
771 if( $this->workRevision )
772 $this->workRevision->setComment( $this->appenddata );
773 break;
774 case "type":
775 if( $this->workRevision )
776 $this->workRevision->setType( $this->appenddata );
777 break;
778 case "action":
779 if( $this->workRevision )
780 $this->workRevision->setAction( $this->appenddata );
781 break;
782 case "logtitle":
783 if( $this->workRevision )
784 $this->workRevision->setTitle( Title::newFromText( $this->appenddata ) );
785 break;
786 case "params":
787 if( $this->workRevision )
788 $this->workRevision->setParams( $this->appenddata );
789 break;
790 case "minor":
791 if( $this->workRevision )
792 $this->workRevision->setMinor( true );
793 break;
794 case "filename":
795 if( $this->workRevision )
796 $this->workRevision->setFilename( $this->appenddata );
797 break;
798 case "src":
799 if( $this->workRevision )
800 $this->workRevision->setSrc( $this->appenddata );
801 break;
802 case "size":
803 if( $this->workRevision )
804 $this->workRevision->setSize( intval( $this->appenddata ) );
805 break;
806 default:
807 $this->debug( "Bad append: {$this->appendfield}" );
808 }
809 $this->appendfield = "";
810 $this->appenddata = "";
811
812 $parent = $this->parentTag();
813 xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
814 xml_set_character_data_handler( $parser, "donothing" );
815 }
816
817 function in_revision( $parser, $name, $attribs ) {
818 $this->debug( "in_revision $name" );
819 switch( $name ) {
820 case "id":
821 case "timestamp":
822 case "comment":
823 case "minor":
824 case "text":
825 $this->appendfield = $name;
826 xml_set_element_handler( $parser, "in_nothing", "out_append" );
827 xml_set_character_data_handler( $parser, "char_append" );
828 break;
829 case "contributor":
830 $this->push( "contributor" );
831 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
832 break;
833 default:
834 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
835 }
836 }
837
838 function out_revision( $parser, $name ) {
839 $this->debug( "out_revision $name" );
840 $this->pop();
841 if( $name != "revision" ) {
842 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
843 }
844 xml_set_element_handler( $parser, "in_page", "out_page" );
845
846 if( $this->workRevision ) {
847 $ok = call_user_func_array( $this->mRevisionCallback,
848 array( $this->workRevision, $this ) );
849 if( $ok ) {
850 $this->workSuccessCount++;
851 }
852 }
853 }
854
855 function in_logitem( $parser, $name, $attribs ) {
856 $this->debug( "in_logitem $name" );
857 switch( $name ) {
858 case "id":
859 case "timestamp":
860 case "comment":
861 case "type":
862 case "action":
863 case "logtitle":
864 case "params":
865 $this->appendfield = $name;
866 xml_set_element_handler( $parser, "in_nothing", "out_append" );
867 xml_set_character_data_handler( $parser, "char_append" );
868 break;
869 case "contributor":
870 $this->push( "contributor" );
871 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
872 break;
873 default:
874 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
875 }
876 }
877
878 function out_logitem( $parser, $name ) {
879 $this->debug( "out_logitem $name" );
880 $this->pop();
881 if( $name != "logitem" ) {
882 return $this->throwXMLerror( "Expected </logitem>, got </$name>" );
883 }
884 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
885
886 if( $this->workRevision ) {
887 $ok = call_user_func_array( $this->mLogItemCallback,
888 array( $this->workRevision, $this ) );
889 if( $ok ) {
890 $this->workSuccessCount++;
891 }
892 }
893 }
894
895 function in_upload( $parser, $name, $attribs ) {
896 $this->debug( "in_upload $name" );
897 switch( $name ) {
898 case "timestamp":
899 case "comment":
900 case "text":
901 case "filename":
902 case "src":
903 case "size":
904 $this->appendfield = $name;
905 xml_set_element_handler( $parser, "in_nothing", "out_append" );
906 xml_set_character_data_handler( $parser, "char_append" );
907 break;
908 case "contributor":
909 $this->push( "contributor" );
910 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
911 break;
912 default:
913 return $this->throwXMLerror( "Element <$name> not allowed in an <upload>." );
914 }
915 }
916
917 function out_upload( $parser, $name ) {
918 $this->debug( "out_revision $name" );
919 $this->pop();
920 if( $name != "upload" ) {
921 return $this->throwXMLerror( "Expected </upload>, got </$name>" );
922 }
923 xml_set_element_handler( $parser, "in_page", "out_page" );
924
925 if( $this->workRevision ) {
926 $ok = call_user_func_array( $this->mUploadCallback,
927 array( $this->workRevision, $this ) );
928 if( $ok ) {
929 $this->workUploadSuccessCount++;
930 }
931 }
932 }
933
934 function in_contributor( $parser, $name, $attribs ) {
935 $this->debug( "in_contributor $name" );
936 switch( $name ) {
937 case "username":
938 case "ip":
939 case "id":
940 $this->appendfield = $name;
941 xml_set_element_handler( $parser, "in_nothing", "out_append" );
942 xml_set_character_data_handler( $parser, "char_append" );
943 break;
944 default:
945 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
946 }
947 }
948
949 function out_contributor( $parser, $name ) {
950 $this->debug( "out_contributor $name" );
951 $this->pop();
952 if( $name != "contributor" ) {
953 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
954 }
955 $parent = $this->parentTag();
956 xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
957 }
958
959 private function push( $name ) {
960 array_push( $this->tagStack, $name );
961 $this->debug( "PUSH $name" );
962 }
963
964 private function pop() {
965 $name = array_pop( $this->tagStack );
966 $this->debug( "POP $name" );
967 return $name;
968 }
969
970 private function parentTag() {
971 $name = $this->tagStack[count( $this->tagStack ) - 1];
972 $this->debug( "PARENT $name" );
973 return $name;
974 }
975
976 }
977
978 /**
979 * @todo document (e.g. one-sentence class description).
980 * @ingroup SpecialPage
981 */
982 class ImportStringSource {
983 function __construct( $string ) {
984 $this->mString = $string;
985 $this->mRead = false;
986 }
987
988 function atEnd() {
989 return $this->mRead;
990 }
991
992 function readChunk() {
993 if( $this->atEnd() ) {
994 return false;
995 } else {
996 $this->mRead = true;
997 return $this->mString;
998 }
999 }
1000 }
1001
1002 /**
1003 * @todo document (e.g. one-sentence class description).
1004 * @ingroup SpecialPage
1005 */
1006 class ImportStreamSource {
1007 function __construct( $handle ) {
1008 $this->mHandle = $handle;
1009 }
1010
1011 function atEnd() {
1012 return feof( $this->mHandle );
1013 }
1014
1015 function readChunk() {
1016 return fread( $this->mHandle, 32768 );
1017 }
1018
1019 static function newFromFile( $filename ) {
1020 $file = @fopen( $filename, 'rt' );
1021 if( !$file ) {
1022 return new WikiErrorMsg( "importcantopen" );
1023 }
1024 return new ImportStreamSource( $file );
1025 }
1026
1027 static function newFromUpload( $fieldname = "xmlimport" ) {
1028 $upload =& $_FILES[$fieldname];
1029
1030 if( !isset( $upload ) || !$upload['name'] ) {
1031 return new WikiErrorMsg( 'importnofile' );
1032 }
1033 if( !empty( $upload['error'] ) ) {
1034 switch($upload['error']){
1035 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
1036 return new WikiErrorMsg( 'importuploaderrorsize' );
1037 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
1038 return new WikiErrorMsg( 'importuploaderrorsize' );
1039 case 3: # The uploaded file was only partially uploaded
1040 return new WikiErrorMsg( 'importuploaderrorpartial' );
1041 case 6: #Missing a temporary folder. Introduced in PHP 4.3.10 and PHP 5.0.3.
1042 return new WikiErrorMsg( 'importuploaderrortemp' );
1043 # case else: # Currently impossible
1044 }
1045
1046 }
1047 $fname = $upload['tmp_name'];
1048 if( is_uploaded_file( $fname ) ) {
1049 return ImportStreamSource::newFromFile( $fname );
1050 } else {
1051 return new WikiErrorMsg( 'importnofile' );
1052 }
1053 }
1054
1055 static function newFromURL( $url, $method = 'GET' ) {
1056 wfDebug( __METHOD__ . ": opening $url\n" );
1057 # Use the standard HTTP fetch function; it times out
1058 # quicker and sorts out user-agent problems which might
1059 # otherwise prevent importing from large sites, such
1060 # as the Wikimedia cluster, etc.
1061 $data = Http::request( $method, $url );
1062 if( $data !== false ) {
1063 $file = tmpfile();
1064 fwrite( $file, $data );
1065 fflush( $file );
1066 fseek( $file, 0 );
1067 return new ImportStreamSource( $file );
1068 } else {
1069 return new WikiErrorMsg( 'importcantopen' );
1070 }
1071 }
1072
1073 public static function newFromInterwiki( $interwiki, $page, $history=false ) {
1074 if( $page == '' ) {
1075 return new WikiErrorMsg( 'import-noarticle' );
1076 }
1077 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
1078 if( is_null( $link ) || $link->getInterwiki() == '' ) {
1079 return new WikiErrorMsg( 'importbadinterwiki' );
1080 } else {
1081 $params = $history ? 'history=1' : '';
1082 $url = $link->getFullUrl( $params );
1083 # For interwikis, use POST to avoid redirects.
1084 return ImportStreamSource::newFromURL( $url, "POST" );
1085 }
1086 }
1087 }