check that $wgArticle is an instance of the Article class in Skin::pageStats() per...
[lhc/web/wiklou.git] / includes / Import.php
1 <?php
2 /**
3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @file
23 * @ingroup SpecialPage
24 */
25
26 /**
27 *
28 * @ingroup SpecialPage
29 */
30 class WikiRevision {
31 var $title = null;
32 var $id = 0;
33 var $timestamp = "20010115000000";
34 var $user = 0;
35 var $user_text = "";
36 var $text = "";
37 var $comment = "";
38 var $minor = false;
39 var $type = "";
40 var $action = "";
41 var $params = "";
42
43 function setTitle( $title ) {
44 if( is_object( $title ) ) {
45 $this->title = $title;
46 } elseif( is_null( $title ) ) {
47 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
48 } else {
49 throw new MWException( "WikiRevision given non-object title in import." );
50 }
51 }
52
53 function setID( $id ) {
54 $this->id = $id;
55 }
56
57 function setTimestamp( $ts ) {
58 # 2003-08-05T18:30:02Z
59 $this->timestamp = wfTimestamp( TS_MW, $ts );
60 }
61
62 function setUsername( $user ) {
63 $this->user_text = $user;
64 }
65
66 function setUserIP( $ip ) {
67 $this->user_text = $ip;
68 }
69
70 function setText( $text ) {
71 $this->text = $text;
72 }
73
74 function setComment( $text ) {
75 $this->comment = $text;
76 }
77
78 function setMinor( $minor ) {
79 $this->minor = (bool)$minor;
80 }
81
82 function setSrc( $src ) {
83 $this->src = $src;
84 }
85
86 function setFilename( $filename ) {
87 $this->filename = $filename;
88 }
89
90 function setSize( $size ) {
91 $this->size = intval( $size );
92 }
93
94 function setType( $type ) {
95 $this->type = $type;
96 }
97
98 function setAction( $action ) {
99 $this->action = $action;
100 }
101
102 function setParams( $params ) {
103 $this->params = $params;
104 }
105
106 function getTitle() {
107 return $this->title;
108 }
109
110 function getID() {
111 return $this->id;
112 }
113
114 function getTimestamp() {
115 return $this->timestamp;
116 }
117
118 function getUser() {
119 return $this->user_text;
120 }
121
122 function getText() {
123 return $this->text;
124 }
125
126 function getComment() {
127 return $this->comment;
128 }
129
130 function getMinor() {
131 return $this->minor;
132 }
133
134 function getSrc() {
135 return $this->src;
136 }
137
138 function getFilename() {
139 return $this->filename;
140 }
141
142 function getSize() {
143 return $this->size;
144 }
145
146 function getType() {
147 return $this->type;
148 }
149
150 function getAction() {
151 return $this->action;
152 }
153
154 function getParams() {
155 return $this->params;
156 }
157
158 function importOldRevision() {
159 $dbw = wfGetDB( DB_MASTER );
160
161 # Sneak a single revision into place
162 $user = User::newFromName( $this->getUser() );
163 if( $user ) {
164 $userId = intval( $user->getId() );
165 $userText = $user->getName();
166 } else {
167 $userId = 0;
168 $userText = $this->getUser();
169 }
170
171 // avoid memory leak...?
172 $linkCache = LinkCache::singleton();
173 $linkCache->clear();
174
175 $article = new Article( $this->title );
176 $pageId = $article->getId();
177 if( $pageId == 0 ) {
178 # must create the page...
179 $pageId = $article->insertOn( $dbw );
180 $created = true;
181 } else {
182 $created = false;
183
184 $prior = $dbw->selectField( 'revision', '1',
185 array( 'rev_page' => $pageId,
186 'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
187 'rev_user_text' => $userText,
188 'rev_comment' => $this->getComment() ),
189 __METHOD__
190 );
191 if( $prior ) {
192 // FIXME: this could fail slightly for multiple matches :P
193 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
194 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
195 return false;
196 }
197 }
198
199 # FIXME: Use original rev_id optionally (better for backups)
200 # Insert the row
201 $revision = new Revision( array(
202 'page' => $pageId,
203 'text' => $this->getText(),
204 'comment' => $this->getComment(),
205 'user' => $userId,
206 'user_text' => $userText,
207 'timestamp' => $this->timestamp,
208 'minor_edit' => $this->minor,
209 ) );
210 $revId = $revision->insertOn( $dbw );
211 $changed = $article->updateIfNewerOn( $dbw, $revision );
212
213 # To be on the safe side...
214 $tempTitle = $GLOBALS['wgTitle'];
215 $GLOBALS['wgTitle'] = $this->title;
216
217 if( $created ) {
218 wfDebug( __METHOD__ . ": running onArticleCreate\n" );
219 Article::onArticleCreate( $this->title );
220
221 wfDebug( __METHOD__ . ": running create updates\n" );
222 $article->createUpdates( $revision );
223
224 } elseif( $changed ) {
225 wfDebug( __METHOD__ . ": running onArticleEdit\n" );
226 Article::onArticleEdit( $this->title, 'skiptransclusions' ); // leave templatelinks for editUpdates()
227
228 wfDebug( __METHOD__ . ": running edit updates\n" );
229 $article->editUpdates(
230 $this->getText(),
231 $this->getComment(),
232 $this->minor,
233 $this->timestamp,
234 $revId );
235 }
236 $GLOBALS['wgTitle'] = $tempTitle;
237
238 return true;
239 }
240
241 function importLogItem() {
242 $dbw = wfGetDB( DB_MASTER );
243 # FIXME: this will not record autoblocks
244 if( !$this->getTitle() ) {
245 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
246 $this->timestamp . "\n" );
247 return;
248 }
249 # Check if it exists already
250 // FIXME: use original log ID (better for backups)
251 $prior = $dbw->selectField( 'logging', '1',
252 array( 'log_type' => $this->getType(),
253 'log_action' => $this->getAction(),
254 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
255 'log_namespace' => $this->getTitle()->getNamespace(),
256 'log_title' => $this->getTitle()->getDBkey(),
257 'log_comment' => $this->getComment(),
258 #'log_user_text' => $this->user_text,
259 'log_params' => $this->params ),
260 __METHOD__
261 );
262 // FIXME: this could fail slightly for multiple matches :P
263 if( $prior ) {
264 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
265 $this->timestamp . "\n" );
266 return false;
267 }
268 $log_id = $dbw->nextSequenceValue( 'log_log_id_seq' );
269 $data = array(
270 'log_id' => $log_id,
271 'log_type' => $this->type,
272 'log_action' => $this->action,
273 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
274 'log_user' => User::idFromName( $this->user_text ),
275 #'log_user_text' => $this->user_text,
276 'log_namespace' => $this->getTitle()->getNamespace(),
277 'log_title' => $this->getTitle()->getDBkey(),
278 'log_comment' => $this->getComment(),
279 'log_params' => $this->params
280 );
281 $dbw->insert( 'logging', $data, __METHOD__ );
282 }
283
284 function importUpload() {
285 wfDebug( __METHOD__ . ": STUB\n" );
286
287 /**
288 // from file revert...
289 $source = $this->file->getArchiveVirtualUrl( $this->oldimage );
290 $comment = $wgRequest->getText( 'wpComment' );
291 // TODO: Preserve file properties from database instead of reloading from file
292 $status = $this->file->upload( $source, $comment, $comment );
293 if( $status->isGood() ) {
294 */
295
296 /**
297 // from file upload...
298 $this->mLocalFile = wfLocalFile( $nt );
299 $this->mDestName = $this->mLocalFile->getName();
300 //....
301 $status = $this->mLocalFile->upload( $this->mTempPath, $this->mComment, $pageText,
302 File::DELETE_SOURCE, $this->mFileProps );
303 if ( !$status->isGood() ) {
304 $resultDetails = array( 'internal' => $status->getWikiText() );
305 */
306
307 // @fixme upload() uses $wgUser, which is wrong here
308 // it may also create a page without our desire, also wrong potentially.
309 // and, it will record a *current* upload, but we might want an archive version here
310
311 $file = wfLocalFile( $this->getTitle() );
312 if( !$file ) {
313 var_dump( $file );
314 wfDebug( "IMPORT: Bad file. :(\n" );
315 return false;
316 }
317
318 $source = $this->downloadSource();
319 if( !$source ) {
320 wfDebug( "IMPORT: Could not fetch remote file. :(\n" );
321 return false;
322 }
323
324 $status = $file->upload( $source,
325 $this->getComment(),
326 $this->getComment(), // Initial page, if none present...
327 File::DELETE_SOURCE,
328 false, // props...
329 $this->getTimestamp() );
330
331 if( $status->isGood() ) {
332 // yay?
333 wfDebug( "IMPORT: is ok?\n" );
334 return true;
335 }
336
337 wfDebug( "IMPORT: is bad? " . $status->getXml() . "\n" );
338 return false;
339
340 }
341
342 function downloadSource() {
343 global $wgEnableUploads;
344 if( !$wgEnableUploads ) {
345 return false;
346 }
347
348 $tempo = tempnam( wfTempDir(), 'download' );
349 $f = fopen( $tempo, 'wb' );
350 if( !$f ) {
351 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
352 return false;
353 }
354
355 // @fixme!
356 $src = $this->getSrc();
357 $data = Http::get( $src );
358 if( !$data ) {
359 wfDebug( "IMPORT: couldn't fetch source $src\n" );
360 fclose( $f );
361 unlink( $tempo );
362 return false;
363 }
364
365 fwrite( $f, $data );
366 fclose( $f );
367
368 return $tempo;
369 }
370
371 }
372
373 /**
374 * implements Special:Import
375 * @ingroup SpecialPage
376 */
377 class WikiImporter {
378 var $mDebug = false;
379 var $mSource = null;
380 var $mPageCallback = null;
381 var $mPageOutCallback = null;
382 var $mRevisionCallback = null;
383 var $mLogItemCallback = null;
384 var $mUploadCallback = null;
385 var $mTargetNamespace = null;
386 var $mXmlNamespace = false;
387 var $lastfield;
388 var $tagStack = array();
389
390 function __construct( $source ) {
391 $this->setRevisionCallback( array( $this, "importRevision" ) );
392 $this->setUploadCallback( array( $this, "importUpload" ) );
393 $this->setLogItemCallback( array( $this, "importLogItem" ) );
394 $this->mSource = $source;
395 }
396
397 function throwXmlError( $err ) {
398 $this->debug( "FAILURE: $err" );
399 wfDebug( "WikiImporter XML error: $err\n" );
400 }
401
402 function handleXmlNamespace ( $parser, $data, $prefix=false, $uri=false ) {
403 if( preg_match( '/www.mediawiki.org/',$prefix ) ) {
404 $prefix = str_replace( '/','\/',$prefix );
405 $this->mXmlNamespace='/^'.$prefix.':/';
406 }
407 }
408
409 function stripXmlNamespace($name) {
410 if( $this->mXmlNamespace ) {
411 return(preg_replace($this->mXmlNamespace,'',$name,1));
412 }
413 else {
414 return($name);
415 }
416 }
417
418 # --------------
419
420 function doImport() {
421 if( empty( $this->mSource ) ) {
422 return new WikiErrorMsg( "importnotext" );
423 }
424
425 $parser = xml_parser_create_ns( "UTF-8" );
426
427 # case folding violates XML standard, turn it off
428 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
429
430 xml_set_object( $parser, $this );
431 xml_set_element_handler( $parser, "in_start", "" );
432 xml_set_start_namespace_decl_handler( $parser, "handleXmlNamespace" );
433
434 $offset = 0; // for context extraction on error reporting
435 do {
436 $chunk = $this->mSource->readChunk();
437 if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
438 wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
439 return new WikiXmlError( $parser, wfMsgHtml( 'import-parse-failure' ), $chunk, $offset );
440 }
441 $offset += strlen( $chunk );
442 } while( $chunk !== false && !$this->mSource->atEnd() );
443 xml_parser_free( $parser );
444
445 return true;
446 }
447
448 function debug( $data ) {
449 if( $this->mDebug ) {
450 wfDebug( "IMPORT: $data\n" );
451 }
452 }
453
454 function notice( $data ) {
455 global $wgCommandLineMode;
456 if( $wgCommandLineMode ) {
457 print "$data\n";
458 } else {
459 global $wgOut;
460 $wgOut->addHTML( "<li>" . htmlspecialchars( $data ) . "</li>\n" );
461 }
462 }
463
464 /**
465 * Set debug mode...
466 */
467 function setDebug( $debug ) {
468 $this->mDebug = $debug;
469 }
470
471 /**
472 * Sets the action to perform as each new page in the stream is reached.
473 * @param $callback callback
474 * @return callback
475 */
476 function setPageCallback( $callback ) {
477 $previous = $this->mPageCallback;
478 $this->mPageCallback = $callback;
479 return $previous;
480 }
481
482 /**
483 * Sets the action to perform as each page in the stream is completed.
484 * Callback accepts the page title (as a Title object), a second object
485 * with the original title form (in case it's been overridden into a
486 * local namespace), and a count of revisions.
487 *
488 * @param $callback callback
489 * @return callback
490 */
491 function setPageOutCallback( $callback ) {
492 $previous = $this->mPageOutCallback;
493 $this->mPageOutCallback = $callback;
494 return $previous;
495 }
496
497 /**
498 * Sets the action to perform as each page revision is reached.
499 * @param $callback callback
500 * @return callback
501 */
502 function setRevisionCallback( $callback ) {
503 $previous = $this->mRevisionCallback;
504 $this->mRevisionCallback = $callback;
505 return $previous;
506 }
507
508 /**
509 * Sets the action to perform as each file upload version is reached.
510 * @param $callback callback
511 * @return callback
512 */
513 function setUploadCallback( $callback ) {
514 $previous = $this->mUploadCallback;
515 $this->mUploadCallback = $callback;
516 return $previous;
517 }
518
519 /**
520 * Sets the action to perform as each log item reached.
521 * @param $callback callback
522 * @return callback
523 */
524 function setLogItemCallback( $callback ) {
525 $previous = $this->mLogItemCallback;
526 $this->mLogItemCallback = $callback;
527 return $previous;
528 }
529
530 /**
531 * Set a target namespace to override the defaults
532 */
533 function setTargetNamespace( $namespace ) {
534 if( is_null( $namespace ) ) {
535 // Don't override namespaces
536 $this->mTargetNamespace = null;
537 } elseif( $namespace >= 0 ) {
538 // FIXME: Check for validity
539 $this->mTargetNamespace = intval( $namespace );
540 } else {
541 return false;
542 }
543 }
544
545 /**
546 * Default per-revision callback, performs the import.
547 * @param $revision WikiRevision
548 * @private
549 */
550 function importRevision( $revision ) {
551 $dbw = wfGetDB( DB_MASTER );
552 return $dbw->deadlockLoop( array( $revision, 'importOldRevision' ) );
553 }
554
555 /**
556 * Default per-revision callback, performs the import.
557 * @param $revision WikiRevision
558 * @private
559 */
560 function importLogItem( $rev ) {
561 $dbw = wfGetDB( DB_MASTER );
562 return $dbw->deadlockLoop( array( $rev, 'importLogItem' ) );
563 }
564
565 /**
566 * Dummy for now...
567 */
568 function importUpload( $revision ) {
569 //$dbw = wfGetDB( DB_MASTER );
570 //return $dbw->deadlockLoop( array( $revision, 'importUpload' ) );
571 return false;
572 }
573
574 /**
575 * Alternate per-revision callback, for debugging.
576 * @param $revision WikiRevision
577 * @private
578 */
579 function debugRevisionHandler( &$revision ) {
580 $this->debug( "Got revision:" );
581 if( is_object( $revision->title ) ) {
582 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
583 } else {
584 $this->debug( "-- Title: <invalid>" );
585 }
586 $this->debug( "-- User: " . $revision->user_text );
587 $this->debug( "-- Timestamp: " . $revision->timestamp );
588 $this->debug( "-- Comment: " . $revision->comment );
589 $this->debug( "-- Text: " . $revision->text );
590 }
591
592 /**
593 * Notify the callback function when a new <page> is reached.
594 * @param $title Title
595 * @private
596 */
597 function pageCallback( $title ) {
598 if( is_callable( $this->mPageCallback ) ) {
599 call_user_func( $this->mPageCallback, $title );
600 }
601 }
602
603 /**
604 * Notify the callback function when a </page> is closed.
605 * @param $title Title
606 * @param $origTitle Title
607 * @param $revisionCount int
608 * @param $successCount Int: number of revisions for which callback returned true
609 * @private
610 */
611 function pageOutCallback( $title, $origTitle, $revisionCount, $successCount ) {
612 if( is_callable( $this->mPageOutCallback ) ) {
613 call_user_func( $this->mPageOutCallback, $title, $origTitle,
614 $revisionCount, $successCount );
615 }
616 }
617
618 # XML parser callbacks from here out -- beware!
619 function donothing( $parser, $x, $y="" ) {
620 #$this->debug( "donothing" );
621 }
622
623 function in_start( $parser, $name, $attribs ) {
624 $name = $this->stripXmlNamespace($name);
625 $this->debug( "in_start $name" );
626 if( $name != "mediawiki" ) {
627 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
628 }
629 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
630 }
631
632 function in_mediawiki( $parser, $name, $attribs ) {
633 $name = $this->stripXmlNamespace($name);
634 $this->debug( "in_mediawiki $name" );
635 if( $name == 'siteinfo' ) {
636 xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
637 } elseif( $name == 'page' ) {
638 $this->push( $name );
639 $this->workRevisionCount = 0;
640 $this->workSuccessCount = 0;
641 $this->uploadCount = 0;
642 $this->uploadSuccessCount = 0;
643 xml_set_element_handler( $parser, "in_page", "out_page" );
644 } elseif( $name == 'logitem' ) {
645 $this->push( $name );
646 $this->workRevision = new WikiRevision;
647 xml_set_element_handler( $parser, "in_logitem", "out_logitem" );
648 } else {
649 return $this->throwXMLerror( "Expected <page>, got <$name>" );
650 }
651 }
652 function out_mediawiki( $parser, $name ) {
653 $name = $this->stripXmlNamespace($name);
654 $this->debug( "out_mediawiki $name" );
655 if( $name != "mediawiki" ) {
656 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
657 }
658 xml_set_element_handler( $parser, "donothing", "donothing" );
659 }
660
661
662 function in_siteinfo( $parser, $name, $attribs ) {
663 // no-ops for now
664 $name = $this->stripXmlNamespace($name);
665 $this->debug( "in_siteinfo $name" );
666 switch( $name ) {
667 case "sitename":
668 case "base":
669 case "generator":
670 case "case":
671 case "namespaces":
672 case "namespace":
673 break;
674 default:
675 return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
676 }
677 }
678
679 function out_siteinfo( $parser, $name ) {
680 $name = $this->stripXmlNamespace($name);
681 if( $name == "siteinfo" ) {
682 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
683 }
684 }
685
686
687 function in_page( $parser, $name, $attribs ) {
688 $name = $this->stripXmlNamespace($name);
689 $this->debug( "in_page $name" );
690 switch( $name ) {
691 case "id":
692 case "title":
693 case "restrictions":
694 $this->appendfield = $name;
695 $this->appenddata = "";
696 xml_set_element_handler( $parser, "in_nothing", "out_append" );
697 xml_set_character_data_handler( $parser, "char_append" );
698 break;
699 case "revision":
700 $this->push( "revision" );
701 if( is_object( $this->pageTitle ) ) {
702 $this->workRevision = new WikiRevision;
703 $this->workRevision->setTitle( $this->pageTitle );
704 $this->workRevisionCount++;
705 } else {
706 // Skipping items due to invalid page title
707 $this->workRevision = null;
708 }
709 xml_set_element_handler( $parser, "in_revision", "out_revision" );
710 break;
711 case "upload":
712 $this->push( "upload" );
713 if( is_object( $this->pageTitle ) ) {
714 $this->workRevision = new WikiRevision;
715 $this->workRevision->setTitle( $this->pageTitle );
716 $this->uploadCount++;
717 } else {
718 // Skipping items due to invalid page title
719 $this->workRevision = null;
720 }
721 xml_set_element_handler( $parser, "in_upload", "out_upload" );
722 break;
723 default:
724 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
725 }
726 }
727
728 function out_page( $parser, $name ) {
729 $name = $this->stripXmlNamespace($name);
730 $this->debug( "out_page $name" );
731 $this->pop();
732 if( $name != "page" ) {
733 return $this->throwXMLerror( "Expected </page>, got </$name>" );
734 }
735 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
736
737 $this->pageOutCallback( $this->pageTitle, $this->origTitle,
738 $this->workRevisionCount, $this->workSuccessCount );
739
740 $this->workTitle = null;
741 $this->workRevision = null;
742 $this->workRevisionCount = 0;
743 $this->workSuccessCount = 0;
744 $this->pageTitle = null;
745 $this->origTitle = null;
746 }
747
748 function in_nothing( $parser, $name, $attribs ) {
749 $name = $this->stripXmlNamespace($name);
750 $this->debug( "in_nothing $name" );
751 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
752 }
753
754 function char_append( $parser, $data ) {
755 $this->debug( "char_append '$data'" );
756 $this->appenddata .= $data;
757 }
758
759 function out_append( $parser, $name ) {
760 $name = $this->stripXmlNamespace($name);
761 $this->debug( "out_append $name" );
762 if( $name != $this->appendfield ) {
763 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
764 }
765
766 switch( $this->appendfield ) {
767 case "title":
768 $this->workTitle = $this->appenddata;
769 $this->origTitle = Title::newFromText( $this->workTitle );
770 if( !is_null( $this->mTargetNamespace ) && !is_null( $this->origTitle ) ) {
771 $this->pageTitle = Title::makeTitle( $this->mTargetNamespace,
772 $this->origTitle->getDBkey() );
773 } else {
774 $this->pageTitle = Title::newFromText( $this->workTitle );
775 }
776 if( is_null( $this->pageTitle ) ) {
777 // Invalid page title? Ignore the page
778 $this->notice( "Skipping invalid page title '$this->workTitle'" );
779 } elseif( $this->pageTitle->getInterwiki() != '' ) {
780 $this->notice( "Skipping interwiki page title '$this->workTitle'" );
781 $this->pageTitle = null;
782 } else {
783 $this->pageCallback( $this->workTitle );
784 }
785 break;
786 case "id":
787 if ( $this->parentTag() == 'revision' || $this->parentTag() == 'logitem' ) {
788 if( $this->workRevision )
789 $this->workRevision->setID( $this->appenddata );
790 }
791 break;
792 case "text":
793 if( $this->workRevision )
794 $this->workRevision->setText( $this->appenddata );
795 break;
796 case "username":
797 if( $this->workRevision )
798 $this->workRevision->setUsername( $this->appenddata );
799 break;
800 case "ip":
801 if( $this->workRevision )
802 $this->workRevision->setUserIP( $this->appenddata );
803 break;
804 case "timestamp":
805 if( $this->workRevision )
806 $this->workRevision->setTimestamp( $this->appenddata );
807 break;
808 case "comment":
809 if( $this->workRevision )
810 $this->workRevision->setComment( $this->appenddata );
811 break;
812 case "type":
813 if( $this->workRevision )
814 $this->workRevision->setType( $this->appenddata );
815 break;
816 case "action":
817 if( $this->workRevision )
818 $this->workRevision->setAction( $this->appenddata );
819 break;
820 case "logtitle":
821 if( $this->workRevision )
822 $this->workRevision->setTitle( Title::newFromText( $this->appenddata ) );
823 break;
824 case "params":
825 if( $this->workRevision )
826 $this->workRevision->setParams( $this->appenddata );
827 break;
828 case "minor":
829 if( $this->workRevision )
830 $this->workRevision->setMinor( true );
831 break;
832 case "filename":
833 if( $this->workRevision )
834 $this->workRevision->setFilename( $this->appenddata );
835 break;
836 case "src":
837 if( $this->workRevision )
838 $this->workRevision->setSrc( $this->appenddata );
839 break;
840 case "size":
841 if( $this->workRevision )
842 $this->workRevision->setSize( intval( $this->appenddata ) );
843 break;
844 default:
845 $this->debug( "Bad append: {$this->appendfield}" );
846 }
847 $this->appendfield = "";
848 $this->appenddata = "";
849
850 $parent = $this->parentTag();
851 xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
852 xml_set_character_data_handler( $parser, "donothing" );
853 }
854
855 function in_revision( $parser, $name, $attribs ) {
856 $name = $this->stripXmlNamespace($name);
857 $this->debug( "in_revision $name" );
858 switch( $name ) {
859 case "id":
860 case "timestamp":
861 case "comment":
862 case "minor":
863 case "text":
864 $this->appendfield = $name;
865 xml_set_element_handler( $parser, "in_nothing", "out_append" );
866 xml_set_character_data_handler( $parser, "char_append" );
867 break;
868 case "contributor":
869 $this->push( "contributor" );
870 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
871 break;
872 default:
873 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
874 }
875 }
876
877 function out_revision( $parser, $name ) {
878 $name = $this->stripXmlNamespace($name);
879 $this->debug( "out_revision $name" );
880 $this->pop();
881 if( $name != "revision" ) {
882 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
883 }
884 xml_set_element_handler( $parser, "in_page", "out_page" );
885
886 if( $this->workRevision ) {
887 $ok = call_user_func_array( $this->mRevisionCallback,
888 array( $this->workRevision, $this ) );
889 if( $ok ) {
890 $this->workSuccessCount++;
891 }
892 }
893 }
894
895 function in_logitem( $parser, $name, $attribs ) {
896 $name = $this->stripXmlNamespace($name);
897 $this->debug( "in_logitem $name" );
898 switch( $name ) {
899 case "id":
900 case "timestamp":
901 case "comment":
902 case "type":
903 case "action":
904 case "logtitle":
905 case "params":
906 $this->appendfield = $name;
907 xml_set_element_handler( $parser, "in_nothing", "out_append" );
908 xml_set_character_data_handler( $parser, "char_append" );
909 break;
910 case "contributor":
911 $this->push( "contributor" );
912 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
913 break;
914 default:
915 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
916 }
917 }
918
919 function out_logitem( $parser, $name ) {
920 $name = $this->stripXmlNamespace($name);
921 $this->debug( "out_logitem $name" );
922 $this->pop();
923 if( $name != "logitem" ) {
924 return $this->throwXMLerror( "Expected </logitem>, got </$name>" );
925 }
926 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
927
928 if( $this->workRevision ) {
929 $ok = call_user_func_array( $this->mLogItemCallback,
930 array( $this->workRevision, $this ) );
931 if( $ok ) {
932 $this->workSuccessCount++;
933 }
934 }
935 }
936
937 function in_upload( $parser, $name, $attribs ) {
938 $name = $this->stripXmlNamespace($name);
939 $this->debug( "in_upload $name" );
940 switch( $name ) {
941 case "timestamp":
942 case "comment":
943 case "text":
944 case "filename":
945 case "src":
946 case "size":
947 $this->appendfield = $name;
948 xml_set_element_handler( $parser, "in_nothing", "out_append" );
949 xml_set_character_data_handler( $parser, "char_append" );
950 break;
951 case "contributor":
952 $this->push( "contributor" );
953 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
954 break;
955 default:
956 return $this->throwXMLerror( "Element <$name> not allowed in an <upload>." );
957 }
958 }
959
960 function out_upload( $parser, $name ) {
961 $name = $this->stripXmlNamespace($name);
962 $this->debug( "out_revision $name" );
963 $this->pop();
964 if( $name != "upload" ) {
965 return $this->throwXMLerror( "Expected </upload>, got </$name>" );
966 }
967 xml_set_element_handler( $parser, "in_page", "out_page" );
968
969 if( $this->workRevision ) {
970 $ok = call_user_func_array( $this->mUploadCallback,
971 array( $this->workRevision, $this ) );
972 if( $ok ) {
973 $this->workUploadSuccessCount++;
974 }
975 }
976 }
977
978 function in_contributor( $parser, $name, $attribs ) {
979 $name = $this->stripXmlNamespace($name);
980 $this->debug( "in_contributor $name" );
981 switch( $name ) {
982 case "username":
983 case "ip":
984 case "id":
985 $this->appendfield = $name;
986 xml_set_element_handler( $parser, "in_nothing", "out_append" );
987 xml_set_character_data_handler( $parser, "char_append" );
988 break;
989 default:
990 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
991 }
992 }
993
994 function out_contributor( $parser, $name ) {
995 $name = $this->stripXmlNamespace($name);
996 $this->debug( "out_contributor $name" );
997 $this->pop();
998 if( $name != "contributor" ) {
999 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
1000 }
1001 $parent = $this->parentTag();
1002 xml_set_element_handler( $parser, "in_$parent", "out_$parent" );
1003 }
1004
1005 private function push( $name ) {
1006 array_push( $this->tagStack, $name );
1007 $this->debug( "PUSH $name" );
1008 }
1009
1010 private function pop() {
1011 $name = array_pop( $this->tagStack );
1012 $this->debug( "POP $name" );
1013 return $name;
1014 }
1015
1016 private function parentTag() {
1017 $name = $this->tagStack[count( $this->tagStack ) - 1];
1018 $this->debug( "PARENT $name" );
1019 return $name;
1020 }
1021
1022 }
1023
1024 /**
1025 * @todo document (e.g. one-sentence class description).
1026 * @ingroup SpecialPage
1027 */
1028 class ImportStringSource {
1029 function __construct( $string ) {
1030 $this->mString = $string;
1031 $this->mRead = false;
1032 }
1033
1034 function atEnd() {
1035 return $this->mRead;
1036 }
1037
1038 function readChunk() {
1039 if( $this->atEnd() ) {
1040 return false;
1041 } else {
1042 $this->mRead = true;
1043 return $this->mString;
1044 }
1045 }
1046 }
1047
1048 /**
1049 * @todo document (e.g. one-sentence class description).
1050 * @ingroup SpecialPage
1051 */
1052 class ImportStreamSource {
1053 function __construct( $handle ) {
1054 $this->mHandle = $handle;
1055 }
1056
1057 function atEnd() {
1058 return feof( $this->mHandle );
1059 }
1060
1061 function readChunk() {
1062 return fread( $this->mHandle, 32768 );
1063 }
1064
1065 static function newFromFile( $filename ) {
1066 $file = @fopen( $filename, 'rt' );
1067 if( !$file ) {
1068 return new WikiErrorMsg( "importcantopen" );
1069 }
1070 return new ImportStreamSource( $file );
1071 }
1072
1073 static function newFromUpload( $fieldname = "xmlimport" ) {
1074 $upload =& $_FILES[$fieldname];
1075
1076 if( !isset( $upload ) || !$upload['name'] ) {
1077 return new WikiErrorMsg( 'importnofile' );
1078 }
1079 if( !empty( $upload['error'] ) ) {
1080 switch($upload['error']){
1081 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
1082 return new WikiErrorMsg( 'importuploaderrorsize' );
1083 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
1084 return new WikiErrorMsg( 'importuploaderrorsize' );
1085 case 3: # The uploaded file was only partially uploaded
1086 return new WikiErrorMsg( 'importuploaderrorpartial' );
1087 case 6: #Missing a temporary folder. Introduced in PHP 4.3.10 and PHP 5.0.3.
1088 return new WikiErrorMsg( 'importuploaderrortemp' );
1089 # case else: # Currently impossible
1090 }
1091
1092 }
1093 $fname = $upload['tmp_name'];
1094 if( is_uploaded_file( $fname ) ) {
1095 return ImportStreamSource::newFromFile( $fname );
1096 } else {
1097 return new WikiErrorMsg( 'importnofile' );
1098 }
1099 }
1100
1101 static function newFromURL( $url, $method = 'GET' ) {
1102 wfDebug( __METHOD__ . ": opening $url\n" );
1103 # Use the standard HTTP fetch function; it times out
1104 # quicker and sorts out user-agent problems which might
1105 # otherwise prevent importing from large sites, such
1106 # as the Wikimedia cluster, etc.
1107 $data = Http::request( $method, $url );
1108 if( $data !== false ) {
1109 $file = tmpfile();
1110 fwrite( $file, $data );
1111 fflush( $file );
1112 fseek( $file, 0 );
1113 return new ImportStreamSource( $file );
1114 } else {
1115 return new WikiErrorMsg( 'importcantopen' );
1116 }
1117 }
1118
1119 public static function newFromInterwiki( $interwiki, $page, $history=false ) {
1120 if( $page == '' ) {
1121 return new WikiErrorMsg( 'import-noarticle' );
1122 }
1123 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
1124 if( is_null( $link ) || $link->getInterwiki() == '' ) {
1125 return new WikiErrorMsg( 'importbadinterwiki' );
1126 } else {
1127 $params = $history ? 'history=1' : '';
1128 $url = $link->getFullUrl( $params );
1129 # For interwikis, use POST to avoid redirects.
1130 return ImportStreamSource::newFromURL( $url, "POST" );
1131 }
1132 }
1133 }