Bug 3491: typo
[lhc/web/wiklou.git] / includes / SpecialImport.php
1 <?php
2 /**
3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @package MediaWiki
23 * @subpackage SpecialPage
24 */
25
26 /** */
27 require_once( 'WikiError.php' );
28
29 /**
30 * Constructor
31 */
32 function wfSpecialImport( $page = '' ) {
33 global $wgUser, $wgOut, $wgLang, $wgRequest, $wgTitle;
34 global $wgImportSources;
35
36 ###
37 # $wgOut->addWikiText( "Special:Import is not ready for this beta release, sorry." );
38 # return;
39 ###
40
41 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
42 switch( $wgRequest->getVal( "source" ) ) {
43 case "upload":
44 if( $wgUser->isAllowed( 'importupload' ) ) {
45 $source = ImportStreamSource::newFromUpload( "xmlimport" );
46 } else {
47 return $wgOut->permissionRequired( 'importupload' );
48 }
49 break;
50 case "interwiki":
51 $source = ImportStreamSource::newFromInterwiki(
52 $wgRequest->getVal( "interwiki" ),
53 $wgRequest->getText( "frompage" ) );
54 break;
55 default:
56 $source = new WikiError( "Unknown import source type" );
57 }
58
59 if( WikiError::isError( $source ) ) {
60 $wgOut->addWikiText( wfEscapeWikiText( $source->getMessage() ) );
61 } else {
62 $importer = new WikiImporter( $source );
63 $result = $importer->doImport();
64 if( WikiError::isError( $result ) ) {
65 $wgOut->addWikiText( wfMsg( "importfailed",
66 wfEscapeWikiText( $result->getMessage() ) ) );
67 } else {
68 # Success!
69 $wgOut->addWikiText( wfMsg( "importsuccess" ) );
70 }
71 }
72 }
73
74 $action = $wgTitle->escapeLocalUrl( 'action=submit' );
75
76 if( $wgUser->isAllowed( 'importupload' ) ) {
77 $wgOut->addWikiText( wfMsg( "importtext" ) );
78 $wgOut->addHTML( "
79 <fieldset>
80 <legend>" . wfMsgHtml('upload') . "</legend>
81 <form enctype='multipart/form-data' method='post' action=\"$action\">
82 <input type='hidden' name='action' value='submit' />
83 <input type='hidden' name='source' value='upload' />
84 <input type='hidden' name='MAX_FILE_SIZE' value='2000000' />
85 <input type='file' name='xmlimport' value='' size='30' />
86 <input type='submit' value='" . wfMsgHtml( "uploadbtn" ) . "'/>
87 </form>
88 </fieldset>
89 " );
90 } else {
91 if( empty( $wgImportSources ) ) {
92 $wgOut->addWikiText( wfMsg( 'importnosources' ) );
93 }
94 }
95
96 if( !empty( $wgImportSources ) ) {
97 $wgOut->addHTML( "
98 <fieldset>
99 <legend>" . wfMsgHtml('importinterwiki') . "</legend>
100 <form method='post' action=\"$action\">
101 <input type='hidden' name='action' value='submit' />
102 <input type='hidden' name='source' value='interwiki' />
103 <select name='interwiki'>
104 " );
105 foreach( $wgImportSources as $interwiki ) {
106 $iw = htmlspecialchars( $interwiki );
107 $wgOut->addHTML( "<option value=\"$iw\">$iw</option>\n" );
108 }
109 $wgOut->addHTML( "
110 </select>
111 <input name='frompage' />
112 <input type='submit' />
113 </form>
114 </fieldset>
115 " );
116 }
117 }
118
119 /**
120 *
121 * @package MediaWiki
122 * @subpackage SpecialPage
123 */
124 class WikiRevision {
125 var $title = NULL;
126 var $timestamp = "20010115000000";
127 var $user = 0;
128 var $user_text = "";
129 var $text = "";
130 var $comment = "";
131 var $minor = false;
132
133 function setTitle( $text ) {
134 $this->title = Title::newFromText( $text );
135 }
136
137 function setTimestamp( $ts ) {
138 # 2003-08-05T18:30:02Z
139 $this->timestamp = preg_replace( '/^(....)-(..)-(..)T(..):(..):(..)Z$/', '$1$2$3$4$5$6', $ts );
140 }
141
142 function setUsername( $user ) {
143 $this->user_text = $user;
144 }
145
146 function setUserIP( $ip ) {
147 $this->user_text = $ip;
148 }
149
150 function setText( $text ) {
151 $this->text = $text;
152 }
153
154 function setComment( $text ) {
155 $this->comment = $text;
156 }
157
158 function setMinor( $minor ) {
159 $this->minor = (bool)$minor;
160 }
161
162 function getTitle() {
163 return $this->title;
164 }
165
166 function getTimestamp() {
167 return $this->timestamp;
168 }
169
170 function getUser() {
171 return $this->user_text;
172 }
173
174 function getText() {
175 return $this->text;
176 }
177
178 function getComment() {
179 return $this->comment;
180 }
181
182 function getMinor() {
183 return $this->minor;
184 }
185
186 function importOldRevision() {
187 $fname = "WikiImporter::importOldRevision";
188 $dbw =& wfGetDB( DB_MASTER );
189
190 # Sneak a single revision into place
191 $user = User::newFromName( $this->getUser() );
192 if( $user ) {
193 $userId = intval( $user->getId() );
194 $userText = $user->getName();
195 } else {
196 $userId = 0;
197 $userText = $this->getUser();
198 }
199
200 $article = new Article( $this->title );
201 $pageId = $article->getId();
202 if( $pageId == 0 ) {
203 # must create the page...
204 $pageId = $article->insertOn( $dbw );
205 }
206
207 # FIXME: Check for exact conflicts
208 # FIXME: Use original rev_id optionally
209 # FIXME: blah blah blah
210
211 #if( $numrows > 0 ) {
212 # return wfMsg( "importhistoryconflict" );
213 #}
214
215 # Insert the row
216 $revision = new Revision( array(
217 'page' => $pageId,
218 'text' => $this->getText(),
219 'comment' => $this->getComment(),
220 'user' => $userId,
221 'user_text' => $userText,
222 'timestamp' => $this->timestamp,
223 'minor_edit' => $this->minor,
224 ) );
225 $revId = $revision->insertOn( $dbw );
226 $article->updateIfNewerOn( $dbw, $revision );
227
228 return true;
229 }
230
231 }
232
233 /**
234 *
235 * @package MediaWiki
236 * @subpackage SpecialPage
237 */
238 class WikiImporter {
239 var $mSource = null;
240 var $mPageCallback = null;
241 var $mRevisionCallback = null;
242 var $lastfield;
243
244 function WikiImporter( $source ) {
245 $this->setRevisionCallback( array( &$this, "importRevision" ) );
246 $this->mSource = $source;
247 }
248
249 function throwXmlError( $err ) {
250 $this->debug( "FAILURE: $err" );
251 wfDebug( "WikiImporter XML error: $err\n" );
252 }
253
254 # --------------
255
256 function doImport() {
257 if( empty( $this->mSource ) ) {
258 return new WikiErrorMsg( "importnotext" );
259 }
260
261 $parser = xml_parser_create( "UTF-8" );
262
263 # case folding violates XML standard, turn it off
264 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
265
266 xml_set_object( $parser, &$this );
267 xml_set_element_handler( $parser, "in_start", "" );
268
269 $offset = 0; // for context extraction on error reporting
270 do {
271 $chunk = $this->mSource->readChunk();
272 if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
273 wfDebug( "WikiImporter::doImport encountered XML parsing error\n" );
274 return new WikiXmlError( $parser, 'XML import parse failure', $chunk, $offset );
275 }
276 $offset += strlen( $chunk );
277 } while( $chunk !== false && !$this->mSource->atEnd() );
278 xml_parser_free( $parser );
279
280 return true;
281 }
282
283 function debug( $data ) {
284 #wfDebug( "IMPORT: $data\n" );
285 }
286
287 function notice( $data ) {
288 global $wgCommandLineMode;
289 if( $wgCommandLineMode ) {
290 print "$data\n";
291 } else {
292 global $wgOut;
293 $wgOut->addHTML( "<li>$data</li>\n" );
294 }
295 }
296
297 /**
298 * Sets the action to perform as each new page in the stream is reached.
299 * @param callable $callback
300 * @return callable
301 */
302 function setPageCallback( $callback ) {
303 $previous = $this->mPageCallback;
304 $this->mPageCallback = $callback;
305 return $previous;
306 }
307
308 /**
309 * Sets the action to perform as each page revision is reached.
310 * @param callable $callback
311 * @return callable
312 */
313 function setRevisionCallback( $callback ) {
314 $previous = $this->mRevisionCallback;
315 $this->mRevisionCallback = $callback;
316 return $previous;
317 }
318
319 /**
320 * Default per-revision callback, performs the import.
321 * @param WikiRevision $revision
322 * @access private
323 */
324 function importRevision( &$revision ) {
325 $dbw =& wfGetDB( DB_MASTER );
326 $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) );
327 }
328
329 /**
330 * Alternate per-revision callback, for debugging.
331 * @param WikiRevision $revision
332 * @access private
333 */
334 function debugRevisionHandler( &$revision ) {
335 $this->debug( "Got revision:" );
336 if( is_object( $revision->title ) ) {
337 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
338 } else {
339 $this->debug( "-- Title: <invalid>" );
340 }
341 $this->debug( "-- User: " . $revision->user_text );
342 $this->debug( "-- Timestamp: " . $revision->timestamp );
343 $this->debug( "-- Comment: " . $revision->comment );
344 $this->debug( "-- Text: " . $revision->text );
345 }
346
347 /**
348 * Notify the callback function when a new <page> is reached.
349 * @param Title $title
350 * @access private
351 */
352 function pageCallback( $title ) {
353 if( is_callable( $this->mPageCallback ) ) {
354 call_user_func( $this->mPageCallback, $title );
355 }
356 }
357
358
359 # XML parser callbacks from here out -- beware!
360 function donothing( $parser, $x, $y="" ) {
361 #$this->debug( "donothing" );
362 }
363
364 function in_start( $parser, $name, $attribs ) {
365 $this->debug( "in_start $name" );
366 if( $name != "mediawiki" ) {
367 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
368 }
369 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
370 }
371
372 function in_mediawiki( $parser, $name, $attribs ) {
373 $this->debug( "in_mediawiki $name" );
374 if( $name == 'siteinfo' ) {
375 xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
376 } elseif( $name == 'page' ) {
377 xml_set_element_handler( $parser, "in_page", "out_page" );
378 } else {
379 return $this->throwXMLerror( "Expected <page>, got <$name>" );
380 }
381 }
382 function out_mediawiki( $parser, $name ) {
383 $this->debug( "out_mediawiki $name" );
384 if( $name != "mediawiki" ) {
385 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
386 }
387 xml_set_element_handler( $parser, "donothing", "donothing" );
388 }
389
390
391 function in_siteinfo( $parser, $name, $attribs ) {
392 // no-ops for now
393 $this->debug( "in_siteinfo $name" );
394 switch( $name ) {
395 case "sitename":
396 case "base":
397 case "generator":
398 case "case":
399 case "namespaces":
400 case "namespace":
401 break;
402 default:
403 return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
404 }
405 }
406
407 function out_siteinfo( $parser, $name ) {
408 if( $name == "siteinfo" ) {
409 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
410 }
411 }
412
413
414 function in_page( $parser, $name, $attribs ) {
415 $this->debug( "in_page $name" );
416 switch( $name ) {
417 case "id":
418 case "title":
419 case "restrictions":
420 $this->appendfield = $name;
421 $this->appenddata = "";
422 $this->parenttag = "page";
423 xml_set_element_handler( $parser, "in_nothing", "out_append" );
424 xml_set_character_data_handler( $parser, "char_append" );
425 break;
426 case "revision":
427 $this->workRevision = new WikiRevision;
428 $this->workRevision->setTitle( $this->workTitle );
429 xml_set_element_handler( $parser, "in_revision", "out_revision" );
430 break;
431 default:
432 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
433 }
434 }
435
436 function out_page( $parser, $name ) {
437 $this->debug( "out_page $name" );
438 if( $name != "page" ) {
439 return $this->throwXMLerror( "Expected </page>, got </$name>" );
440 }
441 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
442
443 $this->workTitle = NULL;
444 $this->workRevision = NULL;
445 }
446
447 function in_nothing( $parser, $name, $attribs ) {
448 $this->debug( "in_nothing $name" );
449 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
450 }
451 function char_append( $parser, $data ) {
452 $this->debug( "char_append '$data'" );
453 $this->appenddata .= $data;
454 }
455 function out_append( $parser, $name ) {
456 $this->debug( "out_append $name" );
457 if( $name != $this->appendfield ) {
458 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
459 }
460 xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
461 xml_set_character_data_handler( $parser, "donothing" );
462
463 switch( $this->appendfield ) {
464 case "title":
465 $this->workTitle = $this->appenddata;
466 $this->pageCallback( $this->workTitle );
467 break;
468 case "text":
469 $this->workRevision->setText( $this->appenddata );
470 break;
471 case "username":
472 $this->workRevision->setUsername( $this->appenddata );
473 break;
474 case "ip":
475 $this->workRevision->setUserIP( $this->appenddata );
476 break;
477 case "timestamp":
478 $this->workRevision->setTimestamp( $this->appenddata );
479 break;
480 case "comment":
481 $this->workRevision->setComment( $this->appenddata );
482 break;
483 case "minor":
484 $this->workRevision->setMinor( true );
485 break;
486 default:
487 $this->debug( "Bad append: {$this->appendfield}" );
488 }
489 $this->appendfield = "";
490 $this->appenddata = "";
491 }
492
493 function in_revision( $parser, $name, $attribs ) {
494 $this->debug( "in_revision $name" );
495 switch( $name ) {
496 case "id":
497 case "timestamp":
498 case "comment":
499 case "minor":
500 case "text":
501 $this->parenttag = "revision";
502 $this->appendfield = $name;
503 xml_set_element_handler( $parser, "in_nothing", "out_append" );
504 xml_set_character_data_handler( $parser, "char_append" );
505 break;
506 case "contributor":
507 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
508 break;
509 default:
510 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
511 }
512 }
513
514 function out_revision( $parser, $name ) {
515 $this->debug( "out_revision $name" );
516 if( $name != "revision" ) {
517 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
518 }
519 xml_set_element_handler( $parser, "in_page", "out_page" );
520
521 $out = call_user_func( $this->mRevisionCallback,
522 &$this->workRevision,
523 &$this );
524 if( !empty( $out ) ) {
525 global $wgOut;
526 $wgOut->addHTML( "<li>" . $out . "</li>\n" );
527 }
528 }
529
530 function in_contributor( $parser, $name, $attribs ) {
531 $this->debug( "in_contributor $name" );
532 switch( $name ) {
533 case "username":
534 case "ip":
535 case "id":
536 $this->parenttag = "contributor";
537 $this->appendfield = $name;
538 xml_set_element_handler( $parser, "in_nothing", "out_append" );
539 xml_set_character_data_handler( $parser, "char_append" );
540 break;
541 default:
542 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
543 }
544 }
545
546 function out_contributor( $parser, $name ) {
547 $this->debug( "out_contributor $name" );
548 if( $name != "contributor" ) {
549 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
550 }
551 xml_set_element_handler( $parser, "in_revision", "out_revision" );
552 }
553
554 }
555
556 /** @package MediaWiki */
557 class ImportStringSource {
558 function ImportStringSource( $string ) {
559 $this->mString = $string;
560 $this->mRead = false;
561 }
562
563 function atEnd() {
564 return $this->mRead;
565 }
566
567 function readChunk() {
568 if( $this->atEnd() ) {
569 return false;
570 } else {
571 $this->mRead = true;
572 return $this->mString;
573 }
574 }
575 }
576
577 /** @package MediaWiki */
578 class ImportStreamSource {
579 function ImportStreamSource( $handle ) {
580 $this->mHandle = $handle;
581 }
582
583 function atEnd() {
584 return feof( $this->mHandle );
585 }
586
587 function readChunk() {
588 return fread( $this->mHandle, 32768 );
589 }
590
591 function newFromFile( $filename ) {
592 $file = @fopen( $filename, 'rt' );
593 if( !$file ) {
594 return new WikiError( "Couldn't open import file" );
595 }
596 return new ImportStreamSource( $file );
597 }
598
599 function newFromUpload( $fieldname = "xmlimport" ) {
600 global $wgOut;
601
602 $upload =& $_FILES[$fieldname];
603
604 if( !isset( $upload ) ) {
605 return new WikiErrorMsg( 'importnofile' );
606 }
607 if( !empty( $upload['error'] ) ) {
608 return new WikiErrorMsg( 'importuploaderror', $upload['error'] );
609 }
610 $fname = $upload['tmp_name'];
611 if( is_uploaded_file( $fname ) ) {
612 return ImportStreamSource::newFromFile( $fname );
613 } else {
614 return new WikiErrorMsg( 'importnofile' );
615 }
616 }
617
618 function newFromURL( $url ) {
619 # fopen-wrappers are normally turned off for security.
620 ini_set( "allow_url_fopen", true );
621 $ret = ImportStreamSource::newFromFile( $url );
622 ini_set( "allow_url_fopen", false );
623 return $ret;
624 }
625
626 function newFromInterwiki( $interwiki, $page ) {
627 $base = Title::getInterwikiLink( $interwiki );
628 if( empty( $base ) ) {
629 return new WikiError( 'Bad interwiki link' );
630 } else {
631 $import = wfUrlencode( "Special:Export/$page" );
632 $url = str_replace( "$1", $import, $base );
633 return ImportStreamSource::newFromURL( $url );
634 }
635 }
636 }
637
638
639 ?>