Fix #153 path by <stehan dot walter at epfl dot ch>
[lhc/web/wiklou.git] / includes / SpecialImport.php
1 <?php
2 /**
3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @package MediaWiki
23 * @subpackage SpecialPage
24 */
25
26 /** */
27 require_once( 'WikiError.php' );
28
29 /**
30 * Constructor
31 */
32 function wfSpecialImport( $page = '' ) {
33 global $wgUser, $wgOut, $wgLang, $wgRequest, $wgTitle;
34 global $wgImportSources;
35
36 ###
37 # $wgOut->addWikiText( "Special:Import is not ready for this beta release, sorry." );
38 # return;
39 ###
40
41 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
42 switch( $wgRequest->getVal( "source" ) ) {
43 case "upload":
44 if( $wgUser->isAllowed( 'importupload' ) ) {
45 $source = ImportStreamSource::newFromUpload( "xmlimport" );
46 } else {
47 return $wgOut->permissionRequired( 'importupload' );
48 }
49 break;
50 case "interwiki":
51 $source = ImportStreamSource::newFromInterwiki(
52 $wgRequest->getVal( "interwiki" ),
53 $wgRequest->getText( "frompage" ) );
54 break;
55 default:
56 $source = new WikiError( "Unknown import source type" );
57 }
58
59 if( WikiError::isError( $source ) ) {
60 $wgOut->addWikiText( wfEscapeWikiText( $source->getMessage() ) );
61 } else {
62 $importer = new WikiImporter( $source );
63 $result = $importer->doImport();
64 if( WikiError::isError( $result ) ) {
65 $wgOut->addWikiText( wfMsg( "importfailed",
66 wfEscapeWikiText( $result->getMessage() ) ) );
67 } else {
68 # Success!
69 $wgOut->addWikiText( wfMsg( "importsuccess" ) );
70 }
71 }
72 }
73
74 $action = $wgTitle->escapeLocalUrl( 'action=submit' );
75
76 if( $wgUser->isAllowed( 'importupload' ) ) {
77 $wgOut->addWikiText( wfMsg( "importtext" ) );
78 $wgOut->addHTML( "
79 <fieldset>
80 <legend>" . wfMsgHtml('upload') . "</legend>
81 <form enctype='multipart/form-data' method='post' action=\"$action\">
82 <input type='hidden' name='action' value='submit' />
83 <input type='hidden' name='source' value='upload' />
84 <input type='hidden' name='MAX_FILE_SIZE' value='2000000' />
85 <input type='file' name='xmlimport' value='' size='30' />
86 <input type='submit' value='" . wfMsgHtml( "uploadbtn" ) . "'/>
87 </form>
88 </fieldset>
89 " );
90 } else {
91 if( empty( $wgImportSources ) ) {
92 $wgOut->addWikiText( wfMsg( 'importnosources' ) );
93 }
94 }
95
96 if( !empty( $wgImportSources ) ) {
97 $wgOut->addHTML( "
98 <fieldset>
99 <legend>" . wfMsgHtml('importinterwiki') . "</legend>
100 <form method='post' action=\"$action\">
101 <input type='hidden' name='action' value='submit' />
102 <input type='hidden' name='source' value='interwiki' />
103 <select name='interwiki'>
104 " );
105 foreach( $wgImportSources as $interwiki ) {
106 $iw = htmlspecialchars( $interwiki );
107 $wgOut->addHTML( "<option value=\"$iw\">$iw</option>\n" );
108 }
109 $wgOut->addHTML( "
110 </select>
111 <input name='frompage' />
112 <input type='submit' />
113 </form>
114 </fieldset>
115 " );
116 }
117 }
118
119 /**
120 *
121 * @package MediaWiki
122 * @subpackage SpecialPage
123 */
124 class WikiRevision {
125 var $title = NULL;
126 var $timestamp = "20010115000000";
127 var $user = 0;
128 var $user_text = "";
129 var $text = "";
130 var $comment = "";
131
132 function setTitle( $text ) {
133 $this->title = Title::newFromText( $text );
134 }
135
136 function setTimestamp( $ts ) {
137 # 2003-08-05T18:30:02Z
138 $this->timestamp = preg_replace( '/^(....)-(..)-(..)T(..):(..):(..)Z$/', '$1$2$3$4$5$6', $ts );
139 }
140
141 function setUsername( $user ) {
142 $this->user_text = $user;
143 }
144
145 function setUserIP( $ip ) {
146 $this->user_text = $ip;
147 }
148
149 function setText( $text ) {
150 $this->text = $text;
151 }
152
153 function setComment( $text ) {
154 $this->comment = $text;
155 }
156
157 function getTitle() {
158 return $this->title;
159 }
160
161 function getTimestamp() {
162 return $this->timestamp;
163 }
164
165 function getUser() {
166 return $this->user_text;
167 }
168
169 function getText() {
170 return $this->text;
171 }
172
173 function getComment() {
174 return $this->comment;
175 }
176
177 function importOldRevision() {
178 $fname = "WikiImporter::importOldRevision";
179 $dbw =& wfGetDB( DB_MASTER );
180
181 # Sneak a single revision into place
182 $user = User::newFromName( $this->getUser() );
183 if( $user ) {
184 $userId = IntVal( $user->getId() );
185 $userText = $user->getName();
186 } else {
187 $userId = 0;
188 $userText = $this->getUser();
189 }
190
191 $article = new Article( $this->title );
192 $pageId = $article->getId();
193 if( $pageId == 0 ) {
194 # must create the page...
195 $pageId = $article->insertOn( $dbw );
196 }
197
198 # FIXME: Check for exact conflicts
199 # FIXME: Use original rev_id optionally
200 # FIXME: blah blah blah
201
202 #if( $numrows > 0 ) {
203 # return wfMsg( "importhistoryconflict" );
204 #}
205
206 # Insert the row
207 $revision = new Revision( array(
208 'page' => $pageId,
209 'text' => $this->getText(),
210 'comment' => $this->getComment(),
211 'user' => $userId,
212 'user_text' => $userText,
213 'timestamp' => $this->timestamp,
214 'minor_edit' => 0
215 ) );
216 $revId = $revision->insertOn( $dbw );
217 $article->updateIfNewerOn( $dbw, $revision );
218
219 return true;
220 }
221
222 }
223
224 /**
225 *
226 * @package MediaWiki
227 * @subpackage SpecialPage
228 */
229 class WikiImporter {
230 var $mSource = null;
231 var $mPageCallback = null;
232 var $mRevisionCallback = null;
233 var $lastfield;
234
235 function WikiImporter( $source ) {
236 $this->setRevisionCallback( array( &$this, "importRevision" ) );
237 $this->mSource = $source;
238 }
239
240 function throwXmlError( $err ) {
241 $this->debug( "FAILURE: $err" );
242 }
243
244 # --------------
245
246 function doImport() {
247 if( empty( $this->mSource ) ) {
248 return new WikiErrorMsg( "importnotext" );
249 }
250
251 $parser = xml_parser_create( "UTF-8" );
252
253 # case folding violates XML standard, turn it off
254 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
255
256 xml_set_object( $parser, &$this );
257 xml_set_element_handler( $parser, "in_start", "" );
258
259 do {
260 $chunk = $this->mSource->readChunk();
261 if( !xml_parse( $parser, $chunk, $this->mSource->atEnd() ) ) {
262 return new WikiXmlError( $parser );
263 }
264 } while( $chunk !== false && !$this->mSource->atEnd() );
265 xml_parser_free( $parser );
266
267 return true;
268 }
269
270 function debug( $data ) {
271 #wfDebug( "IMPORT: $data\n" );
272 }
273
274 function notice( $data ) {
275 global $wgCommandLineMode;
276 if( $wgCommandLineMode ) {
277 print "$data\n";
278 } else {
279 global $wgOut;
280 $wgOut->addHTML( "<li>$data</li>\n" );
281 }
282 }
283
284 /**
285 * Sets the action to perform as each new page in the stream is reached.
286 * @param callable $callback
287 * @return callable
288 */
289 function setPageCallback( $callback ) {
290 $previous = $this->mPageCallback;
291 $this->mPageCallback = $callback;
292 return $previous;
293 }
294
295 /**
296 * Sets the action to perform as each page revision is reached.
297 * @param callable $callback
298 * @return callable
299 */
300 function setRevisionCallback( $callback ) {
301 $previous = $this->mRevisionCallback;
302 $this->mRevisionCallback = $callback;
303 return $previous;
304 }
305
306 /**
307 * Default per-revision callback, performs the import.
308 * @param WikiRevision $revision
309 * @access private
310 */
311 function importRevision( &$revision ) {
312 $dbw =& wfGetDB( DB_MASTER );
313 $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) );
314 }
315
316 /**
317 * Alternate per-revision callback, for debugging.
318 * @param WikiRevision $revision
319 * @access private
320 */
321 function debugRevisionHandler( &$revision ) {
322 $this->debug( "Got revision:" );
323 if( is_object( $revision->title ) ) {
324 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
325 } else {
326 $this->debug( "-- Title: <invalid>" );
327 }
328 $this->debug( "-- User: " . $revision->user_text );
329 $this->debug( "-- Timestamp: " . $revision->timestamp );
330 $this->debug( "-- Comment: " . $revision->comment );
331 $this->debug( "-- Text: " . $revision->text );
332 }
333
334 /**
335 * Notify the callback function when a new <page> is reached.
336 * @param Title $title
337 * @access private
338 */
339 function pageCallback( $title ) {
340 if( is_callable( $this->mPageCallback ) ) {
341 call_user_func( $this->mPageCallback, $title );
342 }
343 }
344
345
346 # XML parser callbacks from here out -- beware!
347 function donothing( $parser, $x, $y="" ) {
348 #$this->debug( "donothing" );
349 }
350
351 function in_start( $parser, $name, $attribs ) {
352 $this->debug( "in_start $name" );
353 if( $name != "mediawiki" ) {
354 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
355 }
356 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
357 }
358
359 function in_mediawiki( $parser, $name, $attribs ) {
360 $this->debug( "in_mediawiki $name" );
361 if( $name == 'siteinfo' ) {
362 xml_set_element_handler( $parser, "in_siteinfo", "out_siteinfo" );
363 } elseif( $name == 'page' ) {
364 xml_set_element_handler( $parser, "in_page", "out_page" );
365 } else {
366 return $this->throwXMLerror( "Expected <page>, got <$name>" );
367 }
368 }
369 function out_mediawiki( $parser, $name ) {
370 $this->debug( "out_mediawiki $name" );
371 if( $name != "mediawiki" ) {
372 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
373 }
374 xml_set_element_handler( $parser, "donothing", "donothing" );
375 }
376
377
378 function in_siteinfo( $parser, $name, $attribs ) {
379 // no-ops for now
380 $this->debug( "in_siteinfo $name" );
381 switch( $name ) {
382 case "sitename":
383 case "generator":
384 case "case":
385 case "namespaces":
386 case "namespace":
387 break;
388 default:
389 return $this->throwXMLerror( "Element <$name> not allowed in <siteinfo>." );
390 }
391 }
392
393 function out_siteinfo( $parser, $name ) {
394 if( $name == "siteinfo" ) {
395 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
396 }
397 }
398
399
400 function in_page( $parser, $name, $attribs ) {
401 $this->debug( "in_page $name" );
402 switch( $name ) {
403 case "id":
404 case "title":
405 case "restrictions":
406 $this->appendfield = $name;
407 $this->appenddata = "";
408 $this->parenttag = "page";
409 xml_set_element_handler( $parser, "in_nothing", "out_append" );
410 xml_set_character_data_handler( $parser, "char_append" );
411 break;
412 case "revision":
413 $this->workRevision = new WikiRevision;
414 $this->workRevision->setTitle( $this->workTitle );
415 xml_set_element_handler( $parser, "in_revision", "out_revision" );
416 break;
417 default:
418 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
419 }
420 }
421
422 function out_page( $parser, $name ) {
423 $this->debug( "out_page $name" );
424 if( $name != "page" ) {
425 return $this->throwXMLerror( "Expected </page>, got </$name>" );
426 }
427 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
428
429 $this->workTitle = NULL;
430 $this->workRevision = NULL;
431 }
432
433 function in_nothing( $parser, $name, $attribs ) {
434 $this->debug( "in_nothing $name" );
435 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
436 }
437 function char_append( $parser, $data ) {
438 $this->debug( "char_append '$data'" );
439 $this->appenddata .= $data;
440 }
441 function out_append( $parser, $name ) {
442 $this->debug( "out_append $name" );
443 if( $name != $this->appendfield ) {
444 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
445 }
446 xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
447 xml_set_character_data_handler( $parser, "donothing" );
448
449 switch( $this->appendfield ) {
450 case "title":
451 $this->workTitle = $this->appenddata;
452 $this->pageCallback( $this->workTitle );
453 break;
454 case "text":
455 $this->workRevision->setText( $this->appenddata );
456 break;
457 case "username":
458 $this->workRevision->setUsername( $this->appenddata );
459 break;
460 case "ip":
461 $this->workRevision->setUserIP( $this->appenddata );
462 break;
463 case "timestamp":
464 $this->workRevision->setTimestamp( $this->appenddata );
465 break;
466 case "comment":
467 $this->workRevision->setComment( $this->appenddata );
468 break;
469 default:
470 $this->debug( "Bad append: {$this->appendfield}" );
471 }
472 $this->appendfield = "";
473 $this->appenddata = "";
474 }
475
476 function in_revision( $parser, $name, $attribs ) {
477 $this->debug( "in_revision $name" );
478 switch( $name ) {
479 case "id":
480 case "timestamp":
481 case "comment":
482 case "text":
483 $this->parenttag = "revision";
484 $this->appendfield = $name;
485 xml_set_element_handler( $parser, "in_nothing", "out_append" );
486 xml_set_character_data_handler( $parser, "char_append" );
487 break;
488 case "contributor":
489 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
490 break;
491 default:
492 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
493 }
494 }
495
496 function out_revision( $parser, $name ) {
497 $this->debug( "out_revision $name" );
498 if( $name != "revision" ) {
499 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
500 }
501 xml_set_element_handler( $parser, "in_page", "out_page" );
502
503 $out = call_user_func( $this->mRevisionCallback,
504 &$this->workRevision,
505 &$this );
506 if( !empty( $out ) ) {
507 global $wgOut;
508 $wgOut->addHTML( "<li>" . $out . "</li>\n" );
509 }
510 }
511
512 function in_contributor( $parser, $name, $attribs ) {
513 $this->debug( "in_contributor $name" );
514 switch( $name ) {
515 case "username":
516 case "ip":
517 $this->parenttag = "contributor";
518 $this->appendfield = $name;
519 xml_set_element_handler( $parser, "in_nothing", "out_append" );
520 xml_set_character_data_handler( $parser, "char_append" );
521 break;
522 default:
523 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
524 }
525 }
526
527 function out_contributor( $parser, $name ) {
528 $this->debug( "out_contributor $name" );
529 if( $name != "contributor" ) {
530 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
531 }
532 xml_set_element_handler( $parser, "in_revision", "out_revision" );
533 }
534
535 }
536
537 /** @package MediaWiki */
538 class ImportStringSource {
539 function ImportStringSource( $string ) {
540 $this->mString = $string;
541 $this->mRead = false;
542 }
543
544 function atEnd() {
545 return $this->mRead;
546 }
547
548 function readChunk() {
549 if( $this->atEnd() ) {
550 return false;
551 } else {
552 $this->mRead = true;
553 return $this->mString;
554 }
555 }
556 }
557
558 /** @package MediaWiki */
559 class ImportStreamSource {
560 function ImportStreamSource( $handle ) {
561 $this->mHandle = $handle;
562 }
563
564 function atEnd() {
565 return feof( $this->mHandle );
566 }
567
568 function readChunk() {
569 return fread( $this->mHandle, 32768 );
570 }
571
572 function newFromFile( $filename ) {
573 $file = @fopen( $filename, 'rt' );
574 if( !$file ) {
575 return new WikiError( "Couldn't open import file" );
576 }
577 return new ImportStreamSource( $file );
578 }
579
580 function newFromUpload( $fieldname = "xmlimport" ) {
581 global $wgOut;
582
583 $upload =& $_FILES[$fieldname];
584
585 if( !isset( $upload ) ) {
586 return new WikiErrorMsg( 'importnofile' );
587 }
588 if( !empty( $upload['error'] ) ) {
589 return new WikiErrorMsg( 'importuploaderror', $upload['error'] );
590 }
591 $fname = $upload['tmp_name'];
592 if( is_uploaded_file( $fname ) ) {
593 return ImportStreamSource::newFromFile( $fname );
594 } else {
595 return new WikiErrorMsg( 'importnofile' );
596 }
597 }
598
599 function newFromURL( $url ) {
600 # fopen-wrappers are normally turned off for security.
601 ini_set( "allow_url_fopen", true );
602 $ret = ImportStreamSource::newFromFile( $url );
603 ini_set( "allow_url_fopen", false );
604 return $ret;
605 }
606
607 function newFromInterwiki( $interwiki, $page ) {
608 $base = Title::getInterwikiLink( $interwiki );
609 if( empty( $base ) ) {
610 return new WikiError( 'Bad interwiki link' );
611 } else {
612 $import = wfUrlencode( "Special:Export/$page" );
613 $url = str_replace( "$1", $import, $base );
614 return ImportStreamSource::newFromURL( $url );
615 }
616 }
617 }
618
619
620 ?>