* Indent.
[lhc/web/wiklou.git] / includes / SpecialImport.php
1 <?php
2 /**
3 * MediaWiki page data importer
4 * Copyright (C) 2003,2005 Brion Vibber <brion@pobox.com>
5 * http://www.mediawiki.org/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 * http://www.gnu.org/copyleft/gpl.html
21 *
22 * @package MediaWiki
23 * @subpackage SpecialPage
24 */
25
26 require_once( 'WikiError.php' );
27
28 /**
29 * Constructor
30 */
31 function wfSpecialImport( $page = '' ) {
32 global $wgOut, $wgLang, $wgRequest, $wgTitle;
33 global $wgImportSources;
34
35 ###
36 # $wgOut->addWikiText( "Special:Import is not ready for this beta release, sorry." );
37 # return;
38 ###
39
40 if( $wgRequest->wasPosted() && $wgRequest->getVal( 'action' ) == 'submit') {
41 $importer = new WikiImporter();
42
43 switch( $wgRequest->getVal( "source" ) ) {
44 case "upload":
45 $result = $importer->setupFromUpload( "xmlimport" );
46 break;
47 case "interwiki":
48 $result = $importer->setupFromInterwiki(
49 $wgRequest->getVal( "interwiki" ),
50 $wgRequest->getText( "frompage" ) );
51 break;
52 default:
53 $result = new WikiError( "Unknown import source type" );
54 }
55
56 if( WikiError::isError( $result ) ) {
57 $wgOut->addWikiText( htmlspecialchars( $result->toString() ) );
58 } else {
59 $importer->setRevisionHandler( "wfImportOldRevision" );
60 $result = $importer->doImport();
61 if( WikiError::isError( $result ) ) {
62 $wgOut->addHTML( "<p>" . wfMsg( "importfailed",
63 htmlspecialchars( $result->toString() ) ) . "</p>" );
64 } else {
65 # Success!
66 $wgOut->addHTML( "<p>" . wfMsg( "importsuccess" ) . "</p>" );
67 }
68 }
69 }
70
71 $wgOut->addWikiText( "<p>" . wfMsg( "importtext" ) . "</p>" );
72 $action = $wgTitle->escapeLocalUrl( 'action=submit' );
73 $wgOut->addHTML( "
74 <fieldset>
75 <legend>Upload XML</legend>
76 <form enctype='multipart/form-data' method='post' action=\"$action\">
77 <input type='hidden' name='action' value='submit' />
78 <input type='hidden' name='source' value='upload' />
79 <input type='hidden' name='MAX_FILE_SIZE' value='2000000' />
80 <input type='file' name='xmlimport' value='' size='30' />
81 <input type='submit' value='" . htmlspecialchars( wfMsg( "uploadbtn" ) ) . "'/>
82 </form>
83 </fieldset>
84 " );
85
86 if( !empty( $wgImportSources ) ) {
87 $wgOut->addHTML( "
88 <fieldset>
89 <legend>Interwiki import</legend>
90 <form method='post' action=\"$action\">
91 <input type='hidden' name='action' value='submit' />
92 <input type='hidden' name='source' value='interwiki' />
93 <select name='interwiki'>
94 " );
95 foreach( $wgImportSources as $interwiki ) {
96 $iw = htmlspecialchars( $interwiki );
97 $wgOut->addHTML( "<option value=\"$iw\">$iw</option>\n" );
98 }
99 $wgOut->addHTML( "
100 </select>
101 <input name='frompage' />
102 <input type='submit' />
103 </form>
104 </fieldset>
105 " );
106 }
107 }
108
109 function wfImportOldRevision( &$revision ) {
110 $dbw =& wfGetDB( DB_MASTER );
111 $dbw->deadlockLoop( array( &$revision, 'importOldRevision' ) );
112 }
113
114 /**
115 *
116 * @package MediaWiki
117 * @subpackage SpecialPage
118 */
119 class WikiRevision {
120 var $title = NULL;
121 var $timestamp = "20010115000000";
122 var $user = 0;
123 var $user_text = "";
124 var $text = "";
125 var $comment = "";
126
127 function setTitle( $text ) {
128 $this->title = Title::newFromText( $text );
129 }
130
131 function setTimestamp( $ts ) {
132 # 2003-08-05T18:30:02Z
133 $this->timestamp = preg_replace( '/^(....)-(..)-(..)T(..):(..):(..)Z$/', '$1$2$3$4$5$6', $ts );
134 }
135
136 function setUsername( $user ) {
137 $this->user_text = $user;
138 }
139
140 function setUserIP( $ip ) {
141 $this->user_text = $ip;
142 }
143
144 function setText( $text ) {
145 $this->text = $text;
146 }
147
148 function setComment( $text ) {
149 $this->comment = $text;
150 }
151
152 function getTitle() {
153 return $this->title;
154 }
155
156 function getTimestamp() {
157 return $this->timestamp;
158 }
159
160 function getUser() {
161 return $this->user_text;
162 }
163
164 function getText() {
165 return $this->text;
166 }
167
168 function getComment() {
169 return $this->comment;
170 }
171
172 function importOldRevision() {
173 $fname = "WikiImporter::importOldRevision";
174 $dbw =& wfGetDB( DB_MASTER );
175
176 # Sneak a single revision into place
177 $user = User::newFromName( $this->getUser() );
178
179 $article = new Article( $this->title );
180 $pageId = $article->getId();
181 if( $pageId == 0 ) {
182 # must create the page...
183 $pageId = $article->insertOn( $dbw );
184 }
185
186 # FIXME: Check for exact conflicts
187 # FIXME: Use original rev_id optionally
188 # FIXME: blah blah blah
189
190 #if( $numrows > 0 ) {
191 # return wfMsg( "importhistoryconflict" );
192 #}
193
194 # Insert the row
195 $revision = new Revision( array(
196 'page' => $pageId,
197 'text' => $this->getText(),
198 'comment' => $this->getComment(),
199 'user' => IntVal( $user->getId() ),
200 'user_text' => $user->getName(),
201 'timestamp' => $this->timestamp,
202 'minor_edit' => 0
203 ) );
204 $revId = $revision->insertOn( $dbw );
205 $article->updateIfNewerOn( $dbw, $revision );
206
207 return true;
208 }
209
210 }
211
212 /**
213 *
214 * @package MediaWiki
215 * @subpackage SpecialPage
216 */
217 class WikiImporter {
218 var $mSource = NULL;
219 var $mRevisionHandler = NULL;
220 var $lastfield;
221
222 function WikiImporter() {
223 $this->setRevisionHandler( array( &$this, "defaultRevisionHandler" ) );
224 }
225
226 function throwXmlError( $err ) {
227 $this->debug( "FAILURE: $err" );
228 }
229
230 function setupFromFile( $filename ) {
231 $this->mSource = @file_get_contents( $filename );
232 if( $this->mSource === false ) {
233 return new WikiError( "Couldn't open import file" );
234 }
235 return true;
236 }
237
238 function setupFromUpload( $fieldname = "xmlimport" ) {
239 global $wgOut;
240
241 $upload =& $_FILES[$fieldname];
242
243 if( !isset( $upload ) ) {
244 return new WikiErrorMsg( 'importnofile' );
245 }
246 if( !empty( $upload['error'] ) ) {
247 return new WikiErrorMsg( 'importuploaderror', $upload['error'] );
248 }
249 $fname = $upload['tmp_name'];
250 if( is_uploaded_file( $fname ) ) {
251 return $this->setupFromFile( $fname );
252 } else {
253 return new WikiErrorMsg( 'importnofile' );
254 }
255 }
256
257 function setupFromURL( $url ) {
258 # fopen-wrappers are normally turned off for security.
259 ini_set( "allow_url_fopen", true );
260 $ret = $this->setupFromFile( $url );
261 ini_set( "allow_url_fopen", false );
262 return $ret;
263 }
264
265 function setupFromInterwiki( $interwiki, $page ) {
266 $base = Title::getInterwikiLink( $interwiki );
267 if( empty( $base ) ) {
268 return new WikiError( 'Bad interwiki link' );
269 } else {
270 $import = wfUrlencode( "Special:Export/$page" );
271 $url = str_replace( "$1", $import, $base );
272 $this->notice( "Importing from $url" );
273 return $this->setupFromURL( $url );
274 }
275 }
276
277 # --------------
278
279 function doImport() {
280 if( empty( $this->mSource ) ) {
281 return new WikiErrorMsg( "importnotext" );
282 }
283
284 $parser = xml_parser_create( "UTF-8" );
285
286 # case folding violates XML standard, turn it off
287 xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
288
289 xml_set_object( $parser, &$this );
290 xml_set_element_handler( $parser, "in_start", "" );
291
292 if( !xml_parse( $parser, $this->mSource, true ) ) {
293 return new WikiXmlError( $parser );
294 }
295 xml_parser_free( $parser );
296
297 return true;
298 }
299
300 function debug( $data ) {
301 #$this->notice( "DEBUG: $data\n" );
302 }
303
304 function notice( $data ) {
305 global $wgCommandLineMode;
306 if( $wgCommandLineMode ) {
307 print "$data\n";
308 } else {
309 global $wgOut;
310 $wgOut->addHTML( "<li>$data</li>\n" );
311 }
312 }
313
314 function setRevisionHandler( $functionref ) {
315 $this->mRevisionHandler = $functionref;
316 }
317
318 function defaultRevisionHandler( &$revision ) {
319 $this->debug( "Got revision:" );
320 if( is_object( $revision->title ) ) {
321 $this->debug( "-- Title: " . $revision->title->getPrefixedText() );
322 } else {
323 $this->debug( "-- Title: <invalid>" );
324 }
325 $this->debug( "-- User: " . $revision->user_text );
326 $this->debug( "-- Timestamp: " . $revision->timestamp );
327 $this->debug( "-- Comment: " . $revision->comment );
328 $this->debug( "-- Text: " . $revision->text );
329 }
330
331
332
333 # XML parser callbacks from here out -- beware!
334 function donothing( $parser, $x, $y="" ) {
335 #$this->debug( "donothing" );
336 }
337
338 function in_start( $parser, $name, $attribs ) {
339 $this->debug( "in_start $name" );
340 if( $name != "mediawiki" ) {
341 return $this->throwXMLerror( "Expected <mediawiki>, got <$name>" );
342 }
343 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
344 }
345
346 function in_mediawiki( $parser, $name, $attribs ) {
347 $this->debug( "in_mediawiki $name" );
348 if( $name != "page" ) {
349 return $this->throwXMLerror( "Expected <page>, got <$name>" );
350 }
351 xml_set_element_handler( $parser, "in_page", "out_page" );
352 }
353 function out_mediawiki( $parser, $name ) {
354 $this->debug( "out_mediawiki $name" );
355 if( $name != "mediawiki" ) {
356 return $this->throwXMLerror( "Expected </mediawiki>, got </$name>" );
357 }
358 xml_set_element_handler( $parser, "donothing", "donothing" );
359 }
360
361 function in_page( $parser, $name, $attribs ) {
362 $this->debug( "in_page $name" );
363 switch( $name ) {
364 case "id":
365 case "title":
366 case "restrictions":
367 $this->appendfield = $name;
368 $this->appenddata = "";
369 $this->parenttag = "page";
370 xml_set_element_handler( $parser, "in_nothing", "out_append" );
371 xml_set_character_data_handler( $parser, "char_append" );
372 break;
373 case "revision":
374 $this->workRevision = new WikiRevision;
375 $this->workRevision->setTitle( $this->workTitle );
376 xml_set_element_handler( $parser, "in_revision", "out_revision" );
377 break;
378 default:
379 return $this->throwXMLerror( "Element <$name> not allowed in a <page>." );
380 }
381 }
382
383 function out_page( $parser, $name ) {
384 $this->debug( "out_page $name" );
385 if( $name != "page" ) {
386 return $this->throwXMLerror( "Expected </page>, got </$name>" );
387 }
388 xml_set_element_handler( $parser, "in_mediawiki", "out_mediawiki" );
389
390 $this->workTitle = NULL;
391 $this->workRevision = NULL;
392 }
393
394 function in_nothing( $parser, $name, $attribs ) {
395 $this->debug( "in_nothing $name" );
396 return $this->throwXMLerror( "No child elements allowed here; got <$name>" );
397 }
398 function char_append( $parser, $data ) {
399 $this->debug( "char_append '$data'" );
400 $this->appenddata .= $data;
401 }
402 function out_append( $parser, $name ) {
403 $this->debug( "out_append $name" );
404 if( $name != $this->appendfield ) {
405 return $this->throwXMLerror( "Expected </{$this->appendfield}>, got </$name>" );
406 }
407 xml_set_element_handler( $parser, "in_$this->parenttag", "out_$this->parenttag" );
408 xml_set_character_data_handler( $parser, "donothing" );
409 switch( $this->appendfield ) {
410 case "title":
411 $this->workTitle = $this->appenddata;
412 break;
413 case "text":
414 $this->workRevision->setText( $this->appenddata );
415 break;
416 case "username":
417 $this->workRevision->setUsername( $this->appenddata );
418 break;
419 case "ip":
420 $this->workRevision->setUserIP( $this->appenddata );
421 break;
422 case "timestamp":
423 $this->workRevision->setTimestamp( $this->appenddata );
424 break;
425 case "comment":
426 $this->workRevision->setComment( $this->appenddata );
427 break;
428 default:
429 $this->debug( "Bad append: {$this->appendfield}" );
430 }
431 $this->appendfield = "";
432 $this->appenddata = "";
433 }
434
435 function in_revision( $parser, $name, $attribs ) {
436 $this->debug( "in_revision $name" );
437 switch( $name ) {
438 case "id":
439 case "timestamp":
440 case "comment":
441 case "text":
442 $this->parenttag = "revision";
443 $this->appendfield = $name;
444 xml_set_element_handler( $parser, "in_nothing", "out_append" );
445 xml_set_character_data_handler( $parser, "char_append" );
446 break;
447 case "contributor":
448 xml_set_element_handler( $parser, "in_contributor", "out_contributor" );
449 break;
450 default:
451 return $this->throwXMLerror( "Element <$name> not allowed in a <revision>." );
452 }
453 }
454
455 function out_revision( $parser, $name ) {
456 $this->debug( "out_revision $name" );
457 if( $name != "revision" ) {
458 return $this->throwXMLerror( "Expected </revision>, got </$name>" );
459 }
460 xml_set_element_handler( $parser, "in_page", "out_page" );
461
462 $out = call_user_func( $this->mRevisionHandler, &$this->workRevision, &$this );
463 if( !empty( $out ) ) {
464 global $wgOut;
465 $wgOut->addHTML( "<li>" . $out . "</li>\n" );
466 }
467 }
468
469 function in_contributor( $parser, $name, $attribs ) {
470 $this->debug( "in_contributor $name" );
471 switch( $name ) {
472 case "username":
473 case "ip":
474 $this->parenttag = "contributor";
475 $this->appendfield = $name;
476 xml_set_element_handler( $parser, "in_nothing", "out_append" );
477 xml_set_character_data_handler( $parser, "char_append" );
478 break;
479 default:
480 $this->throwXMLerror( "Invalid tag <$name> in <contributor>" );
481 }
482 }
483
484 function out_contributor( $parser, $name ) {
485 $this->debug( "out_contributor $name" );
486 if( $name != "contributor" ) {
487 return $this->throwXMLerror( "Expected </contributor>, got </$name>" );
488 }
489 xml_set_element_handler( $parser, "in_revision", "out_revision" );
490 }
491
492 }
493
494
495 ?>