Don't look for pipes in the root node.
[lhc/web/wiklou.git] / includes / Import.php
1 <?php
2 /**
3 * MediaWiki page data importer
4 *
5 * Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
6 * http://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup SpecialPage
25 */
26
27 /**
28 * @todo document (e.g. one-sentence class description).
29 * @ingroup SpecialPage
30 */
31 class WikiRevision {
32 var $title = null;
33 var $id = 0;
34 var $timestamp = "20010115000000";
35 var $user = 0;
36 var $user_text = "";
37 var $text = "";
38 var $comment = "";
39 var $minor = false;
40 var $type = "";
41 var $action = "";
42 var $params = "";
43
44 function setTitle( $title ) {
45 if( is_object( $title ) ) {
46 $this->title = $title;
47 } elseif( is_null( $title ) ) {
48 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
49 } else {
50 throw new MWException( "WikiRevision given non-object title in import." );
51 }
52 }
53
54 function setID( $id ) {
55 $this->id = $id;
56 }
57
58 function setTimestamp( $ts ) {
59 # 2003-08-05T18:30:02Z
60 $this->timestamp = wfTimestamp( TS_MW, $ts );
61 }
62
63 function setUsername( $user ) {
64 $this->user_text = $user;
65 }
66
67 function setUserIP( $ip ) {
68 $this->user_text = $ip;
69 }
70
71 function setText( $text ) {
72 $this->text = $text;
73 }
74
75 function setComment( $text ) {
76 $this->comment = $text;
77 }
78
79 function setMinor( $minor ) {
80 $this->minor = (bool)$minor;
81 }
82
83 function setSrc( $src ) {
84 $this->src = $src;
85 }
86
87 function setFilename( $filename ) {
88 $this->filename = $filename;
89 }
90
91 function setSize( $size ) {
92 $this->size = intval( $size );
93 }
94
95 function setType( $type ) {
96 $this->type = $type;
97 }
98
99 function setAction( $action ) {
100 $this->action = $action;
101 }
102
103 function setParams( $params ) {
104 $this->params = $params;
105 }
106
107 function getTitle() {
108 return $this->title;
109 }
110
111 function getID() {
112 return $this->id;
113 }
114
115 function getTimestamp() {
116 return $this->timestamp;
117 }
118
119 function getUser() {
120 return $this->user_text;
121 }
122
123 function getText() {
124 return $this->text;
125 }
126
127 function getComment() {
128 return $this->comment;
129 }
130
131 function getMinor() {
132 return $this->minor;
133 }
134
135 function getSrc() {
136 return $this->src;
137 }
138
139 function getFilename() {
140 return $this->filename;
141 }
142
143 function getSize() {
144 return $this->size;
145 }
146
147 function getType() {
148 return $this->type;
149 }
150
151 function getAction() {
152 return $this->action;
153 }
154
155 function getParams() {
156 return $this->params;
157 }
158
159 function importOldRevision() {
160 $dbw = wfGetDB( DB_MASTER );
161
162 # Sneak a single revision into place
163 $user = User::newFromName( $this->getUser() );
164 if( $user ) {
165 $userId = intval( $user->getId() );
166 $userText = $user->getName();
167 } else {
168 $userId = 0;
169 $userText = $this->getUser();
170 }
171
172 // avoid memory leak...?
173 $linkCache = LinkCache::singleton();
174 $linkCache->clear();
175
176 $article = new Article( $this->title );
177 $pageId = $article->getId();
178 if( $pageId == 0 ) {
179 # must create the page...
180 $pageId = $article->insertOn( $dbw );
181 $created = true;
182 } else {
183 $created = false;
184
185 $prior = $dbw->selectField( 'revision', '1',
186 array( 'rev_page' => $pageId,
187 'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
188 'rev_user_text' => $userText,
189 'rev_comment' => $this->getComment() ),
190 __METHOD__
191 );
192 if( $prior ) {
193 // FIXME: this could fail slightly for multiple matches :P
194 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
195 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
196 return false;
197 }
198 }
199
200 # FIXME: Use original rev_id optionally (better for backups)
201 # Insert the row
202 $revision = new Revision( array(
203 'page' => $pageId,
204 'text' => $this->getText(),
205 'comment' => $this->getComment(),
206 'user' => $userId,
207 'user_text' => $userText,
208 'timestamp' => $this->timestamp,
209 'minor_edit' => $this->minor,
210 ) );
211 $revId = $revision->insertOn( $dbw );
212 $changed = $article->updateIfNewerOn( $dbw, $revision );
213
214 # To be on the safe side...
215 $tempTitle = $GLOBALS['wgTitle'];
216 $GLOBALS['wgTitle'] = $this->title;
217
218 if( $created ) {
219 wfDebug( __METHOD__ . ": running onArticleCreate\n" );
220 Article::onArticleCreate( $this->title );
221
222 wfDebug( __METHOD__ . ": running create updates\n" );
223 $article->createUpdates( $revision );
224
225 } elseif( $changed ) {
226 wfDebug( __METHOD__ . ": running onArticleEdit\n" );
227 Article::onArticleEdit( $this->title );
228
229 wfDebug( __METHOD__ . ": running edit updates\n" );
230 $article->editUpdates(
231 $this->getText(),
232 $this->getComment(),
233 $this->minor,
234 $this->timestamp,
235 $revId );
236 }
237 $GLOBALS['wgTitle'] = $tempTitle;
238
239 return true;
240 }
241
242 function importLogItem() {
243 $dbw = wfGetDB( DB_MASTER );
244 # FIXME: this will not record autoblocks
245 if( !$this->getTitle() ) {
246 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
247 $this->timestamp . "\n" );
248 return;
249 }
250 # Check if it exists already
251 // FIXME: use original log ID (better for backups)
252 $prior = $dbw->selectField( 'logging', '1',
253 array( 'log_type' => $this->getType(),
254 'log_action' => $this->getAction(),
255 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
256 'log_namespace' => $this->getTitle()->getNamespace(),
257 'log_title' => $this->getTitle()->getDBkey(),
258 'log_comment' => $this->getComment(),
259 #'log_user_text' => $this->user_text,
260 'log_params' => $this->params ),
261 __METHOD__
262 );
263 // FIXME: this could fail slightly for multiple matches :P
264 if( $prior ) {
265 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
266 $this->timestamp . "\n" );
267 return false;
268 }
269 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
270 $data = array(
271 'log_id' => $log_id,
272 'log_type' => $this->type,
273 'log_action' => $this->action,
274 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
275 'log_user' => User::idFromName( $this->user_text ),
276 #'log_user_text' => $this->user_text,
277 'log_namespace' => $this->getTitle()->getNamespace(),
278 'log_title' => $this->getTitle()->getDBkey(),
279 'log_comment' => $this->getComment(),
280 'log_params' => $this->params
281 );
282 $dbw->insert( 'logging', $data, __METHOD__ );
283 }
284
285 function importUpload() {
286 wfDebug( __METHOD__ . ": STUB\n" );
287
288 /**
289 // from file revert...
290 $source = $this->file->getArchiveVirtualUrl( $this->oldimage );
291 $comment = $wgRequest->getText( 'wpComment' );
292 // TODO: Preserve file properties from database instead of reloading from file
293 $status = $this->file->upload( $source, $comment, $comment );
294 if( $status->isGood() ) {
295 */
296
297 /**
298 // from file upload...
299 $this->mLocalFile = wfLocalFile( $nt );
300 $this->mDestName = $this->mLocalFile->getName();
301 //....
302 $status = $this->mLocalFile->upload( $this->mTempPath, $this->mComment, $pageText,
303 File::DELETE_SOURCE, $this->mFileProps );
304 if ( !$status->isGood() ) {
305 $resultDetails = array( 'internal' => $status->getWikiText() );
306 */
307
308 // @todo Fixme: upload() uses $wgUser, which is wrong here
309 // it may also create a page without our desire, also wrong potentially.
310 // and, it will record a *current* upload, but we might want an archive version here
311
312 $file = wfLocalFile( $this->getTitle() );
313 if( !$file ) {
314 wfDebug( "IMPORT: Bad file. :(\n" );
315 return false;
316 }
317
318 $source = $this->downloadSource();
319 if( !$source ) {
320 wfDebug( "IMPORT: Could not fetch remote file. :(\n" );
321 return false;
322 }
323
324 $status = $file->upload( $source,
325 $this->getComment(),
326 $this->getComment(), // Initial page, if none present...
327 File::DELETE_SOURCE,
328 false, // props...
329 $this->getTimestamp() );
330
331 if( $status->isGood() ) {
332 // yay?
333 wfDebug( "IMPORT: is ok?\n" );
334 return true;
335 }
336
337 wfDebug( "IMPORT: is bad? " . $status->getXml() . "\n" );
338 return false;
339
340 }
341
342 function downloadSource() {
343 global $wgEnableUploads;
344 if( !$wgEnableUploads ) {
345 return false;
346 }
347
348 $tempo = tempnam( wfTempDir(), 'download' );
349 $f = fopen( $tempo, 'wb' );
350 if( !$f ) {
351 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
352 return false;
353 }
354
355 // @todo Fixme!
356 $src = $this->getSrc();
357 $data = Http::get( $src );
358 if( !$data ) {
359 wfDebug( "IMPORT: couldn't fetch source $src\n" );
360 fclose( $f );
361 unlink( $tempo );
362 return false;
363 }
364
365 fwrite( $f, $data );
366 fclose( $f );
367
368 return $tempo;
369 }
370
371 }
372
373 /**
374 * @todo document (e.g. one-sentence class description).
375 * @ingroup SpecialPage
376 */
377 class ImportStringSource {
378 function __construct( $string ) {
379 $this->mString = $string;
380 $this->mRead = false;
381 }
382
383 function atEnd() {
384 return $this->mRead;
385 }
386
387 function readChunk() {
388 if( $this->atEnd() ) {
389 return false;
390 } else {
391 $this->mRead = true;
392 return $this->mString;
393 }
394 }
395 }
396
397 /**
398 * @todo document (e.g. one-sentence class description).
399 * @ingroup SpecialPage
400 */
401 class ImportStreamSource {
402 function __construct( $handle ) {
403 $this->mHandle = $handle;
404 }
405
406 function atEnd() {
407 return feof( $this->mHandle );
408 }
409
410 function readChunk() {
411 return fread( $this->mHandle, 32768 );
412 }
413
414 static function newFromFile( $filename ) {
415 $file = @fopen( $filename, 'rt' );
416 if( !$file ) {
417 return Status::newFatal( "importcantopen" );
418 }
419 return Status::newGood( new ImportStreamSource( $file ) );
420 }
421
422 static function newFromUpload( $fieldname = "xmlimport" ) {
423 $upload =& $_FILES[$fieldname];
424
425 if( !isset( $upload ) || !$upload['name'] ) {
426 return Status::newFatal( 'importnofile' );
427 }
428 if( !empty( $upload['error'] ) ) {
429 switch($upload['error']){
430 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
431 return Status::newFatal( 'importuploaderrorsize' );
432 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
433 return Status::newFatal( 'importuploaderrorsize' );
434 case 3: # The uploaded file was only partially uploaded
435 return Status::newFatal( 'importuploaderrorpartial' );
436 case 6: #Missing a temporary folder.
437 return Status::newFatal( 'importuploaderrortemp' );
438 # case else: # Currently impossible
439 }
440
441 }
442 $fname = $upload['tmp_name'];
443 if( is_uploaded_file( $fname ) ) {
444 return ImportStreamSource::newFromFile( $fname );
445 } else {
446 return Status::newFatal( 'importnofile' );
447 }
448 }
449
450 static function newFromURL( $url, $method = 'GET' ) {
451 wfDebug( __METHOD__ . ": opening $url\n" );
452 # Use the standard HTTP fetch function; it times out
453 # quicker and sorts out user-agent problems which might
454 # otherwise prevent importing from large sites, such
455 # as the Wikimedia cluster, etc.
456 $data = Http::request( $method, $url );
457 if( $data !== false ) {
458 $file = tmpfile();
459 fwrite( $file, $data );
460 fflush( $file );
461 fseek( $file, 0 );
462 return Status::newGood( new ImportStreamSource( $file ) );
463 } else {
464 return Status::newFatal( 'importcantopen' );
465 }
466 }
467
468 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
469 if( $page == '' ) {
470 return Status::newFatal( 'import-noarticle' );
471 }
472 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
473 if( is_null( $link ) || $link->getInterwiki() == '' ) {
474 return Status::newFatal( 'importbadinterwiki' );
475 } else {
476 $params = array();
477 if ( $history ) $params['history'] = 1;
478 if ( $templates ) $params['templates'] = 1;
479 if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
480 $url = $link->getFullUrl( $params );
481 # For interwikis, use POST to avoid redirects.
482 return ImportStreamSource::newFromURL( $url, "POST" );
483 }
484 }
485 }