* Added file description headers
[lhc/web/wiklou.git] / includes / Import.php
1 <?php
2 /**
3 * MediaWiki page data importer
4 *
5 * Copyright © 2003,2005 Brion Vibber <brion@pobox.com>
6 * http://www.mediawiki.org/
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License along
19 * with this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
21 * http://www.gnu.org/copyleft/gpl.html
22 *
23 * @file
24 * @ingroup SpecialPage
25 */
26
27 /**
28 * @todo document (e.g. one-sentence class description).
29 * @ingroup SpecialPage
30 */
31 class WikiRevision {
32 var $title = null;
33 var $id = 0;
34 var $timestamp = "20010115000000";
35 var $user = 0;
36 var $user_text = "";
37 var $text = "";
38 var $comment = "";
39 var $minor = false;
40 var $type = "";
41 var $action = "";
42 var $params = "";
43
44 function setTitle( $title ) {
45 if( is_object( $title ) ) {
46 $this->title = $title;
47 } elseif( is_null( $title ) ) {
48 throw new MWException( "WikiRevision given a null title in import. You may need to adjust \$wgLegalTitleChars." );
49 } else {
50 throw new MWException( "WikiRevision given non-object title in import." );
51 }
52 }
53
54 function setID( $id ) {
55 $this->id = $id;
56 }
57
58 function setTimestamp( $ts ) {
59 # 2003-08-05T18:30:02Z
60 $this->timestamp = wfTimestamp( TS_MW, $ts );
61 }
62
63 function setUsername( $user ) {
64 $this->user_text = $user;
65 }
66
67 function setUserIP( $ip ) {
68 $this->user_text = $ip;
69 }
70
71 function setText( $text ) {
72 $this->text = $text;
73 }
74
75 function setComment( $text ) {
76 $this->comment = $text;
77 }
78
79 function setMinor( $minor ) {
80 $this->minor = (bool)$minor;
81 }
82
83 function setSrc( $src ) {
84 $this->src = $src;
85 }
86
87 function setFilename( $filename ) {
88 $this->filename = $filename;
89 }
90
91 function setSize( $size ) {
92 $this->size = intval( $size );
93 }
94
95 function setType( $type ) {
96 $this->type = $type;
97 }
98
99 function setAction( $action ) {
100 $this->action = $action;
101 }
102
103 function setParams( $params ) {
104 $this->params = $params;
105 }
106
107 function getTitle() {
108 return $this->title;
109 }
110
111 function getID() {
112 return $this->id;
113 }
114
115 function getTimestamp() {
116 return $this->timestamp;
117 }
118
119 function getUser() {
120 return $this->user_text;
121 }
122
123 function getText() {
124 return $this->text;
125 }
126
127 function getComment() {
128 return $this->comment;
129 }
130
131 function getMinor() {
132 return $this->minor;
133 }
134
135 function getSrc() {
136 return $this->src;
137 }
138
139 function getFilename() {
140 return $this->filename;
141 }
142
143 function getSize() {
144 return $this->size;
145 }
146
147 function getType() {
148 return $this->type;
149 }
150
151 function getAction() {
152 return $this->action;
153 }
154
155 function getParams() {
156 return $this->params;
157 }
158
159 function importOldRevision() {
160 $dbw = wfGetDB( DB_MASTER );
161
162 # Sneak a single revision into place
163 $user = User::newFromName( $this->getUser() );
164 if( $user ) {
165 $userId = intval( $user->getId() );
166 $userText = $user->getName();
167 } else {
168 $userId = 0;
169 $userText = $this->getUser();
170 }
171
172 // avoid memory leak...?
173 $linkCache = LinkCache::singleton();
174 $linkCache->clear();
175
176 $article = new Article( $this->title );
177 $pageId = $article->getId();
178 if( $pageId == 0 ) {
179 # must create the page...
180 $pageId = $article->insertOn( $dbw );
181 $created = true;
182 } else {
183 $created = false;
184
185 $prior = $dbw->selectField( 'revision', '1',
186 array( 'rev_page' => $pageId,
187 'rev_timestamp' => $dbw->timestamp( $this->timestamp ),
188 'rev_user_text' => $userText,
189 'rev_comment' => $this->getComment() ),
190 __METHOD__
191 );
192 if( $prior ) {
193 // FIXME: this could fail slightly for multiple matches :P
194 wfDebug( __METHOD__ . ": skipping existing revision for [[" .
195 $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
196 return false;
197 }
198 }
199
200 # FIXME: Use original rev_id optionally (better for backups)
201 # Insert the row
202 $revision = new Revision( array(
203 'page' => $pageId,
204 'text' => $this->getText(),
205 'comment' => $this->getComment(),
206 'user' => $userId,
207 'user_text' => $userText,
208 'timestamp' => $this->timestamp,
209 'minor_edit' => $this->minor,
210 ) );
211 $revId = $revision->insertOn( $dbw );
212 $changed = $article->updateIfNewerOn( $dbw, $revision );
213
214 # To be on the safe side...
215 $tempTitle = $GLOBALS['wgTitle'];
216 $GLOBALS['wgTitle'] = $this->title;
217
218 if( $created ) {
219 wfDebug( __METHOD__ . ": running onArticleCreate\n" );
220 Article::onArticleCreate( $this->title );
221
222 wfDebug( __METHOD__ . ": running create updates\n" );
223 $article->createUpdates( $revision );
224
225 } elseif( $changed ) {
226 wfDebug( __METHOD__ . ": running onArticleEdit\n" );
227 Article::onArticleEdit( $this->title );
228
229 wfDebug( __METHOD__ . ": running edit updates\n" );
230 $article->editUpdates(
231 $this->getText(),
232 $this->getComment(),
233 $this->minor,
234 $this->timestamp,
235 $revId );
236 }
237 $GLOBALS['wgTitle'] = $tempTitle;
238
239 return true;
240 }
241
242 function importLogItem() {
243 $dbw = wfGetDB( DB_MASTER );
244 # FIXME: this will not record autoblocks
245 if( !$this->getTitle() ) {
246 wfDebug( __METHOD__ . ": skipping invalid {$this->type}/{$this->action} log time, timestamp " .
247 $this->timestamp . "\n" );
248 return;
249 }
250 # Check if it exists already
251 // FIXME: use original log ID (better for backups)
252 $prior = $dbw->selectField( 'logging', '1',
253 array( 'log_type' => $this->getType(),
254 'log_action' => $this->getAction(),
255 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
256 'log_namespace' => $this->getTitle()->getNamespace(),
257 'log_title' => $this->getTitle()->getDBkey(),
258 'log_comment' => $this->getComment(),
259 #'log_user_text' => $this->user_text,
260 'log_params' => $this->params ),
261 __METHOD__
262 );
263 // FIXME: this could fail slightly for multiple matches :P
264 if( $prior ) {
265 wfDebug( __METHOD__ . ": skipping existing item for Log:{$this->type}/{$this->action}, timestamp " .
266 $this->timestamp . "\n" );
267 return false;
268 }
269 $log_id = $dbw->nextSequenceValue( 'logging_log_id_seq' );
270 $data = array(
271 'log_id' => $log_id,
272 'log_type' => $this->type,
273 'log_action' => $this->action,
274 'log_timestamp' => $dbw->timestamp( $this->timestamp ),
275 'log_user' => User::idFromName( $this->user_text ),
276 #'log_user_text' => $this->user_text,
277 'log_namespace' => $this->getTitle()->getNamespace(),
278 'log_title' => $this->getTitle()->getDBkey(),
279 'log_comment' => $this->getComment(),
280 'log_params' => $this->params
281 );
282 $dbw->insert( 'logging', $data, __METHOD__ );
283 }
284
285 function importUpload() {
286 wfDebug( __METHOD__ . ": STUB\n" );
287
288 /**
289 // from file revert...
290 $source = $this->file->getArchiveVirtualUrl( $this->oldimage );
291 $comment = $wgRequest->getText( 'wpComment' );
292 // TODO: Preserve file properties from database instead of reloading from file
293 $status = $this->file->upload( $source, $comment, $comment );
294 if( $status->isGood() ) {
295 */
296
297 /**
298 // from file upload...
299 $this->mLocalFile = wfLocalFile( $nt );
300 $this->mDestName = $this->mLocalFile->getName();
301 //....
302 $status = $this->mLocalFile->upload( $this->mTempPath, $this->mComment, $pageText,
303 File::DELETE_SOURCE, $this->mFileProps );
304 if ( !$status->isGood() ) {
305 $resultDetails = array( 'internal' => $status->getWikiText() );
306 */
307
308 // @todo Fixme: upload() uses $wgUser, which is wrong here
309 // it may also create a page without our desire, also wrong potentially.
310 // and, it will record a *current* upload, but we might want an archive version here
311
312 $file = wfLocalFile( $this->getTitle() );
313 if( !$file ) {
314 var_dump( $file );
315 wfDebug( "IMPORT: Bad file. :(\n" );
316 return false;
317 }
318
319 $source = $this->downloadSource();
320 if( !$source ) {
321 wfDebug( "IMPORT: Could not fetch remote file. :(\n" );
322 return false;
323 }
324
325 $status = $file->upload( $source,
326 $this->getComment(),
327 $this->getComment(), // Initial page, if none present...
328 File::DELETE_SOURCE,
329 false, // props...
330 $this->getTimestamp() );
331
332 if( $status->isGood() ) {
333 // yay?
334 wfDebug( "IMPORT: is ok?\n" );
335 return true;
336 }
337
338 wfDebug( "IMPORT: is bad? " . $status->getXml() . "\n" );
339 return false;
340
341 }
342
343 function downloadSource() {
344 global $wgEnableUploads;
345 if( !$wgEnableUploads ) {
346 return false;
347 }
348
349 $tempo = tempnam( wfTempDir(), 'download' );
350 $f = fopen( $tempo, 'wb' );
351 if( !$f ) {
352 wfDebug( "IMPORT: couldn't write to temp file $tempo\n" );
353 return false;
354 }
355
356 // @todo Fixme!
357 $src = $this->getSrc();
358 $data = Http::get( $src );
359 if( !$data ) {
360 wfDebug( "IMPORT: couldn't fetch source $src\n" );
361 fclose( $f );
362 unlink( $tempo );
363 return false;
364 }
365
366 fwrite( $f, $data );
367 fclose( $f );
368
369 return $tempo;
370 }
371
372 }
373
374 /**
375 * @todo document (e.g. one-sentence class description).
376 * @ingroup SpecialPage
377 */
378 class ImportStringSource {
379 function __construct( $string ) {
380 $this->mString = $string;
381 $this->mRead = false;
382 }
383
384 function atEnd() {
385 return $this->mRead;
386 }
387
388 function readChunk() {
389 if( $this->atEnd() ) {
390 return false;
391 } else {
392 $this->mRead = true;
393 return $this->mString;
394 }
395 }
396 }
397
398 /**
399 * @todo document (e.g. one-sentence class description).
400 * @ingroup SpecialPage
401 */
402 class ImportStreamSource {
403 function __construct( $handle ) {
404 $this->mHandle = $handle;
405 }
406
407 function atEnd() {
408 return feof( $this->mHandle );
409 }
410
411 function readChunk() {
412 return fread( $this->mHandle, 32768 );
413 }
414
415 static function newFromFile( $filename ) {
416 $file = @fopen( $filename, 'rt' );
417 if( !$file ) {
418 return new WikiErrorMsg( "importcantopen" );
419 }
420 return new ImportStreamSource( $file );
421 }
422
423 static function newFromUpload( $fieldname = "xmlimport" ) {
424 $upload =& $_FILES[$fieldname];
425
426 if( !isset( $upload ) || !$upload['name'] ) {
427 return new WikiErrorMsg( 'importnofile' );
428 }
429 if( !empty( $upload['error'] ) ) {
430 switch($upload['error']){
431 case 1: # The uploaded file exceeds the upload_max_filesize directive in php.ini.
432 return new WikiErrorMsg( 'importuploaderrorsize' );
433 case 2: # The uploaded file exceeds the MAX_FILE_SIZE directive that was specified in the HTML form.
434 return new WikiErrorMsg( 'importuploaderrorsize' );
435 case 3: # The uploaded file was only partially uploaded
436 return new WikiErrorMsg( 'importuploaderrorpartial' );
437 case 6: #Missing a temporary folder. Introduced in PHP 4.3.10 and PHP 5.0.3.
438 return new WikiErrorMsg( 'importuploaderrortemp' );
439 # case else: # Currently impossible
440 }
441
442 }
443 $fname = $upload['tmp_name'];
444 if( is_uploaded_file( $fname ) ) {
445 return ImportStreamSource::newFromFile( $fname );
446 } else {
447 return new WikiErrorMsg( 'importnofile' );
448 }
449 }
450
451 static function newFromURL( $url, $method = 'GET' ) {
452 wfDebug( __METHOD__ . ": opening $url\n" );
453 # Use the standard HTTP fetch function; it times out
454 # quicker and sorts out user-agent problems which might
455 # otherwise prevent importing from large sites, such
456 # as the Wikimedia cluster, etc.
457 $data = Http::request( $method, $url );
458 if( $data !== false ) {
459 $file = tmpfile();
460 fwrite( $file, $data );
461 fflush( $file );
462 fseek( $file, 0 );
463 return new ImportStreamSource( $file );
464 } else {
465 return new WikiErrorMsg( 'importcantopen' );
466 }
467 }
468
469 public static function newFromInterwiki( $interwiki, $page, $history = false, $templates = false, $pageLinkDepth = 0 ) {
470 if( $page == '' ) {
471 return new WikiErrorMsg( 'import-noarticle' );
472 }
473 $link = Title::newFromText( "$interwiki:Special:Export/$page" );
474 if( is_null( $link ) || $link->getInterwiki() == '' ) {
475 return new WikiErrorMsg( 'importbadinterwiki' );
476 } else {
477 $params = array();
478 if ( $history ) $params['history'] = 1;
479 if ( $templates ) $params['templates'] = 1;
480 if ( $pageLinkDepth ) $params['pagelink-depth'] = $pageLinkDepth;
481 $url = $link->getFullUrl( $params );
482 # For interwikis, use POST to avoid redirects.
483 return ImportStreamSource::newFromURL( $url, "POST" );
484 }
485 }
486 }