Merge "New hook for filters on Special:Contributions form"
[lhc/web/wiklou.git] / maintenance / importImages.php
1 <?php
2 /**
3 * Import one or more images from the local file system into the wiki without
4 * using the web-based interface.
5 *
6 * "Smart import" additions:
7 * - aim: preserve the essential metadata (user, description) when importing media
8 * files from an existing wiki.
9 * - process:
10 * - interface with the source wiki, don't use bare files only (see --source-wiki-url).
11 * - fetch metadata from source wiki for each file to import.
12 * - commit the fetched metadata to the destination wiki while submitting.
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License along
25 * with this program; if not, write to the Free Software Foundation, Inc.,
26 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 * http://www.gnu.org/copyleft/gpl.html
28 *
29 * @file
30 * @ingroup Maintenance
31 * @author Rob Church <robchur@gmail.com>
32 * @author Mij <mij@bitchx.it>
33 */
34
35 $optionsWithArgs = array(
36 'extensions', 'comment', 'comment-file', 'comment-ext', 'summary', 'user',
37 'license', 'sleep', 'limit', 'from', 'source-wiki-url', 'timestamp',
38 );
39 require_once __DIR__ . '/commandLine.inc';
40 require_once __DIR__ . '/importImages.inc';
41 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
42
43 echo "Import Images\n\n";
44
45 # Need a path
46 if ( count( $args ) == 0 ) {
47 showUsage();
48 }
49
50 $dir = $args[0];
51
52 # Check Protection
53 if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) ) {
54 die( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
55 }
56
57 if ( isset( $options['protect'] ) && $options['protect'] == 1 ) {
58 die( "You must specify a protection option.\n" );
59 }
60
61 # Prepare the list of allowed extensions
62 global $wgFileExtensions;
63 $extensions = isset( $options['extensions'] )
64 ? explode( ',', strtolower( $options['extensions'] ) )
65 : $wgFileExtensions;
66
67 # Search the path provided for candidates for import
68 $files = findFiles( $dir, $extensions, isset( $options['search-recursively'] ) );
69
70 # Initialise the user for this operation
71 $user = isset( $options['user'] )
72 ? User::newFromName( $options['user'] )
73 : User::newSystemUser( 'Maintenance script', array( 'steal' => true ) );
74 if ( !$user instanceof User ) {
75 $user = User::newSystemUser( 'Maintenance script', array( 'steal' => true ) );
76 }
77 $wgUser = $user;
78
79 # Get block check. If a value is given, this specified how often the check is performed
80 if ( isset( $options['check-userblock'] ) ) {
81 if ( !$options['check-userblock'] ) {
82 $checkUserBlock = 1;
83 } else {
84 $checkUserBlock = (int)$options['check-userblock'];
85 }
86 } else {
87 $checkUserBlock = false;
88 }
89
90 # Get --from
91 MediaWiki\suppressWarnings();
92 $from = $options['from'];
93 MediaWiki\restoreWarnings();
94
95 # Get sleep time.
96 MediaWiki\suppressWarnings();
97 $sleep = $options['sleep'];
98 MediaWiki\restoreWarnings();
99
100 if ( $sleep ) {
101 $sleep = (int)$sleep;
102 }
103
104 # Get limit number
105 MediaWiki\suppressWarnings();
106 $limit = $options['limit'];
107 MediaWiki\restoreWarnings();
108
109 if ( $limit ) {
110 $limit = (int)$limit;
111 }
112
113 $timestamp = isset( $options['timestamp'] ) ? $options['timestamp'] : false;
114
115 # Get the upload comment. Provide a default one in case there's no comment given.
116 $comment = 'Importing file';
117
118 if ( isset( $options['comment-file'] ) ) {
119 $comment = file_get_contents( $options['comment-file'] );
120 if ( $comment === false || $comment === null ) {
121 die( "failed to read comment file: {$options['comment-file']}\n" );
122 }
123 } elseif ( isset( $options['comment'] ) ) {
124 $comment = $options['comment'];
125 }
126
127 $commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false;
128
129 $summary = isset( $options['summary'] ) ? $options['summary'] : '';
130
131 # Get the license specifier
132 $license = isset( $options['license'] ) ? $options['license'] : '';
133
134 # Batch "upload" operation
135 $count = count( $files );
136 if ( $count > 0 ) {
137
138 foreach ( $files as $file ) {
139 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
140
141 # Validate a title
142 $title = Title::makeTitleSafe( NS_FILE, $base );
143 if ( !is_object( $title ) ) {
144 echo "{$base} could not be imported; a valid title cannot be produced\n";
145 continue;
146 }
147
148 if ( $from ) {
149 if ( $from == $title->getDBkey() ) {
150 $from = null;
151 } else {
152 $ignored++;
153 continue;
154 }
155 }
156
157 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
158 $user->clearInstanceCache( 'name' ); // reload from DB!
159 if ( $user->isBlocked() ) {
160 echo $user->getName() . " was blocked! Aborting.\n";
161 break;
162 }
163 }
164
165 # Check existence
166 $image = wfLocalFile( $title );
167 if ( $image->exists() ) {
168 if ( isset( $options['overwrite'] ) ) {
169 echo "{$base} exists, overwriting...";
170 $svar = 'overwritten';
171 } else {
172 echo "{$base} exists, skipping\n";
173 $skipped++;
174 continue;
175 }
176 } else {
177 if ( isset( $options['skip-dupes'] ) ) {
178 $repo = $image->getRepo();
179 # XXX: we end up calculating this again when actually uploading. that sucks.
180 $sha1 = FSFile::getSha1Base36FromPath( $file );
181
182 $dupes = $repo->findBySha1( $sha1 );
183
184 if ( $dupes ) {
185 echo "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n";
186 $skipped++;
187 continue;
188 }
189 }
190
191 echo "Importing {$base}...";
192 $svar = 'added';
193 }
194
195 if ( isset( $options['source-wiki-url'] ) ) {
196 /* find comment text directly from source wiki, through MW's API */
197 $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
198 if ( $real_comment === false ) {
199 $commentText = $comment;
200 } else {
201 $commentText = $real_comment;
202 }
203
204 /* find user directly from source wiki, through MW's API */
205 $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
206 if ( $real_user === false ) {
207 $wgUser = $user;
208 } else {
209 $wgUser = User::newFromName( $real_user );
210 if ( $wgUser === false ) {
211 # user does not exist in target wiki
212 echo "failed: user '$real_user' does not exist in target wiki.";
213 continue;
214 }
215 }
216 } else {
217 # Find comment text
218 $commentText = false;
219
220 if ( $commentExt ) {
221 $f = findAuxFile( $file, $commentExt );
222 if ( !$f ) {
223 echo " No comment file with extension {$commentExt} found "
224 . "for {$file}, using default comment. ";
225 } else {
226 $commentText = file_get_contents( $f );
227 if ( !$commentText ) {
228 echo " Failed to load comment file {$f}, using default comment. ";
229 }
230 }
231 }
232
233 if ( !$commentText ) {
234 $commentText = $comment;
235 }
236 }
237
238 # Import the file
239 if ( isset( $options['dry'] ) ) {
240 echo " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... ";
241 } else {
242 $props = FSFile::getPropsFromPath( $file );
243 $flags = 0;
244 $publishOptions = array();
245 $handler = MediaHandler::getHandler( $props['mime'] );
246 if ( $handler ) {
247 $publishOptions['headers'] = $handler->getStreamHeaders( $props['metadata'] );
248 } else {
249 $publishOptions['headers'] = array();
250 }
251 $archive = $image->publish( $file, $flags, $publishOptions );
252 if ( !$archive->isGood() ) {
253 echo "failed. (" .
254 $archive->getWikiText() .
255 ")\n";
256 $failed++;
257 continue;
258 }
259 }
260
261 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
262 if ( !isset( $options['summary'] ) ) {
263 $summary = $commentText;
264 }
265
266 if ( isset( $options['dry'] ) ) {
267 echo "done.\n";
268 } elseif ( $image->recordUpload2(
269 $archive->value,
270 $summary,
271 $commentText,
272 $props,
273 $timestamp
274 ) ) {
275 # We're done!
276 echo "done.\n";
277
278 $doProtect = false;
279
280 global $wgRestrictionLevels;
281
282 $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
283
284 if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
285 $doProtect = true;
286 }
287 if ( isset( $options['unprotect'] ) ) {
288 $protectLevel = '';
289 $doProtect = true;
290 }
291
292 if ( $doProtect ) {
293 # Protect the file
294 echo "\nWaiting for slaves...\n";
295 // Wait for slaves.
296 sleep( 2.0 ); # Why this sleep?
297 wfWaitForSlaves();
298
299 echo "\nSetting image restrictions ... ";
300
301 $cascade = false;
302 $restrictions = array();
303 foreach ( $title->getRestrictionTypes() as $type ) {
304 $restrictions[$type] = $protectLevel;
305 }
306
307 $page = WikiPage::factory( $title );
308 $status = $page->doUpdateRestrictions( $restrictions, array(), $cascade, '', $user );
309 echo ( $status->isOK() ? 'done' : 'failed' ) . "\n";
310 }
311 } else {
312 echo "failed. (at recordUpload stage)\n";
313 $svar = 'failed';
314 }
315
316 $$svar++;
317 $processed++;
318
319 if ( $limit && $processed >= $limit ) {
320 break;
321 }
322
323 if ( $sleep ) {
324 sleep( $sleep );
325 }
326 }
327
328 # Print out some statistics
329 echo "\n";
330 foreach (
331 array(
332 'count' => 'Found',
333 'limit' => 'Limit',
334 'ignored' => 'Ignored',
335 'added' => 'Added',
336 'skipped' => 'Skipped',
337 'overwritten' => 'Overwritten',
338 'failed' => 'Failed'
339 ) as $var => $desc
340 ) {
341 if ( $$var > 0 ) {
342 echo "{$desc}: {$$var}\n";
343 }
344 }
345 } else {
346 echo "No suitable files could be found for import.\n";
347 }
348
349 exit( 0 );
350
351 function showUsage( $reason = false ) {
352 if ( $reason ) {
353 echo $reason . "\n";
354 }
355
356 echo <<<TEXT
357 Imports images and other media files into the wiki
358 USAGE: php importImages.php [options] <dir>
359
360 <dir> : Path to the directory containing images to be imported
361
362 Options:
363 --extensions=<exts> Comma-separated list of allowable extensions, defaults
364 to \$wgFileExtensions.
365 --overwrite Overwrite existing images with the same name (default
366 is to skip them).
367 --limit=<num> Limit the number of images to process. Ignored or
368 skipped images are not counted.
369 --from=<name> Ignore all files until the one with the given name.
370 Useful for resuming aborted imports. <name> should be
371 the file's canonical database form.
372 --skip-dupes Skip images that were already uploaded under a different
373 name (check SHA1).
374 --search-recursively Search recursively for files in subdirectories.
375 --sleep=<sec> Sleep between files. Useful mostly for debugging.
376 --user=<username> Set username of uploader, default 'Maintenance script'.
377 --check-userblock Check if the user got blocked during import.
378 --comment=<text> Set file description, default 'Importing file'.
379 --comment-file=<file> Set description to the content of <file>.
380 --comment-ext=<ext> Causes the description for each file to be loaded from a
381 file with the same name, but the extension <ext>. If a
382 global description is also given, it is appended.
383 --license=<code> Use an optional license template.
384 --dry Dry run, don't import anything.
385 --protect=<protect> Specify the protect value (autoconfirmed,sysop).
386 --summary=<summary> Upload summary, description will be used if not
387 provided.
388 --timestamp=<timestamp> Override upload time/date, all MediaWiki timestamp
389 formats are accepted.
390 --unprotect Unprotects all uploaded images.
391 --source-wiki-url If specified, take User and Comment data for each
392 imported file from this URL. For example,
393 --source-wiki-url="http://en.wikipedia.org/."
394
395 TEXT;
396 exit( 1 );
397 }