Merge "Don't fallback from uk to ru"
[lhc/web/wiklou.git] / maintenance / importImages.php
1 <?php
2 /**
3 * Import one or more images from the local file system into the wiki without
4 * using the web-based interface.
5 *
6 * "Smart import" additions:
7 * - aim: preserve the essential metadata (user, description) when importing media
8 * files from an existing wiki.
9 * - process:
10 * - interface with the source wiki, don't use bare files only (see --source-wiki-url).
11 * - fetch metadata from source wiki for each file to import.
12 * - commit the fetched metadata to the destination wiki while submitting.
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License along
25 * with this program; if not, write to the Free Software Foundation, Inc.,
26 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 * http://www.gnu.org/copyleft/gpl.html
28 *
29 * @file
30 * @ingroup Maintenance
31 * @author Rob Church <robchur@gmail.com>
32 * @author Mij <mij@bitchx.it>
33 */
34
35 $optionsWithArgs = [
36 'extensions', 'comment', 'comment-file', 'comment-ext', 'summary', 'user',
37 'license', 'sleep', 'limit', 'from', 'source-wiki-url', 'timestamp',
38 ];
39
40 $optionsWithoutArgs = [
41 'protect', 'unprotect', 'search-recursively', 'check-userblock', 'overwrite',
42 'skip-dupes', 'dry'
43 ];
44
45 require_once __DIR__ . '/commandLine.inc';
46 require_once __DIR__ . '/importImages.inc';
47 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
48
49 echo "Import Images\n\n";
50
51 # Need a path
52 if ( count( $args ) == 0 ) {
53 showUsage();
54 }
55
56 $dir = $args[0];
57
58 # Check Protection
59 if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) ) {
60 die( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
61 }
62
63 if ( isset( $options['protect'] ) && $options['protect'] == 1 ) {
64 die( "You must specify a protection option.\n" );
65 }
66
67 # Prepare the list of allowed extensions
68 global $wgFileExtensions;
69 $extensions = isset( $options['extensions'] )
70 ? explode( ',', strtolower( $options['extensions'] ) )
71 : $wgFileExtensions;
72
73 # Search the path provided for candidates for import
74 $files = findFiles( $dir, $extensions, isset( $options['search-recursively'] ) );
75
76 # Initialise the user for this operation
77 $user = isset( $options['user'] )
78 ? User::newFromName( $options['user'] )
79 : User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
80 if ( !$user instanceof User ) {
81 $user = User::newSystemUser( 'Maintenance script', [ 'steal' => true ] );
82 }
83 $wgUser = $user;
84
85 # Get block check. If a value is given, this specified how often the check is performed
86 if ( isset( $options['check-userblock'] ) ) {
87 if ( !$options['check-userblock'] ) {
88 $checkUserBlock = 1;
89 } else {
90 $checkUserBlock = (int)$options['check-userblock'];
91 }
92 } else {
93 $checkUserBlock = false;
94 }
95
96 # Get --from
97 MediaWiki\suppressWarnings();
98 $from = $options['from'];
99 MediaWiki\restoreWarnings();
100
101 # Get sleep time.
102 MediaWiki\suppressWarnings();
103 $sleep = $options['sleep'];
104 MediaWiki\restoreWarnings();
105
106 if ( $sleep ) {
107 $sleep = (int)$sleep;
108 }
109
110 # Get limit number
111 MediaWiki\suppressWarnings();
112 $limit = $options['limit'];
113 MediaWiki\restoreWarnings();
114
115 if ( $limit ) {
116 $limit = (int)$limit;
117 }
118
119 $timestamp = isset( $options['timestamp'] ) ? $options['timestamp'] : false;
120
121 # Get the upload comment. Provide a default one in case there's no comment given.
122 $comment = 'Importing file';
123
124 if ( isset( $options['comment-file'] ) ) {
125 $comment = file_get_contents( $options['comment-file'] );
126 if ( $comment === false || $comment === null ) {
127 die( "failed to read comment file: {$options['comment-file']}\n" );
128 }
129 } elseif ( isset( $options['comment'] ) ) {
130 $comment = $options['comment'];
131 }
132
133 $commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false;
134
135 $summary = isset( $options['summary'] ) ? $options['summary'] : '';
136
137 # Get the license specifier
138 $license = isset( $options['license'] ) ? $options['license'] : '';
139
140 # Batch "upload" operation
141 $count = count( $files );
142 if ( $count > 0 ) {
143
144 foreach ( $files as $file ) {
145 $base = UtfNormal\Validator::cleanUp( wfBaseName( $file ) );
146
147 # Validate a title
148 $title = Title::makeTitleSafe( NS_FILE, $base );
149 if ( !is_object( $title ) ) {
150 echo "{$base} could not be imported; a valid title cannot be produced\n";
151 continue;
152 }
153
154 if ( $from ) {
155 if ( $from == $title->getDBkey() ) {
156 $from = null;
157 } else {
158 $ignored++;
159 continue;
160 }
161 }
162
163 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
164 $user->clearInstanceCache( 'name' ); // reload from DB!
165 if ( $user->isBlocked() ) {
166 echo $user->getName() . " was blocked! Aborting.\n";
167 break;
168 }
169 }
170
171 # Check existence
172 $image = wfLocalFile( $title );
173 if ( $image->exists() ) {
174 if ( isset( $options['overwrite'] ) ) {
175 echo "{$base} exists, overwriting...";
176 $svar = 'overwritten';
177 } else {
178 echo "{$base} exists, skipping\n";
179 $skipped++;
180 continue;
181 }
182 } else {
183 if ( isset( $options['skip-dupes'] ) ) {
184 $repo = $image->getRepo();
185 # XXX: we end up calculating this again when actually uploading. that sucks.
186 $sha1 = FSFile::getSha1Base36FromPath( $file );
187
188 $dupes = $repo->findBySha1( $sha1 );
189
190 if ( $dupes ) {
191 echo "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n";
192 $skipped++;
193 continue;
194 }
195 }
196
197 echo "Importing {$base}...";
198 $svar = 'added';
199 }
200
201 if ( isset( $options['source-wiki-url'] ) ) {
202 /* find comment text directly from source wiki, through MW's API */
203 $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
204 if ( $real_comment === false ) {
205 $commentText = $comment;
206 } else {
207 $commentText = $real_comment;
208 }
209
210 /* find user directly from source wiki, through MW's API */
211 $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
212 if ( $real_user === false ) {
213 $wgUser = $user;
214 } else {
215 $wgUser = User::newFromName( $real_user );
216 if ( $wgUser === false ) {
217 # user does not exist in target wiki
218 echo "failed: user '$real_user' does not exist in target wiki.";
219 continue;
220 }
221 }
222 } else {
223 # Find comment text
224 $commentText = false;
225
226 if ( $commentExt ) {
227 $f = findAuxFile( $file, $commentExt );
228 if ( !$f ) {
229 echo " No comment file with extension {$commentExt} found "
230 . "for {$file}, using default comment. ";
231 } else {
232 $commentText = file_get_contents( $f );
233 if ( !$commentText ) {
234 echo " Failed to load comment file {$f}, using default comment. ";
235 }
236 }
237 }
238
239 if ( !$commentText ) {
240 $commentText = $comment;
241 }
242 }
243
244 # Import the file
245 if ( isset( $options['dry'] ) ) {
246 echo " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... ";
247 } else {
248 $mwProps = new MWFileProps( MimeMagic::singleton() );
249 $props = $mwProps->getPropsFromPath( $file, true );
250 $flags = 0;
251 $publishOptions = [];
252 $handler = MediaHandler::getHandler( $props['mime'] );
253 if ( $handler ) {
254 $publishOptions['headers'] = $handler->getStreamHeaders( $props['metadata'] );
255 } else {
256 $publishOptions['headers'] = [];
257 }
258 $archive = $image->publish( $file, $flags, $publishOptions );
259 if ( !$archive->isGood() ) {
260 echo "failed. (" .
261 $archive->getWikiText( false, false, 'en' ) .
262 ")\n";
263 $failed++;
264 continue;
265 }
266 }
267
268 $commentText = SpecialUpload::getInitialPageText( $commentText, $license );
269 if ( !isset( $options['summary'] ) ) {
270 $summary = $commentText;
271 }
272
273 if ( isset( $options['dry'] ) ) {
274 echo "done.\n";
275 } elseif ( $image->recordUpload2(
276 $archive->value,
277 $summary,
278 $commentText,
279 $props,
280 $timestamp
281 ) ) {
282 # We're done!
283 echo "done.\n";
284
285 $doProtect = false;
286
287 global $wgRestrictionLevels;
288
289 $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
290
291 if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
292 $doProtect = true;
293 }
294 if ( isset( $options['unprotect'] ) ) {
295 $protectLevel = '';
296 $doProtect = true;
297 }
298
299 if ( $doProtect ) {
300 # Protect the file
301 echo "\nWaiting for replica DBs...\n";
302 // Wait for replica DBs.
303 sleep( 2.0 ); # Why this sleep?
304 wfWaitForSlaves();
305
306 echo "\nSetting image restrictions ... ";
307
308 $cascade = false;
309 $restrictions = [];
310 foreach ( $title->getRestrictionTypes() as $type ) {
311 $restrictions[$type] = $protectLevel;
312 }
313
314 $page = WikiPage::factory( $title );
315 $status = $page->doUpdateRestrictions( $restrictions, [], $cascade, '', $user );
316 echo ( $status->isOK() ? 'done' : 'failed' ) . "\n";
317 }
318 } else {
319 echo "failed. (at recordUpload stage)\n";
320 $svar = 'failed';
321 }
322
323 $$svar++;
324 $processed++;
325
326 if ( $limit && $processed >= $limit ) {
327 break;
328 }
329
330 if ( $sleep ) {
331 sleep( $sleep );
332 }
333 }
334
335 # Print out some statistics
336 echo "\n";
337 foreach (
338 [
339 'count' => 'Found',
340 'limit' => 'Limit',
341 'ignored' => 'Ignored',
342 'added' => 'Added',
343 'skipped' => 'Skipped',
344 'overwritten' => 'Overwritten',
345 'failed' => 'Failed'
346 ] as $var => $desc
347 ) {
348 if ( $$var > 0 ) {
349 echo "{$desc}: {$$var}\n";
350 }
351 }
352 } else {
353 echo "No suitable files could be found for import.\n";
354 }
355
356 exit( 0 );
357
358 function showUsage( $reason = false ) {
359 if ( $reason ) {
360 echo $reason . "\n";
361 }
362
363 echo <<<TEXT
364 Imports images and other media files into the wiki
365 USAGE: php importImages.php [options] <dir>
366
367 <dir> : Path to the directory containing images to be imported
368
369 Options:
370 --extensions=<exts> Comma-separated list of allowable extensions, defaults
371 to \$wgFileExtensions.
372 --overwrite Overwrite existing images with the same name (default
373 is to skip them).
374 --limit=<num> Limit the number of images to process. Ignored or
375 skipped images are not counted.
376 --from=<name> Ignore all files until the one with the given name.
377 Useful for resuming aborted imports. <name> should be
378 the file's canonical database form.
379 --skip-dupes Skip images that were already uploaded under a different
380 name (check SHA1).
381 --search-recursively Search recursively for files in subdirectories.
382 --sleep=<sec> Sleep between files. Useful mostly for debugging.
383 --user=<username> Set username of uploader, default 'Maintenance script'.
384 --check-userblock Check if the user got blocked during import.
385 --comment=<text> Set file description, default 'Importing file'.
386 --comment-file=<file> Set description to the content of <file>.
387 --comment-ext=<ext> Causes the description for each file to be loaded from a
388 file with the same name, but the extension <ext>. If a
389 global description is also given, it is appended.
390 --license=<code> Use an optional license template.
391 --dry Dry run, don't import anything.
392 --protect=<protect> Specify the protect value (autoconfirmed,sysop).
393 --summary=<summary> Upload summary, description will be used if not
394 provided.
395 --timestamp=<timestamp> Override upload time/date, all MediaWiki timestamp
396 formats are accepted.
397 --unprotect Unprotects all uploaded images.
398 --source-wiki-url If specified, take User and Comment data for each
399 imported file from this URL. For example,
400 --source-wiki-url="http://en.wikipedia.org/."
401
402 TEXT;
403 exit( 1 );
404 }