Apply patch from Karsten Düsterloh in Bug #28103.
[lhc/web/wiklou.git] / maintenance / importImages.php
1 <?php
2
3 /**
4 * Maintenance script to import one or more images from the local file system into
5 * the wiki without using the web-based interface.
6 *
7 * "Smart import" additions:
8 * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki
9 * - process:
10 * - interface with the source wiki, don't use bare files only (see --source-wiki-url).
11 * - fetch metadata from source wiki for each file to import.
12 * - commit the fetched metadata to the destination wiki while submitting.
13 *
14 * This program is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
18 *
19 * This program is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License along
25 * with this program; if not, write to the Free Software Foundation, Inc.,
26 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
27 * http://www.gnu.org/copyleft/gpl.html
28 *
29 * @file
30 * @ingroup Maintenance
31 * @author Rob Church <robchur@gmail.com>
32 * @author Mij <mij@bitchx.it>
33 */
34
35 $optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' );
36 require_once( dirname( __FILE__ ) . '/commandLine.inc' );
37 require_once( dirname( __FILE__ ) . '/importImages.inc' );
38 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
39
40 echo( "Import Images\n\n" );
41
42 # Need a path
43 if ( count( $args ) > 0 ) {
44
45 $dir = $args[0];
46
47 # Check Protection
48 if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) )
49 die( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
50
51 if ( isset( $options['protect'] ) && $options['protect'] == 1 )
52 die( "You must specify a protection option.\n" );
53
54 # Prepare the list of allowed extensions
55 global $wgFileExtensions;
56 $extensions = isset( $options['extensions'] )
57 ? explode( ',', strtolower( $options['extensions'] ) )
58 : $wgFileExtensions;
59
60 # Search the path provided for candidates for import
61 $files = findFiles( $dir, $extensions );
62
63 # Initialise the user for this operation
64 $user = isset( $options['user'] )
65 ? User::newFromName( $options['user'] )
66 : User::newFromName( 'Maintenance script' );
67 if ( !$user instanceof User )
68 $user = User::newFromName( 'Maintenance script' );
69 $wgUser = $user;
70
71 # Get block check. If a value is given, this specified how often the check is performed
72 if ( isset( $options['check-userblock'] ) ) {
73 if ( !$options['check-userblock'] ) $checkUserBlock = 1;
74 else $checkUserBlock = (int)$options['check-userblock'];
75 } else {
76 $checkUserBlock = false;
77 }
78
79 # Get --from
80 $from = @$options['from'];
81
82 # Get sleep time.
83 $sleep = @$options['sleep'];
84 if ( $sleep ) $sleep = (int)$sleep;
85
86 # Get limit number
87 $limit = @$options['limit'];
88 if ( $limit ) $limit = (int)$limit;
89
90 # Get the upload comment. Provide a default one in case there's no comment given.
91 $comment = 'Importing image file';
92
93 if ( isset( $options['comment-file'] ) ) {
94 $comment = file_get_contents( $options['comment-file'] );
95 if ( $comment === false || $comment === NULL ) {
96 die( "failed to read comment file: {$options['comment-file']}\n" );
97 }
98 }
99 else if ( isset( $options['comment'] ) ) {
100 $comment = $options['comment'];
101 }
102
103 $commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false;
104
105 # Get the license specifier
106 $license = isset( $options['license'] ) ? $options['license'] : '';
107
108 # Batch "upload" operation
109 if ( ( $count = count( $files ) ) > 0 ) {
110
111 foreach ( $files as $file ) {
112 $base = wfBaseName( $file );
113
114 # Validate a title
115 $title = Title::makeTitleSafe( NS_FILE, $base );
116 if ( !is_object( $title ) ) {
117 echo( "{$base} could not be imported; a valid title cannot be produced\n" );
118 continue;
119 }
120
121 if ( $from ) {
122 if ( $from == $title->getDBkey() ) {
123 $from = NULL;
124 } else {
125 $ignored++;
126 continue;
127 }
128 }
129
130 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
131 $user->clearInstanceCache( 'name' ); // reload from DB!
132 if ( $user->isBlocked() ) {
133 echo( $user->getName() . " was blocked! Aborting.\n" );
134 break;
135 }
136 }
137
138 # Check existence
139 $image = wfLocalFile( $title );
140 if ( $image->exists() ) {
141 if ( isset( $options['overwrite'] ) ) {
142 echo( "{$base} exists, overwriting..." );
143 $svar = 'overwritten';
144 } else {
145 echo( "{$base} exists, skipping\n" );
146 $skipped++;
147 continue;
148 }
149 } else {
150 if ( isset( $options['skip-dupes'] ) ) {
151 $repo = $image->getRepo();
152 $sha1 = File::sha1Base36( $file ); # XXX: we end up calculating this again when actually uploading. that sucks.
153
154 $dupes = $repo->findBySha1( $sha1 );
155
156 if ( $dupes ) {
157 echo( "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n" );
158 $skipped++;
159 continue;
160 }
161 }
162
163 echo( "Importing {$base}..." );
164 $svar = 'added';
165 }
166
167 if ( isset( $options['source-wiki-url'] ) ) {
168 /* find comment text directly from source wiki, through MW's API */
169 $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
170 if ( $real_comment === false )
171 $commentText = $comment;
172 else
173 $commentText = $real_comment;
174
175 /* find user directly from source wiki, through MW's API */
176 $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
177 if ( $real_user === false ) {
178 $wgUser = $user;
179 } else {
180 $wgUser = User::newFromName( $real_user );
181 if ( $wgUser === false ) {
182 # user does not exist in target wiki
183 echo ( "failed: user '$real_user' does not exist in target wiki." );
184 continue;
185 }
186 }
187 } else {
188 # Find comment text
189 $commentText = false;
190
191 if ( $commentExt ) {
192 $f = findAuxFile( $file, $commentExt );
193 if ( !$f ) {
194 echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
195 } else {
196 $commentText = file_get_contents( $f );
197 if ( !$f ) {
198 echo( " Failed to load comment file {$f}, using default comment. " );
199 }
200 }
201 }
202
203 if ( !$commentText ) {
204 $commentText = $comment;
205 }
206 }
207
208
209 # Import the file
210 if ( isset( $options['dry'] ) ) {
211 echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " );
212 } else {
213 $archive = $image->publish( $file );
214 if ( !$archive->isGood() ) {
215 echo( "failed. (" .
216 $archive->getWikiText() .
217 ")\n" );
218 $failed++;
219 continue;
220 }
221 }
222
223 $doProtect = false;
224 $restrictions = array();
225
226 global $wgRestrictionLevels;
227
228 $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
229
230 if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
231 $restrictions['move'] = $protectLevel;
232 $restrictions['edit'] = $protectLevel;
233 $doProtect = true;
234 }
235 if ( isset( $options['unprotect'] ) ) {
236 $restrictions['move'] = '';
237 $restrictions['edit'] = '';
238 $doProtect = true;
239 }
240
241
242 if ( isset( $options['dry'] ) ) {
243 echo( "done.\n" );
244 } else if ( $image->recordUpload( $archive->value, $commentText, $license ) ) {
245 # We're done!
246 echo( "done.\n" );
247 if ( $doProtect ) {
248 # Protect the file
249 $article = new Article( $title );
250 echo "\nWaiting for slaves...\n";
251 // Wait for slaves.
252 sleep( 2.0 ); # Why this sleep?
253 wfWaitForSlaves();
254
255 echo( "\nSetting image restrictions ... " );
256 if ( $article->updateRestrictions( $restrictions ) )
257 echo( "done.\n" );
258 else
259 echo( "failed.\n" );
260 }
261
262 } else {
263 echo( "failed. (at recordUpload stage)\n" );
264 $svar = 'failed';
265 }
266
267 $$svar++;
268 $processed++;
269
270 if ( $limit && $processed >= $limit )
271 break;
272
273 if ( $sleep )
274 sleep( $sleep );
275 }
276
277 # Print out some statistics
278 echo( "\n" );
279 foreach ( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
280 'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
281 'failed' => 'Failed' ) as $var => $desc ) {
282 if ( $$var > 0 )
283 echo( "{$desc}: {$$var}\n" );
284 }
285
286 } else {
287 echo( "No suitable files could be found for import.\n" );
288 }
289
290 } else {
291 showUsage();
292 }
293
294 exit( 0 );
295
296 function showUsage( $reason = false ) {
297 if ( $reason ) {
298 echo( $reason . "\n" );
299 }
300
301 echo <<<TEXT
302 Imports images and other media files into the wiki
303 USAGE: php importImages.php [options] <dir>
304
305 <dir> : Path to the directory containing images to be imported
306
307 Options:
308 --extensions=<exts> Comma-separated list of allowable extensions, defaults to \$wgFileExtensions
309 --overwrite Overwrite existing images with the same name (default is to skip them)
310 --limit=<num> Limit the number of images to process. Ignored or skipped images are not counted.
311 --from=<name> Ignore all files until the one with the given name. Useful for resuming
312 aborted imports. <name> should be the file's canonical database form.
313 --skip-dupes Skip images that were already uploaded under a different name (check SHA1)
314 --sleep=<sec> Sleep between files. Useful mostly for debugging.
315 --user=<username> Set username of uploader, default 'Maintenance script'
316 --check-userblock Check if the user got blocked during import.
317 --comment=<text> Set upload summary comment, default 'Importing image file'.
318 --comment-file=<file> Set upload summary comment the the content of <file>.
319 --comment-ext=<ext> Causes the comment for each file to be loaded from a file with the same name
320 but the extension <ext>. If a global comment is also given, it is appended.
321 --license=<code> Use an optional license template
322 --dry Dry run, don't import anything
323 --protect=<protect> Specify the protect value (autoconfirmed,sysop)
324 --unprotect Unprotects all uploaded images
325 --source-wiki-url if specified, take User and Comment data for each imported file from this URL.
326 For example, --source-wiki-url="http://en.wikipedia.org/"
327
328 TEXT;
329 exit( 1 );
330 }