Merge "Follow-up I0b781c11 (2a55449): use User::getAutomaticGroups()."
[lhc/web/wiklou.git] / maintenance / importImages.php
1 <?php
2 /**
3 * Import one or more images from the local file system into the wiki without
4 * using the web-based interface.
5 *
6 * "Smart import" additions:
7 * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki
8 * - process:
9 * - interface with the source wiki, don't use bare files only (see --source-wiki-url).
10 * - fetch metadata from source wiki for each file to import.
11 * - commit the fetched metadata to the destination wiki while submitting.
12 *
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2 of the License, or
16 * (at your option) any later version.
17 *
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License along
24 * with this program; if not, write to the Free Software Foundation, Inc.,
25 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
26 * http://www.gnu.org/copyleft/gpl.html
27 *
28 * @file
29 * @ingroup Maintenance
30 * @author Rob Church <robchur@gmail.com>
31 * @author Mij <mij@bitchx.it>
32 */
33
34 $optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' );
35 require_once( __DIR__ . '/commandLine.inc' );
36 require_once( __DIR__ . '/importImages.inc' );
37 $processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
38
39 echo( "Import Images\n\n" );
40
41 # Need a path
42 if ( count( $args ) == 0 ) {
43 showUsage();
44 }
45
46 $dir = $args[0];
47
48 # Check Protection
49 if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) ) {
50 die( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" );
51 }
52
53 if ( isset( $options['protect'] ) && $options['protect'] == 1 ) {
54 die( "You must specify a protection option.\n" );
55 }
56
57 # Prepare the list of allowed extensions
58 global $wgFileExtensions;
59 $extensions = isset( $options['extensions'] )
60 ? explode( ',', strtolower( $options['extensions'] ) )
61 : $wgFileExtensions;
62
63 # Search the path provided for candidates for import
64 $files = findFiles( $dir, $extensions );
65
66 # Initialise the user for this operation
67 $user = isset( $options['user'] )
68 ? User::newFromName( $options['user'] )
69 : User::newFromName( 'Maintenance script' );
70 if ( !$user instanceof User ) {
71 $user = User::newFromName( 'Maintenance script' );
72 }
73 $wgUser = $user;
74
75 # Get block check. If a value is given, this specified how often the check is performed
76 if ( isset( $options['check-userblock'] ) ) {
77 if ( !$options['check-userblock'] ) {
78 $checkUserBlock = 1;
79 } else {
80 $checkUserBlock = (int)$options['check-userblock'];
81 }
82 } else {
83 $checkUserBlock = false;
84 }
85
86 # Get --from
87 $from = @$options['from'];
88
89 # Get sleep time.
90 $sleep = @$options['sleep'];
91 if ( $sleep ) {
92 $sleep = (int)$sleep;
93 }
94
95 # Get limit number
96 $limit = @$options['limit'];
97 if ( $limit ) {
98 $limit = (int)$limit;
99 }
100
101 # Get the upload comment. Provide a default one in case there's no comment given.
102 $comment = 'Importing image file';
103
104 if ( isset( $options['comment-file'] ) ) {
105 $comment = file_get_contents( $options['comment-file'] );
106 if ( $comment === false || $comment === null ) {
107 die( "failed to read comment file: {$options['comment-file']}\n" );
108 }
109 } elseif ( isset( $options['comment'] ) ) {
110 $comment = $options['comment'];
111 }
112
113 $commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false;
114
115 # Get the license specifier
116 $license = isset( $options['license'] ) ? $options['license'] : '';
117
118 # Batch "upload" operation
119 $count = count( $files );
120 if ( $count > 0 ) {
121
122 foreach ( $files as $file ) {
123 $base = wfBaseName( $file );
124
125 # Validate a title
126 $title = Title::makeTitleSafe( NS_FILE, $base );
127 if ( !is_object( $title ) ) {
128 echo( "{$base} could not be imported; a valid title cannot be produced\n" );
129 continue;
130 }
131
132 if ( $from ) {
133 if ( $from == $title->getDBkey() ) {
134 $from = null;
135 } else {
136 $ignored++;
137 continue;
138 }
139 }
140
141 if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
142 $user->clearInstanceCache( 'name' ); // reload from DB!
143 if ( $user->isBlocked() ) {
144 echo( $user->getName() . " was blocked! Aborting.\n" );
145 break;
146 }
147 }
148
149 # Check existence
150 $image = wfLocalFile( $title );
151 if ( $image->exists() ) {
152 if ( isset( $options['overwrite'] ) ) {
153 echo( "{$base} exists, overwriting..." );
154 $svar = 'overwritten';
155 } else {
156 echo( "{$base} exists, skipping\n" );
157 $skipped++;
158 continue;
159 }
160 } else {
161 if ( isset( $options['skip-dupes'] ) ) {
162 $repo = $image->getRepo();
163 $sha1 = File::sha1Base36( $file ); # XXX: we end up calculating this again when actually uploading. that sucks.
164
165 $dupes = $repo->findBySha1( $sha1 );
166
167 if ( $dupes ) {
168 echo( "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n" );
169 $skipped++;
170 continue;
171 }
172 }
173
174 echo( "Importing {$base}..." );
175 $svar = 'added';
176 }
177
178 if ( isset( $options['source-wiki-url'] ) ) {
179 /* find comment text directly from source wiki, through MW's API */
180 $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
181 if ( $real_comment === false )
182 $commentText = $comment;
183 else
184 $commentText = $real_comment;
185
186 /* find user directly from source wiki, through MW's API */
187 $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
188 if ( $real_user === false ) {
189 $wgUser = $user;
190 } else {
191 $wgUser = User::newFromName( $real_user );
192 if ( $wgUser === false ) {
193 # user does not exist in target wiki
194 echo ( "failed: user '$real_user' does not exist in target wiki." );
195 continue;
196 }
197 }
198 } else {
199 # Find comment text
200 $commentText = false;
201
202 if ( $commentExt ) {
203 $f = findAuxFile( $file, $commentExt );
204 if ( !$f ) {
205 echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
206 } else {
207 $commentText = file_get_contents( $f );
208 if ( !$commentText ) {
209 echo( " Failed to load comment file {$f}, using default comment. " );
210 }
211 }
212 }
213
214 if ( !$commentText ) {
215 $commentText = $comment;
216 }
217 }
218
219 # Import the file
220 if ( isset( $options['dry'] ) ) {
221 echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " );
222 } else {
223 $archive = $image->publish( $file );
224 if ( !$archive->isGood() ) {
225 echo( "failed. (" .
226 $archive->getWikiText() .
227 ")\n" );
228 $failed++;
229 continue;
230 }
231 }
232
233 if ( isset( $options['dry'] ) ) {
234 echo( "done.\n" );
235 } elseif ( $image->recordUpload( $archive->value, $commentText, $license ) ) {
236 # We're done!
237 echo( "done.\n" );
238
239 $doProtect = false;
240
241 global $wgRestrictionLevels;
242
243 $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
244
245 if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
246 $doProtect = true;
247 }
248 if ( isset( $options['unprotect'] ) ) {
249 $protectLevel = '';
250 $doProtect = true;
251 }
252
253 if ( $doProtect ) {
254 # Protect the file
255 echo "\nWaiting for slaves...\n";
256 // Wait for slaves.
257 sleep( 2.0 ); # Why this sleep?
258 wfWaitForSlaves();
259
260 echo( "\nSetting image restrictions ... " );
261
262 $cascade = false;
263 $restrictions = array();
264 foreach( $title->getRestrictionTypes() as $type ) {
265 $restrictions[$type] = $protectLevel;
266 }
267
268 $page = WikiPage::factory( $title );
269 $status = $page->doUpdateRestrictions( $restrictions, array(), $cascade, '', $user );
270 echo( ( $status->isOK() ? 'done' : 'failed' ) . "\n" );
271 }
272
273 } else {
274 echo( "failed. (at recordUpload stage)\n" );
275 $svar = 'failed';
276 }
277
278 $$svar++;
279 $processed++;
280
281 if ( $limit && $processed >= $limit ) {
282 break;
283 }
284
285 if ( $sleep ) {
286 sleep( $sleep );
287 }
288 }
289
290 # Print out some statistics
291 echo( "\n" );
292 foreach ( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
293 'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
294 'failed' => 'Failed' ) as $var => $desc ) {
295 if ( $$var > 0 )
296 echo( "{$desc}: {$$var}\n" );
297 }
298
299 } else {
300 echo( "No suitable files could be found for import.\n" );
301 }
302
303 exit( 0 );
304
305 function showUsage( $reason = false ) {
306 if ( $reason ) {
307 echo( $reason . "\n" );
308 }
309
310 echo <<<TEXT
311 Imports images and other media files into the wiki
312 USAGE: php importImages.php [options] <dir>
313
314 <dir> : Path to the directory containing images to be imported
315
316 Options:
317 --extensions=<exts> Comma-separated list of allowable extensions, defaults to \$wgFileExtensions
318 --overwrite Overwrite existing images with the same name (default is to skip them)
319 --limit=<num> Limit the number of images to process. Ignored or skipped images are not counted.
320 --from=<name> Ignore all files until the one with the given name. Useful for resuming
321 aborted imports. <name> should be the file's canonical database form.
322 --skip-dupes Skip images that were already uploaded under a different name (check SHA1)
323 --sleep=<sec> Sleep between files. Useful mostly for debugging.
324 --user=<username> Set username of uploader, default 'Maintenance script'
325 --check-userblock Check if the user got blocked during import.
326 --comment=<text> Set upload summary comment, default 'Importing image file'.
327 --comment-file=<file> Set upload summary comment the the content of <file>.
328 --comment-ext=<ext> Causes the comment for each file to be loaded from a file with the same name
329 but the extension <ext>. If a global comment is also given, it is appended.
330 --license=<code> Use an optional license template
331 --dry Dry run, don't import anything
332 --protect=<protect> Specify the protect value (autoconfirmed,sysop)
333 --unprotect Unprotects all uploaded images
334 --source-wiki-url if specified, take User and Comment data for each imported file from this URL.
335 For example, --source-wiki-url="http://en.wikipedia.org/"
336
337 TEXT;
338 exit( 1 );
339 }