Standardised file description headers:
[lhc/web/wiklou.git] / maintenance / importImages.php
index 925c64b..4364785 100644 (file)
 
 /**
  * Maintenance script to import one or more images from the local file system into
- * the wiki without using the web-based interface
+ * the wiki without using the web-based interface.
  *
- * @package MediaWiki
- * @subpackage Maintenance
+ * "Smart import" additions:
+ * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki
+ * - process:
+ *      - interface with the source wiki, don't use bare files only (see --source-wiki-url).
+ *      - fetch metadata from source wiki for each file to import.
+ *      - commit the fetched metadata to the destination wiki while submitting.
+ *
+ * @file
+ * @ingroup Maintenance
  * @author Rob Church <robchur@gmail.com>
+ * @author Mij <mij@bitchx.it>
  */
 
-require_once( 'commandLine.inc' );
-require_once( 'importImages.inc.php' );
+$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' );
+require_once( dirname( __FILE__ ) . '/commandLine.inc' );
+require_once( dirname( __FILE__ ) . '/importImages.inc' );
+$processed = $added = $ignored = $skipped = $overwritten = $failed = 0;
+
 echo( "Import Images\n\n" );
 
-# Need a directory and at least one extension
-if( count( $args ) > 1 ) {
+# Need a path
+if ( count( $args ) > 0 ) {
 
-       $dir = array_shift( $args );
+       $dir = $args[0];
 
-       # Check the allowed extensions
-       while( $ext = array_shift( $args ) )
-               $exts[] = ltrim( $ext, '.' );
-               
-       # Search the directory given and pull out suitable candidates
-       $files = findFiles( $dir, $exts );
+       # Check Protection
+       if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) )
+                       die( "Cannot specify both protect and unprotect.  Only 1 is allowed.\n" );
+
+if ( isset( $options['protect'] ) && $options['protect'] == 1 )
+                       die( "You must specify a protection option.\n" );
+
+       # Prepare the list of allowed extensions
+       global $wgFileExtensions;
+       $extensions = isset( $options['extensions'] )
+               ? explode( ',', strtolower( $options['extensions'] ) )
+               : $wgFileExtensions;
+
+       # Search the path provided for candidates for import
+       $files = findFiles( $dir, $extensions );
+
+       # Initialise the user for this operation
+       $user = isset( $options['user'] )
+               ? User::newFromName( $options['user'] )
+               : User::newFromName( 'Maintenance script' );
+       if ( !$user instanceof User )
+               $user = User::newFromName( 'Maintenance script' );
+       $wgUser = $user;
+
+       # Get block check. If a value is given, this specified how often the check is performed
+       if ( isset( $options['check-userblock'] ) ) {
+               if ( !$options['check-userblock'] ) $checkUserBlock = 1;
+               else $checkUserBlock = (int)$options['check-userblock'];
+       } else {
+               $checkUserBlock = false;
+       }
+
+       # Get --from 
+       $from = @$options['from'];
+
+       # Get sleep time. 
+       $sleep = @$options['sleep'];
+       if ( $sleep ) $sleep = (int)$sleep;
+
+       # Get limit number
+       $limit = @$options['limit'];
+       if ( $limit ) $limit = (int)$limit;
+
+       # Get the upload comment
+       $comment = NULL;
+
+       if ( isset( $options['comment-file'] ) ) {
+               $comment =  file_get_contents( $options['comment-file'] );
+               if ( $comment === false || $comment === NULL ) {
+                       die( "failed to read comment file: {$options['comment-file']}\n" );
+               }
+       }
+       else if ( isset( $options['comment'] ) ) {
+               $comment =  $options['comment'];
+       }
+
+       $commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false;
+
+       # Get the license specifier
+       $license = isset( $options['license'] ) ? $options['license'] : '';
 
-       # Set up a fake user for this operation
-       $wgUser = User::newFromName( 'Image import script' );
-       $wgUser->setLoaded( true );
-       
        # Batch "upload" operation
-       foreach( $files as $file ) {
+       if ( ( $count = count( $files ) ) > 0 ) {
        
-               $base = basename( $file );
-               
-               # Validate a title
-               $title = Title::makeTitleSafe( NS_IMAGE, $base );
-               if( is_object( $title ) ) {
-                       
+               foreach ( $files as $file ) {
+                       $base = wfBaseName( $file );
+       
+                       # Validate a title
+                       $title = Title::makeTitleSafe( NS_FILE, $base );
+                       if ( !is_object( $title ) ) {
+                               echo( "{$base} could not be imported; a valid title cannot be produced\n" );
+                               continue;
+                       }
+       
+                       if ( $from ) {
+                               if ( $from == $title->getDBkey() ) {
+                                       $from = NULL;
+                               } else {
+                                       $ignored++;
+                                       continue;
+                               }
+                       }
+
+                       if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) {
+                               $user->clearInstanceCache( 'name' ); // reload from DB!
+                               if ( $user->isBlocked() ) {
+                                       echo( $user->getName() . " was blocked! Aborting.\n" );
+                                       break;
+                               }
+                       }
+
                        # Check existence
-                       $image = new Image( $title );
-                       if( !$image->exists() ) {
-                       
-                               global $wgUploadDirectory;
-                               
-                               # copy() doesn't create paths so if the hash path doesn't exist, we
-                               # have to create it
-                               makeHashPath( wfGetHashPath( $image->name ) );
-                               
-                               # Stash the file
-                               echo( "Saving {$base}..." );
-                               
-                               if( copy( $file, $image->getFullPath() ) ) {
-                               
-                                       echo( "importing..." );
-                               
-                                       # Grab the metadata
-                                       $image->loadFromFile();
-                                       
-                                       # Record the upload
-                                       if( $image->recordUpload( '', 'Importing image file' ) ) {
-                                       
-                                               # We're done!
-                                               echo( "done.\n" );
-                                               
-                                       } else {
-                                               echo( "failed.\n" );
-                                       }
-                               
+                       $image = wfLocalFile( $title );
+                       if ( $image->exists() ) {
+                               if ( isset( $options['overwrite'] ) ) {
+                                       echo( "{$base} exists, overwriting..." );
+                                       $svar = 'overwritten';
                                } else {
+                                       echo( "{$base} exists, skipping\n" );
+                                       $skipped++;
+                                       continue;
+                               }
+                       } else {
+                               if ( isset( $options['skip-dupes'] ) ) {
+                                       $repo = $image->getRepo();
+                                       $sha1 = File::sha1Base36( $file ); # XXX: we end up calculating this again when actually uploading. that sucks.
+
+                                       $dupes = $repo->findBySha1( $sha1 );
+
+                                       if ( $dupes ) {
+                                               echo( "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n" );
+                                               $skipped++;
+                                               continue;
+                                       }
+                               }
+
+                               echo( "Importing {$base}..." );
+                               $svar = 'added';
+                       }
+
+            if ( isset( $options['source-wiki-url'] ) ) {
+                /* find comment text directly from source wiki, through MW's API */
+                $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base );
+                if ( $real_comment === false )
+                    $commentText = $comment;
+                else
+                    $commentText = $real_comment;
+
+                /* find user directly from source wiki, through MW's API */
+                $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base );
+                if ( $real_user === false ) {
+                    $wgUser = $user;
+                } else {
+                    $wgUser = User::newFromName( $real_user );
+                    if ( $wgUser === false ) {
+                        # user does not exist in target wiki
+                        echo ( "failed: user '$real_user' does not exist in target wiki." );
+                        continue;
+                    }
+                }
+            } else {
+                # Find comment text
+                $commentText = false;
+
+                if ( $commentExt ) {
+                    $f = findAuxFile( $file, $commentExt );
+                    if ( !$f ) {
+                        echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " );
+                    } else {
+                        $commentText = file_get_contents( $f );
+                        if ( !$f ) {
+                            echo( " Failed to load comment file {$f}, using default comment. " );
+                        }
+                    }
+                }
+
+                if ( !$commentText ) {
+                    $commentText = $comment;
+                }
+            }
+
+
+                       # Import the file       
+                       if ( isset( $options['dry'] ) ) {
+                               echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " );
+                       } else {
+                               $archive = $image->publish( $file );
+                               if ( WikiError::isError( $archive ) || !$archive->isGood() ) {
                                        echo( "failed.\n" );
+                                       $failed++;
+                                       continue;
                                }
+                       }
+                       
+                       $doProtect = false;
+                       $restrictions = array();
+                       
+                       global $wgRestrictionLevels;
+                       
+                       $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null;
                        
+                       if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) {
+                                       $restrictions['move'] = $protectLevel;
+                                       $restrictions['edit'] = $protectLevel;
+                                       $doProtect = true;
+                       }
+                       if ( isset( $options['unprotect'] ) ) {
+                                       $restrictions['move'] = '';
+                                       $restrictions['edit'] = '';
+                                       $doProtect = true;
+                       }
+
+
+                       if ( isset( $options['dry'] ) ) {
+                               echo( "done.\n" );
+                       } else if ( $image->recordUpload( $archive->value, $commentText, $license ) ) {
+                               # We're done!
+                               echo( "done.\n" );
+                               if ( $doProtect ) {
+                                               # Protect the file
+                                               $article = new Article( $title );
+                                               echo "\nWaiting for slaves...\n";
+                                               // Wait for slaves.
+                                               sleep( 2.0 );
+                                               wfWaitForSlaves( 1.0 );
+                                               
+                                               echo( "\nSetting image restrictions ... " );
+                                               if ( $article->updateRestrictions( $restrictions ) )
+                                                               echo( "done.\n" );
+                                               else
+                                                               echo( "failed.\n" );
+                               }
+
                        } else {
-                               echo( "{$base} could not be imported; a file with this name exists in the wiki\n" );
+                               echo( "failed.\n" );
+                               $svar = 'failed';
                        }
+                       
+                       $$svar++;
+                       $processed++;
+
+                       if ( $limit && $processed >= $limit )
+                               break;
+
+                       if ( $sleep )
+                               sleep( $sleep );
+               }
                
-               } else {
-                       echo( "{$base} could not be imported; a valid title cannot be produced\n" );
+               # Print out some statistics
+               echo( "\n" );
+               foreach ( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored',
+                       'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten',
+                       'failed' => 'Failed' ) as $var => $desc ) {
+                       if ( $$var > 0 )
+                               echo( "{$desc}: {$$var}\n" );
                }
                
+       } else {
+               echo( "No suitable files could be found for import.\n" );
        }
-       
 
 } else {
        showUsage();
 }
 
-exit();
+exit( 0 );
 
 function showUsage( $reason = false ) {
-       if( $reason )
+       if ( $reason ) {
                echo( $reason . "\n" );
-       echo( "USAGE: php importImages.php <dir> <ext1> <ext2>\n\n" );
-       echo( "<dir> : Path to the directory containing images to be imported\n" );
-       echo( "<ext1+> File extensions to import\n\n" );                
-       exit();
-}
+       }
 
-?>
\ No newline at end of file
+       echo <<<TEXT
+Imports images and other media files into the wiki
+USAGE: php importImages.php [options] <dir>
+
+<dir> : Path to the directory containing images to be imported
+
+Options:
+--extensions=<exts>    Comma-separated list of allowable extensions, defaults to \$wgFileExtensions
+--overwrite            Overwrite existing images with the same name (default is to skip them)
+--limit=<num>          Limit the number of images to process. Ignored or skipped images are not counted.
+--from=<name>          Ignore all files until the one with the given name. Useful for resuming
+                        aborted imports. <name> should be the file's canonical database form.
+--skip-dupes           Skip images that were already uploaded under a different name (check SHA1)
+--sleep=<sec>          Sleep between files. Useful mostly for debugging.
+--user=<username>      Set username of uploader, default 'Maintenance script'
+--check-userblock      Check if the user got blocked during import.
+--comment=<text>       Set upload summary comment, default 'Importing image file'.
+--comment-file=<file>          Set upload summary comment the the content of <file>.
+--comment-ext=<ext>    Causes the comment for each file to be loaded from a file with the same name
+                       but the extension <ext>. If a global comment is also given, it is appended.
+--license=<code>       Use an optional license template
+--dry                  Dry run, don't import anything
+--protect=<protect>     Specify the protect value (autoconfirmed,sysop)
+--unprotect             Unprotects all uploaded images
+--source-wiki-url   if specified, take User and Comment data for each imported file from this URL.
+                    For example, --source-wiki-url="http://en.wikipedia.org/"
+
+TEXT;
+       exit( 1 );
+}