X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=maintenance%2FimportImages.php;h=bd077ff959f1c69d90738917c2a552976b6d0a85;hb=e7001ff5742d7f9e481409fbae3fee1e56b9ca0c;hp=8302982cd46b1ce8084d84bcac8496e7754bd641;hpb=7fefaf95f91cca14d2175fb7bcaddec29d83f84a;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/importImages.php b/maintenance/importImages.php index 8302982cd4..bd077ff959 100644 --- a/maintenance/importImages.php +++ b/maintenance/importImages.php @@ -2,120 +2,339 @@ /** * Maintenance script to import one or more images from the local file system into - * the wiki without using the web-based interface + * the wiki without using the web-based interface. * - * @addtogroup Maintenance + * "Smart import" additions: + * - aim: preserve the essential metadata (user, description) when importing medias from an existing wiki + * - process: + * - interface with the source wiki, don't use bare files only (see --source-wiki-url). + * - fetch metadata from source wiki for each file to import. + * - commit the fetched metadata to the destination wiki while submitting. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * http://www.gnu.org/copyleft/gpl.html + * + * @file + * @ingroup Maintenance * @author Rob Church + * @author Mij */ -require_once( 'commandLine.inc' ); -require_once( 'importImages.inc.php' ); +$optionsWithArgs = array( 'extensions', 'comment', 'comment-file', 'comment-ext', 'user', 'license', 'sleep', 'limit', 'from', 'source-wiki-url' ); +require_once( dirname( __FILE__ ) . '/commandLine.inc' ); +require_once( dirname( __FILE__ ) . '/importImages.inc' ); +$processed = $added = $ignored = $skipped = $overwritten = $failed = 0; + echo( "Import Images\n\n" ); -# Need a directory and at least one extension -if( count( $args ) > 1 ) { - - $dir = array_shift( $args ); - - # Check the allowed extensions - while( $ext = array_shift( $args ) ) - $exts[] = ltrim( $ext, '.' ); - - # Search the directory given and pull out suitable candidates - $files = findFiles( $dir, $exts ); - - # Initialise the user for this operation - $user = isset( $options['user'] ) - ? User::newFromName( $options['user'] ) - : User::newFromName( 'Maintenance script' ); - if( !$user instanceof User ) - $user = User::newFromName( 'Maintenance script' ); - $wgUser = $user; - - # Get the upload comment - $comment = isset( $options['comment'] ) - ? $options['comment'] - : 'Importing image file'; - - # Get the license specifier - $license = isset( $options['license'] ) ? $options['license'] : ''; - - # Batch "upload" operation - foreach( $files as $file ) { - +# Need a path +if ( count( $args ) == 0 ) { + showUsage(); +} + +$dir = $args[0]; + +# Check Protection +if ( isset( $options['protect'] ) && isset( $options['unprotect'] ) ) { + die( "Cannot specify both protect and unprotect. Only 1 is allowed.\n" ); +} + +if ( isset( $options['protect'] ) && $options['protect'] == 1 ) { + die( "You must specify a protection option.\n" ); +} + +# Prepare the list of allowed extensions +global $wgFileExtensions; +$extensions = isset( $options['extensions'] ) + ? explode( ',', strtolower( $options['extensions'] ) ) + : $wgFileExtensions; + +# Search the path provided for candidates for import +$files = findFiles( $dir, $extensions ); + +# Initialise the user for this operation +$user = isset( $options['user'] ) + ? User::newFromName( $options['user'] ) + : User::newFromName( 'Maintenance script' ); +if ( !$user instanceof User ) { + $user = User::newFromName( 'Maintenance script' ); +} +$wgUser = $user; + +# Get block check. If a value is given, this specified how often the check is performed +if ( isset( $options['check-userblock'] ) ) { + if ( !$options['check-userblock'] ) { + $checkUserBlock = 1; + } else { + $checkUserBlock = (int)$options['check-userblock']; + } +} else { + $checkUserBlock = false; +} + +# Get --from +$from = @$options['from']; + +# Get sleep time. +$sleep = @$options['sleep']; +if ( $sleep ) { + $sleep = (int)$sleep; +} + +# Get limit number +$limit = @$options['limit']; +if ( $limit ) { + $limit = (int)$limit; +} + +# Get the upload comment. Provide a default one in case there's no comment given. +$comment = 'Importing image file'; + +if ( isset( $options['comment-file'] ) ) { + $comment = file_get_contents( $options['comment-file'] ); + if ( $comment === false || $comment === null ) { + die( "failed to read comment file: {$options['comment-file']}\n" ); + } +} elseif ( isset( $options['comment'] ) ) { + $comment = $options['comment']; +} + +$commentExt = isset( $options['comment-ext'] ) ? $options['comment-ext'] : false; + +# Get the license specifier +$license = isset( $options['license'] ) ? $options['license'] : ''; + +# Batch "upload" operation +$count = count( $files ); +if ( $count > 0 ) { + + foreach ( $files as $file ) { $base = wfBaseName( $file ); - + # Validate a title - $title = Title::makeTitleSafe( NS_IMAGE, $base ); - if( is_object( $title ) ) { - - # Check existence - $image = new Image( $title ); - if( !$image->exists() ) { - - global $wgUploadDirectory; - - # copy() doesn't create paths so if the hash path doesn't exist, we - # have to create it - makeHashPath( wfGetHashPath( $image->name ) ); - - # Stash the file - echo( "Saving {$base}..." ); - - if( copy( $file, $image->getFullPath() ) ) { - - echo( "importing..." ); - - # Grab the metadata - $image->loadFromFile(); - - # Record the upload - if( $image->recordUpload( '', $comment, $license ) ) { - - # We're done! - echo( "done.\n" ); - - } else { - echo( "failed.\n" ); - } - - } else { - echo( "failed.\n" ); + $title = Title::makeTitleSafe( NS_FILE, $base ); + if ( !is_object( $title ) ) { + echo( "{$base} could not be imported; a valid title cannot be produced\n" ); + continue; + } + + if ( $from ) { + if ( $from == $title->getDBkey() ) { + $from = null; + } else { + $ignored++; + continue; + } + } + + if ( $checkUserBlock && ( ( $processed % $checkUserBlock ) == 0 ) ) { + $user->clearInstanceCache( 'name' ); // reload from DB! + if ( $user->isBlocked() ) { + echo( $user->getName() . " was blocked! Aborting.\n" ); + break; + } + } + + # Check existence + $image = wfLocalFile( $title ); + if ( $image->exists() ) { + if ( isset( $options['overwrite'] ) ) { + echo( "{$base} exists, overwriting..." ); + $svar = 'overwritten'; + } else { + echo( "{$base} exists, skipping\n" ); + $skipped++; + continue; + } + } else { + if ( isset( $options['skip-dupes'] ) ) { + $repo = $image->getRepo(); + $sha1 = File::sha1Base36( $file ); # XXX: we end up calculating this again when actually uploading. that sucks. + + $dupes = $repo->findBySha1( $sha1 ); + + if ( $dupes ) { + echo( "{$base} already exists as " . $dupes[0]->getName() . ", skipping\n" ); + $skipped++; + continue; } - + } + + echo( "Importing {$base}..." ); + $svar = 'added'; + } + + if ( isset( $options['source-wiki-url'] ) ) { + /* find comment text directly from source wiki, through MW's API */ + $real_comment = getFileCommentFromSourceWiki( $options['source-wiki-url'], $base ); + if ( $real_comment === false ) + $commentText = $comment; + else + $commentText = $real_comment; + + /* find user directly from source wiki, through MW's API */ + $real_user = getFileUserFromSourceWiki( $options['source-wiki-url'], $base ); + if ( $real_user === false ) { + $wgUser = $user; } else { - echo( "{$base} could not be imported; a file with this name exists in the wiki\n" ); + $wgUser = User::newFromName( $real_user ); + if ( $wgUser === false ) { + # user does not exist in target wiki + echo ( "failed: user '$real_user' does not exist in target wiki." ); + continue; + } } - } else { - echo( "{$base} could not be imported; a valid title cannot be produced\n" ); + # Find comment text + $commentText = false; + + if ( $commentExt ) { + $f = findAuxFile( $file, $commentExt ); + if ( !$f ) { + echo( " No comment file with extension {$commentExt} found for {$file}, using default comment. " ); + } else { + $commentText = file_get_contents( $f ); + if ( !$commentText ) { + echo( " Failed to load comment file {$f}, using default comment. " ); + } + } + } + + if ( !$commentText ) { + $commentText = $comment; + } + } + + # Import the file + if ( isset( $options['dry'] ) ) { + echo( " publishing {$file} by '" . $wgUser->getName() . "', comment '$commentText'... " ); + } else { + $archive = $image->publish( $file ); + if ( !$archive->isGood() ) { + echo( "failed. (" . + $archive->getWikiText() . + ")\n" ); + $failed++; + continue; + } + } + + if ( isset( $options['dry'] ) ) { + echo( "done.\n" ); + } elseif ( $image->recordUpload( $archive->value, $commentText, $license ) ) { + # We're done! + echo( "done.\n" ); + + $doProtect = false; + + global $wgRestrictionLevels; + + $protectLevel = isset( $options['protect'] ) ? $options['protect'] : null; + + if ( $protectLevel && in_array( $protectLevel, $wgRestrictionLevels ) ) { + $doProtect = true; + } + if ( isset( $options['unprotect'] ) ) { + $protectLevel = ''; + $doProtect = true; + } + + if ( $doProtect ) { + # Protect the file + echo "\nWaiting for slaves...\n"; + // Wait for slaves. + sleep( 2.0 ); # Why this sleep? + wfWaitForSlaves(); + + echo( "\nSetting image restrictions ... " ); + + $cascade = false; + $restrictions = array(); + foreach( $title->getRestrictionTypes() as $type ) { + $restrictions[$type] = $protectLevel; + } + + $page = WikiPage::factory( $title ); + $status = $page->doUpdateRestrictions( $restrictions, array(), $cascade, '', $user ); + echo( ( $status->isOK() ? 'done' : 'failed' ) . "\n" ); + } + + } else { + echo( "failed. (at recordUpload stage)\n" ); + $svar = 'failed'; + } + + $$svar++; + $processed++; + + if ( $limit && $processed >= $limit ) { + break; + } + + if ( $sleep ) { + sleep( $sleep ); } - } - + + # Print out some statistics + echo( "\n" ); + foreach ( array( 'count' => 'Found', 'limit' => 'Limit', 'ignored' => 'Ignored', + 'added' => 'Added', 'skipped' => 'Skipped', 'overwritten' => 'Overwritten', + 'failed' => 'Failed' ) as $var => $desc ) { + if ( $$var > 0 ) + echo( "{$desc}: {$$var}\n" ); + } } else { - showUsage(); + echo( "No suitable files could be found for import.\n" ); } -exit(); +exit( 0 ); function showUsage( $reason = false ) { - if( $reason ) + if ( $reason ) { echo( $reason . "\n" ); - echo << ... + } + + echo << : Path to the directory containing images to be imported - File extensions to import Options: ---user= Set username of uploader, default 'Image import script' ---comment= Set upload summary comment, default 'Importing image file' ---license= Use an optional license template +--extensions= Comma-separated list of allowable extensions, defaults to \$wgFileExtensions +--overwrite Overwrite existing images with the same name (default is to skip them) +--limit= Limit the number of images to process. Ignored or skipped images are not counted. +--from= Ignore all files until the one with the given name. Useful for resuming + aborted imports. should be the file's canonical database form. +--skip-dupes Skip images that were already uploaded under a different name (check SHA1) +--sleep= Sleep between files. Useful mostly for debugging. +--user= Set username of uploader, default 'Maintenance script' +--check-userblock Check if the user got blocked during import. +--comment= Set upload summary comment, default 'Importing image file'. +--comment-file= Set upload summary comment the the content of . +--comment-ext= Causes the comment for each file to be loaded from a file with the same name + but the extension . If a global comment is also given, it is appended. +--license= Use an optional license template +--dry Dry run, don't import anything +--protect= Specify the protect value (autoconfirmed,sysop) +--unprotect Unprotects all uploaded images +--source-wiki-url if specified, take User and Comment data for each imported file from this URL. + For example, --source-wiki-url="http://en.wikipedia.org/" -END; - exit(); +TEXT; + exit( 1 ); } - -?>