addDescription( 'Does something with a dump' ); $this->addOption( 'file', 'File with text to run.', false, true ); $this->addOption( 'dump', 'XML dump to execute all revisions.', false, true ); $this->addOption( 'from', 'Article from XML dump to start from.', false, true ); } public function execute() { if ( !( $this->hasOption( 'file' ) ^ $this->hasOption( 'dump' ) ) ) { $this->error( "You must provide a file or dump", true ); } $this->checkOptions(); if ( $this->hasOption( 'file' ) ) { $revision = new WikiRevision( $this->getConfig() ); $revision->setText( file_get_contents( $this->getOption( 'file' ) ) ); $revision->setTitle( Title::newFromText( rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) ) ) ); $this->handleRevision( $revision ); return; } $this->startTime = microtime( true ); if ( $this->getOption( 'dump' ) == '-' ) { $source = new ImportStreamSource( $this->getStdin() ); } else { $this->error( "Sorry, I don't support dump filenames yet. " . "Use - and provide it on stdin on the meantime.", true ); } $importer = new WikiImporter( $source, $this->getConfig() ); $importer->setRevisionCallback( [ $this, 'handleRevision' ] ); $this->from = $this->getOption( 'from', null ); $this->count = 0; $importer->doImport(); $this->conclusions(); $delta = microtime( true ) - $this->startTime; $this->error( "Done {$this->count} revisions in " . round( $delta, 2 ) . " seconds " ); if ( $delta > 0 ) { $this->error( round( $this->count / $delta, 2 ) . " pages/sec" ); } # Perform the memory_get_peak_usage() when all the other data has been # output so there's no damage if it dies. It is only available since # 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit) $this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" ); } public function finalSetup() { parent::finalSetup(); if ( $this->getDbType() == Maintenance::DB_NONE ) { global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks; $wgUseDatabaseMessages = false; $wgLocalisationCacheConf['storeClass'] = 'LCStoreNull'; $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis'; } } static function disableInterwikis( $prefix, &$data ) { # Title::newFromText will check on each namespaced article if it's an interwiki. # We always answer that it is not. return false; } /** * Callback function for each revision, child classes should override * processRevision instead. * @param WikiRevision $rev */ public function handleRevision( $rev ) { $title = $rev->getTitle(); if ( !$title ) { $this->error( "Got bogus revision with null title!" ); return; } $this->count++; if ( isset( $this->from ) ) { if ( $this->from != $title ) { return; } $this->output( "Skipped " . ( $this->count - 1 ) . " pages\n" ); $this->count = 1; $this->from = null; } $this->processRevision( $rev ); } /* Stub function for processing additional options */ public function checkOptions() { return; } /* Stub function for giving data about what was computed */ public function conclusions() { return; } /* Core function which does whatever the maintenance script is designed to do */ abstract public function processRevision( $rev ); } /** * Maintenance script that runs a regex in the revisions from a dump. * * @ingroup Maintenance */ class SearchDump extends DumpIterator { public function __construct() { parent::__construct(); $this->addDescription( 'Runs a regex in the revisions from a dump' ); $this->addOption( 'regex', 'Searching regex', true, true ); } public function getDbType() { return Maintenance::DB_NONE; } /** * @param Revision $rev */ public function processRevision( $rev ) { if ( preg_match( $this->getOption( 'regex' ), $rev->getContent()->getTextForSearchIndex() ) ) { $this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" ); } } } $maintClass = "SearchDump"; require_once RUN_MAINTENANCE_IF_MAIN;