* Used as a base class for CompareParsers and PreprocessDump.
* We implement below the simple task of searching inside a dump.
*
- * Copyright (C) 2011 Platonides - http://www.mediawiki.org/
- *
+ * Copyright © 2011 Platonides
+ * http://www.mediawiki.org/
+ *
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* @file
* @ingroup Maintenance
*/
-
-require_once( dirname( __FILE__ ) . '/Maintenance.php' );
+require_once( __DIR__ . '/Maintenance.php' );
+
+/**
+ * Base class for interating over a dump.
+ *
+ * @ingroup Maintenance
+ */
abstract class DumpIterator extends Maintenance {
private $count = 0;
if (! ( $this->hasOption('file') ^ $this->hasOption('dump') ) ) {
$this->error("You must provide a file or dump", true);
}
-
+
$this->checkOptions();
-
+
if ( $this->hasOption('file') ) {
$revision = new WikiRevision;
-
+
$revision->setText( file_get_contents( $this->getOption( 'file' ) ) );
$revision->setTitle( Title::newFromText( rawurldecode( basename( $this->getOption( 'file' ), '.txt' ) ) ) );
$this->handleRevision( $revision );
return;
}
-
- $this->startTime = wfTime();
+
+ $this->startTime = microtime( true );
if ( $this->getOption('dump') == '-' ) {
$source = new ImportStreamSource( $this->getStdin() );
$importer->setRevisionCallback(
array( &$this, 'handleRevision' ) );
-
+
$this->from = $this->getOption( 'from', null );
$this->count = 0;
$importer->doImport();
-
+
$this->conclusions();
-
- $delta = wfTime() - $this->startTime;
+
+ $delta = microtime( true ) - $this->startTime;
$this->error( "Done {$this->count} revisions in " . round($delta, 2) . " seconds " );
if ($delta > 0)
$this->error( round($this->count / $delta, 2) . " pages/sec" );
-
+
# Perform the memory_get_peak_usage() when all the other data has been output so there's no damage if it dies.
# It is only available since 5.2.0 (since 5.2.1 if you haven't compiled with --enable-memory-limit)
$this->error( "Memory peak usage of " . memory_get_peak_usage() . " bytes\n" );
}
-
- function stripParameters( $text ) {
- if ( !$this->stripParametersEnabled ) {
- return $text;
+
+ public function finalSetup() {
+ parent::finalSetup();
+
+ if ( $this->getDbType() == Maintenance::DB_NONE ) {
+ global $wgUseDatabaseMessages, $wgLocalisationCacheConf, $wgHooks;
+ $wgUseDatabaseMessages = false;
+ $wgLocalisationCacheConf['storeClass'] = 'LCStore_Null';
+ $wgHooks['InterwikiLoadPrefix'][] = 'DumpIterator::disableInterwikis';
}
- return preg_replace( '/(<a) [^>]+>/', '$1>', $text );
}
-
+
+ static function disableInterwikis( $prefix, &$data ) {
+ # Title::newFromText will check on each namespaced article if it's an interwiki.
+ # We always answer that it is not.
+
+ return false;
+ }
+
/**
- * Callback function for each revision, child classes should override
+ * Callback function for each revision, child classes should override
* processRevision instead.
* @param $rev Revision
*/
$this->error( "Got bogus revision with null title!" );
return;
}
-
+
$this->count++;
if ( isset( $this->from ) ) {
if ( $this->from != $title )
return;
$this->output( "Skipped " . ($this->count - 1) . " pages\n" );
-
+
$this->count = 1;
$this->from = null;
}
-
+
$this->processRevision( $rev );
}
-
+
/* Stub function for processing additional options */
public function checkOptions() {
return;
}
-
+
/* Stub function for giving data about what was computed */
public function conclusions() {
return;
abstract public function processRevision( $rev );
}
+/**
+ * Maintenance script that runs a regex in the revisions from a dump.
+ *
+ * @ingroup Maintenance
+ */
class SearchDump extends DumpIterator {
-
+
public function __construct() {
parent::__construct();
$this->mDescription = "Runs a regex in the revisions from a dump";
$this->addOption( 'regex', 'Searching regex', true, true );
}
-
+
+ public function getDbType() {
+ return Maintenance::DB_NONE;
+ }
+
+ /**
+ * @param $rev Revision
+ */
public function processRevision( $rev ) {
if ( preg_match( $this->getOption( 'regex' ), $rev->getText() ) ) {
$this->output( $rev->getTitle() . " matches at edit from " . $rev->getTimestamp() . "\n" );