From 77086dc2ef4cc909d18188a94b538677d42669ca Mon Sep 17 00:00:00 2001 From: Tim Starling Date: Thu, 24 Sep 2009 04:19:25 +0000 Subject: [PATCH] Fixes for TitleCleanup subclasses: * (r56711) Don't use unbufferred queries unless someone is holding a gun to your head, they cause no end of trouble. Use batched queries instead. Should fix the bug reported on bug 20741 comment 13. * Fixed a bug in cleanupTitles.php which caused it to fail to convert spaces in page_title to underscores, possibly caused by r6355. * Made cleanupWatchlist.php respect "--fix" as documented * Made runTable accept a parameter array instead of an ever-growing formal parameter list * Renamed processPage() to more accurate processRow(). Removed abstract function definition since the name of the function can be overridden. * Made a few things public instead of protected for easier testing via eval.php * Fixed missing newlines in progress messages in cleanupCaps.php --- maintenance/cleanupCaps.php | 20 ++++--- maintenance/cleanupImages.php | 10 +++- maintenance/cleanupTable.inc | 95 +++++++++++++++++++++++--------- maintenance/cleanupTitles.php | 13 +++-- maintenance/cleanupWatchlist.php | 28 ++++++++-- 5 files changed, 119 insertions(+), 47 deletions(-) diff --git a/maintenance/cleanupCaps.php b/maintenance/cleanupCaps.php index c0d1773aab..6a48ea832b 100644 --- a/maintenance/cleanupCaps.php +++ b/maintenance/cleanupCaps.php @@ -45,12 +45,14 @@ class CapsCleanup extends TableCleanup { if( $wgCapitalLinks ) $this->error( "\$wgCapitalLinks is on -- no need for caps links cleanup.", true ); - $this->runTable( $this->targetTable, - 'WHERE page_namespace=' . $this->namespace, - array( &$this, 'processPage' ) ); + $this->runTable( array( + 'table' => 'page', + 'conds' => array( 'page_namespace' => $this->namespace ), + 'index' => 'page_id', + 'callback' => 'processRow' ) ); } - protected function processPage( $row ) { + protected function processRow( $row ) { global $wgContLang; $current = Title::makeTitle( $row->page_namespace, $row->page_title ); @@ -58,23 +60,23 @@ class CapsCleanup extends TableCleanup { $upper = $row->page_title; $lower = $wgContLang->lcfirst( $row->page_title ); if( $upper == $lower ) { - $this->output( "\"$display\" already lowercase." ); + $this->output( "\"$display\" already lowercase.\n" ); return $this->progress( 0 ); } $target = Title::makeTitle( $row->page_namespace, $lower ); $targetDisplay = $target->getPrefixedText(); if( $target->exists() ) { - $this->output( "\"$display\" skipped; \"$targetDisplay\" already exists" ); + $this->output( "\"$display\" skipped; \"$targetDisplay\" already exists\n" ); return $this->progress( 0 ); } if( $this->dryrun ) { - $this->output( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED" ); + $this->output( "\"$display\" -> \"$targetDisplay\": DRY RUN, NOT MOVED\n" ); $ok = true; } else { $ok = $current->moveTo( $target, false, 'Converting page titles to lowercase' ); - $this->output( "\"$display\" -> \"$targetDisplay\": $ok" ); + $this->output( "\"$display\" -> \"$targetDisplay\": $ok\n" ); } if( $ok === true ) { $this->progress( 1 ); @@ -82,7 +84,7 @@ class CapsCleanup extends TableCleanup { $talk = $target->getTalkPage(); $row->page_namespace = $talk->getNamespace(); if( $talk->exists() ) { - return $this->processPage( $row ); + return $this->processRow( $row ); } } } else { diff --git a/maintenance/cleanupImages.php b/maintenance/cleanupImages.php index 96b1c5c0c3..d401094314 100644 --- a/maintenance/cleanupImages.php +++ b/maintenance/cleanupImages.php @@ -31,13 +31,19 @@ require_once( dirname(__FILE__) . '/cleanupTable.inc' ); class ImageCleanup extends TableCleanup { - protected $targetTable = 'image'; + protected $defaultParams = array( + 'table' => 'image', + 'conds' => array(), + 'index' => 'img_name', + 'callback' => 'processRow', + ); + public function __construct() { parent::__construct(); $this->mDescription = "Script to clean up broken, unparseable upload filenames"; } - protected function processPage( $row ) { + protected function processRow( $row ) { global $wgContLang; $source = $row->img_name; diff --git a/maintenance/cleanupTable.inc b/maintenance/cleanupTable.inc index f67b3b1b64..da0ff88d61 100644 --- a/maintenance/cleanupTable.inc +++ b/maintenance/cleanupTable.inc @@ -23,10 +23,18 @@ require_once( dirname(__FILE__) . '/Maintenance.php' ); -abstract class TableCleanup extends Maintenance { - protected $targetTable = 'page'; +class TableCleanup extends Maintenance { + protected $defaultParams = array( + 'table' => 'page', + 'conds' => array(), + 'index' => 'page_id', + 'callback' => 'processRow', + ); + protected $dryrun = false; protected $maxLag = 10; # if slaves are lagged more than 10 secs, wait + public $batchSize = 100; + public $reportInterval = 100; public function __construct() { parent::__construct(); @@ -42,9 +50,7 @@ abstract class TableCleanup extends Maintenance { } else { $this->output( "Checking and fixing bad titles...\n" ); } - $this->runTable( $this->targetTable, - '', //'WHERE page_namespace=0', - array( $this, 'processPage' ) ); + $this->runTable( $this->defaultParams ); } protected function init( $count, $table ) { @@ -58,7 +64,7 @@ abstract class TableCleanup extends Maintenance { protected function progress( $updated ) { $this->updated += $updated; $this->processed++; - if( $this->processed % 100 != 0 ) { + if( $this->processed % $this->reportInterval != 0 ) { return; } $portion = $this->processed / $this->count; @@ -85,34 +91,73 @@ abstract class TableCleanup extends Maintenance { flush(); } - protected function runTable( $table, $where, $callback ) { + public function runTable( $params ) { $dbr = wfGetDB( DB_SLAVE ); - - $count = $dbr->selectField( $table, 'count(*)', '', __METHOD__ ); + + if ( array_diff( array_keys( $params ), + array( 'table', 'conds', 'index', 'callback' ) ) ) + { + throw new MWException( __METHOD__.': Missing parameter ' . implode( ', ', $params ) ); + } + + $table = $params['table']; + $count = $dbr->selectField( $table, 'count(*)', $params['conds'], __METHOD__ ); $this->init( $count, $table ); - $this->output( "Processing $table..." ); + $this->output( "Processing $table...\n" ); + + + $index = (array)$params['index']; + $indexConds = array(); + $options = array( + 'ORDER BY' => implode( ',', $index ), + 'LIMIT' => $this->batchSize + ); + $callback = array( $this, $params['callback'] ); - // Unbuffered queries, avoids OOM - $dbr->bufferResults( false ); - - $tableName = $dbr->tableName( $table ); - $sql = "SELECT * FROM $tableName $where"; - $result = $dbr->query( $sql, __METHOD__ ); + while ( true ) { + $conds = array_merge( $params['conds'], $indexConds ); + $res = $dbr->select( $table, '*', $conds, __METHOD__, $options ); + if ( !$res->numRows() ) { + // Done + break; + } - foreach( $result as $row ) { - call_user_func( $callback, $row ); + foreach ( $res as $row ) { + call_user_func( $callback, $row ); + } + + if ( $res->numRows() < $this->batchSize ) { + // Done + break; + } + + // Update the conditions to select the next batch. + // Construct a condition string by starting with the least significant part + // of the index, and adding more significant parts progressively to the left + // of the string. + $nextCond = ''; + foreach ( array_reverse( $index ) as $field ) { + $encValue = $dbr->addQuotes( $row->$field ); + if ( $nextCond === '' ) { + $nextCond = "$field > $encValue"; + } else { + $nextCond = "$field > $encValue OR ($field = $encValue AND ($nextCond))"; + } + } + $indexConds = array( $nextCond ); } - + $this->output( "Finished $table... $this->updated of $this->processed rows updated\n" ); - - $result->free(); - - $dbr->bufferResults( true ); } protected function hexChar( $matches ) { return sprintf( "\\x%02x", ord( $matches[1] ) ); } - - abstract protected function processPage( $row ); } + +class TableCleanupTest extends TableCleanup { + function processRow( $row ) { + $this->progress( mt_rand( 0, 1 ) ); + } +} + diff --git a/maintenance/cleanupTitles.php b/maintenance/cleanupTitles.php index ea01ba7ea5..5025eebafe 100644 --- a/maintenance/cleanupTitles.php +++ b/maintenance/cleanupTitles.php @@ -36,15 +36,16 @@ class TitleCleanup extends TableCleanup { $this->mDescription = "Script to clean up broken, unparseable titles"; } - protected function processPage( $row ) { - $current = Title::makeTitle( $row->page_namespace, $row->page_title ); - $display = $current->getPrefixedText(); - + protected function processRow( $row ) { + $display = Title::makeName( $row->page_namespace, $row->page_title ); $verified = UtfNormal::cleanUp( $display ); - $title = Title::newFromText( $verified ); - if( !is_null( $title ) && $title->equals( $current ) && $title->canExist() ) { + if( !is_null( $title ) + && $title->canExist() + && $title->getNamespace() == $row->page_namespace + && $title->getDBkey() === $row->page_title ) + { return $this->progress( 0 ); // all is fine } diff --git a/maintenance/cleanupWatchlist.php b/maintenance/cleanupWatchlist.php index 7be48107f6..bc07d74b5a 100644 --- a/maintenance/cleanupWatchlist.php +++ b/maintenance/cleanupWatchlist.php @@ -31,13 +31,27 @@ require_once( dirname(__FILE__) . '/cleanupTable.inc' ); class WatchlistCleanup extends TableCleanup { - protected $targetTable = 'watchlist'; + protected $defaultParams = array( + 'table' => 'watchlist', + 'index' => array( 'wl_user', 'wl_namespace', 'wl_title' ), + 'conds' => array(), + 'callback' => 'processRow' + ); + public function __construct() { parent::__construct(); $this->mDescription = "Script to remove broken, unparseable titles in the Watchlist"; + $this->addOption( 'fix', 'Actually remove entries; without will only report.' ); + } + + function execute() { + if ( !$this->hasOption( 'fix' ) ) { + $this->output( "Dry run only: use --fix to enable updates\n" ); + } + parent::execute(); } - protected function processPage( $row ) { + protected function processRow( $row ) { $current = Title::makeTitle( $row->wl_namespace, $row->wl_title ); $display = $current->getPrefixedText(); $verified = UtfNormal::cleanUp( $display ); @@ -45,14 +59,15 @@ class WatchlistCleanup extends TableCleanup { if( $row->wl_user == 0 || is_null( $title ) || !$title->equals( $current ) ) { $this->output( "invalid watch by {$row->wl_user} for ({$row->wl_namespace}, \"{$row->wl_title}\")\n" ); - $this->removeWatch( $row ); - return $this->progress( 1 ); + $updated = $this->removeWatch( $row ); + $this->progress( $updated ); + return; } $this->progress( 0 ); } private function removeWatch( $row ) { - if( !$this->dryrun ) { + if( !$this->dryrun && $this->hasOption( 'fix' ) ) { $dbw = wfGetDB( DB_MASTER ); $dbw->delete( 'watchlist', array( 'wl_user' => $row->wl_user, @@ -60,6 +75,9 @@ class WatchlistCleanup extends TableCleanup { 'wl_title' => $row->wl_title ), __METHOD__ ); $this->output( "- removed\n" ); + return 1; + } else { + return 0; } } } -- 2.20.1