Use unbuffered queries in cleanupTable maintenance scripts. Prevents OOM issues.
[lhc/web/wiklou.git] / maintenance / cleanupTable.inc
1 <?php
2 /**
3 * Generic table cleanup class. Already subclasses maintenance
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @defgroup Wikimedia
21 * @ingroup Maintenance
22 */
23
24 require_once( dirname(__FILE__) . '/Maintenance.php' );
25
26 abstract class TableCleanup extends Maintenance {
27 protected $targetTable = 'page';
28 protected $dryrun = false;
29 protected $maxLag = 10; # if slaves are lagged more than 10 secs, wait
30
31 public function __construct() {
32 parent::__construct();
33 $this->addOption( 'dry-run', 'Perform a dry run' );
34 }
35
36 public function execute() {
37 global $wgUser;
38 $wgUser->setName( 'Conversion script' );
39 $this->dryrun = $this->hasOption( 'dry-run' );
40 if( $this->dryrun ) {
41 $this->output( "Checking for bad titles...\n" );
42 } else {
43 $this->output( "Checking and fixing bad titles...\n" );
44 }
45 $this->runTable( $this->targetTable,
46 '', //'WHERE page_namespace=0',
47 array( $this, 'processPage' ) );
48 }
49
50 protected function init( $count, $table ) {
51 $this->processed = 0;
52 $this->updated = 0;
53 $this->count = $count;
54 $this->startTime = wfTime();
55 $this->table = $table;
56 }
57
58 protected function progress( $updated ) {
59 $this->updated += $updated;
60 $this->processed++;
61 if( $this->processed % 100 != 0 ) {
62 return;
63 }
64 $portion = $this->processed / $this->count;
65 $updateRate = $this->updated / $this->processed;
66
67 $now = wfTime();
68 $delta = $now - $this->startTime;
69 $estimatedTotalTime = $delta / $portion;
70 $eta = $this->startTime + $estimatedTotalTime;
71
72 $this->output(
73 sprintf( "%s %s: %6.2f%% done on %s; ETA %s [%d/%d] %.2f/sec <%.2f%% updated>\n",
74 wfWikiID(),
75 wfTimestamp( TS_DB, intval( $now ) ),
76 $portion * 100.0,
77 $this->table,
78 wfTimestamp( TS_DB, intval( $eta ) ),
79 $this->processed,
80 $this->count,
81 $this->processed / $delta,
82 $updateRate * 100.0
83 )
84 );
85 flush();
86 }
87
88 protected function runTable( $table, $where, $callback ) {
89 $dbw = wfGetDB( DB_MASTER );
90
91 // Unbuffered queries, avoids OOM
92 $dbw->bufferResults( false );
93
94 $count = $dbw->selectField( $table, 'count(*)', '', __METHOD__ );
95 $this->init( $count, $table );
96 $this->output( "Processing $table..." );
97
98 $tableName = $dbw->tableName( $table );
99 $sql = "SELECT * FROM $tableName $where";
100 $result = $dbw->query( $sql, __METHOD__ );
101
102 foreach( $result as $row ) {
103 call_user_func( $callback, $row );
104 }
105
106 $this->output( "Finished $table... $this->updated of $this->processed rows updated\n" );
107
108 $result->free();
109
110 $dbw->bufferResults( true );
111 }
112
113 protected function hexChar( $matches ) {
114 return sprintf( "\\x%02x", ord( $matches[1] ) );
115 }
116
117 abstract protected function processPage( $row );
118 }