rebuildrecentchanges: Allow rebuilding specified time range only
[lhc/web/wiklou.git] / maintenance / rebuildrecentchanges.php
1 <?php
2 /**
3 * Rebuild recent changes from scratch. This takes several hours,
4 * depending on the database size and server configuration.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup Maintenance
23 * @todo Document
24 */
25
26 require_once __DIR__ . '/Maintenance.php';
27
28 /**
29 * Maintenance script that rebuilds recent changes from scratch.
30 *
31 * @ingroup Maintenance
32 */
33 class RebuildRecentchanges extends Maintenance {
34 public function __construct() {
35 parent::__construct();
36 $this->addDescription( 'Rebuild recent changes' );
37
38 $this->addOption(
39 'from',
40 "Don't empty the table, only insert rows in requested time range (in YYYYMMDDHHMMSS format)",
41 false,
42 true
43 );
44 $this->addOption(
45 'to',
46 "Don't empty the table, only insert rows in requested time range (in YYYYMMDDHHMMSS format)",
47 false,
48 true
49 );
50 }
51
52 public function execute() {
53 if (
54 ( $this->hasOption( 'from' ) && !$this->hasOption( 'to' ) ) ||
55 ( !$this->hasOption( 'from' ) && $this->hasOption( 'to' ) )
56 ) {
57 $this->error( "Both 'from' and 'to' must be given, or neither", 1 );
58 }
59
60 $this->rebuildRecentChangesTablePass1();
61 $this->rebuildRecentChangesTablePass2();
62 $this->rebuildRecentChangesTablePass3();
63 $this->rebuildRecentChangesTablePass4();
64 $this->rebuildRecentChangesTablePass5();
65 if ( !( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) ) {
66 $this->purgeFeeds();
67 }
68 $this->output( "Done.\n" );
69 }
70
71 /**
72 * Rebuild pass 1: Insert `recentchanges` entries for page revisions.
73 */
74 private function rebuildRecentChangesTablePass1() {
75 $dbw = $this->getDB( DB_MASTER );
76
77 if ( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) {
78 $this->cutoffFrom = wfTimestamp( TS_UNIX, $this->getOption( 'from' ) );
79 $this->cutoffTo = wfTimestamp( TS_UNIX, $this->getOption( 'to' ) );
80
81 $sec = $this->cutoffTo - $this->cutoffFrom;
82 $days = $sec / 24 / 3600;
83 $this->output( "Rebuilding range of $sec seconds ($days days)\n" );
84
85 } else {
86 global $wgRCMaxAge;
87
88 $days = $wgRCMaxAge / 24 / 3600;
89 $this->output( "Rebuilding \$wgRCMaxAge=$wgRCMaxAge seconds ($days days)\n" );
90
91 $this->cutoffFrom = time() - $wgRCMaxAge;
92 $this->cutoffTo = time();
93
94 $this->output( "Clearing recentchanges table...\n" );
95 $dbw->delete( 'recentchanges', '*' );
96 }
97
98 $this->output( "Loading from page and revision tables...\n" );
99
100 $dbw->insertSelect( 'recentchanges', [ 'page', 'revision' ],
101 [
102 'rc_timestamp' => 'rev_timestamp',
103 'rc_user' => 'rev_user',
104 'rc_user_text' => 'rev_user_text',
105 'rc_namespace' => 'page_namespace',
106 'rc_title' => 'page_title',
107 'rc_comment' => 'rev_comment',
108 'rc_minor' => 'rev_minor_edit',
109 'rc_bot' => 0,
110 'rc_new' => 'page_is_new',
111 'rc_cur_id' => 'page_id',
112 'rc_this_oldid' => 'rev_id',
113 'rc_last_oldid' => 0, // is this ok?
114 'rc_type' => $dbw->conditional( 'page_is_new != 0', RC_NEW, RC_EDIT ),
115 'rc_source' => $dbw->conditional(
116 'page_is_new != 0',
117 $dbw->addQuotes( RecentChange::SRC_NEW ),
118 $dbw->addQuotes( RecentChange::SRC_EDIT )
119 ),
120 'rc_deleted' => 'rev_deleted'
121 ],
122 [
123 'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
124 'rev_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
125 'rev_page=page_id'
126 ],
127 __METHOD__,
128 [], // INSERT options
129 [ 'ORDER BY' => 'rev_timestamp DESC', 'LIMIT' => 5000 ] // SELECT options
130 );
131 }
132
133 /**
134 * Rebuild pass 2: Enhance entries for page revisions with references to the previous revision
135 * (rc_last_oldid, rc_new etc.) and size differences (rc_old_len, rc_new_len).
136 */
137 private function rebuildRecentChangesTablePass2() {
138 $dbw = $this->getDB( DB_MASTER );
139 list( $recentchanges, $revision ) = $dbw->tableNamesN( 'recentchanges', 'revision' );
140
141 $this->output( "Updating links and size differences...\n" );
142
143 # Fill in the rc_last_oldid field, which points to the previous edit
144 $sql = "SELECT rc_cur_id,rc_this_oldid,rc_timestamp FROM $recentchanges " .
145 "WHERE rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ) . ' ' .
146 "AND rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ) . ' ' .
147 "ORDER BY rc_cur_id,rc_timestamp";
148 $res = $dbw->query( $sql, DB_MASTER );
149
150 $lastCurId = 0;
151 $lastOldId = 0;
152 foreach ( $res as $obj ) {
153 $new = 0;
154 if ( $obj->rc_cur_id != $lastCurId ) {
155 # Switch! Look up the previous last edit, if any
156 $lastCurId = intval( $obj->rc_cur_id );
157 $emit = $obj->rc_timestamp;
158 $sql2 = "SELECT rev_id,rev_len FROM $revision " .
159 "WHERE rev_page={$lastCurId} " .
160 "AND rev_timestamp<'{$emit}' ORDER BY rev_timestamp DESC";
161 $sql2 = $dbw->limitResult( $sql2, 1, false );
162 $res2 = $dbw->query( $sql2 );
163 $row = $dbw->fetchObject( $res2 );
164 if ( $row ) {
165 $lastOldId = intval( $row->rev_id );
166 # Grab the last text size if available
167 $lastSize = !is_null( $row->rev_len ) ? intval( $row->rev_len ) : null;
168 } else {
169 # No previous edit
170 $lastOldId = 0;
171 $lastSize = null;
172 $new = 1; // probably true
173 }
174 }
175 if ( $lastCurId == 0 ) {
176 $this->output( "Uhhh, something wrong? No curid\n" );
177 } else {
178 # Grab the entry's text size
179 $size = $dbw->selectField( 'revision', 'rev_len', [ 'rev_id' => $obj->rc_this_oldid ] );
180
181 $dbw->update( 'recentchanges',
182 [
183 'rc_last_oldid' => $lastOldId,
184 'rc_new' => $new,
185 'rc_type' => $new,
186 'rc_source' => $new === 1 ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT,
187 'rc_old_len' => $lastSize,
188 'rc_new_len' => $size,
189 ], [
190 'rc_cur_id' => $lastCurId,
191 'rc_this_oldid' => $obj->rc_this_oldid,
192 ],
193 __METHOD__
194 );
195
196 $lastOldId = intval( $obj->rc_this_oldid );
197 $lastSize = $size;
198 }
199 }
200 }
201
202 /**
203 * Rebuild pass 3: Insert `recentchanges` entries for action logs.
204 */
205 private function rebuildRecentChangesTablePass3() {
206 $dbw = $this->getDB( DB_MASTER );
207
208 $this->output( "Loading from user, page, and logging tables...\n" );
209
210 global $wgLogTypes, $wgLogRestrictions;
211 // Some logs don't go in RC. This should check for that
212 $basicRCLogs = array_diff( $wgLogTypes, array_keys( $wgLogRestrictions ) );
213
214 list( $logging, $page ) = $dbw->tableNamesN( 'logging', 'page' );
215 $dbw->insertSelect(
216 'recentchanges',
217 [
218 'user',
219 "$logging LEFT JOIN $page ON (log_namespace=page_namespace AND log_title=page_title)"
220 ],
221 [
222 'rc_timestamp' => 'log_timestamp',
223 'rc_user' => 'log_user',
224 'rc_user_text' => 'user_name',
225 'rc_namespace' => 'log_namespace',
226 'rc_title' => 'log_title',
227 'rc_comment' => 'log_comment',
228 'rc_minor' => 0,
229 'rc_bot' => 0,
230 'rc_patrolled' => 1,
231 'rc_new' => 0,
232 'rc_this_oldid' => 0,
233 'rc_last_oldid' => 0,
234 'rc_type' => RC_LOG,
235 'rc_source' => $dbw->addQuotes( RecentChange::SRC_LOG ),
236 'rc_cur_id' => $dbw->cascadingDeletes() ? 'page_id' : 'COALESCE(page_id, 0)',
237 'rc_log_type' => 'log_type',
238 'rc_log_action' => 'log_action',
239 'rc_logid' => 'log_id',
240 'rc_params' => 'log_params',
241 'rc_deleted' => 'log_deleted'
242 ],
243 [
244 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
245 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
246 'log_user=user_id',
247 'log_type' => $basicRCLogs,
248 ],
249 __METHOD__,
250 [], // INSERT options
251 [ 'ORDER BY' => 'log_timestamp DESC', 'LIMIT' => 5000 ] // SELECT options
252 );
253 }
254
255 /**
256 * Rebuild pass 4: Mark bot and autopatrolled entries.
257 */
258 private function rebuildRecentChangesTablePass4() {
259 global $wgUseRCPatrol;
260
261 $dbw = $this->getDB( DB_MASTER );
262
263 list( $recentchanges, $usergroups, $user ) =
264 $dbw->tableNamesN( 'recentchanges', 'user_groups', 'user' );
265
266 $botgroups = User::getGroupsWithPermission( 'bot' );
267 $autopatrolgroups = $wgUseRCPatrol ? User::getGroupsWithPermission( 'autopatrol' ) : [];
268 # Flag our recent bot edits
269 if ( !empty( $botgroups ) ) {
270 $botwhere = $dbw->makeList( $botgroups );
271 $botusers = [];
272
273 $this->output( "Flagging bot account edits...\n" );
274
275 # Find all users that are bots
276 $sql = "SELECT DISTINCT user_name FROM $usergroups, $user " .
277 "WHERE ug_group IN($botwhere) AND user_id = ug_user";
278 $res = $dbw->query( $sql, DB_MASTER );
279
280 foreach ( $res as $obj ) {
281 $botusers[] = $dbw->addQuotes( $obj->user_name );
282 }
283 # Fill in the rc_bot field
284 if ( !empty( $botusers ) ) {
285 $botwhere = implode( ',', $botusers );
286 $sql2 = "UPDATE $recentchanges SET rc_bot=1 " .
287 "WHERE rc_user_text IN($botwhere) " .
288 "AND rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ) . ' ' .
289 "AND rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) );
290 $dbw->query( $sql2 );
291 }
292 }
293 global $wgMiserMode;
294 # Flag our recent autopatrolled edits
295 if ( !$wgMiserMode && !empty( $autopatrolgroups ) ) {
296 $patrolwhere = $dbw->makeList( $autopatrolgroups );
297 $patrolusers = [];
298
299 $this->output( "Flagging auto-patrolled edits...\n" );
300
301 # Find all users in RC with autopatrol rights
302 $sql = "SELECT DISTINCT user_name FROM $usergroups, $user " .
303 "WHERE ug_group IN($patrolwhere) AND user_id = ug_user";
304 $res = $dbw->query( $sql, DB_MASTER );
305
306 foreach ( $res as $obj ) {
307 $patrolusers[] = $dbw->addQuotes( $obj->user_name );
308 }
309
310 # Fill in the rc_patrolled field
311 if ( !empty( $patrolusers ) ) {
312 $patrolwhere = implode( ',', $patrolusers );
313 $sql2 = "UPDATE $recentchanges SET rc_patrolled=1 " .
314 "WHERE rc_user_text IN($patrolwhere) " .
315 "AND rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ) . ' ' .
316 "AND rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) );
317 $dbw->query( $sql2 );
318 }
319 }
320 }
321
322 /**
323 * Rebuild pass 5: Delete duplicate entries where we generate both a page revision and a log entry
324 * for a single action (upload only, at the moment, but potentially also move, protect, ...).
325 */
326 private function rebuildRecentChangesTablePass5() {
327 $dbw = wfGetDB( DB_MASTER );
328
329 $this->output( "Removing duplicate revision and logging entries...\n" );
330
331 $res = $dbw->select(
332 [ 'logging', 'log_search' ],
333 [ 'ls_value', 'ls_log_id' ],
334 [
335 'ls_log_id = log_id',
336 'ls_field' => 'associated_rev_id',
337 'log_type' => 'upload',
338 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ),
339 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ),
340 ],
341 __METHOD__
342 );
343 foreach ( $res as $obj ) {
344 $rev_id = $obj->ls_value;
345 $log_id = $obj->ls_log_id;
346
347 // Mark the logging row as having an associated rev id
348 $dbw->update(
349 'recentchanges',
350 /*SET*/ [ 'rc_this_oldid' => $rev_id ],
351 /*WHERE*/ [ 'rc_logid' => $log_id ],
352 __METHOD__
353 );
354
355 // Delete the revision row
356 $dbw->delete(
357 'recentchanges',
358 /*WHERE*/ [ 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ],
359 __METHOD__
360 );
361 }
362 }
363
364 /**
365 * Purge cached feeds in $messageMemc
366 */
367 private function purgeFeeds() {
368 global $wgFeedClasses, $messageMemc;
369
370 $this->output( "Deleting feed timestamps.\n" );
371
372 foreach ( $wgFeedClasses as $feed => $className ) {
373 $messageMemc->delete( wfMemcKey( 'rcfeed', $feed, 'timestamp' ) ); # Good enough for now.
374 }
375 }
376 }
377
378 $maintClass = "RebuildRecentchanges";
379 require_once RUN_MAINTENANCE_IF_MAIN;