X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=maintenance%2Frebuildrecentchanges.php;h=6465bb32faab980769592b02018cec69dccc1fb1;hb=df2ef5194e88514081511302da45ebf19afcb248;hp=d61906cfaabd0e5acb6d48675814ab9326f54656;hpb=920c06e42c307fcaea47d9b65e09303aded2d1c9;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/rebuildrecentchanges.php b/maintenance/rebuildrecentchanges.php index d61906cfaa..6465bb32fa 100644 --- a/maintenance/rebuildrecentchanges.php +++ b/maintenance/rebuildrecentchanges.php @@ -31,18 +31,46 @@ require_once __DIR__ . '/Maintenance.php'; * @ingroup Maintenance */ class RebuildRecentchanges extends Maintenance { + /** @var integer UNIX timestamp */ + private $cutoffFrom; + /** @var integer UNIX timestamp */ + private $cutoffTo; + public function __construct() { parent::__construct(); $this->addDescription( 'Rebuild recent changes' ); + + $this->addOption( + 'from', + "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)", + false, + true + ); + $this->addOption( + 'to', + "Only rebuild rows in requested time range (in YYYYMMDDHHMMSS format)", + false, + true + ); + $this->setBatchSize( 200 ); } public function execute() { + if ( + ( $this->hasOption( 'from' ) && !$this->hasOption( 'to' ) ) || + ( !$this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) + ) { + $this->error( "Both 'from' and 'to' must be given, or neither", 1 ); + } + $this->rebuildRecentChangesTablePass1(); $this->rebuildRecentChangesTablePass2(); $this->rebuildRecentChangesTablePass3(); $this->rebuildRecentChangesTablePass4(); $this->rebuildRecentChangesTablePass5(); - $this->purgeFeeds(); + if ( !( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) ) { + $this->purgeFeeds(); + } $this->output( "Done.\n" ); } @@ -52,51 +80,93 @@ class RebuildRecentchanges extends Maintenance { private function rebuildRecentChangesTablePass1() { $dbw = $this->getDB( DB_MASTER ); - $dbw->delete( 'recentchanges', '*' ); + if ( $this->hasOption( 'from' ) && $this->hasOption( 'to' ) ) { + $this->cutoffFrom = wfTimestamp( TS_UNIX, $this->getOption( 'from' ) ); + $this->cutoffTo = wfTimestamp( TS_UNIX, $this->getOption( 'to' ) ); - $this->output( "Loading from page and revision tables...\n" ); + $sec = $this->cutoffTo - $this->cutoffFrom; + $days = $sec / 24 / 3600; + $this->output( "Rebuilding range of $sec seconds ($days days)\n" ); + } else { + global $wgRCMaxAge; - global $wgRCMaxAge; + $days = $wgRCMaxAge / 24 / 3600; + $this->output( "Rebuilding \$wgRCMaxAge=$wgRCMaxAge seconds ($days days)\n" ); - $this->output( '$wgRCMaxAge=' . $wgRCMaxAge ); - $days = $wgRCMaxAge / 24 / 3600; - if ( intval( $days ) == $days ) { - $this->output( " (" . $days . " days)\n" ); - } else { - $this->output( " (approx. " . intval( $days ) . " days)\n" ); + $this->cutoffFrom = time() - $wgRCMaxAge; + $this->cutoffTo = time(); + } + + $this->output( "Clearing recentchanges table for time range...\n" ); + $rcids = $dbw->selectFieldValues( + 'recentchanges', + 'rc_id', + [ + 'rc_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ), + 'rc_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ) + ] + ); + foreach ( array_chunk( $rcids, $this->mBatchSize ) as $rcidBatch ) { + $dbw->delete( 'recentchanges', [ 'rc_id' => $rcidBatch ], __METHOD__ ); + wfGetLBFactory()->waitForReplication(); } - $cutoff = time() - $wgRCMaxAge; - $dbw->insertSelect( 'recentchanges', array( 'page', 'revision' ), - array( - 'rc_timestamp' => 'rev_timestamp', - 'rc_user' => 'rev_user', - 'rc_user_text' => 'rev_user_text', - 'rc_namespace' => 'page_namespace', - 'rc_title' => 'page_title', - 'rc_comment' => 'rev_comment', - 'rc_minor' => 'rev_minor_edit', - 'rc_bot' => 0, - 'rc_new' => 'page_is_new', - 'rc_cur_id' => 'page_id', - 'rc_this_oldid' => 'rev_id', - 'rc_last_oldid' => 0, // is this ok? - 'rc_type' => $dbw->conditional( 'page_is_new != 0', RC_NEW, RC_EDIT ), - 'rc_source' => $dbw->conditional( - 'page_is_new != 0', - $dbw->addQuotes( RecentChange::SRC_NEW ), - $dbw->addQuotes( RecentChange::SRC_EDIT ) - ), - 'rc_deleted' => 'rev_deleted' - ), - array( - 'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $cutoff ) ), + $this->output( "Loading from page and revision tables...\n" ); + $res = $dbw->select( + [ 'page', 'revision' ], + [ + 'rev_timestamp', + 'rev_user', + 'rev_user_text', + 'rev_comment', + 'rev_minor_edit', + 'rev_id', + 'rev_deleted', + 'page_namespace', + 'page_title', + 'page_is_new', + 'page_id' + ], + [ + 'rev_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ), + 'rev_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ), 'rev_page=page_id' - ), + ], __METHOD__, - array(), // INSERT options - array( 'ORDER BY' => 'rev_timestamp DESC', 'LIMIT' => 5000 ) // SELECT options + [ 'ORDER BY' => 'rev_timestamp DESC' ] ); + + $this->output( "Inserting from page and revision tables...\n" ); + $inserted = 0; + foreach ( $res as $row ) { + $dbw->insert( + 'recentchanges', + [ + 'rc_timestamp' => $row->rev_timestamp, + 'rc_user' => $row->rev_user, + 'rc_user_text' => $row->rev_user_text, + 'rc_namespace' => $row->page_namespace, + 'rc_title' => $row->page_title, + 'rc_comment' => $row->rev_comment, + 'rc_minor' => $row->rev_minor_edit, + 'rc_bot' => 0, + 'rc_new' => $row->page_is_new, + 'rc_cur_id' => $row->page_id, + 'rc_this_oldid' => $row->rev_id, + 'rc_last_oldid' => 0, // is this ok? + 'rc_type' => $row->page_is_new ? RC_NEW : RC_EDIT, + 'rc_source' => $row->page_is_new + ? $dbw->addQuotes( RecentChange::SRC_NEW ) + : $dbw->addQuotes( RecentChange::SRC_EDIT ) + , + 'rc_deleted' => $row->rev_deleted + ], + __METHOD__ + ); + if ( ( ++$inserted % $this->mBatchSize ) == 0 ) { + wfGetLBFactory()->waitForReplication(); + } + } } /** @@ -105,29 +175,40 @@ class RebuildRecentchanges extends Maintenance { */ private function rebuildRecentChangesTablePass2() { $dbw = $this->getDB( DB_MASTER ); - list( $recentchanges, $revision ) = $dbw->tableNamesN( 'recentchanges', 'revision' ); $this->output( "Updating links and size differences...\n" ); # Fill in the rc_last_oldid field, which points to the previous edit - $sql = "SELECT rc_cur_id,rc_this_oldid,rc_timestamp FROM $recentchanges " . - "ORDER BY rc_cur_id,rc_timestamp"; - $res = $dbw->query( $sql, DB_MASTER ); + $res = $dbw->select( + 'recentchanges', + [ 'rc_cur_id', 'rc_this_oldid', 'rc_timestamp' ], + [ + "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ), + "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ) + ], + __METHOD__, + [ 'ORDER BY' => 'rc_cur_id,rc_timestamp' ] + ); $lastCurId = 0; $lastOldId = 0; + $lastSize = null; + $updated = 0; foreach ( $res as $obj ) { $new = 0; + if ( $obj->rc_cur_id != $lastCurId ) { # Switch! Look up the previous last edit, if any $lastCurId = intval( $obj->rc_cur_id ); $emit = $obj->rc_timestamp; - $sql2 = "SELECT rev_id,rev_len FROM $revision " . - "WHERE rev_page={$lastCurId} " . - "AND rev_timestamp<'{$emit}' ORDER BY rev_timestamp DESC"; - $sql2 = $dbw->limitResult( $sql2, 1, false ); - $res2 = $dbw->query( $sql2 ); - $row = $dbw->fetchObject( $res2 ); + + $row = $dbw->selectRow( + 'revision', + [ 'rev_id', 'rev_len' ], + [ 'rev_page' => $lastCurId, "rev_timestamp < " . $dbw->addQuotes( $emit ) ], + __METHOD__, + [ 'ORDER BY' => 'rev_timestamp DESC' ] + ); if ( $row ) { $lastOldId = intval( $row->rev_id ); # Grab the last text size if available @@ -139,29 +220,44 @@ class RebuildRecentchanges extends Maintenance { $new = 1; // probably true } } + if ( $lastCurId == 0 ) { $this->output( "Uhhh, something wrong? No curid\n" ); } else { # Grab the entry's text size - $size = $dbw->selectField( 'revision', 'rev_len', array( 'rev_id' => $obj->rc_this_oldid ) ); + $size = (int)$dbw->selectField( + 'revision', + 'rev_len', + [ 'rev_id' => $obj->rc_this_oldid ], + __METHOD__ + ); - $dbw->update( 'recentchanges', - array( + $dbw->update( + 'recentchanges', + [ 'rc_last_oldid' => $lastOldId, 'rc_new' => $new, - 'rc_type' => $new, - 'rc_source' => $new === 1 ? RecentChange::SRC_NEW : RecentChange::SRC_EDIT, + 'rc_type' => $new ? RC_NEW : RC_EDIT, + 'rc_source' => $new === 1 + ? $dbw->addQuotes( RecentChange::SRC_NEW ) + : $dbw->addQuotes( RecentChange::SRC_EDIT ), 'rc_old_len' => $lastSize, 'rc_new_len' => $size, - ), array( + ], + [ 'rc_cur_id' => $lastCurId, 'rc_this_oldid' => $obj->rc_this_oldid, - ), + 'rc_timestamp' => $obj->rc_timestamp // index usage + ], __METHOD__ ); $lastOldId = intval( $obj->rc_this_oldid ); $lastSize = $size; + + if ( ( ++$updated % $this->mBatchSize ) == 0 ) { + wfGetLBFactory()->waitForReplication(); + } } } } @@ -170,113 +266,162 @@ class RebuildRecentchanges extends Maintenance { * Rebuild pass 3: Insert `recentchanges` entries for action logs. */ private function rebuildRecentChangesTablePass3() { + global $wgLogTypes, $wgLogRestrictions; + $dbw = $this->getDB( DB_MASTER ); $this->output( "Loading from user, page, and logging tables...\n" ); - global $wgRCMaxAge, $wgLogTypes, $wgLogRestrictions; - // Some logs don't go in RC. This should check for that - $basicRCLogs = array_diff( $wgLogTypes, array_keys( $wgLogRestrictions ) ); - - $cutoff = time() - $wgRCMaxAge; - list( $logging, $page ) = $dbw->tableNamesN( 'logging', 'page' ); - $dbw->insertSelect( - 'recentchanges', - array( - 'user', - "$logging LEFT JOIN $page ON (log_namespace=page_namespace AND log_title=page_title)" - ), - array( - 'rc_timestamp' => 'log_timestamp', - 'rc_user' => 'log_user', - 'rc_user_text' => 'user_name', - 'rc_namespace' => 'log_namespace', - 'rc_title' => 'log_title', - 'rc_comment' => 'log_comment', - 'rc_minor' => 0, - 'rc_bot' => 0, - 'rc_patrolled' => 1, - 'rc_new' => 0, - 'rc_this_oldid' => 0, - 'rc_last_oldid' => 0, - 'rc_type' => RC_LOG, - 'rc_source' => $dbw->addQuotes( RecentChange::SRC_LOG ), - 'rc_cur_id' => $dbw->cascadingDeletes() ? 'page_id' : 'COALESCE(page_id, 0)', - 'rc_log_type' => 'log_type', - 'rc_log_action' => 'log_action', - 'rc_logid' => 'log_id', - 'rc_params' => 'log_params', - 'rc_deleted' => 'log_deleted' - ), - array( - 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $cutoff ) ), + $res = $dbw->select( + [ 'user', 'logging', 'page' ], + [ + 'log_timestamp', + 'log_user', + 'user_name', + 'log_namespace', + 'log_title', + 'log_comment', + 'page_id', + 'log_type', + 'log_action', + 'log_id', + 'log_params', + 'log_deleted' + ], + [ + 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ), + 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ), 'log_user=user_id', - 'log_type' => $basicRCLogs, - ), + // Some logs don't go in RC since they are private. + // @FIXME: core/extensions also have spammy logs that don't go in RC. + 'log_type' => array_diff( $wgLogTypes, array_keys( $wgLogRestrictions ) ), + ], __METHOD__, - array(), // INSERT options - array( 'ORDER BY' => 'log_timestamp DESC', 'LIMIT' => 5000 ) // SELECT options + [ 'ORDER BY' => 'log_timestamp DESC' ], + [ + 'page' => + [ 'LEFT JOIN', [ 'log_namespace=page_namespace', 'log_title=page_title' ] ] + ] ); + + $inserted = 0; + foreach ( $res as $row ) { + $dbw->insert( + 'recentchanges', + [ + 'rc_timestamp' => $row->log_timestamp, + 'rc_user' => $row->log_user, + 'rc_user_text' => $row->user_name, + 'rc_namespace' => $row->log_namespace, + 'rc_title' => $row->log_title, + 'rc_comment' => $row->log_comment, + 'rc_minor' => 0, + 'rc_bot' => 0, + 'rc_patrolled' => 1, + 'rc_new' => 0, + 'rc_this_oldid' => 0, + 'rc_last_oldid' => 0, + 'rc_type' => RC_LOG, + 'rc_source' => $dbw->addQuotes( RecentChange::SRC_LOG ), + 'rc_cur_id' => $dbw->cascadingDeletes() + ? $row->page_id + : (int)$row->page_id, // NULL => 0, + 'rc_log_type' => $row->log_type, + 'rc_log_action' => $row->log_action, + 'rc_logid' => $row->log_id, + 'rc_params' => $row->log_params, + 'rc_deleted' => $row->log_deleted + ], + __METHOD__ + ); + + if ( ( ++$inserted % $this->mBatchSize ) == 0 ) { + wfGetLBFactory()->waitForReplication(); + } + } } /** * Rebuild pass 4: Mark bot and autopatrolled entries. */ private function rebuildRecentChangesTablePass4() { - global $wgUseRCPatrol; + global $wgUseRCPatrol, $wgMiserMode; $dbw = $this->getDB( DB_MASTER ); list( $recentchanges, $usergroups, $user ) = $dbw->tableNamesN( 'recentchanges', 'user_groups', 'user' ); - $botgroups = User::getGroupsWithPermission( 'bot' ); - $autopatrolgroups = $wgUseRCPatrol ? User::getGroupsWithPermission( 'autopatrol' ) : array(); + # @FIXME: recognize other bot account groups (not the same as users with 'bot' rights) + # @NOTE: users with 'bot' rights choose when edits are bot edits or not. That information + # may be lost at this point (aside from joining on the patrol log table entries). + $botgroups = [ 'bot' ]; + $autopatrolgroups = $wgUseRCPatrol ? User::getGroupsWithPermission( 'autopatrol' ) : []; + # Flag our recent bot edits - if ( !empty( $botgroups ) ) { + if ( $botgroups ) { $botwhere = $dbw->makeList( $botgroups ); - $botusers = array(); $this->output( "Flagging bot account edits...\n" ); # Find all users that are bots $sql = "SELECT DISTINCT user_name FROM $usergroups, $user " . "WHERE ug_group IN($botwhere) AND user_id = ug_user"; - $res = $dbw->query( $sql, DB_MASTER ); + $res = $dbw->query( $sql, __METHOD__ ); + $botusers = []; foreach ( $res as $obj ) { - $botusers[] = $dbw->addQuotes( $obj->user_name ); + $botusers[] = $obj->user_name; } + # Fill in the rc_bot field - if ( !empty( $botusers ) ) { - $botwhere = implode( ',', $botusers ); - $sql2 = "UPDATE $recentchanges SET rc_bot=1 " . - "WHERE rc_user_text IN($botwhere)"; - $dbw->query( $sql2 ); + if ( $botusers ) { + $rcids = $dbw->selectFieldValues( + 'recentchanges', + 'rc_id', + [ + 'rc_user_text' => $botusers, + "rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ), + "rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ) + ], + __METHOD__ + ); + + foreach ( array_chunk( $rcids, $this->mBatchSize ) as $rcidBatch ) { + $dbw->update( + 'recentchanges', + [ 'rc_bot' => 1 ], + [ 'rc_id' => $rcidBatch ], + __METHOD__ + ); + wfGetLBFactory()->waitForReplication(); + } } } - global $wgMiserMode; + # Flag our recent autopatrolled edits - if ( !$wgMiserMode && !empty( $autopatrolgroups ) ) { + if ( !$wgMiserMode && $autopatrolgroups ) { $patrolwhere = $dbw->makeList( $autopatrolgroups ); - $patrolusers = array(); + $patrolusers = []; $this->output( "Flagging auto-patrolled edits...\n" ); # Find all users in RC with autopatrol rights $sql = "SELECT DISTINCT user_name FROM $usergroups, $user " . "WHERE ug_group IN($patrolwhere) AND user_id = ug_user"; - $res = $dbw->query( $sql, DB_MASTER ); + $res = $dbw->query( $sql, __METHOD__ ); foreach ( $res as $obj ) { $patrolusers[] = $dbw->addQuotes( $obj->user_name ); } # Fill in the rc_patrolled field - if ( !empty( $patrolusers ) ) { + if ( $patrolusers ) { $patrolwhere = implode( ',', $patrolusers ); $sql2 = "UPDATE $recentchanges SET rc_patrolled=1 " . - "WHERE rc_user_text IN($patrolwhere)"; + "WHERE rc_user_text IN($patrolwhere) " . + "AND rc_timestamp > " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ) . ' ' . + "AND rc_timestamp < " . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ); $dbw->query( $sql2 ); } } @@ -292,15 +437,19 @@ class RebuildRecentchanges extends Maintenance { $this->output( "Removing duplicate revision and logging entries...\n" ); $res = $dbw->select( - array( 'logging', 'log_search' ), - array( 'ls_value', 'ls_log_id' ), - array( + [ 'logging', 'log_search' ], + [ 'ls_value', 'ls_log_id' ], + [ 'ls_log_id = log_id', 'ls_field' => 'associated_rev_id', 'log_type' => 'upload', - ), + 'log_timestamp > ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffFrom ) ), + 'log_timestamp < ' . $dbw->addQuotes( $dbw->timestamp( $this->cutoffTo ) ), + ], __METHOD__ ); + + $updates = 0; foreach ( $res as $obj ) { $rev_id = $obj->ls_value; $log_id = $obj->ls_log_id; @@ -308,17 +457,21 @@ class RebuildRecentchanges extends Maintenance { // Mark the logging row as having an associated rev id $dbw->update( 'recentchanges', - /*SET*/ array( 'rc_this_oldid' => $rev_id ), - /*WHERE*/ array( 'rc_logid' => $log_id ), + /*SET*/ [ 'rc_this_oldid' => $rev_id ], + /*WHERE*/ [ 'rc_logid' => $log_id ], __METHOD__ ); // Delete the revision row $dbw->delete( 'recentchanges', - /*WHERE*/ array( 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ), + /*WHERE*/ [ 'rc_this_oldid' => $rev_id, 'rc_logid' => 0 ], __METHOD__ ); + + if ( ( ++$updates % $this->mBatchSize ) == 0 ) { + wfGetLBFactory()->waitForReplication(); + } } }