From 2d17990a5d17498387128ae470406d47a2ccdfe9 Mon Sep 17 00:00:00 2001 From: Bill Pirkle Date: Tue, 2 Apr 2019 09:37:55 -0500 Subject: [PATCH] Remove references to field rev_text_id Field rev_text_id is being retired as part of MCR Schema Migration. Remove references to this field from maintenance scripts. Bug: T198341 Change-Id: I43ab93c44cbd0a2a2479baf848f50348e1e4762b --- maintenance/storage/checkStorage.php | 49 +++++++++++++++++++------ maintenance/storage/compressOld.php | 54 ++++++++++++---------------- maintenance/storage/trackBlobs.php | 45 +++++++++++++++++------ 3 files changed, 95 insertions(+), 53 deletions(-) diff --git a/maintenance/storage/checkStorage.php b/maintenance/storage/checkStorage.php index eed8019ec2..173d741be8 100644 --- a/maintenance/storage/checkStorage.php +++ b/maintenance/storage/checkStorage.php @@ -56,6 +56,8 @@ class CheckStorage { ]; function check( $fix = false, $xml = '' ) { + global $wgMultiContentRevisionSchemaMigrationStage; + $dbr = wfGetDB( DB_REPLICA ); if ( $fix ) { print "Checking, will fix errors if possible...\n"; @@ -79,13 +81,40 @@ class CheckStorage { $chunkEnd = $chunkStart + $chunkSize - 1; // print "$chunkStart of $maxRevId\n"; - // Fetch revision rows $this->oldIdMap = []; $dbr->ping(); - $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ], - [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ ); - foreach ( $res as $row ) { - $this->oldIdMap[$row->rev_id] = $row->rev_text_id; + + // Fetch revision rows + if ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_READ_OLD ) { + $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ], + [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ ); + foreach ( $res as $row ) { + if ( !isset( $this->oldIdMap[ $row->rev_text_id ] ) ) { + $this->oldIdMap[ $row->rev_text_id ] = [ $row->rev_id ]; + } elseif ( !in_array( $row->rev_id, $this->oldIdMap[ $row->rev_text_id ] ) ) { + $this->oldIdMap[ $row->rev_text_id ][] = $row->rev_id; + } + } + } else { + $res = $dbr->select( + [ 'slots', 'content' ], + [ 'slot_revision_id', 'content_address' ], + [ "slot_revision_id BETWEEN $chunkStart AND $chunkEnd" ], + __METHOD__, + [], + [ 'content' => [ 'INNER JOIN', [ 'content_id = slot_content_id' ] ] ] + ); + $blobStore = MediaWikiServices::getInstance()->getBlobStore(); + foreach ( $res as $row ) { + $textId = $blobStore->getTextIdFromAddress( $row->content_address ); + if ( $textId ) { + if ( !isset( $this->oldIdMap[$textId] ) ) { + $this->oldIdMap[ $textId ] = [ $row->slot_revision_id ]; + } elseif ( !in_array( $row->slot_revision_id, $this->oldIdMap[$textId] ) ) { + $this->oldIdMap[ $textId ][] = $row->slot_revision_id; + } + } + } } if ( !count( $this->oldIdMap ) ) { @@ -93,13 +122,13 @@ class CheckStorage { } // Fetch old_flags - $missingTextRows = array_flip( $this->oldIdMap ); + $missingTextRows = $this->oldIdMap; $externalRevs = []; $objectRevs = []; $res = $dbr->select( 'text', [ 'old_id', 'old_flags' ], - [ 'old_id' => $this->oldIdMap ], + [ 'old_id' => array_keys( $this->oldIdMap ) ], __METHOD__ ); foreach ( $res as $row ) { @@ -149,7 +178,7 @@ class CheckStorage { } // Output errors for any missing text rows - foreach ( $missingTextRows as $oldId => $revId ) { + foreach ( $missingTextRows as $oldId => $revIds ) { $this->addError( 'restore revision', "Error: missing text row", $oldId ); } @@ -371,13 +400,13 @@ class CheckStorage { if ( is_array( $ids ) ) { $revIds = []; foreach ( $ids as $id ) { - $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) ); + $revIds = array_unique( array_merge( $revIds, $this->oldIdMap[$id] ) ); } print "$msg in text rows " . implode( ', ', $ids ) . ", revisions " . implode( ', ', $revIds ) . "\n"; } else { $id = $ids; - $revIds = array_keys( $this->oldIdMap, $id ); + $revIds = $this->oldIdMap[$id]; if ( count( $revIds ) == 1 ) { print "$msg in old_id $id, rev_id {$revIds[0]}\n"; } else { diff --git a/maintenance/storage/compressOld.php b/maintenance/storage/compressOld.php index ac4e1200fa..d3e9ce2cf6 100644 --- a/maintenance/storage/compressOld.php +++ b/maintenance/storage/compressOld.php @@ -40,6 +40,8 @@ * @file * @ingroup Maintenance ExternalStorage */ +use MediaWiki\MediaWikiServices; +use MediaWiki\Revision\SlotRecord; require_once __DIR__ . '/../Maintenance.php'; @@ -49,16 +51,6 @@ require_once __DIR__ . '/../Maintenance.php'; * @ingroup Maintenance ExternalStorage */ class CompressOld extends Maintenance { - /** - * Option to load each revision individually. - */ - const LS_INDIVIDUAL = 0; - - /** - * Option to load revisions in chunks. - */ - const LS_CHUNKED = 1; - public function __construct() { parent::__construct(); $this->addDescription( 'Compress the text of a wiki' ); @@ -233,7 +225,7 @@ class CompressOld extends Maintenance { private function compressWithConcat( $startId, $maxChunkSize, $beginDate, $endDate, $extdb = "", $maxPageId = false ) { - $loadStyle = self::LS_CHUNKED; + global $wgMultiContentRevisionSchemaMigrationStage; $dbr = $this->getDB( DB_REPLICA ); $dbw = $this->getDB( DB_MASTER ); @@ -288,17 +280,25 @@ class CompressOld extends Maintenance { } $conds[] = "rev_timestamp<'" . $endDate . "'"; } - if ( $loadStyle == self::LS_CHUNKED ) { + + if ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_READ_OLD ) { $tables = [ 'revision', 'text' ]; - $fields = [ 'rev_id', 'rev_text_id', 'old_flags', 'old_text' ]; $conds[] = 'rev_text_id=old_id'; - $revLoadOptions = 'FOR UPDATE'; } else { - $tables = [ 'revision' ]; - $fields = [ 'rev_id', 'rev_text_id' ]; - $revLoadOptions = []; + $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore(); + $tables = [ 'revision', 'slots', 'content', 'text' ]; + $conds = array_merge( [ + 'rev_id=slot_revision_id', + 'slot_role_id=' . $slotRoleStore->getId( SlotRecord::MAIN ), + 'content_id=slot_content_id', + 'SUBSTRING(content_address, 1, 3)=' . $dbr->addQuotes( 'tt:' ), + 'SUBSTRING(content_address, 4)=old_id', + ], $conds ); } + $fields = [ 'rev_id', 'old_id', 'old_flags', 'old_text' ]; + $revLoadOptions = 'FOR UPDATE'; + # Don't work with current revisions # Don't lock the page table for update either -- TS 2006-04-04 # $tables[] = 'page'; @@ -359,24 +359,14 @@ class CompressOld extends Maintenance { $stubs = []; $this->beginTransaction( $dbw, __METHOD__ ); $usedChunk = false; - $primaryOldid = $revs[$i]->rev_text_id; + $primaryOldid = $revs[$i]->old_id; # Get the text of each revision and add it to the object for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy(); $j++ ) { - $oldid = $revs[$i + $j]->rev_text_id; + $oldid = $revs[$i + $j]->old_id; # Get text - if ( $loadStyle == self::LS_INDIVIDUAL ) { - $textRow = $dbw->selectRow( 'text', - [ 'old_flags', 'old_text' ], - [ 'old_id' => $oldid ], - __METHOD__, - 'FOR UPDATE' - ); - $text = Revision::getRevisionText( $textRow ); - } else { - $text = Revision::getRevisionText( $revs[$i + $j] ); - } + $text = Revision::getRevisionText( $revs[$i + $j] ); if ( $text === false ) { $this->error( "\nError, unable to get text in old_id $oldid" ); @@ -444,13 +434,13 @@ class CompressOld extends Maintenance { # Store the stub objects for ( $j = 1; $j < $thisChunkSize; $j++ ) { # Skip if not compressing and don't overwrite the first revision - if ( $stubs[$j] !== false && $revs[$i + $j]->rev_text_id != $primaryOldid ) { + if ( $stubs[$j] !== false && $revs[$i + $j]->old_id != $primaryOldid ) { $dbw->update( 'text', [ /* SET */ 'old_text' => serialize( $stubs[$j] ), 'old_flags' => 'object,utf-8', ], [ /* WHERE */ - 'old_id' => $revs[$i + $j]->rev_text_id + 'old_id' => $revs[$i + $j]->old_id ] ); } diff --git a/maintenance/storage/trackBlobs.php b/maintenance/storage/trackBlobs.php index 2dd48caf98..385ae6a5a0 100644 --- a/maintenance/storage/trackBlobs.php +++ b/maintenance/storage/trackBlobs.php @@ -23,6 +23,7 @@ */ use MediaWiki\MediaWikiServices; +use MediaWiki\Revision\SlotRecord; use Wikimedia\Rdbms\DBConnectionError; require __DIR__ . '/../commandLine.inc'; @@ -130,6 +131,8 @@ class TrackBlobs { * Scan the revision table for rows stored in the specified clusters */ function trackRevisions() { + global $wgMultiContentRevisionSchemaMigrationStage; + $dbw = wfGetDB( DB_MASTER ); $dbr = wfGetDB( DB_REPLICA ); @@ -141,20 +144,40 @@ class TrackBlobs { echo "Finding revisions...\n"; + $fields = [ 'rev_id', 'rev_page', 'old_id', 'old_flags', 'old_text' ]; + $options = [ + 'ORDER BY' => 'rev_id', + 'LIMIT' => $this->batchSize + ]; + $conds = [ + $textClause, + 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), + ]; + if ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_READ_OLD ) { + $tables = [ 'revision', 'text' ]; + $conds = array_merge( [ + 'rev_text_id=old_id', + ], $conds ); + } else { + $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore(); + $tables = [ 'revision', 'slots', 'content', 'text' ]; + $conds = array_merge( [ + 'rev_id=slot_revision_id', + 'slot_role_id=' . $slotRoleStore->getId( SlotRecord::MAIN ), + 'content_id=slot_content_id', + 'SUBSTRING(content_address, 1, 3)=' . $dbr->addQuotes( 'tt:' ), + 'SUBSTRING(content_address, 4)=old_id', + ], $conds ); + } + while ( true ) { - $res = $dbr->select( [ 'revision', 'text' ], - [ 'rev_id', 'rev_page', 'old_id', 'old_flags', 'old_text' ], - [ + $res = $dbr->select( $tables, + $fields, + array_merge( [ 'rev_id > ' . $dbr->addQuotes( $startId ), - 'rev_text_id=old_id', - $textClause, - 'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ), - ], + ], $conds ), __METHOD__, - [ - 'ORDER BY' => 'rev_id', - 'LIMIT' => $this->batchSize - ] + $options ); if ( !$res->numRows() ) { break; -- 2.20.1