Remove references to field rev_text_id
authorBill Pirkle <bpirkle@wikimedia.org>
Tue, 2 Apr 2019 14:37:55 +0000 (09:37 -0500)
committerBill Pirkle <bpirkle@wikimedia.org>
Mon, 22 Apr 2019 23:56:51 +0000 (18:56 -0500)
Field rev_text_id is being retired as part of MCR Schema Migration.
Remove references to this field from maintenance scripts.

Bug: T198341
Change-Id: I43ab93c44cbd0a2a2479baf848f50348e1e4762b

maintenance/storage/checkStorage.php
maintenance/storage/compressOld.php
maintenance/storage/trackBlobs.php

index eed8019..173d741 100644 (file)
@@ -56,6 +56,8 @@ class CheckStorage {
        ];
 
        function check( $fix = false, $xml = '' ) {
+               global $wgMultiContentRevisionSchemaMigrationStage;
+
                $dbr = wfGetDB( DB_REPLICA );
                if ( $fix ) {
                        print "Checking, will fix errors if possible...\n";
@@ -79,13 +81,40 @@ class CheckStorage {
                        $chunkEnd = $chunkStart + $chunkSize - 1;
                        // print "$chunkStart of $maxRevId\n";
 
-                       // Fetch revision rows
                        $this->oldIdMap = [];
                        $dbr->ping();
-                       $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
-                               [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
-                       foreach ( $res as $row ) {
-                               $this->oldIdMap[$row->rev_id] = $row->rev_text_id;
+
+                       // Fetch revision rows
+                       if ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_READ_OLD ) {
+                               $res = $dbr->select( 'revision', [ 'rev_id', 'rev_text_id' ],
+                                       [ "rev_id BETWEEN $chunkStart AND $chunkEnd" ], __METHOD__ );
+                               foreach ( $res as $row ) {
+                                       if ( !isset( $this->oldIdMap[ $row->rev_text_id ] ) ) {
+                                               $this->oldIdMap[ $row->rev_text_id ] = [ $row->rev_id ];
+                                       } elseif ( !in_array( $row->rev_id, $this->oldIdMap[ $row->rev_text_id ] ) ) {
+                                               $this->oldIdMap[ $row->rev_text_id ][] = $row->rev_id;
+                                       }
+                               }
+                       } else {
+                               $res = $dbr->select(
+                                       [ 'slots', 'content' ],
+                                       [ 'slot_revision_id', 'content_address' ],
+                                       [ "slot_revision_id BETWEEN $chunkStart AND $chunkEnd" ],
+                                       __METHOD__,
+                                       [],
+                                       [ 'content' => [ 'INNER JOIN', [ 'content_id = slot_content_id' ] ] ]
+                               );
+                               $blobStore = MediaWikiServices::getInstance()->getBlobStore();
+                               foreach ( $res as $row ) {
+                                       $textId = $blobStore->getTextIdFromAddress( $row->content_address );
+                                       if ( $textId ) {
+                                               if ( !isset( $this->oldIdMap[$textId] ) ) {
+                                                       $this->oldIdMap[ $textId ] = [ $row->slot_revision_id ];
+                                               } elseif ( !in_array( $row->slot_revision_id, $this->oldIdMap[$textId] ) ) {
+                                                       $this->oldIdMap[ $textId ][] = $row->slot_revision_id;
+                                               }
+                                       }
+                               }
                        }
 
                        if ( !count( $this->oldIdMap ) ) {
@@ -93,13 +122,13 @@ class CheckStorage {
                        }
 
                        // Fetch old_flags
-                       $missingTextRows = array_flip( $this->oldIdMap );
+                       $missingTextRows = $this->oldIdMap;
                        $externalRevs = [];
                        $objectRevs = [];
                        $res = $dbr->select(
                                'text',
                                [ 'old_id', 'old_flags' ],
-                               [ 'old_id' => $this->oldIdMap ],
+                               [ 'old_id' => array_keys( $this->oldIdMap ) ],
                                __METHOD__
                        );
                        foreach ( $res as $row ) {
@@ -149,7 +178,7 @@ class CheckStorage {
                        }
 
                        // Output errors for any missing text rows
-                       foreach ( $missingTextRows as $oldId => $revId ) {
+                       foreach ( $missingTextRows as $oldId => $revIds ) {
                                $this->addError( 'restore revision', "Error: missing text row", $oldId );
                        }
 
@@ -371,13 +400,13 @@ class CheckStorage {
                if ( is_array( $ids ) ) {
                        $revIds = [];
                        foreach ( $ids as $id ) {
-                               $revIds = array_merge( $revIds, array_keys( $this->oldIdMap, $id ) );
+                               $revIds = array_unique( array_merge( $revIds, $this->oldIdMap[$id] ) );
                        }
                        print "$msg in text rows " . implode( ', ', $ids ) .
                                ", revisions " . implode( ', ', $revIds ) . "\n";
                } else {
                        $id = $ids;
-                       $revIds = array_keys( $this->oldIdMap, $id );
+                       $revIds = $this->oldIdMap[$id];
                        if ( count( $revIds ) == 1 ) {
                                print "$msg in old_id $id, rev_id {$revIds[0]}\n";
                        } else {
index ac4e120..d3e9ce2 100644 (file)
@@ -40,6 +40,8 @@
  * @file
  * @ingroup Maintenance ExternalStorage
  */
+use MediaWiki\MediaWikiServices;
+use MediaWiki\Revision\SlotRecord;
 
 require_once __DIR__ . '/../Maintenance.php';
 
@@ -49,16 +51,6 @@ require_once __DIR__ . '/../Maintenance.php';
  * @ingroup Maintenance ExternalStorage
  */
 class CompressOld extends Maintenance {
-       /**
-        * Option to load each revision individually.
-        */
-       const LS_INDIVIDUAL = 0;
-
-       /**
-        * Option to load revisions in chunks.
-        */
-       const LS_CHUNKED = 1;
-
        public function __construct() {
                parent::__construct();
                $this->addDescription( 'Compress the text of a wiki' );
@@ -233,7 +225,7 @@ class CompressOld extends Maintenance {
        private function compressWithConcat( $startId, $maxChunkSize, $beginDate,
                $endDate, $extdb = "", $maxPageId = false
        ) {
-               $loadStyle = self::LS_CHUNKED;
+               global $wgMultiContentRevisionSchemaMigrationStage;
 
                $dbr = $this->getDB( DB_REPLICA );
                $dbw = $this->getDB( DB_MASTER );
@@ -288,17 +280,25 @@ class CompressOld extends Maintenance {
                        }
                        $conds[] = "rev_timestamp<'" . $endDate . "'";
                }
-               if ( $loadStyle == self::LS_CHUNKED ) {
+
+               if ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_READ_OLD ) {
                        $tables = [ 'revision', 'text' ];
-                       $fields = [ 'rev_id', 'rev_text_id', 'old_flags', 'old_text' ];
                        $conds[] = 'rev_text_id=old_id';
-                       $revLoadOptions = 'FOR UPDATE';
                } else {
-                       $tables = [ 'revision' ];
-                       $fields = [ 'rev_id', 'rev_text_id' ];
-                       $revLoadOptions = [];
+                       $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
+                       $tables = [ 'revision', 'slots', 'content', 'text' ];
+                       $conds = array_merge( [
+                               'rev_id=slot_revision_id',
+                               'slot_role_id=' . $slotRoleStore->getId( SlotRecord::MAIN ),
+                               'content_id=slot_content_id',
+                               'SUBSTRING(content_address, 1, 3)=' . $dbr->addQuotes( 'tt:' ),
+                               'SUBSTRING(content_address, 4)=old_id',
+                       ], $conds );
                }
 
+               $fields = [ 'rev_id', 'old_id', 'old_flags', 'old_text' ];
+               $revLoadOptions = 'FOR UPDATE';
+
                # Don't work with current revisions
                # Don't lock the page table for update either -- TS 2006-04-04
                # $tables[] = 'page';
@@ -359,24 +359,14 @@ class CompressOld extends Maintenance {
                                $stubs = [];
                                $this->beginTransaction( $dbw, __METHOD__ );
                                $usedChunk = false;
-                               $primaryOldid = $revs[$i]->rev_text_id;
+                               $primaryOldid = $revs[$i]->old_id;
 
                                # Get the text of each revision and add it to the object
                                for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy(); $j++ ) {
-                                       $oldid = $revs[$i + $j]->rev_text_id;
+                                       $oldid = $revs[$i + $j]->old_id;
 
                                        # Get text
-                                       if ( $loadStyle == self::LS_INDIVIDUAL ) {
-                                               $textRow = $dbw->selectRow( 'text',
-                                                       [ 'old_flags', 'old_text' ],
-                                                       [ 'old_id' => $oldid ],
-                                                       __METHOD__,
-                                                       'FOR UPDATE'
-                                               );
-                                               $text = Revision::getRevisionText( $textRow );
-                                       } else {
-                                               $text = Revision::getRevisionText( $revs[$i + $j] );
-                                       }
+                                       $text = Revision::getRevisionText( $revs[$i + $j] );
 
                                        if ( $text === false ) {
                                                $this->error( "\nError, unable to get text in old_id $oldid" );
@@ -444,13 +434,13 @@ class CompressOld extends Maintenance {
                                                # Store the stub objects
                                                for ( $j = 1; $j < $thisChunkSize; $j++ ) {
                                                        # Skip if not compressing and don't overwrite the first revision
-                                                       if ( $stubs[$j] !== false && $revs[$i + $j]->rev_text_id != $primaryOldid ) {
+                                                       if ( $stubs[$j] !== false && $revs[$i + $j]->old_id != $primaryOldid ) {
                                                                $dbw->update( 'text',
                                                                        [ /* SET */
                                                                                'old_text' => serialize( $stubs[$j] ),
                                                                                'old_flags' => 'object,utf-8',
                                                                        ], [ /* WHERE */
-                                                                               'old_id' => $revs[$i + $j]->rev_text_id
+                                                                               'old_id' => $revs[$i + $j]->old_id
                                                                        ]
                                                                );
                                                        }
index 2dd48ca..385ae6a 100644 (file)
@@ -23,6 +23,7 @@
  */
 
 use MediaWiki\MediaWikiServices;
+use MediaWiki\Revision\SlotRecord;
 use Wikimedia\Rdbms\DBConnectionError;
 
 require __DIR__ . '/../commandLine.inc';
@@ -130,6 +131,8 @@ class TrackBlobs {
         *  Scan the revision table for rows stored in the specified clusters
         */
        function trackRevisions() {
+               global $wgMultiContentRevisionSchemaMigrationStage;
+
                $dbw = wfGetDB( DB_MASTER );
                $dbr = wfGetDB( DB_REPLICA );
 
@@ -141,20 +144,40 @@ class TrackBlobs {
 
                echo "Finding revisions...\n";
 
+               $fields = [ 'rev_id', 'rev_page', 'old_id', 'old_flags', 'old_text' ];
+               $options = [
+                       'ORDER BY' => 'rev_id',
+                       'LIMIT' => $this->batchSize
+               ];
+               $conds = [
+                       $textClause,
+                       'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
+               ];
+               if ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_READ_OLD ) {
+                       $tables = [ 'revision', 'text' ];
+                       $conds = array_merge( [
+                               'rev_text_id=old_id',
+                       ], $conds );
+               } else {
+                       $slotRoleStore = MediaWikiServices::getInstance()->getSlotRoleStore();
+                       $tables = [ 'revision', 'slots', 'content', 'text' ];
+                       $conds = array_merge( [
+                               'rev_id=slot_revision_id',
+                               'slot_role_id=' . $slotRoleStore->getId( SlotRecord::MAIN ),
+                               'content_id=slot_content_id',
+                               'SUBSTRING(content_address, 1, 3)=' . $dbr->addQuotes( 'tt:' ),
+                               'SUBSTRING(content_address, 4)=old_id',
+                       ], $conds );
+               }
+
                while ( true ) {
-                       $res = $dbr->select( [ 'revision', 'text' ],
-                               [ 'rev_id', 'rev_page', 'old_id', 'old_flags', 'old_text' ],
-                               [
+                       $res = $dbr->select( $tables,
+                               $fields,
+                               array_merge( [
                                        'rev_id > ' . $dbr->addQuotes( $startId ),
-                                       'rev_text_id=old_id',
-                                       $textClause,
-                                       'old_flags ' . $dbr->buildLike( $dbr->anyString(), 'external', $dbr->anyString() ),
-                               ],
+                               ], $conds ),
                                __METHOD__,
-                               [
-                                       'ORDER BY' => 'rev_id',
-                                       'LIMIT' => $this->batchSize
-                               ]
+                               $options
                        );
                        if ( !$res->numRows() ) {
                                break;