From: jenkins-bot Date: Fri, 27 Sep 2019 18:38:09 +0000 (+0000) Subject: Merge "Remove Revision::getRevisionText and gated pre-MCR schema access" X-Git-Tag: 1.34.0-rc.0~70 X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=commitdiff_plain;h=72e141a5e33417a3f4bcb18cf8a862634bda9bee;hp=3caa34017f2dafa15b605502d3be74851ae8e8d4 Merge "Remove Revision::getRevisionText and gated pre-MCR schema access" --- diff --git a/autoload.php b/autoload.php index 55e5a7f089..dc57ff6ed9 100644 --- a/autoload.php +++ b/autoload.php @@ -532,7 +532,6 @@ $wgAutoloadLocalClasses = [ 'FixDefaultJsonContentPages' => __DIR__ . '/maintenance/fixDefaultJsonContentPages.php', 'FixDoubleRedirects' => __DIR__ . '/maintenance/fixDoubleRedirects.php', 'FixExtLinksProtocolRelative' => __DIR__ . '/maintenance/fixExtLinksProtocolRelative.php', - 'FixT22757' => __DIR__ . '/maintenance/storage/fixT22757.php', 'FixTimestamps' => __DIR__ . '/maintenance/fixTimestamps.php', 'FixUserRegistration' => __DIR__ . '/maintenance/fixUserRegistration.php', 'ForeignAPIFile' => __DIR__ . '/includes/filerepo/file/ForeignAPIFile.php', diff --git a/maintenance/storage/checkStorage.php b/maintenance/storage/checkStorage.php index 37625cf2f0..3741959d78 100644 --- a/maintenance/storage/checkStorage.php +++ b/maintenance/storage/checkStorage.php @@ -556,10 +556,27 @@ class CheckStorage { // Find text row again $dbr = wfGetDB( DB_REPLICA ); - $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ ); + global $wgMultiContentRevisionSchemaMigrationStage; + if ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_READ_OLD ) { + $oldId = $dbr->selectField( 'revision', 'rev_text_id', [ 'rev_id' => $id ], __METHOD__ ); + } else { + $res = $dbr->selectRow( + [ 'slots', 'content' ], + [ 'content_address' ], + [ 'slot_revision_id' => $id ], + __METHOD__, + [], + [ 'content' => [ 'INNER JOIN', [ 'content_id = slot_content_id' ] ] ] + ); + // @phan-suppress-next-line PhanAccessMethodInternal + $blobStore = MediaWikiServices::getInstance() + ->getBlobStoreFactory() + ->newSqlBlobStore(); + $oldId = $blobStore->getTextIdFromAddress( $res->content_address ); + } + if ( !$oldId ) { echo "Missing revision row for rev_id $id\n"; - return; } diff --git a/maintenance/storage/compressOld.php b/maintenance/storage/compressOld.php index b6aa62623a..c4779b948a 100644 --- a/maintenance/storage/compressOld.php +++ b/maintenance/storage/compressOld.php @@ -239,6 +239,10 @@ class CompressOld extends Maintenance { /** @var ExternalStoreDB $storeObj */ $storeObj = $esFactory->getStore( 'DB' ); } + // @phan-suppress-next-line PhanAccessMethodInternal + $blobStore = MediaWikiServices::getInstance() + ->getBlobStoreFactory() + ->newSqlBlobStore(); # Get all articles by page_id if ( !$maxPageId ) { @@ -370,8 +374,12 @@ class CompressOld extends Maintenance { for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy(); $j++ ) { $oldid = $revs[$i + $j]->old_id; - # Get text - $text = Revision::getRevisionText( $revs[$i + $j] ); + # Get text. We do not need the full `extractBlob` since the query is built + # to fetch non-externalstore blobs. + $text = $blobStore->decompressData( + $revs[$i + $j]->old_text, + explode( ',', $revs[$i + $j]->old_flags ) + ); if ( $text === false ) { $this->error( "\nError, unable to get text in old_id $oldid" ); diff --git a/maintenance/storage/fixT22757.php b/maintenance/storage/fixT22757.php deleted file mode 100644 index 61f1177899..0000000000 --- a/maintenance/storage/fixT22757.php +++ /dev/null @@ -1,335 +0,0 @@ -addDescription( 'Script to fix T22757 assuming that blob_tracking is intact' ); - $this->addOption( 'dry-run', 'Report only' ); - $this->addOption( 'start', 'old_id to start at', false, true ); - } - - function execute() { - $dbr = $this->getDB( DB_REPLICA ); - $dbw = $this->getDB( DB_MASTER ); - - $dryRun = $this->getOption( 'dry-run' ); - if ( $dryRun ) { - print "Dry run only.\n"; - } - - $startId = $this->getOption( 'start', 0 ); - $numGood = 0; - $numFixed = 0; - $numBad = 0; - - $totalRevs = $dbr->selectField( 'text', 'MAX(old_id)', '', __METHOD__ ); - - // In MySQL 4.1+, the binary field old_text has a non-working LOWER() function - $lowerLeft = 'LOWER(CONVERT(LEFT(old_text,22) USING latin1))'; - - while ( true ) { - print "ID: $startId / $totalRevs\r"; - - $res = $dbr->select( - 'text', - [ 'old_id', 'old_flags', 'old_text' ], - [ - 'old_id > ' . intval( $startId ), - 'old_flags LIKE \'%object%\' AND old_flags NOT LIKE \'%external%\'', - "$lowerLeft = 'o:15:\"historyblobstub\"'", - ], - __METHOD__, - [ - 'ORDER BY' => 'old_id', - 'LIMIT' => $this->batchSize, - ] - ); - - if ( !$res->numRows() ) { - break; - } - - $secondaryIds = []; - $stubs = []; - - foreach ( $res as $row ) { - $startId = $row->old_id; - - // Basic sanity checks - $obj = unserialize( $row->old_text ); - if ( $obj === false ) { - print "{$row->old_id}: unrecoverable: cannot unserialize\n"; - ++$numBad; - continue; - } - - if ( !is_object( $obj ) ) { - print "{$row->old_id}: unrecoverable: unserialized to type " . - gettype( $obj ) . ", possible double-serialization\n"; - ++$numBad; - continue; - } - - if ( strtolower( get_class( $obj ) ) !== 'historyblobstub' ) { - print "{$row->old_id}: unrecoverable: unexpected object class " . - get_class( $obj ) . "\n"; - ++$numBad; - continue; - } - - // Process flags - $flags = explode( ',', $row->old_flags ); - if ( in_array( 'utf-8', $flags ) || in_array( 'utf8', $flags ) ) { - $legacyEncoding = false; - } else { - $legacyEncoding = true; - } - - // Queue the stub for future batch processing - $id = intval( $obj->mOldId ); - $secondaryIds[] = $id; - $stubs[$row->old_id] = [ - 'legacyEncoding' => $legacyEncoding, - 'secondaryId' => $id, - 'hash' => $obj->mHash, - ]; - } - - $secondaryIds = array_unique( $secondaryIds ); - - if ( !count( $secondaryIds ) ) { - continue; - } - - // Run the batch query on blob_tracking - $res = $dbr->select( - 'blob_tracking', - '*', - [ - 'bt_text_id' => $secondaryIds, - ], - __METHOD__ - ); - $trackedBlobs = []; - foreach ( $res as $row ) { - $trackedBlobs[$row->bt_text_id] = $row; - } - - // Process the stubs - foreach ( $stubs as $primaryId => $stub ) { - $secondaryId = $stub['secondaryId']; - if ( !isset( $trackedBlobs[$secondaryId] ) ) { - // No tracked blob. Work out what went wrong - $secondaryRow = $dbr->selectRow( - 'text', - [ 'old_flags', 'old_text' ], - [ 'old_id' => $secondaryId ], - __METHOD__ - ); - if ( !$secondaryRow ) { - print "$primaryId: unrecoverable: secondary row is missing\n"; - ++$numBad; - } elseif ( $this->isUnbrokenStub( $stub, $secondaryRow ) ) { - // Not broken yet, and not in the tracked clusters so it won't get - // broken by the current RCT run. - ++$numGood; - } elseif ( strpos( $secondaryRow->old_flags, 'external' ) !== false ) { - print "$primaryId: unrecoverable: secondary gone to {$secondaryRow->old_text}\n"; - ++$numBad; - } else { - print "$primaryId: unrecoverable: miscellaneous corruption of secondary row\n"; - ++$numBad; - } - unset( $stubs[$primaryId] ); - continue; - } - $trackRow = $trackedBlobs[$secondaryId]; - - // Check that the specified text really is available in the tracked source row - $url = "DB://{$trackRow->bt_cluster}/{$trackRow->bt_blob_id}/{$stub['hash']}"; - $text = ExternalStore::fetchFromURL( $url ); - if ( $text === false ) { - print "$primaryId: unrecoverable: source text missing\n"; - ++$numBad; - unset( $stubs[$primaryId] ); - continue; - } - if ( md5( $text ) !== $stub['hash'] ) { - print "$primaryId: unrecoverable: content hashes do not match\n"; - ++$numBad; - unset( $stubs[$primaryId] ); - continue; - } - - // Find the page_id and rev_id - // The page is probably the same as the page of the secondary row - $pageId = intval( $trackRow->bt_page ); - if ( !$pageId ) { - $revId = $pageId = 0; - } else { - $revId = $this->findTextIdInPage( $pageId, $primaryId ); - if ( !$revId ) { - // Actually an orphan - $pageId = $revId = 0; - } - } - - $newFlags = $stub['legacyEncoding'] ? 'external' : 'external,utf-8'; - - if ( !$dryRun ) { - // Reset the text row to point to the original copy - $this->beginTransaction( $dbw, __METHOD__ ); - $dbw->update( - 'text', - // SET - [ - 'old_flags' => $newFlags, - 'old_text' => $url - ], - // WHERE - [ 'old_id' => $primaryId ], - __METHOD__ - ); - - // Add a blob_tracking row so that the new reference can be recompressed - // without needing to run trackBlobs.php again - $dbw->insert( 'blob_tracking', - [ - 'bt_page' => $pageId, - 'bt_rev_id' => $revId, - 'bt_text_id' => $primaryId, - 'bt_cluster' => $trackRow->bt_cluster, - 'bt_blob_id' => $trackRow->bt_blob_id, - 'bt_cgz_hash' => $stub['hash'], - 'bt_new_url' => null, - 'bt_moved' => 0, - ], - __METHOD__ - ); - $this->commitTransaction( $dbw, __METHOD__ ); - } - - print "$primaryId: resolved to $url\n"; - ++$numFixed; - } - } - - print "\n"; - print "Fixed: $numFixed\n"; - print "Unrecoverable: $numBad\n"; - print "Good stubs: $numGood\n"; - } - - function findTextIdInPage( $pageId, $textId ) { - $ids = $this->getRevTextMap( $pageId ); - return $ids[$textId] ?? null; - } - - function getRevTextMap( $pageId ) { - if ( !isset( $this->mapCache[$pageId] ) ) { - // Limit cache size - while ( $this->mapCacheSize > $this->maxMapCacheSize ) { - $key = key( $this->mapCache ); - $this->mapCacheSize -= count( $this->mapCache[$key] ); - unset( $this->mapCache[$key] ); - } - - $dbr = $this->getDB( DB_REPLICA ); - $map = []; - $res = $dbr->select( 'revision', - [ 'rev_id', 'rev_text_id' ], - [ 'rev_page' => $pageId ], - __METHOD__ - ); - foreach ( $res as $row ) { - $map[$row->rev_text_id] = $row->rev_id; - } - $this->mapCache[$pageId] = $map; - $this->mapCacheSize += count( $map ); - } - - return $this->mapCache[$pageId]; - } - - /** - * This is based on part of HistoryBlobStub::getText(). - * Determine if the text can be retrieved from the row in the normal way. - * @param array $stub - * @param stdClass $secondaryRow - * @return bool - */ - function isUnbrokenStub( $stub, $secondaryRow ) { - $flags = explode( ',', $secondaryRow->old_flags ); - $text = $secondaryRow->old_text; - if ( in_array( 'external', $flags ) ) { - $url = $text; - Wikimedia\suppressWarnings(); - list( /* $proto */, $path ) = explode( '://', $url, 2 ); - Wikimedia\restoreWarnings(); - - if ( $path == "" ) { - return false; - } - $text = ExternalStore::fetchFromURL( $url ); - } - if ( !in_array( 'object', $flags ) ) { - return false; - } - - if ( in_array( 'gzip', $flags ) ) { - $obj = unserialize( gzinflate( $text ) ); - } else { - $obj = unserialize( $text ); - } - - if ( !is_object( $obj ) ) { - // Correct for old double-serialization bug. - $obj = unserialize( $obj ); - } - - if ( !is_object( $obj ) ) { - return false; - } - - $obj->uncompress(); - $text = $obj->getItem( $stub['hash'] ); - - return $text !== false; - } -} - -$maintClass = FixT22757::class; -require_once RUN_MAINTENANCE_IF_MAIN; diff --git a/maintenance/storage/recompressTracked.php b/maintenance/storage/recompressTracked.php index 4c75de8d27..00680462c6 100644 --- a/maintenance/storage/recompressTracked.php +++ b/maintenance/storage/recompressTracked.php @@ -22,6 +22,7 @@ * @ingroup Maintenance ExternalStorage */ +use MediaWiki\Storage\SqlBlobStore; use Wikimedia\Rdbms\IMaintainableDatabase; use MediaWiki\Logger\LegacyLogger; use MediaWiki\MediaWikiServices; @@ -71,6 +72,8 @@ class RecompressTracked { public $debugLog, $infoLog, $criticalLog; /** @var ExternalStoreDB */ public $store; + /** @var SqlBlobStore */ + private $blobStore; private static $optionsWithArgs = [ 'procs', @@ -120,6 +123,10 @@ class RecompressTracked { $this->pageBlobClass = function_exists( 'xdiff_string_bdiff' ) ? DiffHistoryBlob::class : ConcatenatedGzipHistoryBlob::class; $this->orphanBlobClass = ConcatenatedGzipHistoryBlob::class; + // @phan-suppress-next-line PhanAccessMethodInternal + $this->blobStore = MediaWikiServices::getInstance() + ->getBlobStoreFactory() + ->newSqlBlobStore(); } function debug( $msg ) { @@ -531,7 +538,7 @@ class RecompressTracked { } $lastTextId = $row->bt_text_id; // Load the text - $text = Revision::getRevisionText( $row ); + $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags ); if ( $text === false ) { $this->critical( "Error loading {$row->bt_rev_id}/{$row->bt_text_id}" ); continue; @@ -685,7 +692,7 @@ class RecompressTracked { ); foreach ( $res as $row ) { - $text = Revision::getRevisionText( $row ); + $text = $this->blobStore->expandBlob( $row->old_text, $row->old_flags ); if ( $text === false ) { $this->critical( "Error: cannot load revision text for old_id={$row->old_id}" ); continue;