Make sure we don't try to use a deleted rev ID.
authorBrad Jorsch <bjorsch@wikimedia.org>
Wed, 22 Aug 2018 15:59:56 +0000 (11:59 -0400)
committerBrad Jorsch <bjorsch@wikimedia.org>
Wed, 22 Aug 2018 19:32:16 +0000 (15:32 -0400)
MySQL until 8.0 and MariaDB until some version after 10.1.34 don't save
the auto-increment value to disk, so on server restart it might reuse
IDs from deleted revisions. We can fix that with an insert with an
explicit rev_id value, if necessary.

Bug: T202032
Change-Id: I14e5b6c8ec8b5f1aeb2305fae4b103665f8f6686

includes/Storage/RevisionStore.php
maintenance/deduplicateArchiveRevId.php
maintenance/populateArchiveRevId.php
tests/phpunit/includes/Storage/McrRevisionStoreDbTest.php
tests/phpunit/includes/Storage/RevisionStoreDbTestBase.php

index 5769527..4ab4570 100644 (file)
@@ -746,6 +746,76 @@ class RevisionStore
                if ( !isset( $revisionRow['rev_id'] ) ) {
                        // only if auto-increment was used
                        $revisionRow['rev_id'] = intval( $dbw->insertId() );
+
+                       if ( $dbw->getType() === 'mysql' ) {
+                               // (T202032) MySQL until 8.0 and MariaDB until some version after 10.1.34 don't save the
+                               // auto-increment value to disk, so on server restart it might reuse IDs from deleted
+                               // revisions. We can fix that with an insert with an explicit rev_id value, if necessary.
+
+                               $maxRevId = intval( $dbw->selectField( 'archive', 'MAX(ar_rev_id)', '', __METHOD__ ) );
+                               $table = 'archive';
+                               if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_NEW ) ) {
+                                       $maxRevId2 = intval( $dbw->selectField( 'slots', 'MAX(slot_revision_id)', '', __METHOD__ ) );
+                                       if ( $maxRevId2 >= $maxRevId ) {
+                                               $maxRevId = $maxRevId2;
+                                               $table = 'slots';
+                                       }
+                               }
+
+                               if ( $maxRevId >= $revisionRow['rev_id'] ) {
+                                       $this->logger->debug(
+                                               '__METHOD__: Inserted revision {revid} but {table} has revisions up to {maxrevid}.'
+                                                       . ' Trying to fix it.',
+                                               [
+                                                       'revid' => $revisionRow['rev_id'],
+                                                       'table' => $table,
+                                                       'maxrevid' => $maxRevId,
+                                               ]
+                                       );
+
+                                       if ( !$dbw->lock( 'fix-for-T202032', __METHOD__ ) ) {
+                                               throw new MWException( 'Failed to get database lock for T202032' );
+                                       }
+                                       $fname = __METHOD__;
+                                       $dbw->onTransactionResolution( function ( $trigger, $dbw ) use ( $fname ) {
+                                               $dbw->unlock( 'fix-for-T202032', $fname );
+                                       } );
+
+                                       $dbw->delete( 'revision', [ 'rev_id' => $revisionRow['rev_id'] ], __METHOD__ );
+
+                                       // The locking here is mostly to make MySQL bypass the REPEATABLE-READ transaction
+                                       // isolation (weird MySQL "feature"). It does seem to block concurrent auto-incrementing
+                                       // inserts too, though, at least on MariaDB 10.1.29.
+                                       //
+                                       // Don't try to lock `revision` in this way, it'll deadlock if there are concurrent
+                                       // transactions in this code path thanks to the row lock from the original ->insert() above.
+                                       //
+                                       // And we have to use raw SQL to bypass the "aggregation used with a locking SELECT" warning
+                                       // that's for non-MySQL DBs.
+                                       $row1 = $dbw->query(
+                                               $dbw->selectSqlText( 'archive', [ 'v' => "MAX(ar_rev_id)" ], '', __METHOD__ ) . ' FOR UPDATE'
+                                       )->fetchObject();
+                                       if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_NEW ) ) {
+                                               $row2 = $dbw->query(
+                                                       $dbw->selectSqlText( 'slots', [ 'v' => "MAX(slot_revision_id)" ], '', __METHOD__ )
+                                                               . ' FOR UPDATE'
+                                               )->fetchObject();
+                                       } else {
+                                               $row2 = null;
+                                       }
+                                       $maxRevId = max(
+                                               $maxRevId,
+                                               $row1 ? intval( $row1->v ) : 0,
+                                               $row2 ? intval( $row2->v ) : 0
+                                       );
+
+                                       // If we don't have SCHEMA_COMPAT_WRITE_NEW, all except the first of any concurrent
+                                       // transactions will throw a duplicate key error here. It doesn't seem worth trying
+                                       // to avoid that.
+                                       $revisionRow['rev_id'] = $maxRevId + 1;
+                                       $dbw->insert( 'revision', $revisionRow, __METHOD__ );
+                               }
+                       }
                }
 
                $commentCallback( $revisionRow['rev_id'] );
index dad79b0..2442caa 100644 (file)
@@ -35,6 +35,7 @@ class DeduplicateArchiveRevId extends LoggedUpdateMaintenance {
                $this->output( "Deduplicating ar_rev_id...\n" );
 
                $dbw = $this->getDB( DB_MASTER );
+               PopulateArchiveRevId::checkMysqlAutoIncrementBug( $dbw );
 
                $minId = $dbw->selectField( 'archive', 'MIN(ar_rev_id)', [], __METHOD__ );
                $maxId = $dbw->selectField( 'archive', 'MAX(ar_rev_id)', [], __METHOD__ );
index e493506..60f5e8a 100644 (file)
@@ -21,6 +21,7 @@
  * @ingroup Maintenance
  */
 
+use Wikimedia\Rdbms\DBQueryError;
 use Wikimedia\Rdbms\IDatabase;
 
 require_once __DIR__ . '/Maintenance.php';
@@ -49,6 +50,7 @@ class PopulateArchiveRevId extends LoggedUpdateMaintenance {
        protected function doDBUpdates() {
                $this->output( "Populating ar_rev_id...\n" );
                $dbw = $this->getDB( DB_MASTER );
+               self::checkMysqlAutoIncrementBug( $dbw );
 
                // Quick exit if there are no rows needing updates.
                $any = $dbw->selectField(
@@ -86,6 +88,53 @@ class PopulateArchiveRevId extends LoggedUpdateMaintenance {
                }
        }
 
+       /**
+        * Check for (and work around) a MySQL auto-increment bug
+        *
+        * (T202032) MySQL until 8.0 and MariaDB until some version after 10.1.34
+        * don't save the auto-increment value to disk, so on server restart it
+        * might reuse IDs from deleted revisions. We can fix that with an insert
+        * with an explicit rev_id value, if necessary.
+        *
+        * @param IDatabase $dbw
+        */
+       public static function checkMysqlAutoIncrementBug( IDatabase $dbw ) {
+               if ( $dbw->getType() !== 'mysql' ) {
+                       return;
+               }
+
+               if ( !self::$dummyRev ) {
+                       self::$dummyRev = self::makeDummyRevisionRow( $dbw );
+               }
+
+               $ok = false;
+               while ( !$ok ) {
+                       try {
+                               $dbw->doAtomicSection( __METHOD__, function ( $dbw, $fname ) {
+                                       $dbw->insert( 'revision', self::$dummyRev, $fname );
+                                       $id = $dbw->insertId();
+                                       $toDelete[] = $id;
+
+                                       $maxId = max(
+                                               (int)$dbw->selectField( 'archive', 'MAX(ar_rev_id)', [], __METHOD__ ),
+                                               (int)$dbw->selectField( 'slots', 'MAX(slot_revision_id)', [], __METHOD__ )
+                                       );
+                                       if ( $id <= $maxId ) {
+                                               $dbw->insert( 'revision', [ 'rev_id' => $maxId + 1 ] + self::$dummyRev, $fname );
+                                               $toDelete[] = $maxId + 1;
+                                       }
+
+                                       $dbw->delete( 'revision', [ 'rev_id' => $toDelete ], $fname );
+                               } );
+                               $ok = true;
+                       } catch ( DBQueryError $e ) {
+                               if ( $e->errno != 1062 ) { // 1062 is "duplicate entry", ignore it and retry
+                                       throw $e;
+                               }
+                       }
+               }
+       }
+
        /**
         * Assign new ar_rev_ids to a set of ar_ids.
         * @param IDatabase $dbw
index 9a118d7..649e692 100644 (file)
@@ -3,6 +3,7 @@ namespace MediaWiki\Tests\Storage;
 
 use CommentStoreComment;
 use MediaWiki\MediaWikiServices;
+use MediaWiki\Storage\MutableRevisionRecord;
 use MediaWiki\Storage\RevisionRecord;
 use MediaWiki\Storage\SlotRecord;
 use TextContent;
@@ -239,4 +240,56 @@ class McrRevisionStoreDbTest extends RevisionStoreDbTestBase {
                ];
        }
 
+       /**
+        * @covers \MediaWiki\Storage\RevisionStore::insertRevisionOn
+        * @covers \MediaWiki\Storage\RevisionStore::insertSlotRowOn
+        * @covers \MediaWiki\Storage\RevisionStore::insertContentRowOn
+        */
+       public function testInsertRevisionOn_T202032() {
+               // This test only makes sense for MySQL
+               if ( $this->db->getType() !== 'mysql' ) {
+                       $this->assertTrue( true );
+                       return;
+               }
+
+               // NOTE: must be done before checking MAX(rev_id)
+               $page = $this->getTestPage();
+
+               $maxRevId = $this->db->selectField( 'revision', 'MAX(rev_id)' );
+
+               // Construct a slot row that will conflict with the insertion of the next revision ID,
+               // to emulate the failure mode described in T202032. Nothing will ever read this row,
+               // we just need it to trigger a primary key conflict.
+               $this->db->insert( 'slots', [
+                       'slot_revision_id' => $maxRevId + 1,
+                       'slot_role_id' => 1,
+                       'slot_content_id' => 0,
+                       'slot_origin' => 0
+               ], __METHOD__ );
+
+               $rev = new MutableRevisionRecord( $page->getTitle() );
+               $rev->setTimestamp( '20180101000000' );
+               $rev->setComment( CommentStoreComment::newUnsavedComment( 'test' ) );
+               $rev->setUser( $this->getTestUser()->getUser() );
+               $rev->setContent( 'main', new WikitextContent( 'Text' ) );
+               $rev->setPageId( $page->getId() );
+
+               $store = MediaWikiServices::getInstance()->getRevisionStore();
+               $return = $store->insertRevisionOn( $rev, $this->db );
+
+               $this->assertSame( $maxRevId + 2, $return->getId() );
+
+               // is the new revision correct?
+               $this->assertRevisionCompleteness( $return );
+               $this->assertRevisionRecordsEqual( $rev, $return );
+
+               // can we find it directly in the database?
+               $this->assertRevisionExistsInDatabase( $return );
+
+               // can we load it from the store?
+               $loaded = $store->getRevisionById( $return->getId() );
+               $this->assertRevisionCompleteness( $loaded );
+               $this->assertRevisionRecordsEqual( $return, $loaded );
+       }
+
 }
index ad1e013..8137b27 100644 (file)
@@ -244,14 +244,14 @@ abstract class RevisionStoreDbTestBase extends MediaWikiTestCase {
                $this->assertSame( 0, $count );
        }
 
-       private function assertLinkTargetsEqual( LinkTarget $l1, LinkTarget $l2 ) {
+       protected function assertLinkTargetsEqual( LinkTarget $l1, LinkTarget $l2 ) {
                $this->assertEquals( $l1->getDBkey(), $l2->getDBkey() );
                $this->assertEquals( $l1->getNamespace(), $l2->getNamespace() );
                $this->assertEquals( $l1->getFragment(), $l2->getFragment() );
                $this->assertEquals( $l1->getInterwiki(), $l2->getInterwiki() );
        }
 
-       private function assertRevisionRecordsEqual( RevisionRecord $r1, RevisionRecord $r2 ) {
+       protected function assertRevisionRecordsEqual( RevisionRecord $r1, RevisionRecord $r2 ) {
                $this->assertEquals(
                        $r1->getPageAsLinkTarget()->getNamespace(),
                        $r2->getPageAsLinkTarget()->getNamespace()
@@ -291,7 +291,7 @@ abstract class RevisionStoreDbTestBase extends MediaWikiTestCase {
                }
        }
 
-       private function assertSlotRecordsEqual( SlotRecord $s1, SlotRecord $s2 ) {
+       protected function assertSlotRecordsEqual( SlotRecord $s1, SlotRecord $s2 ) {
                $this->assertSame( $s1->getRole(), $s2->getRole() );
                $this->assertSame( $s1->getModel(), $s2->getModel() );
                $this->assertSame( $s1->getFormat(), $s2->getFormat() );
@@ -303,7 +303,7 @@ abstract class RevisionStoreDbTestBase extends MediaWikiTestCase {
                $s1->hasAddress() ? $this->assertSame( $s1->hasAddress(), $s2->hasAddress() ) : null;
        }
 
-       private function assertRevisionCompleteness( RevisionRecord $r ) {
+       protected function assertRevisionCompleteness( RevisionRecord $r ) {
                $this->assertTrue( $r->hasSlot( 'main' ) );
                $this->assertInstanceOf( SlotRecord::class, $r->getSlot( 'main' ) );
                $this->assertInstanceOf( Content::class, $r->getContent( 'main' ) );
@@ -313,7 +313,7 @@ abstract class RevisionStoreDbTestBase extends MediaWikiTestCase {
                }
        }
 
-       private function assertSlotCompleteness( RevisionRecord $r, SlotRecord $slot ) {
+       protected function assertSlotCompleteness( RevisionRecord $r, SlotRecord $slot ) {
                $this->assertTrue( $slot->hasAddress() );
                $this->assertSame( $r->getId(), $slot->getRevision() );