Fix undeletion write-both/read-old mode.
authordaniel <daniel.kinzler@wikimedia.de>
Mon, 27 Aug 2018 19:45:40 +0000 (21:45 +0200)
committerDaniel Kinzler <daniel.kinzler@wikimedia.de>
Mon, 10 Sep 2018 19:36:27 +0000 (19:36 +0000)
With the new schema, undeletion does not create now slot rows.
However, when in read-old mode, we may still have un-migrated
archive rows. When undeletion based on such a row, we do need
to insert a slot row.

This also changes the return value of SlotRecord for
SCHEMA_COMPAT_READ_OLD mode from null to the negative value of
rev_text_id, for consistency. Conceptually, the emulated slots
in SCHEMA_COMPAT_OLD now have a "virtual" content ID, hich is
 however distinct from any real content ID that may exist in
the future. Such virtual content IDs are however not assigned
in SCHEMA_COMPAT_WRITE_BOTH mode. In that mode, unmigrated
rows can be detected by calling hasContentId() on the main
slot. Migrated rows will return true here and provide the
id of the associated content row, even in
SCHEMA_COMPAT_READ_OLD mode. This is particularly essential
for undeletion, which needs to maintain the association
between revision and slot rows even in SCHEMA_COMPAT_READ_OLD
mode.

Bug: T174024
Bug: T194015
Bug: T183488
Change-Id: I88ee9809b9752e1e72d635d62e82008315402901

includes/Storage/RevisionStore.php
includes/Storage/SlotRecord.php
tests/phpunit/includes/Storage/McrWriteBothRevisionStoreDbTest.php
tests/phpunit/includes/Storage/RevisionStoreDbTestBase.php

index d219267..ce8a088 100644 (file)
@@ -566,9 +566,14 @@ class RevisionStore
                foreach ( $slotRoles as $role ) {
                        $slot = $rev->getSlot( $role, RevisionRecord::RAW );
 
-                       if ( $slot->hasRevision() ) {
-                               // If the SlotRecord already has a revision ID set, this means it already exists
-                               // in the database, and should already belong to the current revision.
+                       // If the SlotRecord already has a revision ID set, this means it already exists
+                       // in the database, and should already belong to the current revision.
+                       // However, a slot may already have a revision, but no content ID, if the slot
+                       // is emulated based on the archive table, because we are in SCHEMA_COMPAT_READ_OLD
+                       // mode, and the respective archive row was not yet migrated to the new schema.
+                       // In that case, a new slot row (and content row) must be inserted even during
+                       // undeletion.
+                       if ( $slot->hasRevision() && $slot->hasContentId() ) {
                                // TODO: properly abort transaction if the assertion fails!
                                Assert::parameter(
                                        $slot->getRevision() === $revisionId,
@@ -612,6 +617,8 @@ class RevisionStore
         * @param IDatabase $dbw
         * @param int $revisionId
         * @param string &$blobAddress (may change!)
+        *
+        * @return int the text row id
         */
        private function updateRevisionTextId( IDatabase $dbw, $revisionId, &$blobAddress ) {
                $textId = $this->blobStore->getTextIdFromAddress( $blobAddress );
@@ -631,6 +638,8 @@ class RevisionStore
                        [ 'rev_id' => $revisionId ],
                        __METHOD__
                );
+
+               return $textId;
        }
 
        /**
@@ -654,11 +663,16 @@ class RevisionStore
                        $blobAddress = $this->storeContentBlob( $protoSlot, $title, $blobHints );
                }
 
+               $contentId = null;
+
                // Write the main slot's text ID to the revision table for backwards compatibility
                if ( $protoSlot->getRole() === 'main'
                        && $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_OLD )
                ) {
-                       $this->updateRevisionTextId( $dbw, $revisionId, $blobAddress );
+                       // If SCHEMA_COMPAT_WRITE_NEW is also set, the fake content ID is overwritten
+                       // with the real content ID below.
+                       $textId = $this->updateRevisionTextId( $dbw, $revisionId, $blobAddress );
+                       $contentId = $this->emulateContentId( $textId );
                }
 
                if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_NEW ) ) {
@@ -669,8 +683,6 @@ class RevisionStore
                        }
 
                        $this->insertSlotRowOn( $protoSlot, $dbw, $revisionId, $contentId );
-               } else {
-                       $contentId = null;
                }
 
                $savedSlot = SlotRecord::newSaved(
@@ -1194,6 +1206,7 @@ class RevisionStore
                $mainSlotRow->role_name = 'main';
                $mainSlotRow->model_name = null;
                $mainSlotRow->slot_revision_id = null;
+               $mainSlotRow->slot_content_id = null;
                $mainSlotRow->content_address = null;
 
                $content = null;
@@ -1244,6 +1257,12 @@ class RevisionStore
                        $mainSlotRow->format_name = isset( $row->rev_content_format )
                                ? strval( $row->rev_content_format )
                                : null;
+
+                       if ( isset( $row->rev_text_id ) && intval( $row->rev_text_id ) > 0 ) {
+                               // Overwritten below for SCHEMA_COMPAT_WRITE_NEW
+                               $mainSlotRow->slot_content_id
+                                       = $this->emulateContentId( intval( $row->rev_text_id ) );
+                       }
                } elseif ( is_array( $row ) ) {
                        $mainSlotRow->slot_revision_id = isset( $row['id'] ) ? intval( $row['id'] ) : null;
 
@@ -1283,6 +1302,12 @@ class RevisionStore
                                        $mainSlotRow->format_name = $handler->getDefaultFormat();
                                }
                        }
+
+                       if ( isset( $row['text_id'] ) && intval( $row['text_id'] ) > 0 ) {
+                               // Overwritten below for SCHEMA_COMPAT_WRITE_NEW
+                               $mainSlotRow->slot_content_id
+                                       = $this->emulateContentId( intval( $row['text_id'] ) );
+                       }
                } else {
                        throw new MWException( 'Revision constructor passed invalid row format.' );
                }
@@ -1320,18 +1345,38 @@ class RevisionStore
                        };
                }
 
-               // NOTE: this callback will be looped through RevisionSlot::newInherited(), allowing
-               // the inherited slot to have the same content_id as the original slot. In that case,
-               // $slot will be the inherited slot, while $mainSlotRow still refers to the original slot.
-               $mainSlotRow->slot_content_id =
-                       function ( SlotRecord $slot ) use ( $queryFlags, $mainSlotRow ) {
-                               $db = $this->getDBConnectionRefForQueryFlags( $queryFlags );
-                               return $this->findSlotContentId( $db, $mainSlotRow->slot_revision_id, 'main' );
-                       };
+               if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_NEW ) ) {
+                       // NOTE: this callback will be looped through RevisionSlot::newInherited(), allowing
+                       // the inherited slot to have the same content_id as the original slot. In that case,
+                       // $slot will be the inherited slot, while $mainSlotRow still refers to the original slot.
+                       $mainSlotRow->slot_content_id =
+                               function ( SlotRecord $slot ) use ( $queryFlags, $mainSlotRow ) {
+                                       $db = $this->getDBConnectionRefForQueryFlags( $queryFlags );
+                                       return $this->findSlotContentId( $db, $mainSlotRow->slot_revision_id, 'main' );
+                               };
+               }
 
                return new SlotRecord( $mainSlotRow, $content );
        }
 
+       /**
+        * Provides a content ID to use with emulated SlotRecords in SCHEMA_COMPAT_OLD mode,
+        * based on the revision's text ID (rev_text_id or ar_text_id, respectively).
+        * Note that in SCHEMA_COMPAT_WRITE_BOTH, a callback to findSlotContentId() should be used
+        * instead, since in that mode, some revision rows may already have a real content ID,
+        * while other's don't - and for the ones that don't, we should indicate that it
+        * is missing and cause SlotRecords::hasContentId() to return false.
+        *
+        * @param int $textId
+        * @return int The emulated content ID
+        */
+       private function emulateContentId( $textId ) {
+               // Return a negative number to ensure the ID is distinct from any real content IDs
+               // that will be assigned in SCHEMA_COMPAT_WRITE_NEW mode and read in SCHEMA_COMPAT_READ_NEW
+               // mode.
+               return -$textId;
+       }
+
        /**
         * Loads a Content object based on a slot row.
         *
index dff4b03..c7eb735 100644 (file)
@@ -451,6 +451,16 @@ class SlotRecord {
         * content has been stored in the content table. While building a new revision,
         * SlotRecords will not have an ID associated.
         *
+        * Also, during schema migration, hasContentId() may return false when encountering an
+        * un-migrated database entry in SCHEMA_COMPAT_WRITE_BOTH mode.
+        * It will however always return true for saved revisions on SCHEMA_COMPAT_READ_NEW mode,
+        * or without SCHEMA_COMPAT_WRITE_NEW mode. In the latter case, an emulated content ID
+        * is used, derived from the revision's text ID.
+        *
+        * Note that hasContentId() returning false while hasRevision() returns true always
+        * indicates an unmigrated row in SCHEMA_COMPAT_WRITE_BOTH mode, as described above.
+        * For an unsaved slot, both these methods would return false.
+        *
         * @since 1.32
         *
         * @return bool
@@ -494,6 +504,9 @@ class SlotRecord {
         * This information should be irrelevant to application logic, it is here to allow
         * the construction of a full row for the revision table.
         *
+        * Note that this method may return an emulated value during schema migration in
+        * SCHEMA_COMPAT_WRITE_OLD mode. See RevisionStore::emulateContentId for more information.
+        *
         * @return int
         */
        public function getContentId() {
index f05016a..10c20b9 100644 (file)
@@ -2,6 +2,7 @@
 namespace MediaWiki\Tests\Storage;
 
 use InvalidArgumentException;
+use MediaWiki\MediaWikiServices;
 use MediaWiki\Storage\RevisionRecord;
 use MediaWiki\Storage\SlotRecord;
 use Revision;
@@ -117,4 +118,68 @@ class McrWriteBothRevisionStoreDbTest extends RevisionStoreDbTestBase {
                ];
        }
 
+       /**
+        * @covers \MediaWiki\Storage\RevisionStore::newRevisionFromArchiveRow
+        * @covers \MediaWiki\Storage\RevisionStore::insertRevisionOn
+        */
+       public function testInsertRevisionFromArchiveRow_unmigratedArchiveRow() {
+               // The main purpose of this test is to assert that after reading an archive
+               // row using the old schema it can be inserted into the revision table,
+               // and a slot row is created based on slot emulated from the old-style archive row,
+               // when none such slot row exists yet.
+
+               $title = $this->getTestPage()->getTitle();
+
+               $this->db->insert(
+                       'text',
+                       [ 'old_text' => 'Just a test', 'old_flags' => 'utf-8' ],
+                       __METHOD__
+               );
+
+               $textId = $this->db->insertId();
+
+               $row = (object)[
+                       'ar_minor_edit' => '0',
+                       'ar_user' => '0',
+                       'ar_user_text' => '127.0.0.1',
+                       'ar_actor' => null,
+                       'ar_len' => '11',
+                       'ar_deleted' => '0',
+                       'ar_rev_id' => 112277,
+                       'ar_timestamp' => $this->db->timestamp( '20180101000000' ),
+                       'ar_sha1' => 'deadbeef',
+                       'ar_page_id' => $title->getArticleID(),
+                       'ar_comment_text' => 'just a test',
+                       'ar_comment_data' => null,
+                       'ar_comment_cid' => null,
+                       'ar_content_format' => null,
+                       'ar_content_model' => null,
+                       'ts_tags' => null,
+                       'ar_id' => 17,
+                       'ar_namespace' => $title->getNamespace(),
+                       'ar_title' => $title->getDBkey(),
+                       'ar_text_id' => $textId,
+                       'ar_parent_id' => 112211,
+               ];
+
+               $store = MediaWikiServices::getInstance()->getRevisionStore();
+               $rev = $store->newRevisionFromArchiveRow( $row );
+
+               // re-insert archived revision
+               $return = $store->insertRevisionOn( $rev, $this->db );
+
+               // is the new revision correct?
+               $this->assertRevisionCompleteness( $return );
+               $this->assertRevisionRecordsEqual( $rev, $return );
+
+               // can we load it from the store?
+               $loaded = $store->getRevisionById( $return->getId() );
+               $this->assertNotNull( $loaded );
+               $this->assertRevisionCompleteness( $loaded );
+               $this->assertRevisionRecordsEqual( $return, $loaded );
+
+               // can we find it directly in the database?
+               $this->assertRevisionExistsInDatabase( $return );
+       }
+
 }
index b129a98..58fd4e9 100644 (file)
@@ -836,7 +836,7 @@ abstract class RevisionStoreDbTestBase extends MediaWikiTestCase {
                return (object)$fields;
        }
 
-       private function assertRevisionRecordMatchesRevision(
+       protected function assertRevisionRecordMatchesRevision(
                Revision $rev,
                RevisionRecord $record
        ) {
@@ -1049,9 +1049,13 @@ abstract class RevisionStoreDbTestBase extends MediaWikiTestCase {
        }
 
        /**
+        * @covers \MediaWiki\Storage\RevisionStore::newRevisionFromArchiveRow
         * @covers \MediaWiki\Storage\RevisionStore::insertRevisionOn
         */
        public function testInsertRevisionOn_archive() {
+               // This is a round trip test for deletion and undeletion of a
+               // revision row via the archive table.
+
                $store = MediaWikiServices::getInstance()->getRevisionStore();
                $title = Title::newFromText( __METHOD__ );
 
@@ -1063,6 +1067,9 @@ abstract class RevisionStoreDbTestBase extends MediaWikiTestCase {
                $orig = $origRev->getRevisionRecord();
                $page->doDeleteArticle( __METHOD__ );
 
+               // re-create page, so we can later load revisions for it
+               $page->doEditContent( new WikitextContent( 'Two' ), __METHOD__ );
+
                $db = wfGetDB( DB_MASTER );
                $arQuery = $store->getArchiveQueryInfo();
                $row = $db->selectRow(
@@ -1070,34 +1077,37 @@ abstract class RevisionStoreDbTestBase extends MediaWikiTestCase {
                        __METHOD__, [], $arQuery['joins']
                );
 
-               $record = $store->newRevisionFromArchiveRow( $row );
+               $this->assertNotFalse( $row, 'query failed' );
+
+               $record = $store->newRevisionFromArchiveRow(
+                       $row,
+                       0,
+                       $title,
+                       [ 'page_id' => $title->getArticleID() ]
+               );
 
                $restored = $store->insertRevisionOn( $record, $db );
-               $this->assertSame( $orig->getPageId(), $restored->getPageId() );
-               $this->assertSame( $orig->getId(), $restored->getId() );
-               $this->assertSame( $orig->getComment()->text, $restored->getComment()->text );
 
-               $origMain = $orig->getSlot( 'main' );
-               $restoredMain = $restored->getSlot( 'main' );
-               $this->assertSame(
-                       $origMain->getOrigin(),
-                       $restoredMain->getOrigin()
-               );
+               // is the new revision correct?
+               $this->assertRevisionCompleteness( $restored );
+               $this->assertRevisionRecordsEqual( $record, $restored );
 
-               if ( $origMain->hasContentId() ) {
-                       $this->assertSame(
-                               $origMain->getContentId(),
-                               $restoredMain->getContentId()
-                       );
-               }
+               // does the new revision use the original slot?
+               $recMain = $record->getSlot( 'main' );
+               $restMain = $restored->getSlot( 'main' );
+               $this->assertSame( $recMain->getAddress(), $restMain->getAddress() );
+               $this->assertSame( $recMain->getContentId(), $restMain->getContentId() );
+               $this->assertSame( $recMain->getOrigin(), $restMain->getOrigin() );
+               $this->assertSame( 'Foo', $restMain->getContent()->serialize() );
 
-               // NOTE: we didn't restore the page row, so we can't use RevisionStore::getRevisionById
-               $this->assertSelect(
-                       'revision',
-                       [ 'rev_id' ],
-                       [ 'rev_id' => $orig->getId() ],
-                       [ [ $orig->getId() ] ]
-               );
+               // can we load it from the store?
+               $loaded = $store->getRevisionById( $restored->getId() );
+               $this->assertNotNull( $loaded );
+               $this->assertRevisionCompleteness( $loaded );
+               $this->assertRevisionRecordsEqual( $restored, $loaded );
+
+               // can we find it directly in the database?
+               $this->assertRevisionExistsInDatabase( $restored );
        }
 
        /**