X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=blobdiff_plain;f=includes%2FStorage%2FRevisionStore.php;h=879f3229d3d0a082ac9575dc899025fd572d31cb;hp=576952779600c7960dc46a93245da8d89d446760;hb=fe94275c8fcfc248a5eae857dde7c5772d993ab5;hpb=9171317645de4d31c61c570a9e4fa765e09ecc4d diff --git a/includes/Storage/RevisionStore.php b/includes/Storage/RevisionStore.php index 5769527796..879f3229d3 100644 --- a/includes/Storage/RevisionStore.php +++ b/includes/Storage/RevisionStore.php @@ -466,6 +466,12 @@ class RevisionStore $this->failOnNull( $user->getId(), 'user field' ); $this->failOnEmpty( $user->getName(), 'user_text field' ); + if ( !$rev->isReadyForInsertion() ) { + // This is here for future-proofing. At the time this check being added, it + // was redundant to the individual checks above. + throw new IncompleteRevisionException( 'Revision is incomplete' ); + } + // TODO: we shouldn't need an actual Title here. $title = Title::newFromLinkTarget( $rev->getPageAsLinkTarget() ); $pageId = $this->failOnEmpty( $rev->getPageId(), 'rev_page field' ); // check this early @@ -519,11 +525,11 @@ class RevisionStore $slot = $rev->getSlot( $role, RevisionRecord::RAW ); Assert::postcondition( $slot->getContent() !== null, - $role . ' slot must have content' + $role . ' slot must have content' ); Assert::postcondition( $slot->hasRevision(), - $role . ' slot must have a revision associated' + $role . ' slot must have a revision associated' ); } @@ -566,9 +572,14 @@ class RevisionStore foreach ( $slotRoles as $role ) { $slot = $rev->getSlot( $role, RevisionRecord::RAW ); - if ( $slot->hasRevision() ) { - // If the SlotRecord already has a revision ID set, this means it already exists - // in the database, and should already belong to the current revision. + // If the SlotRecord already has a revision ID set, this means it already exists + // in the database, and should already belong to the current revision. + // However, a slot may already have a revision, but no content ID, if the slot + // is emulated based on the archive table, because we are in SCHEMA_COMPAT_READ_OLD + // mode, and the respective archive row was not yet migrated to the new schema. + // In that case, a new slot row (and content row) must be inserted even during + // undeletion. + if ( $slot->hasRevision() && $slot->hasContentId() ) { // TODO: properly abort transaction if the assertion fails! Assert::parameter( $slot->getRevision() === $revisionId, @@ -612,6 +623,8 @@ class RevisionStore * @param IDatabase $dbw * @param int $revisionId * @param string &$blobAddress (may change!) + * + * @return int the text row id */ private function updateRevisionTextId( IDatabase $dbw, $revisionId, &$blobAddress ) { $textId = $this->blobStore->getTextIdFromAddress( $blobAddress ); @@ -631,6 +644,8 @@ class RevisionStore [ 'rev_id' => $revisionId ], __METHOD__ ); + + return $textId; } /** @@ -654,11 +669,16 @@ class RevisionStore $blobAddress = $this->storeContentBlob( $protoSlot, $title, $blobHints ); } + $contentId = null; + // Write the main slot's text ID to the revision table for backwards compatibility if ( $protoSlot->getRole() === 'main' && $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_OLD ) ) { - $this->updateRevisionTextId( $dbw, $revisionId, $blobAddress ); + // If SCHEMA_COMPAT_WRITE_NEW is also set, the fake content ID is overwritten + // with the real content ID below. + $textId = $this->updateRevisionTextId( $dbw, $revisionId, $blobAddress ); + $contentId = $this->emulateContentId( $textId ); } if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_NEW ) ) { @@ -669,8 +689,6 @@ class RevisionStore } $this->insertSlotRowOn( $protoSlot, $dbw, $revisionId, $contentId ); - } else { - $contentId = null; } $savedSlot = SlotRecord::newSaved( @@ -746,6 +764,76 @@ class RevisionStore if ( !isset( $revisionRow['rev_id'] ) ) { // only if auto-increment was used $revisionRow['rev_id'] = intval( $dbw->insertId() ); + + if ( $dbw->getType() === 'mysql' ) { + // (T202032) MySQL until 8.0 and MariaDB until some version after 10.1.34 don't save the + // auto-increment value to disk, so on server restart it might reuse IDs from deleted + // revisions. We can fix that with an insert with an explicit rev_id value, if necessary. + + $maxRevId = intval( $dbw->selectField( 'archive', 'MAX(ar_rev_id)', '', __METHOD__ ) ); + $table = 'archive'; + if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_NEW ) ) { + $maxRevId2 = intval( $dbw->selectField( 'slots', 'MAX(slot_revision_id)', '', __METHOD__ ) ); + if ( $maxRevId2 >= $maxRevId ) { + $maxRevId = $maxRevId2; + $table = 'slots'; + } + } + + if ( $maxRevId >= $revisionRow['rev_id'] ) { + $this->logger->debug( + '__METHOD__: Inserted revision {revid} but {table} has revisions up to {maxrevid}.' + . ' Trying to fix it.', + [ + 'revid' => $revisionRow['rev_id'], + 'table' => $table, + 'maxrevid' => $maxRevId, + ] + ); + + if ( !$dbw->lock( 'fix-for-T202032', __METHOD__ ) ) { + throw new MWException( 'Failed to get database lock for T202032' ); + } + $fname = __METHOD__; + $dbw->onTransactionResolution( function ( $trigger, $dbw ) use ( $fname ) { + $dbw->unlock( 'fix-for-T202032', $fname ); + } ); + + $dbw->delete( 'revision', [ 'rev_id' => $revisionRow['rev_id'] ], __METHOD__ ); + + // The locking here is mostly to make MySQL bypass the REPEATABLE-READ transaction + // isolation (weird MySQL "feature"). It does seem to block concurrent auto-incrementing + // inserts too, though, at least on MariaDB 10.1.29. + // + // Don't try to lock `revision` in this way, it'll deadlock if there are concurrent + // transactions in this code path thanks to the row lock from the original ->insert() above. + // + // And we have to use raw SQL to bypass the "aggregation used with a locking SELECT" warning + // that's for non-MySQL DBs. + $row1 = $dbw->query( + $dbw->selectSqlText( 'archive', [ 'v' => "MAX(ar_rev_id)" ], '', __METHOD__ ) . ' FOR UPDATE' + )->fetchObject(); + if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_NEW ) ) { + $row2 = $dbw->query( + $dbw->selectSqlText( 'slots', [ 'v' => "MAX(slot_revision_id)" ], '', __METHOD__ ) + . ' FOR UPDATE' + )->fetchObject(); + } else { + $row2 = null; + } + $maxRevId = max( + $maxRevId, + $row1 ? intval( $row1->v ) : 0, + $row2 ? intval( $row2->v ) : 0 + ); + + // If we don't have SCHEMA_COMPAT_WRITE_NEW, all except the first of any concurrent + // transactions will throw a duplicate key error here. It doesn't seem worth trying + // to avoid that. + $revisionRow['rev_id'] = $maxRevId + 1; + $dbw->insert( 'revision', $revisionRow, __METHOD__ ); + } + } } $commentCallback( $revisionRow['rev_id'] ); @@ -1124,6 +1212,7 @@ class RevisionStore $mainSlotRow->role_name = 'main'; $mainSlotRow->model_name = null; $mainSlotRow->slot_revision_id = null; + $mainSlotRow->slot_content_id = null; $mainSlotRow->content_address = null; $content = null; @@ -1174,6 +1263,12 @@ class RevisionStore $mainSlotRow->format_name = isset( $row->rev_content_format ) ? strval( $row->rev_content_format ) : null; + + if ( isset( $row->rev_text_id ) && intval( $row->rev_text_id ) > 0 ) { + // Overwritten below for SCHEMA_COMPAT_WRITE_NEW + $mainSlotRow->slot_content_id + = $this->emulateContentId( intval( $row->rev_text_id ) ); + } } elseif ( is_array( $row ) ) { $mainSlotRow->slot_revision_id = isset( $row['id'] ) ? intval( $row['id'] ) : null; @@ -1213,6 +1308,12 @@ class RevisionStore $mainSlotRow->format_name = $handler->getDefaultFormat(); } } + + if ( isset( $row['text_id'] ) && intval( $row['text_id'] ) > 0 ) { + // Overwritten below for SCHEMA_COMPAT_WRITE_NEW + $mainSlotRow->slot_content_id + = $this->emulateContentId( intval( $row['text_id'] ) ); + } } else { throw new MWException( 'Revision constructor passed invalid row format.' ); } @@ -1250,18 +1351,38 @@ class RevisionStore }; } - // NOTE: this callback will be looped through RevisionSlot::newInherited(), allowing - // the inherited slot to have the same content_id as the original slot. In that case, - // $slot will be the inherited slot, while $mainSlotRow still refers to the original slot. - $mainSlotRow->slot_content_id = - function ( SlotRecord $slot ) use ( $queryFlags, $mainSlotRow ) { - $db = $this->getDBConnectionRefForQueryFlags( $queryFlags ); - return $this->findSlotContentId( $db, $mainSlotRow->slot_revision_id, 'main' ); - }; + if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_WRITE_NEW ) ) { + // NOTE: this callback will be looped through RevisionSlot::newInherited(), allowing + // the inherited slot to have the same content_id as the original slot. In that case, + // $slot will be the inherited slot, while $mainSlotRow still refers to the original slot. + $mainSlotRow->slot_content_id = + function ( SlotRecord $slot ) use ( $queryFlags, $mainSlotRow ) { + $db = $this->getDBConnectionRefForQueryFlags( $queryFlags ); + return $this->findSlotContentId( $db, $mainSlotRow->slot_revision_id, 'main' ); + }; + } return new SlotRecord( $mainSlotRow, $content ); } + /** + * Provides a content ID to use with emulated SlotRecords in SCHEMA_COMPAT_OLD mode, + * based on the revision's text ID (rev_text_id or ar_text_id, respectively). + * Note that in SCHEMA_COMPAT_WRITE_BOTH, a callback to findSlotContentId() should be used + * instead, since in that mode, some revision rows may already have a real content ID, + * while other's don't - and for the ones that don't, we should indicate that it + * is missing and cause SlotRecords::hasContentId() to return false. + * + * @param int $textId + * @return int The emulated content ID + */ + private function emulateContentId( $textId ) { + // Return a negative number to ensure the ID is distinct from any real content IDs + // that will be assigned in SCHEMA_COMPAT_WRITE_NEW mode and read in SCHEMA_COMPAT_READ_NEW + // mode. + return -$textId; + } + /** * Loads a Content object based on a slot row. * @@ -1477,6 +1598,10 @@ class RevisionStore $slots = []; foreach ( $res as $row ) { + // resolve role names and model names from in-memory cache, instead of joining. + $row->role_name = $this->slotRoleStore->getName( (int)$row->slot_role_id ); + $row->model_name = $this->contentModelStore->getName( (int)$row->content_model ); + $contentCallback = function ( SlotRecord $slot ) use ( $queryFlags, $row ) { return $this->loadSlotContent( $slot, null, null, null, $queryFlags ); }; @@ -2174,7 +2299,9 @@ class RevisionStore // NOTE: even when this class is set to not read from the old schema, callers // should still be able to join against the text table, as long as we are still // writing the old schema for compatibility. - wfDeprecated( __METHOD__ . ' with `text` option', '1.32' ); + // TODO: This should trigger a deprecation warning eventually (T200918), but not + // before all known usages are removed (see T198341 and T201164). + // wfDeprecated( __METHOD__ . ' with `text` option', '1.32' ); } $ret['tables'][] = 'text'; @@ -2196,6 +2323,9 @@ class RevisionStore * * @param array $options Any combination of the following strings * - 'content': Join with the content table, and select content meta-data fields + * - 'model': Join with the content_models table, and select the model_name field. + * Only applicable if 'content' is also set. + * - 'role': Join with the slot_roles table, and select the role_name field * * @return array With three keys: * - tables: (string[]) to include in the `$table` to `IDatabase->select()` @@ -2232,26 +2362,39 @@ class RevisionStore } } else { $ret['tables'][] = 'slots'; - $ret['tables'][] = 'slot_roles'; $ret['fields'] = array_merge( $ret['fields'], [ 'slot_revision_id', 'slot_content_id', 'slot_origin', - 'role_name' + 'slot_role_id', ] ); - $ret['joins']['slot_roles'] = [ 'INNER JOIN', [ 'slot_role_id = role_id' ] ]; + + if ( in_array( 'role', $options, true ) ) { + // Use left join to attach role name, so we still find the revision row even + // if the role name is missing. This triggers a more obvious failure mode. + $ret['tables'][] = 'slot_roles'; + $ret['joins']['slot_roles'] = [ 'LEFT JOIN', [ 'slot_role_id = role_id' ] ]; + $ret['fields'][] = 'role_name'; + } if ( in_array( 'content', $options, true ) ) { $ret['tables'][] = 'content'; - $ret['tables'][] = 'content_models'; $ret['fields'] = array_merge( $ret['fields'], [ 'content_size', 'content_sha1', 'content_address', - 'model_name' + 'content_model', ] ); $ret['joins']['content'] = [ 'INNER JOIN', [ 'slot_content_id = content_id' ] ]; - $ret['joins']['content_models'] = [ 'INNER JOIN', [ 'content_model = model_id' ] ]; + + if ( in_array( 'model', $options, true ) ) { + // Use left join to attach model name, so we still find the revision row even + // if the model name is missing. This triggers a more obvious failure mode. + $ret['tables'][] = 'content_models'; + $ret['joins']['content_models'] = [ 'LEFT JOIN', [ 'content_model = model_id' ] ]; + $ret['fields'][] = 'model_name'; + } + } }