X-Git-Url: https://git.heureux-cyclage.org/?p=lhc%2Fweb%2Fwiklou.git;a=blobdiff_plain;f=includes%2FStorage%2FDerivedPageDataUpdater.php;h=e34e406f1dd2a24f68510dd0446eb4acd2f45345;hp=2df1670c886dfe81b041b4d9a23a91bfe9d1af65;hb=74d04edec385aa86ee01943b9a27475d79f74e78;hpb=a5b521730095f56aef4916f73e39213f1d9b1d16 diff --git a/includes/Storage/DerivedPageDataUpdater.php b/includes/Storage/DerivedPageDataUpdater.php index 2df1670c88..e34e406f1d 100644 --- a/includes/Storage/DerivedPageDataUpdater.php +++ b/includes/Storage/DerivedPageDataUpdater.php @@ -27,12 +27,14 @@ use CategoryMembershipChangeJob; use Content; use ContentHandler; use DataUpdate; +use DeferrableUpdate; use DeferredUpdates; use Hooks; use IDBAccessObject; use InvalidArgumentException; use JobQueueGroup; use Language; +use LinksDeletionUpdate; use LinksUpdate; use LogicException; use MediaWiki\Edit\PreparedEdit; @@ -51,6 +53,7 @@ use SiteStatsUpdate; use Title; use User; use Wikimedia\Assert\Assert; +use Wikimedia\Rdbms\LBFactory; use WikiPage; /** @@ -121,6 +124,11 @@ class DerivedPageDataUpdater implements IDBAccessObject { */ private $messageCache; + /** + * @var LBFactory + */ + private $loadbalancerFactory; + /** * @var string see $wgArticleCountMethod */ @@ -132,15 +140,22 @@ class DerivedPageDataUpdater implements IDBAccessObject { private $rcWatchCategoryMembership = false; /** - * See $options on prepareUpdate. + * Stores (most of) the $options parameter of prepareUpdate(). + * @see prepareUpdate() */ private $options = [ 'changed' => true, 'created' => false, 'moved' => false, 'restored' => false, + 'oldrevision' => null, 'oldcountable' => null, 'oldredirect' => null, + 'triggeringUser' => null, + // causeAction/causeAgent default to 'unknown' but that's handled where it's read, + // to make the life of prepareUpdate() callers easier. + 'causeAction' => null, + 'causeAgent' => null, ]; /** @@ -152,8 +167,7 @@ class DerivedPageDataUpdater implements IDBAccessObject { * * Contains the following fields: * - oldRevision (RevisionRecord|null): the revision that was current before the change - * associated with this update. Might not be set, use getOldRevision() instead of direct - * access. + * associated with this update. Might not be set, use getParentRevision(). * - oldId (int|null): the id of the above revision. 0 if there is no such revision (the change * was about creating a new page); null if not known (that should not happen). * - oldIsRedirect (bool|null): whether the page was a redirect before the change. Lazy-loaded, @@ -170,6 +184,11 @@ class DerivedPageDataUpdater implements IDBAccessObject { */ private $slotsUpdate = null; + /** + * @var RevisionRecord|null + */ + private $parentRevision = null; + /** * @var RevisionRecord|null */ @@ -235,6 +254,7 @@ class DerivedPageDataUpdater implements IDBAccessObject { * @param JobQueueGroup $jobQueueGroup * @param MessageCache $messageCache * @param Language $contLang + * @param LBFactory $loadbalancerFactory */ public function __construct( WikiPage $wikiPage, @@ -243,7 +263,8 @@ class DerivedPageDataUpdater implements IDBAccessObject { ParserCache $parserCache, JobQueueGroup $jobQueueGroup, MessageCache $messageCache, - Language $contLang + Language $contLang, + LBFactory $loadbalancerFactory ) { $this->wikiPage = $wikiPage; @@ -253,6 +274,9 @@ class DerivedPageDataUpdater implements IDBAccessObject { $this->jobQueueGroup = $jobQueueGroup; $this->messageCache = $messageCache; $this->contLang = $contLang; + // XXX only needed for waiting for slaves to catch up; there should be a narrower + // interface for that. + $this->loadbalancerFactory = $loadbalancerFactory; } /** @@ -438,29 +462,34 @@ class DerivedPageDataUpdater implements IDBAccessObject { } /** - * Returns the revision that was current before the edit. This would be null if the edit - * created the page, or the revision's parent for a regular edit, or the revision itself - * for a null-edit. - * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()! + * Returns the parent revision of the new revision wrapped by this update. + * If the update is a null-edit, this will return the parent of the current (and new) revision. + * This will return null if the revision wrapped by this update created the page. + * Only defined after calling prepareContent() or prepareUpdate()! * - * @return RevisionRecord|null the revision that was current before the edit, or null if - * the edit created the page. + * @return RevisionRecord|null the parent revision of the new revision, or null if + * the update created the page. */ - private function getOldRevision() { - $this->assertHasPageState( __METHOD__ ); + private function getParentRevision() { + $this->assertPrepared( __METHOD__ ); - // If 'oldRevision' is not set, load it! - // Useful if $this->oldPageState is initialized by prepareUpdate. - if ( !array_key_exists( 'oldRevision', $this->pageState ) ) { - /** @var int $oldId */ - $oldId = $this->pageState['oldId']; - $flags = $this->useMaster() ? RevisionStore::READ_LATEST : 0; - $this->pageState['oldRevision'] = $oldId - ? $this->revisionStore->getRevisionById( $oldId, $flags ) - : null; + if ( $this->parentRevision ) { + return $this->parentRevision; } - return $this->pageState['oldRevision']; + if ( !$this->pageState['oldId'] ) { + // If there was no current revision, there is no parent revision, + // since the page didn't exist. + return null; + } + + $oldId = $this->revision->getParentId(); + $flags = $this->useMaster() ? RevisionStore::READ_LATEST : 0; + $this->parentRevision = $oldId + ? $this->revisionStore->getRevisionById( $oldId, $flags ) + : null; + + return $this->parentRevision; } /** @@ -477,8 +506,8 @@ class DerivedPageDataUpdater implements IDBAccessObject { * @note After prepareUpdate() was called, grabCurrentRevision() will throw an exception * to avoid confusion, since the page's current revision is then the new revision after * the edit, which was presumably passed to prepareUpdate() as the $revision parameter. - * Use getOldRevision() instead to access the revision that used to be current before the - * edit. + * Use getParentRevision() instead to access the revision that is the parent of the + * new revision. * * @return RevisionRecord|null the page's current revision, or null if the page does not * yet exist. @@ -539,14 +568,6 @@ class DerivedPageDataUpdater implements IDBAccessObject { return $this->wikiPage->getId(); } - /** - * @return string - */ - private function getTimestampNow() { - // TODO: allow an override to be injected for testing - return wfTimestampNow(); - } - /** * Whether the content is deleted and thus not visible to the public. * @@ -759,6 +780,24 @@ class DerivedPageDataUpdater implements IDBAccessObject { $this->revision = new MutableRevisionRecord( $title ); } + // NOTE: user and timestamp must be set, so they can be used for + // {{subst:REVISIONUSER}} and {{subst:REVISIONTIMESTAMP}} in PST! + $this->revision->setTimestamp( wfTimestampNow() ); + $this->revision->setUser( $user ); + + // Set up ParserOptions to operate on the new revision + $oldCallback = $userPopts->getCurrentRevisionCallback(); + $userPopts->setCurrentRevisionCallback( + function ( Title $parserTitle, $parser = false ) use ( $title, $oldCallback ) { + if ( $parserTitle->equals( $title ) ) { + $legacyRevision = new Revision( $this->revision ); + return $legacyRevision; + } else { + return call_user_func( $oldCallback, $parserTitle, $parser ); + } + } + ); + $pstContentSlots = $this->revision->getSlots(); foreach ( $slotsUpdate->getModifiedRoles() as $role ) { @@ -807,7 +846,7 @@ class DerivedPageDataUpdater implements IDBAccessObject { // prepareUpdate() is redundant for null-edits $this->doTransition( 'has-revision' ); } else { - $this->revision->setUser( $user ); + $this->parentRevision = $parentRevision; } } @@ -868,6 +907,14 @@ class DerivedPageDataUpdater implements IDBAccessObject { } } + private function assertHasRevision( $method ) { + if ( !$this->revision->getId() ) { + throw new LogicException( + 'Must call prepareUpdate() before calling ' . $method + ); + } + } + /** * Whether the edit creates the page. * @@ -935,7 +982,7 @@ class DerivedPageDataUpdater implements IDBAccessObject { $this->assertPrepared( __METHOD__ ); if ( !$this->slotsUpdate ) { - $old = $this->getOldRevision(); + $old = $this->getParentRevision(); $this->slotsUpdate = RevisionSlotsUpdate::newFromRevisionSlots( $this->revision->getSlots(), $old ? $old->getSlots() : null @@ -998,7 +1045,8 @@ class DerivedPageDataUpdater implements IDBAccessObject { * - moved: bool, whether the page was moved (default false) * - restored: bool, whether the page was undeleted (default false) * - oldrevision: Revision object for the pre-update revision (default null) - * - triggeringuser: The user triggering the update (UserIdentity, default null) + * - triggeringUser: The user triggering the update (UserIdentity, defaults to the + * user who created the revision) * - oldredirect: bool, null, or string 'no-change' (default null): * - bool: whether the page was counted as a redirect before that * revision, only used in changed is true and created is false @@ -1010,6 +1058,10 @@ class DerivedPageDataUpdater implements IDBAccessObject { * is true, do update the article count * - 'no-change': don't update the article count, ever * When set to null, pageState['oldCountable'] will be used instead if available. + * - causeAction: an arbitrary string identifying the reason for the update. + * See DataUpdate::getCauseAction(). (default 'unknown') + * - causeAgent: name of the user who caused the update. See DataUpdate::getCauseAgent(). + * (string, default 'unknown') */ public function prepareUpdate( RevisionRecord $revision, array $options = [] ) { Assert::parameter( @@ -1020,9 +1072,9 @@ class DerivedPageDataUpdater implements IDBAccessObject { 'must be a RevisionRecord (or Revision)' ); Assert::parameter( - !isset( $options['triggeringuser'] ) - || $options['triggeringuser'] instanceof UserIdentity, - '$options["triggeringuser"]', + !isset( $options['triggeringUser'] ) + || $options['triggeringUser'] instanceof UserIdentity, + '$options["triggeringUser"]', 'must be a UserIdentity' ); @@ -1038,7 +1090,7 @@ class DerivedPageDataUpdater implements IDBAccessObject { } else { throw new LogicException( 'Trying to re-use DerivedPageDataUpdater with revision ' - .$revision->getId() + . $revision->getId() . ', but it\'s already bound to revision ' . $this->revision->getId() ); @@ -1099,7 +1151,7 @@ class DerivedPageDataUpdater implements IDBAccessObject { if ( !$this->user->equals( $user ) ) { throw new LogicException( 'The Revision provided has a mismatching actor: expected ' - .$this->user->getName() + . $this->user->getName() . ', got ' . $user->getName() ); @@ -1213,34 +1265,103 @@ class DerivedPageDataUpdater implements IDBAccessObject { /** * @param bool $recursive * - * @return DataUpdate[] + * @return DeferrableUpdate[] */ public function getSecondaryDataUpdates( $recursive = false ) { - // TODO: MCR: getSecondaryDataUpdates() needs a complete overhaul to avoid DataUpdates - // from different slots overwriting each other in the database. Plan: - // * replace direct calls to Content::getSecondaryDataUpdates() with calls to this method - // * Construct LinksUpdate here, on the combined ParserOutput, instead of in AbstractContent - // for each slot. - // * Pass $slot into getSecondaryDataUpdates() - probably be introducing a new duplicate - // version of this function in ContentHandler. - // * The new method gets the PreparedEdit, but no $recursive flag (that's for LinksUpdate) - // * Hack: call both the old and the new getSecondaryDataUpdates method here; Pass - // the per-slot ParserOutput to the old method, for B/C. - // * Hack: If there is more than one slot, filter LinksUpdate from the DataUpdates - // returned by getSecondaryDataUpdates, and use a LinksUpdated for the combined output - // instead. - // * Call the SecondaryDataUpdates hook here (or kill it - its signature doesn't make sense) - - $content = $this->getSlots()->getContent( 'main' ); - - // NOTE: $output is the combined output, to be shown in the default view. + if ( $this->isContentDeleted() ) { + // This shouldn't happen, since the current content is always public, + // and DataUpates are only needed for current content. + return []; + } + $output = $this->getCanonicalParserOutput(); - $updates = $content->getSecondaryDataUpdates( - $this->getTitle(), null, $recursive, $output + // Construct a LinksUpdate for the combined canonical output. + $linksUpdate = new LinksUpdate( + $this->getTitle(), + $output, + $recursive ); - return $updates; + $allUpdates = [ $linksUpdate ]; + + // NOTE: Run updates for all slots, not just the modified slots! Otherwise, + // info for an inherited slot may end up being removed. This is also needed + // to ensure that purges are effective. + $renderedRevision = $this->getRenderedRevision(); + foreach ( $this->getSlots()->getSlotRoles() as $role ) { + $slot = $this->getRawSlot( $role ); + $content = $slot->getContent(); + $handler = $content->getContentHandler(); + + $updates = $handler->getSecondaryDataUpdates( + $this->getTitle(), + $content, + $role, + $renderedRevision + ); + $allUpdates = array_merge( $allUpdates, $updates ); + + // TODO: remove B/C hack in 1.32! + // NOTE: we assume that the combined output contains all relevant meta-data for + // all slots! + $legacyUpdates = $content->getSecondaryDataUpdates( + $this->getTitle(), + null, + $recursive, + $output + ); + + // HACK: filter out redundant and incomplete LinksUpdates + $legacyUpdates = array_filter( $legacyUpdates, function ( $update ) { + return !( $update instanceof LinksUpdate ); + } ); + + $allUpdates = array_merge( $allUpdates, $legacyUpdates ); + } + + // XXX: if a slot was removed by an earlier edit, but deletion updates failed to run at + // that time, we don't know for which slots to run deletion updates when purging a page. + // We'd have to examine the entire history of the page to determine that. Perhaps there + // could be a "try extra hard" mode for that case that would run a DB query to find all + // roles/models ever used on the page. On the other hand, removing slots should be quite + // rare, so perhaps this isn't worth the trouble. + + // TODO: consolidate with similar logic in WikiPage::getDeletionUpdates() + $wikiPage = $this->getWikiPage(); + $parentRevision = $this->getParentRevision(); + foreach ( $this->getRemovedSlotRoles() as $role ) { + // HACK: we should get the content model of the removed slot from a SlotRoleHandler! + // For now, find the slot in the parent revision - if the slot was removed, it should + // always exist in the parent revision. + $parentSlot = $parentRevision->getSlot( $role, RevisionRecord::RAW ); + $content = $parentSlot->getContent(); + $handler = $content->getContentHandler(); + + $updates = $handler->getDeletionUpdates( + $this->getTitle(), + $role + ); + $allUpdates = array_merge( $allUpdates, $updates ); + + // TODO: remove B/C hack in 1.32! + $legacyUpdates = $content->getDeletionUpdates( $wikiPage ); + + // HACK: filter out redundant and incomplete LinksDeletionUpdate + $legacyUpdates = array_filter( $legacyUpdates, function ( $update ) { + return !( $update instanceof LinksDeletionUpdate ); + } ); + + $allUpdates = array_merge( $allUpdates, $legacyUpdates ); + } + + // TODO: hard deprecate SecondaryDataUpdates in favor of RevisionDataUpdates in 1.33! + Hooks::run( + 'RevisionDataUpdates', + [ $this->getTitle(), $renderedRevision, &$allUpdates ] + ); + + return $allUpdates; } /** @@ -1260,40 +1381,16 @@ class DerivedPageDataUpdater implements IDBAccessObject { $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks! - // NOTE: this may trigger the first parsing of the new content after an edit (when not - // using pre-generated stashed output). - // XXX: we may want to use the PoolCounter here. This would perhaps allow the initial parse - // to be perform post-send. The client could already follow a HTTP redirect to the - // page view, but would then have to wait for a response until rendering is complete. - $output = $this->getCanonicalParserOutput(); - - // Save it to the parser cache. - // Make sure the cache time matches page_touched to avoid double parsing. - $this->parserCache->save( - $output, $wikiPage, $this->getCanonicalParserOptions(), - $this->revision->getTimestamp(), $this->revision->getId() - ); - $legacyUser = User::newFromIdentity( $this->user ); $legacyRevision = new Revision( $this->revision ); - // Update the links tables and other secondary data - $recursive = $this->options['changed']; // T52785 - $updates = $this->getSecondaryDataUpdates( $recursive ); + $this->doParserCacheUpdate(); - $triggeringUser = $this->options['triggeringuser'] ?? $this->user; - if ( !$triggeringUser instanceof User ) { - $triggeringUser = User::newFromIdentity( $triggeringUser ); - } - foreach ( $updates as $update ) { - // TODO: make an $option field for the cause - $update->setCause( 'edit-page', $triggeringUser->getName() ); - if ( $update instanceof LinksUpdate ) { - $update->setRevision( $legacyRevision ); - $update->setTriggeringUser( $triggeringUser ); - } - DeferredUpdates::addUpdate( $update ); - } + $this->doSecondaryDataUpdates( [ + // T52785 do not update any other pages on a null edit + 'recursive' => $this->options['changed'], + 'defer' => DeferredUpdates::POSTSEND, + ] ); // TODO: MCR: check if *any* changed slot supports categories! if ( $this->rcWatchCategoryMembership @@ -1410,7 +1507,7 @@ class DerivedPageDataUpdater implements IDBAccessObject { WikiPage::onArticleEdit( $title, $legacyRevision, $this->getTouchedSlotRoles() ); } - $oldRevision = $this->getOldRevision(); + $oldRevision = $this->getParentRevision(); $oldLegacyRevision = $oldRevision ? new Revision( $oldRevision ) : null; // TODO: In the wiring, register a listener for this on the new PageEventEmitter @@ -1421,4 +1518,93 @@ class DerivedPageDataUpdater implements IDBAccessObject { $this->doTransition( 'done' ); } + /** + * Do secondary data updates (such as updating link tables). + * + * MCR note: this method is temporarily exposed via WikiPage::doSecondaryDataUpdates. + * + * @param array $options + * - recursive: make the update recursive, i.e. also update pages which transclude the + * current page or otherwise depend on it (default: false) + * - defer: one of the DeferredUpdates constants, or false to run immediately after waiting + * for replication of the changes from the SecondaryDataUpdates hooks (default: false) + * - transactionTicket: a transaction ticket from LBFactory::getEmptyTransactionTicket(), + * only when defer is false (default: null) + * @since 1.32 + */ + public function doSecondaryDataUpdates( array $options = [] ) { + $this->assertHasRevision( __METHOD__ ); + $options += [ + 'recursive' => false, + 'defer' => false, + 'transactionTicket' => null, + ]; + $deferValues = [ false, DeferredUpdates::PRESEND, DeferredUpdates::POSTSEND ]; + if ( !in_array( $options['defer'], $deferValues, true ) ) { + throw new InvalidArgumentException( 'invalid value for defer: ' . $options['defer'] ); + } + Assert::parameterType( 'integer|null', $options['transactionTicket'], + '$options[\'transactionTicket\']' ); + + $updates = $this->getSecondaryDataUpdates( $options['recursive'] ); + + $triggeringUser = $this->options['triggeringUser'] ?? $this->user; + if ( !$triggeringUser instanceof User ) { + $triggeringUser = User::newFromIdentity( $triggeringUser ); + } + $causeAction = $this->options['causeAction'] ?? 'unknown'; + $causeAgent = $this->options['causeAgent'] ?? 'unknown'; + $legacyRevision = new Revision( $this->revision ); + + if ( $options['defer'] === false && $options['transactionTicket'] !== null ) { + // For legacy hook handlers doing updates via LinksUpdateConstructed, make sure + // any pending writes they made get flushed before the doUpdate() calls below. + // This avoids snapshot-clearing errors in LinksUpdate::acquirePageLock(). + $this->loadbalancerFactory->commitAndWaitForReplication( + __METHOD__, $options['transactionTicket'] + ); + } + + foreach ( $updates as $update ) { + if ( $update instanceof DataUpdate ) { + $update->setCause( $causeAction, $causeAgent ); + } + if ( $update instanceof LinksUpdate ) { + $update->setRevision( $legacyRevision ); + $update->setTriggeringUser( $triggeringUser ); + } + if ( $options['defer'] === false ) { + if ( $options['transactionTicket'] !== null ) { + $update->setTransactionTicket( $options['transactionTicket'] ); + } + $update->doUpdate(); + } else { + DeferredUpdates::addUpdate( $update, $options['defer'] ); + } + } + } + + public function doParserCacheUpdate() { + $this->assertHasRevision( __METHOD__ ); + + $wikiPage = $this->getWikiPage(); // TODO: ParserCache should accept a RevisionRecord instead + + // NOTE: this may trigger the first parsing of the new content after an edit (when not + // using pre-generated stashed output). + // XXX: we may want to use the PoolCounter here. This would perhaps allow the initial parse + // to be performed post-send. The client could already follow a HTTP redirect to the + // page view, but would then have to wait for a response until rendering is complete. + $output = $this->getCanonicalParserOutput(); + + // Save it to the parser cache. Use the revision timestamp in the case of a + // freshly saved edit, as that matches page_touched and a mismatch would trigger an + // unnecessary reparse. + $timestamp = $this->options['changed'] ? $this->revision->getTimestamp() + : $output->getTimestamp(); + $this->parserCache->save( + $output, $wikiPage, $this->getCanonicalParserOptions(), + $timestamp, $this->revision->getId() + ); + } + }