3 * A handle for managing updates for derived page data on edit, import, purge, etc.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 namespace MediaWiki\Storage
;
26 use CategoryMembershipChangeJob
;
34 use InvalidArgumentException
;
37 use LinksDeletionUpdate
;
40 use MediaWiki\Edit\PreparedEdit
;
41 use MediaWiki\Revision\MutableRevisionRecord
;
42 use MediaWiki\Revision\RenderedRevision
;
43 use MediaWiki\Revision\RevisionRecord
;
44 use MediaWiki\Revision\RevisionRenderer
;
45 use MediaWiki\Revision\RevisionSlots
;
46 use MediaWiki\Revision\RevisionStore
;
47 use MediaWiki\Revision\SlotRecord
;
48 use MediaWiki\User\UserIdentity
;
53 use RecentChangesUpdateJob
;
54 use ResourceLoaderWikiModule
;
60 use Wikimedia\Assert\Assert
;
61 use Wikimedia\Rdbms\LBFactory
;
65 * A handle for managing updates for derived page data on edit, import, purge, etc.
67 * @note Avoid direct usage of DerivedPageDataUpdater.
69 * @todo Define interfaces for the different use cases of DerivedPageDataUpdater, particularly
70 * providing access to post-PST content and ParserOutput to callbacks during revision creation,
71 * which currently use WikiPage::prepareContentForEdit, and allowing updates to be triggered on
72 * purge, import, and undeletion, which currently use WikiPage::doEditUpdates() and
73 * Content::getSecondaryDataUpdates().
75 * DerivedPageDataUpdater instances are designed to be cached inside a WikiPage instance,
76 * and re-used by callback code over the course of an update operation. It's a stepping stone
77 * one the way to a more complete refactoring of WikiPage.
79 * When using a DerivedPageDataUpdater, the following life cycle must be observed:
80 * grabCurrentRevision (optional), prepareContent (optional), prepareUpdate (required
81 * for doUpdates). getCanonicalParserOutput, getSlots, and getSecondaryDataUpdates
82 * require prepareContent or prepareUpdate to have been called first, to initialize the
83 * DerivedPageDataUpdater.
85 * @see docs/pageupdater.txt for more information.
87 * MCR migration note: this replaces the relevant methods in WikiPage, and covers the use cases
95 class DerivedPageDataUpdater
implements IDBAccessObject
{
98 * @var UserIdentity|null
100 private $user = null;
110 private $parserCache;
115 private $revisionStore;
125 private $jobQueueGroup;
130 private $messageCache;
135 private $loadbalancerFactory;
138 * @var string see $wgArticleCountMethod
140 private $articleCountMethod;
143 * @var boolean see $wgRCWatchCategoryMembership
145 private $rcWatchCategoryMembership = false;
148 * Stores (most of) the $options parameter of prepareUpdate().
149 * @see prepareUpdate()
156 'oldrevision' => null,
157 'oldcountable' => null,
158 'oldredirect' => null,
159 'triggeringUser' => null,
160 // causeAction/causeAgent default to 'unknown' but that's handled where it's read,
161 // to make the life of prepareUpdate() callers easier.
162 'causeAction' => null,
163 'causeAgent' => null,
167 * The state of the relevant row in page table before the edit.
168 * This is determined by the first call to grabCurrentRevision, prepareContent,
169 * or prepareUpdate (so it is only accessible in 'knows-current' or a later stage).
170 * If pageState was not initialized when prepareUpdate() is called, prepareUpdate() will
171 * attempt to emulate the state of the page table before the edit.
173 * Contains the following fields:
174 * - oldRevision (RevisionRecord|null): the revision that was current before the change
175 * associated with this update. Might not be set, use getParentRevision().
176 * - oldId (int|null): the id of the above revision. 0 if there is no such revision (the change
177 * was about creating a new page); null if not known (that should not happen).
178 * - oldIsRedirect (bool|null): whether the page was a redirect before the change. Lazy-loaded,
179 * can be null; use wasRedirect() instead of direct access.
180 * - oldCountable (bool|null): whether the page was countable before the change (or null
181 * if we don't have that information)
185 private $pageState = null;
188 * @var RevisionSlotsUpdate|null
190 private $slotsUpdate = null;
193 * @var RevisionRecord|null
195 private $parentRevision = null;
198 * @var RevisionRecord|null
200 private $revision = null;
203 * @var RenderedRevision
205 private $renderedRevision = null;
208 * @var RevisionRenderer
210 private $revisionRenderer;
213 * A stage identifier for managing the life cycle of this instance.
214 * Possible stages are 'new', 'knows-current', 'has-content', 'has-revision', and 'done'.
216 * @see docs/pageupdater.txt for documentation of the life cycle.
220 private $stage = 'new';
223 * Transition table for managing the life cycle of DerivedPageDateUpdater instances.
225 * XXX: Overkill. This is a linear order, we could just count. Names are nice though,
226 * and constants are also overkill...
228 * @see docs/pageupdater.txt for documentation of the life cycle.
232 private static $transitions = [
235 'knows-current' => true,
236 'has-content' => true,
237 'has-revision' => true,
240 'knows-current' => true,
241 'has-content' => true,
242 'has-revision' => true,
245 'has-content' => true,
246 'has-revision' => true,
249 'has-revision' => true,
255 * @param WikiPage $wikiPage ,
256 * @param RevisionStore $revisionStore
257 * @param RevisionRenderer $revisionRenderer
258 * @param ParserCache $parserCache
259 * @param JobQueueGroup $jobQueueGroup
260 * @param MessageCache $messageCache
261 * @param Language $contLang
262 * @param LBFactory $loadbalancerFactory
264 public function __construct(
266 RevisionStore
$revisionStore,
267 RevisionRenderer
$revisionRenderer,
268 ParserCache
$parserCache,
269 JobQueueGroup
$jobQueueGroup,
270 MessageCache
$messageCache,
272 LBFactory
$loadbalancerFactory
274 $this->wikiPage
= $wikiPage;
276 $this->parserCache
= $parserCache;
277 $this->revisionStore
= $revisionStore;
278 $this->revisionRenderer
= $revisionRenderer;
279 $this->jobQueueGroup
= $jobQueueGroup;
280 $this->messageCache
= $messageCache;
281 $this->contLang
= $contLang;
282 // XXX only needed for waiting for replicas to catch up; there should be a narrower
283 // interface for that.
284 $this->loadbalancerFactory
= $loadbalancerFactory;
288 * Transition function for managing the life cycle of this instances.
290 * @see docs/pageupdater.txt for documentation of the life cycle.
292 * @param string $newStage the new stage
293 * @return string the previous stage
295 * @throws LogicException If a transition to the given stage is not possible in the current
298 private function doTransition( $newStage ) {
299 $this->assertTransition( $newStage );
301 $oldStage = $this->stage
;
302 $this->stage
= $newStage;
308 * Asserts that a transition to the given stage is possible, without performing it.
310 * @see docs/pageupdater.txt for documentation of the life cycle.
312 * @param string $newStage the new stage
314 * @throws LogicException If this instance is not in the expected stage
316 private function assertTransition( $newStage ) {
317 if ( empty( self
::$transitions[$this->stage
][$newStage] ) ) {
318 throw new LogicException( "Cannot transition from {$this->stage} to $newStage" );
323 * @return bool|string
325 private function getWikiId() {
326 // TODO: get from RevisionStore
331 * Checks whether this DerivedPageDataUpdater can be re-used for running updates targeting
332 * the given revision.
334 * @param UserIdentity|null $user The user creating the revision in question
335 * @param RevisionRecord|null $revision New revision (after save, if already saved)
336 * @param RevisionSlotsUpdate|null $slotsUpdate New content (before PST)
337 * @param null|int $parentId Parent revision of the edit (use 0 for page creation)
341 public function isReusableFor(
342 UserIdentity
$user = null,
343 RevisionRecord
$revision = null,
344 RevisionSlotsUpdate
$slotsUpdate = null,
349 && $revision->getParentId() !== $parentId
351 throw new InvalidArgumentException( '$parentId should match the parent of $revision' );
354 // NOTE: For null revisions, $user may be different from $this->revision->getUser
355 // and also from $revision->getUser.
356 // But $user should always match $this->user.
357 if ( $user && $this->user
&& $user->getName() !== $this->user
->getName() ) {
361 if ( $revision && $this->revision
&& $this->revision
->getId()
362 && $this->revision
->getId() !== $revision->getId()
367 if ( $this->pageState
369 && $revision->getParentId() !== null
370 && $this->pageState
['oldId'] !== $revision->getParentId()
375 if ( $this->pageState
376 && $parentId !== null
377 && $this->pageState
['oldId'] !== $parentId
382 // NOTE: this check is the primary reason for having the $this->slotsUpdate field!
383 if ( $this->slotsUpdate
385 && !$this->slotsUpdate
->hasSameUpdates( $slotsUpdate )
392 && !$this->revision
->getSlots()->hasSameContent( $revision->getSlots() )
401 * @param string $articleCountMethod "any" or "link".
402 * @see $wgArticleCountMethod
404 public function setArticleCountMethod( $articleCountMethod ) {
405 $this->articleCountMethod
= $articleCountMethod;
409 * @param bool $rcWatchCategoryMembership
410 * @see $wgRCWatchCategoryMembership
412 public function setRcWatchCategoryMembership( $rcWatchCategoryMembership ) {
413 $this->rcWatchCategoryMembership
= $rcWatchCategoryMembership;
419 private function getTitle() {
420 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
421 return $this->wikiPage
->getTitle();
427 private function getWikiPage() {
428 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
429 return $this->wikiPage
;
433 * Determines whether the page being edited already existed.
434 * Only defined after calling grabCurrentRevision() or prepareContent() or prepareUpdate()!
437 * @throws LogicException if called before grabCurrentRevision
439 public function pageExisted() {
440 $this->assertHasPageState( __METHOD__
);
442 return $this->pageState
['oldId'] > 0;
446 * Returns the parent revision of the new revision wrapped by this update.
447 * If the update is a null-edit, this will return the parent of the current (and new) revision.
448 * This will return null if the revision wrapped by this update created the page.
449 * Only defined after calling prepareContent() or prepareUpdate()!
451 * @return RevisionRecord|null the parent revision of the new revision, or null if
452 * the update created the page.
454 private function getParentRevision() {
455 $this->assertPrepared( __METHOD__
);
457 if ( $this->parentRevision
) {
458 return $this->parentRevision
;
461 if ( !$this->pageState
['oldId'] ) {
462 // If there was no current revision, there is no parent revision,
463 // since the page didn't exist.
467 $oldId = $this->revision
->getParentId();
468 $flags = $this->useMaster() ? RevisionStore
::READ_LATEST
: 0;
469 $this->parentRevision
= $oldId
470 ?
$this->revisionStore
->getRevisionById( $oldId, $flags )
473 return $this->parentRevision
;
477 * Returns the revision that was the page's current revision when grabCurrentRevision()
480 * During an edit, that revision will act as the logical parent of the new revision.
482 * Some updates are performed based on the difference between the database state at the
483 * moment this method is first called, and the state after the edit.
485 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
487 * @note After prepareUpdate() was called, grabCurrentRevision() will throw an exception
488 * to avoid confusion, since the page's current revision is then the new revision after
489 * the edit, which was presumably passed to prepareUpdate() as the $revision parameter.
490 * Use getParentRevision() instead to access the revision that is the parent of the
493 * @return RevisionRecord|null the page's current revision, or null if the page does not
496 public function grabCurrentRevision() {
497 if ( $this->pageState
) {
498 return $this->pageState
['oldRevision'];
501 $this->assertTransition( 'knows-current' );
503 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
504 $wikiPage = $this->getWikiPage();
506 // Do not call WikiPage::clear(), since the caller may already have caused page data
507 // to be loaded with SELECT FOR UPDATE. Just assert it's loaded now.
508 $wikiPage->loadPageData( self
::READ_LATEST
);
509 $rev = $wikiPage->getRevision();
510 $current = $rev ?
$rev->getRevisionRecord() : null;
513 'oldRevision' => $current,
514 'oldId' => $rev ?
$rev->getId() : 0,
515 'oldIsRedirect' => $wikiPage->isRedirect(), // NOTE: uses page table
516 'oldCountable' => $wikiPage->isCountable(), // NOTE: uses pagelinks table
519 $this->doTransition( 'knows-current' );
521 return $this->pageState
['oldRevision'];
525 * Whether prepareUpdate() or prepareContent() have been called on this instance.
529 public function isContentPrepared() {
530 return $this->revision
!== null;
534 * Whether prepareUpdate() has been called on this instance.
536 * @note will also return null in case of a null-edit!
540 public function isUpdatePrepared() {
541 return $this->revision
!== null && $this->revision
->getId() !== null;
547 private function getPageId() {
548 // NOTE: eventually, we won't get a WikiPage passed into the constructor any more
549 return $this->wikiPage
->getId();
553 * Whether the content is deleted and thus not visible to the public.
557 public function isContentDeleted() {
558 if ( $this->revision
) {
559 // XXX: if that revision is the current revision, this should be skipped
560 return $this->revision
->isDeleted( RevisionRecord
::DELETED_TEXT
);
562 // If the content has not been saved yet, it cannot have been deleted yet.
568 * Returns the slot, modified or inherited, after PST, with no audience checks applied.
570 * @param string $role slot role name
572 * @throws PageUpdateException If the slot is neither set for update nor inherited from the
576 public function getRawSlot( $role ) {
577 return $this->getSlots()->getSlot( $role );
581 * Returns the content of the given slot, with no audience checks.
583 * @throws PageUpdateException If the slot is neither set for update nor inherited from the
585 * @param string $role slot role name
588 public function getRawContent( $role ) {
589 return $this->getRawSlot( $role )->getContent();
593 * Returns the content model of the given slot
595 * @param string $role slot role name
598 private function getContentModel( $role ) {
599 return $this->getRawSlot( $role )->getModel();
603 * @param string $role slot role name
604 * @return ContentHandler
606 private function getContentHandler( $role ) {
607 // TODO: inject something like a ContentHandlerRegistry
608 return ContentHandler
::getForModelID( $this->getContentModel( $role ) );
611 private function useMaster() {
612 // TODO: can we just set a flag to true in prepareContent()?
613 return $this->wikiPage
->wasLoadedFrom( self
::READ_LATEST
);
619 public function isCountable() {
620 // NOTE: Keep in sync with WikiPage::isCountable.
622 if ( !$this->getTitle()->isContentPage() ) {
626 if ( $this->isContentDeleted() ) {
627 // This should be irrelevant: countability only applies to the current revision,
628 // and the current revision is never suppressed.
632 if ( $this->isRedirect() ) {
638 if ( $this->articleCountMethod
=== 'link' ) {
639 $hasLinks = (bool)count( $this->getCanonicalParserOutput()->getLinks() );
642 // TODO: MCR: ask all slots if they have links [SlotHandler/PageTypeHandler]
643 $mainContent = $this->getRawContent( SlotRecord
::MAIN
);
644 return $mainContent->isCountable( $hasLinks );
650 public function isRedirect() {
651 // NOTE: main slot determines redirect status
652 $mainContent = $this->getRawContent( SlotRecord
::MAIN
);
654 return $mainContent->isRedirect();
658 * @param RevisionRecord $rev
662 private function revisionIsRedirect( RevisionRecord
$rev ) {
663 // NOTE: main slot determines redirect status
664 $mainContent = $rev->getContent( SlotRecord
::MAIN
, RevisionRecord
::RAW
);
666 return $mainContent->isRedirect();
670 * Prepare updates based on an update which has not yet been saved.
672 * This may be used to create derived data that is needed when creating a new revision;
673 * particularly, this makes available the slots of the new revision via the getSlots()
674 * method, after applying PST and slot inheritance.
676 * The derived data prepared for revision creation may then later be re-used by doUpdates(),
677 * without the need to re-calculate.
679 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
681 * @note Calling this method more than once with the same $slotsUpdate
682 * has no effect. Calling this method multiple times with different content will cause
685 * @note Calling this method after prepareUpdate() has been called will cause an exception.
687 * @param User $user The user to act as context for pre-save transformation (PST).
688 * Type hint should be reduced to UserIdentity at some point.
689 * @param RevisionSlotsUpdate $slotsUpdate The new content of the slots to be updated
690 * by this edit, before PST.
691 * @param bool $useStash Whether to use stashed ParserOutput
693 public function prepareContent(
695 RevisionSlotsUpdate
$slotsUpdate,
698 if ( $this->slotsUpdate
) {
699 if ( !$this->user
) {
700 throw new LogicException(
701 'Unexpected state: $this->slotsUpdate was initialized, '
702 . 'but $this->user was not.'
706 if ( $this->user
->getName() !== $user->getName() ) {
707 throw new LogicException( 'Can\'t call prepareContent() again for different user! '
708 . 'Expected ' . $this->user
->getName() . ', got ' . $user->getName()
712 if ( !$this->slotsUpdate
->hasSameUpdates( $slotsUpdate ) ) {
713 throw new LogicException(
714 'Can\'t call prepareContent() again with different slot content!'
718 return; // prepareContent() already done, nothing to do
721 $this->assertTransition( 'has-content' );
723 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks!
724 $title = $this->getTitle();
726 $parentRevision = $this->grabCurrentRevision();
728 $this->slotsOutput
= [];
729 $this->canonicalParserOutput
= null;
731 // The edit may have already been prepared via api.php?action=stashedit
732 $stashedEdit = false;
734 // TODO: MCR: allow output for all slots to be stashed.
735 if ( $useStash && $slotsUpdate->isModifiedSlot( SlotRecord
::MAIN
) ) {
736 $mainContent = $slotsUpdate->getModifiedSlot( SlotRecord
::MAIN
)->getContent();
737 $legacyUser = User
::newFromIdentity( $user );
738 $stashedEdit = ApiStashEdit
::checkCache( $title, $mainContent, $legacyUser );
741 if ( $stashedEdit ) {
742 /** @var ParserOutput $output */
743 $output = $stashedEdit->output
;
745 // TODO: this should happen when stashing the ParserOutput, not now!
746 $output->setCacheTime( $stashedEdit->timestamp
);
748 // TODO: MCR: allow output for all slots to be stashed.
749 $this->canonicalParserOutput
= $output;
752 $userPopts = ParserOptions
::newFromUserAndLang( $user, $this->contLang
);
753 Hooks
::run( 'ArticlePrepareTextForEdit', [ $wikiPage, $userPopts ] );
756 $this->slotsUpdate
= $slotsUpdate;
758 if ( $parentRevision ) {
759 $this->revision
= MutableRevisionRecord
::newFromParentRevision( $parentRevision );
761 $this->revision
= new MutableRevisionRecord( $title );
764 // NOTE: user and timestamp must be set, so they can be used for
765 // {{subst:REVISIONUSER}} and {{subst:REVISIONTIMESTAMP}} in PST!
766 $this->revision
->setTimestamp( wfTimestampNow() );
767 $this->revision
->setUser( $user );
769 // Set up ParserOptions to operate on the new revision
770 $oldCallback = $userPopts->getCurrentRevisionCallback();
771 $userPopts->setCurrentRevisionCallback(
772 function ( Title
$parserTitle, $parser = false ) use ( $title, $oldCallback ) {
773 if ( $parserTitle->equals( $title ) ) {
774 $legacyRevision = new Revision( $this->revision
);
775 return $legacyRevision;
777 return call_user_func( $oldCallback, $parserTitle, $parser );
782 $pstContentSlots = $this->revision
->getSlots();
784 foreach ( $slotsUpdate->getModifiedRoles() as $role ) {
785 $slot = $slotsUpdate->getModifiedSlot( $role );
787 if ( $slot->isInherited() ) {
788 // No PST for inherited slots! Note that "modified" slots may still be inherited
789 // from an earlier version, e.g. for rollbacks.
791 } elseif ( $role === SlotRecord
::MAIN
&& $stashedEdit ) {
792 // TODO: MCR: allow PST content for all slots to be stashed.
793 $pstSlot = SlotRecord
::newUnsaved( $role, $stashedEdit->pstContent
);
795 $content = $slot->getContent();
796 $pstContent = $content->preSaveTransform( $title, $this->user
, $userPopts );
797 $pstSlot = SlotRecord
::newUnsaved( $role, $pstContent );
800 $pstContentSlots->setSlot( $pstSlot );
803 foreach ( $slotsUpdate->getRemovedRoles() as $role ) {
804 $pstContentSlots->removeSlot( $role );
807 $this->options
['created'] = ( $parentRevision === null );
808 $this->options
['changed'] = ( $parentRevision === null
809 ||
!$pstContentSlots->hasSameContent( $parentRevision->getSlots() ) );
811 $this->doTransition( 'has-content' );
813 if ( !$this->options
['changed'] ) {
816 // TODO: move this into MutableRevisionRecord
817 // TODO: This needs to behave differently for a forced dummy edit!
818 $this->revision
->setId( $parentRevision->getId() );
819 $this->revision
->setTimestamp( $parentRevision->getTimestamp() );
820 $this->revision
->setPageId( $parentRevision->getPageId() );
821 $this->revision
->setParentId( $parentRevision->getParentId() );
822 $this->revision
->setUser( $parentRevision->getUser( RevisionRecord
::RAW
) );
823 $this->revision
->setComment( $parentRevision->getComment( RevisionRecord
::RAW
) );
824 $this->revision
->setMinorEdit( $parentRevision->isMinor() );
825 $this->revision
->setVisibility( $parentRevision->getVisibility() );
827 // prepareUpdate() is redundant for null-edits
828 $this->doTransition( 'has-revision' );
830 $this->parentRevision
= $parentRevision;
835 * Returns the update's target revision - that is, the revision that will be the current
836 * revision after the update.
838 * @note Callers must treat the returned RevisionRecord's content as immutable, even
839 * if it is a MutableRevisionRecord instance. Other aspects of a MutableRevisionRecord
840 * returned from here, such as the user or the comment, may be changed, but may not
841 * be reflected in ParserOutput until after prepareUpdate() has been called.
843 * @todo This is currently used by PageUpdater::makeNewRevision() to construct an unsaved
844 * MutableRevisionRecord instance. Introduce something like an UnsavedRevisionFactory service
845 * for that purpose instead!
847 * @return RevisionRecord
849 public function getRevision() {
850 $this->assertPrepared( __METHOD__
);
851 return $this->revision
;
855 * @return RenderedRevision
857 public function getRenderedRevision() {
858 if ( !$this->renderedRevision
) {
859 $this->assertPrepared( __METHOD__
);
861 // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions
862 // NOTE: the revision is either new or current, so we can bypass audience checks.
863 $this->renderedRevision
= $this->revisionRenderer
->getRenderedRevision(
867 [ 'use-master' => $this->useMaster(), 'audience' => RevisionRecord
::RAW
]
871 return $this->renderedRevision
;
874 private function assertHasPageState( $method ) {
875 if ( !$this->pageState
) {
876 throw new LogicException(
877 'Must call grabCurrentRevision() or prepareContent() '
878 . 'or prepareUpdate() before calling ' . $method
883 private function assertPrepared( $method ) {
884 if ( !$this->revision
) {
885 throw new LogicException(
886 'Must call prepareContent() or prepareUpdate() before calling ' . $method
891 private function assertHasRevision( $method ) {
892 if ( !$this->revision
->getId() ) {
893 throw new LogicException(
894 'Must call prepareUpdate() before calling ' . $method
900 * Whether the edit creates the page.
904 public function isCreation() {
905 $this->assertPrepared( __METHOD__
);
906 return $this->options
['created'];
910 * Whether the edit created, or should create, a new revision (that is, it's not a null-edit).
912 * @warning at present, "null-revisions" that do not change content but do have a revision
913 * record would return false after prepareContent(), but true after prepareUpdate()!
914 * This should probably be fixed.
918 public function isChange() {
919 $this->assertPrepared( __METHOD__
);
920 return $this->options
['changed'];
924 * Whether the page was a redirect before the edit.
928 public function wasRedirect() {
929 $this->assertHasPageState( __METHOD__
);
931 if ( $this->pageState
['oldIsRedirect'] === null ) {
932 /** @var RevisionRecord $rev */
933 $rev = $this->pageState
['oldRevision'];
935 $this->pageState
['oldIsRedirect'] = $this->revisionIsRedirect( $rev );
937 $this->pageState
['oldIsRedirect'] = false;
941 return $this->pageState
['oldIsRedirect'];
945 * Returns the slots of the target revision, after PST.
947 * @note Callers must treat the returned RevisionSlots instance as immutable, even
948 * if it is a MutableRevisionSlots instance.
950 * @return RevisionSlots
952 public function getSlots() {
953 $this->assertPrepared( __METHOD__
);
954 return $this->revision
->getSlots();
958 * Returns the RevisionSlotsUpdate for this updater.
960 * @return RevisionSlotsUpdate
962 private function getRevisionSlotsUpdate() {
963 $this->assertPrepared( __METHOD__
);
965 if ( !$this->slotsUpdate
) {
966 $old = $this->getParentRevision();
967 $this->slotsUpdate
= RevisionSlotsUpdate
::newFromRevisionSlots(
968 $this->revision
->getSlots(),
969 $old ?
$old->getSlots() : null
972 return $this->slotsUpdate
;
976 * Returns the role names of the slots touched by the new revision,
977 * including removed roles.
981 public function getTouchedSlotRoles() {
982 return $this->getRevisionSlotsUpdate()->getTouchedRoles();
986 * Returns the role names of the slots modified by the new revision,
987 * not including removed roles.
991 public function getModifiedSlotRoles() {
992 return $this->getRevisionSlotsUpdate()->getModifiedRoles();
996 * Returns the role names of the slots removed by the new revision.
1000 public function getRemovedSlotRoles() {
1001 return $this->getRevisionSlotsUpdate()->getRemovedRoles();
1005 * Prepare derived data updates targeting the given Revision.
1007 * Calling this method requires the given revision to be present in the database.
1008 * This may be right after a new revision has been created, or when re-generating
1009 * derived data e.g. in ApiPurge, RefreshLinksJob, and the refreshLinks
1012 * @see docs/pageupdater.txt for more information on when thie method can and should be called.
1014 * @note Calling this method more than once with the same revision has no effect.
1015 * $options are only used for the first call. Calling this method multiple times with
1016 * different revisions will cause an exception.
1018 * @note If grabCurrentRevision() (or prepareContent()) has been called before
1019 * calling this method, $revision->getParentRevision() has to refer to the revision that
1020 * was the current revision at the time grabCurrentRevision() was called.
1022 * @param RevisionRecord $revision
1023 * @param array $options Array of options, following indexes are used:
1024 * - changed: bool, whether the revision changed the content (default true)
1025 * - created: bool, whether the revision created the page (default false)
1026 * - moved: bool, whether the page was moved (default false)
1027 * - restored: bool, whether the page was undeleted (default false)
1028 * - oldrevision: Revision object for the pre-update revision (default null)
1029 * - triggeringUser: The user triggering the update (UserIdentity, defaults to the
1030 * user who created the revision)
1031 * - oldredirect: bool, null, or string 'no-change' (default null):
1032 * - bool: whether the page was counted as a redirect before that
1033 * revision, only used in changed is true and created is false
1034 * - null or 'no-change': don't update the redirect status.
1035 * - oldcountable: bool, null, or string 'no-change' (default null):
1036 * - bool: whether the page was counted as an article before that
1037 * revision, only used in changed is true and created is false
1038 * - null: if created is false, don't update the article count; if created
1039 * is true, do update the article count
1040 * - 'no-change': don't update the article count, ever
1041 * When set to null, pageState['oldCountable'] will be used instead if available.
1042 * - causeAction: an arbitrary string identifying the reason for the update.
1043 * See DataUpdate::getCauseAction(). (default 'unknown')
1044 * - causeAgent: name of the user who caused the update. See DataUpdate::getCauseAgent().
1045 * (string, default 'unknown')
1047 public function prepareUpdate( RevisionRecord
$revision, array $options = [] ) {
1049 !isset( $options['oldrevision'] )
1050 ||
$options['oldrevision'] instanceof Revision
1051 ||
$options['oldrevision'] instanceof RevisionRecord
,
1052 '$options["oldrevision"]',
1053 'must be a RevisionRecord (or Revision)'
1056 !isset( $options['triggeringUser'] )
1057 ||
$options['triggeringUser'] instanceof UserIdentity
,
1058 '$options["triggeringUser"]',
1059 'must be a UserIdentity'
1062 if ( !$revision->getId() ) {
1063 throw new InvalidArgumentException(
1064 'Revision must have an ID set for it to be used with prepareUpdate()!'
1068 if ( $this->revision
&& $this->revision
->getId() ) {
1069 if ( $this->revision
->getId() === $revision->getId() ) {
1070 return; // nothing to do!
1072 throw new LogicException(
1073 'Trying to re-use DerivedPageDataUpdater with revision '
1074 . $revision->getId()
1075 . ', but it\'s already bound to revision '
1076 . $this->revision
->getId()
1081 if ( $this->revision
1082 && !$this->revision
->getSlots()->hasSameContent( $revision->getSlots() )
1084 throw new LogicException(
1085 'The Revision provided has mismatching content!'
1089 // Override fields defined in $this->options with values from $options.
1090 $this->options
= array_intersect_key( $options, $this->options
) +
$this->options
;
1092 if ( isset( $this->pageState
['oldId'] ) ) {
1093 $oldId = $this->pageState
['oldId'];
1094 } elseif ( isset( $this->options
['oldrevision'] ) ) {
1095 /** @var Revision|RevisionRecord $oldRev */
1096 $oldRev = $this->options
['oldrevision'];
1097 $oldId = $oldRev->getId();
1099 $oldId = $revision->getParentId();
1102 if ( $oldId !== null ) {
1103 // XXX: what if $options['changed'] disagrees?
1104 // MovePage creates a dummy revision with changed = false!
1105 // We may want to explicitly distinguish between "no new revision" (null-edit)
1106 // and "new revision without new content" (dummy revision).
1108 if ( $oldId === $revision->getParentId() ) {
1109 // NOTE: this may still be a NullRevision!
1111 $this->options
['changed'] = true;
1112 } elseif ( $oldId === $revision->getId() ) {
1114 $this->options
['changed'] = false;
1116 // This indicates that calling code has given us the wrong Revision object
1117 throw new LogicException(
1118 'The Revision mismatches old revision ID: '
1119 . 'Old ID is ' . $oldId
1120 . ', parent ID is ' . $revision->getParentId()
1121 . ', revision ID is ' . $revision->getId()
1126 // If prepareContent() was used to generate the PST content (which is indicated by
1127 // $this->slotsUpdate being set), and this is not a null-edit, then the given
1128 // revision must have the acting user as the revision author. Otherwise, user
1129 // signatures generated by PST would mismatch the user in the revision record.
1130 if ( $this->user
!== null && $this->options
['changed'] && $this->slotsUpdate
) {
1131 $user = $revision->getUser();
1132 if ( !$this->user
->equals( $user ) ) {
1133 throw new LogicException(
1134 'The Revision provided has a mismatching actor: expected '
1135 . $this->user
->getName()
1142 // If $this->pageState was not yet initialized by grabCurrentRevision or prepareContent,
1143 // emulate the state of the page table before the edit, as good as we can.
1144 if ( !$this->pageState
) {
1145 $this->pageState
= [
1146 'oldIsRedirect' => isset( $this->options
['oldredirect'] )
1147 && is_bool( $this->options
['oldredirect'] )
1148 ?
$this->options
['oldredirect']
1150 'oldCountable' => isset( $this->options
['oldcountable'] )
1151 && is_bool( $this->options
['oldcountable'] )
1152 ?
$this->options
['oldcountable']
1156 if ( $this->options
['changed'] ) {
1157 // The edit created a new revision
1158 $this->pageState
['oldId'] = $revision->getParentId();
1160 if ( isset( $this->options
['oldrevision'] ) ) {
1161 $rev = $this->options
['oldrevision'];
1162 $this->pageState
['oldRevision'] = $rev instanceof Revision
1163 ?
$rev->getRevisionRecord()
1167 // This is a null-edit, so the old revision IS the new revision!
1168 $this->pageState
['oldId'] = $revision->getId();
1169 $this->pageState
['oldRevision'] = $revision;
1173 // "created" is forced here
1174 $this->options
['created'] = ( $this->pageState
['oldId'] === 0 );
1176 $this->revision
= $revision;
1178 $this->doTransition( 'has-revision' );
1180 // NOTE: in case we have a User object, don't override with a UserIdentity.
1181 // We already checked that $revision->getUser() mathces $this->user;
1182 if ( !$this->user
) {
1183 $this->user
= $revision->getUser( RevisionRecord
::RAW
);
1186 // Prune any output that depends on the revision ID.
1187 if ( $this->renderedRevision
) {
1188 $this->renderedRevision
->updateRevision( $revision );
1191 // TODO: optionally get ParserOutput from the ParserCache here.
1192 // Move the logic used by RefreshLinksJob here!
1196 * @deprecated This only exists for B/C, use the getters on DerivedPageDataUpdater directly!
1197 * @return PreparedEdit
1199 public function getPreparedEdit() {
1200 $this->assertPrepared( __METHOD__
);
1202 $slotsUpdate = $this->getRevisionSlotsUpdate();
1203 $preparedEdit = new PreparedEdit();
1205 $preparedEdit->popts
= $this->getCanonicalParserOptions();
1206 $preparedEdit->output
= $this->getCanonicalParserOutput();
1207 $preparedEdit->pstContent
= $this->revision
->getContent( SlotRecord
::MAIN
);
1208 $preparedEdit->newContent
=
1209 $slotsUpdate->isModifiedSlot( SlotRecord
::MAIN
)
1210 ?
$slotsUpdate->getModifiedSlot( SlotRecord
::MAIN
)->getContent()
1211 : $this->revision
->getContent( SlotRecord
::MAIN
); // XXX: can we just remove this?
1212 $preparedEdit->oldContent
= null; // unused. // XXX: could get this from the parent revision
1213 $preparedEdit->revid
= $this->revision ?
$this->revision
->getId() : null;
1214 $preparedEdit->timestamp
= $preparedEdit->output
->getCacheTime();
1215 $preparedEdit->format
= $preparedEdit->pstContent
->getDefaultFormat();
1217 return $preparedEdit;
1221 * @param string $role
1222 * @param bool $generateHtml
1223 * @return ParserOutput
1225 public function getSlotParserOutput( $role, $generateHtml = true ) {
1226 return $this->getRenderedRevision()->getSlotParserOutput(
1228 [ 'generate-html' => $generateHtml ]
1233 * @return ParserOutput
1235 public function getCanonicalParserOutput() {
1236 return $this->getRenderedRevision()->getRevisionParserOutput();
1240 * @return ParserOptions
1242 public function getCanonicalParserOptions() {
1243 return $this->getRenderedRevision()->getOptions();
1247 * @param bool $recursive
1249 * @return DeferrableUpdate[]
1251 public function getSecondaryDataUpdates( $recursive = false ) {
1252 if ( $this->isContentDeleted() ) {
1253 // This shouldn't happen, since the current content is always public,
1254 // and DataUpates are only needed for current content.
1258 $output = $this->getCanonicalParserOutput();
1260 // Construct a LinksUpdate for the combined canonical output.
1261 $linksUpdate = new LinksUpdate(
1267 $allUpdates = [ $linksUpdate ];
1269 // NOTE: Run updates for all slots, not just the modified slots! Otherwise,
1270 // info for an inherited slot may end up being removed. This is also needed
1271 // to ensure that purges are effective.
1272 $renderedRevision = $this->getRenderedRevision();
1273 foreach ( $this->getSlots()->getSlotRoles() as $role ) {
1274 $slot = $this->getRawSlot( $role );
1275 $content = $slot->getContent();
1276 $handler = $content->getContentHandler();
1278 $updates = $handler->getSecondaryDataUpdates(
1284 $allUpdates = array_merge( $allUpdates, $updates );
1286 // TODO: remove B/C hack in 1.32!
1287 // NOTE: we assume that the combined output contains all relevant meta-data for
1289 $legacyUpdates = $content->getSecondaryDataUpdates(
1296 // HACK: filter out redundant and incomplete LinksUpdates
1297 $legacyUpdates = array_filter( $legacyUpdates, function ( $update ) {
1298 return !( $update instanceof LinksUpdate
);
1301 $allUpdates = array_merge( $allUpdates, $legacyUpdates );
1304 // XXX: if a slot was removed by an earlier edit, but deletion updates failed to run at
1305 // that time, we don't know for which slots to run deletion updates when purging a page.
1306 // We'd have to examine the entire history of the page to determine that. Perhaps there
1307 // could be a "try extra hard" mode for that case that would run a DB query to find all
1308 // roles/models ever used on the page. On the other hand, removing slots should be quite
1309 // rare, so perhaps this isn't worth the trouble.
1311 // TODO: consolidate with similar logic in WikiPage::getDeletionUpdates()
1312 $wikiPage = $this->getWikiPage();
1313 $parentRevision = $this->getParentRevision();
1314 foreach ( $this->getRemovedSlotRoles() as $role ) {
1315 // HACK: we should get the content model of the removed slot from a SlotRoleHandler!
1316 // For now, find the slot in the parent revision - if the slot was removed, it should
1317 // always exist in the parent revision.
1318 $parentSlot = $parentRevision->getSlot( $role, RevisionRecord
::RAW
);
1319 $content = $parentSlot->getContent();
1320 $handler = $content->getContentHandler();
1322 $updates = $handler->getDeletionUpdates(
1326 $allUpdates = array_merge( $allUpdates, $updates );
1328 // TODO: remove B/C hack in 1.32!
1329 $legacyUpdates = $content->getDeletionUpdates( $wikiPage );
1331 // HACK: filter out redundant and incomplete LinksDeletionUpdate
1332 $legacyUpdates = array_filter( $legacyUpdates, function ( $update ) {
1333 return !( $update instanceof LinksDeletionUpdate
);
1336 $allUpdates = array_merge( $allUpdates, $legacyUpdates );
1339 // TODO: hard deprecate SecondaryDataUpdates in favor of RevisionDataUpdates in 1.33!
1341 'RevisionDataUpdates',
1342 [ $this->getTitle(), $renderedRevision, &$allUpdates ]
1349 * Do standard updates after page edit, purge, or import.
1350 * Update links tables, site stats, search index, title cache, message cache, etc.
1351 * Purges pages that depend on this page when appropriate.
1352 * With a 10% chance, triggers pruning the recent changes table.
1354 * @note prepareUpdate() must be called before calling this method!
1356 * MCR migration note: this replaces WikiPage::doEditUpdates.
1358 public function doUpdates() {
1359 $this->assertTransition( 'done' );
1361 // TODO: move logic into a PageEventEmitter service
1363 $wikiPage = $this->getWikiPage(); // TODO: use only for legacy hooks!
1365 $legacyUser = User
::newFromIdentity( $this->user
);
1366 $legacyRevision = new Revision( $this->revision
);
1368 $this->doParserCacheUpdate();
1370 $this->doSecondaryDataUpdates( [
1371 // T52785 do not update any other pages on a null edit
1372 'recursive' => $this->options
['changed'],
1373 'defer' => DeferredUpdates
::POSTSEND
,
1376 // TODO: MCR: check if *any* changed slot supports categories!
1377 if ( $this->rcWatchCategoryMembership
1378 && $this->getContentHandler( SlotRecord
::MAIN
)->supportsCategories() === true
1379 && ( $this->options
['changed'] ||
$this->options
['created'] )
1380 && !$this->options
['restored']
1382 // Note: jobs are pushed after deferred updates, so the job should be able to see
1383 // the recent change entry (also done via deferred updates) and carry over any
1384 // bot/deletion/IP flags, ect.
1385 $this->jobQueueGroup
->lazyPush(
1386 new CategoryMembershipChangeJob(
1389 'pageId' => $this->getPageId(),
1390 'revTimestamp' => $this->revision
->getTimestamp(),
1396 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1397 $editInfo = $this->getPreparedEdit();
1398 Hooks
::run( 'ArticleEditUpdates', [ &$wikiPage, &$editInfo, $this->options
['changed'] ] );
1400 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1401 if ( Hooks
::run( 'ArticleEditUpdatesDeleteFromRecentchanges', [ &$wikiPage ] ) ) {
1402 // Flush old entries from the `recentchanges` table
1403 if ( mt_rand( 0, 9 ) == 0 ) {
1404 $this->jobQueueGroup
->lazyPush( RecentChangesUpdateJob
::newPurgeJob() );
1408 $id = $this->getPageId();
1409 $title = $this->getTitle();
1410 $dbKey = $title->getPrefixedDBkey();
1411 $shortTitle = $title->getDBkey();
1413 if ( !$title->exists() ) {
1414 wfDebug( __METHOD__
. ": Page doesn't exist any more, bailing out\n" );
1416 $this->doTransition( 'done' );
1420 if ( $this->options
['oldcountable'] === 'no-change' ||
1421 ( !$this->options
['changed'] && !$this->options
['moved'] )
1424 } elseif ( $this->options
['created'] ) {
1425 $good = (int)$this->isCountable();
1426 } elseif ( $this->options
['oldcountable'] !== null ) {
1427 $good = (int)$this->isCountable()
1428 - (int)$this->options
['oldcountable'];
1429 } elseif ( isset( $this->pageState
['oldCountable'] ) ) {
1430 $good = (int)$this->isCountable()
1431 - (int)$this->pageState
['oldCountable'];
1435 $edits = $this->options
['changed'] ?
1 : 0;
1436 $pages = $this->options
['created'] ?
1 : 0;
1438 DeferredUpdates
::addUpdate( SiteStatsUpdate
::factory(
1439 [ 'edits' => $edits, 'articles' => $good, 'pages' => $pages ]
1442 // TODO: make search infrastructure aware of slots!
1443 $mainSlot = $this->revision
->getSlot( SlotRecord
::MAIN
);
1444 if ( !$mainSlot->isInherited() && !$this->isContentDeleted() ) {
1445 DeferredUpdates
::addUpdate( new SearchUpdate( $id, $dbKey, $mainSlot->getContent() ) );
1448 // If this is another user's talk page, update newtalk.
1449 // Don't do this if $options['changed'] = false (null-edits) nor if
1450 // it's a minor edit and the user making the edit doesn't generate notifications for those.
1451 if ( $this->options
['changed']
1452 && $title->getNamespace() == NS_USER_TALK
1453 && $shortTitle != $legacyUser->getTitleKey()
1454 && !( $this->revision
->isMinor() && $legacyUser->isAllowed( 'nominornewtalk' ) )
1456 $recipient = User
::newFromName( $shortTitle, false );
1457 if ( !$recipient ) {
1458 wfDebug( __METHOD__
. ": invalid username\n" );
1460 // Allow extensions to prevent user notification
1461 // when a new message is added to their talk page
1462 // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
1463 if ( Hooks
::run( 'ArticleEditUpdateNewTalk', [ &$wikiPage, $recipient ] ) ) {
1464 if ( User
::isIP( $shortTitle ) ) {
1465 // An anonymous user
1466 $recipient->setNewtalk( true, $legacyRevision );
1467 } elseif ( $recipient->isLoggedIn() ) {
1468 $recipient->setNewtalk( true, $legacyRevision );
1470 wfDebug( __METHOD__
. ": don't need to notify a nonexistent user\n" );
1476 if ( $title->getNamespace() == NS_MEDIAWIKI
1477 && $this->getRevisionSlotsUpdate()->isModifiedSlot( SlotRecord
::MAIN
)
1479 $mainContent = $this->isContentDeleted() ?
null : $this->getRawContent( SlotRecord
::MAIN
);
1481 $this->messageCache
->updateMessageOverride( $title, $mainContent );
1484 // TODO: move onArticleCreate and onArticle into a PageEventEmitter service
1485 if ( $this->options
['created'] ) {
1486 WikiPage
::onArticleCreate( $title );
1487 } elseif ( $this->options
['changed'] ) { // T52785
1488 WikiPage
::onArticleEdit( $title, $legacyRevision, $this->getTouchedSlotRoles() );
1491 $oldRevision = $this->getParentRevision();
1492 $oldLegacyRevision = $oldRevision ?
new Revision( $oldRevision ) : null;
1494 // TODO: In the wiring, register a listener for this on the new PageEventEmitter
1495 ResourceLoaderWikiModule
::invalidateModuleCache(
1496 $title, $oldLegacyRevision, $legacyRevision, $this->getWikiId() ?
: wfWikiID()
1499 $this->doTransition( 'done' );
1503 * Do secondary data updates (such as updating link tables).
1505 * MCR note: this method is temporarily exposed via WikiPage::doSecondaryDataUpdates.
1507 * @param array $options
1508 * - recursive: make the update recursive, i.e. also update pages which transclude the
1509 * current page or otherwise depend on it (default: false)
1510 * - defer: one of the DeferredUpdates constants, or false to run immediately after waiting
1511 * for replication of the changes from the SecondaryDataUpdates hooks (default: false)
1512 * - transactionTicket: a transaction ticket from LBFactory::getEmptyTransactionTicket(),
1513 * only when defer is false (default: null)
1516 public function doSecondaryDataUpdates( array $options = [] ) {
1517 $this->assertHasRevision( __METHOD__
);
1519 'recursive' => false,
1521 'transactionTicket' => null,
1523 $deferValues = [ false, DeferredUpdates
::PRESEND
, DeferredUpdates
::POSTSEND
];
1524 if ( !in_array( $options['defer'], $deferValues, true ) ) {
1525 throw new InvalidArgumentException( 'invalid value for defer: ' . $options['defer'] );
1527 Assert
::parameterType( 'integer|null', $options['transactionTicket'],
1528 '$options[\'transactionTicket\']' );
1530 $updates = $this->getSecondaryDataUpdates( $options['recursive'] );
1532 $triggeringUser = $this->options
['triggeringUser'] ??
$this->user
;
1533 if ( !$triggeringUser instanceof User
) {
1534 $triggeringUser = User
::newFromIdentity( $triggeringUser );
1536 $causeAction = $this->options
['causeAction'] ??
'unknown';
1537 $causeAgent = $this->options
['causeAgent'] ??
'unknown';
1538 $legacyRevision = new Revision( $this->revision
);
1540 if ( $options['defer'] === false && $options['transactionTicket'] !== null ) {
1541 // For legacy hook handlers doing updates via LinksUpdateConstructed, make sure
1542 // any pending writes they made get flushed before the doUpdate() calls below.
1543 // This avoids snapshot-clearing errors in LinksUpdate::acquirePageLock().
1544 $this->loadbalancerFactory
->commitAndWaitForReplication(
1545 __METHOD__
, $options['transactionTicket']
1549 foreach ( $updates as $update ) {
1550 if ( $update instanceof DataUpdate
) {
1551 $update->setCause( $causeAction, $causeAgent );
1553 if ( $update instanceof LinksUpdate
) {
1554 $update->setRevision( $legacyRevision );
1555 $update->setTriggeringUser( $triggeringUser );
1557 if ( $options['defer'] === false ) {
1558 if ( $options['transactionTicket'] !== null ) {
1559 $update->setTransactionTicket( $options['transactionTicket'] );
1561 $update->doUpdate();
1563 DeferredUpdates
::addUpdate( $update, $options['defer'] );
1568 public function doParserCacheUpdate() {
1569 $this->assertHasRevision( __METHOD__
);
1571 $wikiPage = $this->getWikiPage(); // TODO: ParserCache should accept a RevisionRecord instead
1573 // NOTE: this may trigger the first parsing of the new content after an edit (when not
1574 // using pre-generated stashed output).
1575 // XXX: we may want to use the PoolCounter here. This would perhaps allow the initial parse
1576 // to be performed post-send. The client could already follow a HTTP redirect to the
1577 // page view, but would then have to wait for a response until rendering is complete.
1578 $output = $this->getCanonicalParserOutput();
1580 // Save it to the parser cache. Use the revision timestamp in the case of a
1581 // freshly saved edit, as that matches page_touched and a mismatch would trigger an
1582 // unnecessary reparse.
1583 $timestamp = $this->options
['changed'] ?
$this->revision
->getTimestamp()
1584 : $output->getTimestamp();
1585 $this->parserCache
->save(
1586 $output, $wikiPage, $this->getCanonicalParserOptions(),
1587 $timestamp, $this->revision
->getId()