Avoid extra parse/save delay for users with non-canonical parser options
authorAaron Schulz <aschulz@wikimedia.org>
Fri, 26 Oct 2018 22:42:26 +0000 (15:42 -0700)
committerKrinkle <krinklemail@gmail.com>
Sun, 14 Apr 2019 02:00:16 +0000 (02:00 +0000)
If {{REVISIONID}} results in a re-parse, that re-parse will be post-send
unless the user has canonical parser options and will need the output for
page views anyway (e.g. the refresh after editing).

Also make getPreparedEdit() allow lazy-loading of the parser output via
a callback. A magic __get() method handles objects created the new way
but accessed by other code the old way.

Bug: T216306
Change-Id: I2012437c45dd605a6c0868dea47cf43dc67061d8

includes/Storage/DerivedPageDataUpdater.php
includes/edit/PreparedEdit.php
includes/parser/ParserOptions.php
tests/phpunit/includes/Storage/DerivedPageDataUpdaterTest.php
tests/phpunit/includes/Storage/PreparedEditTest.php [new file with mode: 0644]
tests/phpunit/includes/parser/ParserOptionsTest.php

index c4aec13..3dbe0a8 100644 (file)
@@ -1239,7 +1239,7 @@ class DerivedPageDataUpdater implements IDBAccessObject {
                $preparedEdit = new PreparedEdit();
 
                $preparedEdit->popts = $this->getCanonicalParserOptions();
-               $preparedEdit->output = $this->getCanonicalParserOutput();
+               $preparedEdit->parserOutputCallback = [ $this, 'getCanonicalParserOutput' ];
                $preparedEdit->pstContent = $this->revision->getContent( SlotRecord::MAIN );
                $preparedEdit->newContent =
                        $slotsUpdate->isModifiedSlot( SlotRecord::MAIN )
@@ -1401,13 +1401,31 @@ class DerivedPageDataUpdater implements IDBAccessObject {
                $legacyUser = User::newFromIdentity( $this->user );
                $legacyRevision = new Revision( $this->revision );
 
-               $this->doParserCacheUpdate();
+               $userParserOptions = ParserOptions::newFromUser( $legacyUser );
+               // Decide whether to save the final canonical parser ouput based on the fact that
+               // users are typically redirected to viewing pages right after they edit those pages.
+               // Due to vary-revision-id, getting/saving that output here might require a reparse.
+               if ( $userParserOptions->matchesForCacheKey( $this->getCanonicalParserOptions() ) ) {
+                       // Whether getting the final output requires a reparse or not, the user will
+                       // need canonical output anyway, since that is what their parser options use.
+                       // A reparse now at least has the benefit of various warm process caches.
+                       $this->doParserCacheUpdate();
+               } else {
+                       // If the user does not have canonical parse options, then don't risk another parse
+                       // to make output they cannot use on the page refresh that typically occurs after
+                       // editing. Doing the parser output save post-send will still benefit *other* users.
+                       DeferredUpdates::addCallableUpdate( function () {
+                               $this->doParserCacheUpdate();
+                       } );
+               }
 
-               $this->doSecondaryDataUpdates( [
-                       // T52785 do not update any other pages on a null edit
-                       'recursive' => $this->options['changed'],
-                       'defer' => DeferredUpdates::POSTSEND,
-               ] );
+               // Defer the getCannonicalParserOutput() call triggered by getSecondaryDataUpdates()
+               DeferredUpdates::addCallableUpdate( function () {
+                       $this->doSecondaryDataUpdates( [
+                               // T52785 do not update any other pages on a null edit
+                               'recursive' => $this->options['changed']
+                       ] );
+               } );
 
                // TODO: MCR: check if *any* changed slot supports categories!
                if ( $this->rcWatchCategoryMembership
@@ -1427,8 +1445,11 @@ class DerivedPageDataUpdater implements IDBAccessObject {
                }
 
                // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
+               // @note: Extensions should *avoid* calling getCannonicalParserOutput() when using
+               // this hook whenever possible in order to avoid unnecessary additional parses.
                $editInfo = $this->getPreparedEdit();
-               Hooks::run( 'ArticleEditUpdates', [ &$wikiPage, &$editInfo, $this->options['changed'] ] );
+               Hooks::run( 'ArticleEditUpdates',
+                       [ &$wikiPage, &$editInfo, $this->options['changed'] ] );
 
                // TODO: replace legacy hook! Use a listener on PageEventEmitter instead!
                if ( Hooks::run( 'ArticleEditUpdatesDeleteFromRecentchanges', [ &$wikiPage ] ) ) {
index 7007316..88eae36 100644 (file)
@@ -22,6 +22,7 @@ namespace MediaWiki\Edit;
 
 use Content;
 use ParserOptions;
+use RuntimeException;
 use ParserOutput;
 
 /**
@@ -32,7 +33,6 @@ use ParserOutput;
  * @since 1.30
  */
 class PreparedEdit {
-
        /**
         * Time this prepared edit was made
         *
@@ -73,7 +73,7 @@ class PreparedEdit {
         *
         * @var ParserOutput|null
         */
-       public $output;
+       private $canonicalOutput;
 
        /**
         * Content that is being saved (before PST)
@@ -89,4 +89,36 @@ class PreparedEdit {
         */
        public $oldContent;
 
+       /**
+        * Lazy-loading callback to get canonical ParserOutput object
+        *
+        * @var callable
+        */
+       public $parserOutputCallback;
+
+       /**
+        * @return ParserOutput Canonical parser output
+        */
+       public function getOutput() {
+               if ( !$this->canonicalOutput ) {
+                       $this->canonicalOutput = call_user_func( $this->parserOutputCallback );
+               }
+
+               return $this->canonicalOutput;
+       }
+
+       /**
+        * Fetch the ParserOutput via a lazy-loaded callback (for backwards compatibility).
+        *
+        * @deprecated since 1.33
+        * @param string $name
+        * @return mixed
+        */
+       function __get( $name ) {
+               if ( $name === 'output' ) {
+                       return $this->getOutput();
+               }
+
+               throw new RuntimeException( "Undefined field $name." );
+       }
 }
index bdca848..66b1612 100644 (file)
@@ -123,7 +123,7 @@ class ParserOptions {
         */
 
        /**
-        * Fetch an option, generically
+        * Fetch an option and track that is was accessed
         * @since 1.30
         * @param string $name Option name
         * @return mixed
@@ -133,15 +133,22 @@ class ParserOptions {
                        throw new InvalidArgumentException( "Unknown parser option $name" );
                }
 
-               if ( isset( self::$lazyOptions[$name] ) && $this->options[$name] === null ) {
-                       $this->options[$name] = call_user_func( self::$lazyOptions[$name], $this, $name );
-               }
+               $this->lazyLoadOption( $name );
                if ( !empty( self::$inCacheKey[$name] ) ) {
                        $this->optionUsed( $name );
                }
                return $this->options[$name];
        }
 
+       /**
+        * @param string $name Lazy load option without tracking usage
+        */
+       private function lazyLoadOption( $name ) {
+               if ( isset( self::$lazyOptions[$name] ) && $this->options[$name] === null ) {
+                       $this->options[$name] = call_user_func( self::$lazyOptions[$name], $this, $name );
+               }
+       }
+
        /**
         * Set an option, generically
         * @since 1.30
@@ -1197,22 +1204,16 @@ class ParserOptions {
         * @since 1.25
         */
        public function matches( ParserOptions $other ) {
-               // Populate lazy options
-               foreach ( self::$lazyOptions as $name => $callback ) {
-                       if ( $this->options[$name] === null ) {
-                               $this->options[$name] = call_user_func( $callback, $this, $name );
-                       }
-                       if ( $other->options[$name] === null ) {
-                               $other->options[$name] = call_user_func( $callback, $other, $name );
-                       }
-               }
-
                // Compare most options
                $options = array_keys( $this->options );
                $options = array_diff( $options, [
                        'enableLimitReport', // only affects HTML comments
                ] );
                foreach ( $options as $option ) {
+                       // Resolve any lazy options
+                       $this->lazyLoadOption( $option );
+                       $other->lazyLoadOption( $option );
+
                        $o1 = $this->optionToString( $this->options[$option] );
                        $o2 = $this->optionToString( $other->options[$option] );
                        if ( $o1 !== $o2 ) {
@@ -1238,6 +1239,27 @@ class ParserOptions {
                return true;
        }
 
+       /**
+        * @param ParserOptions $other
+        * @return bool Whether the cache key relevant options match those of $other
+        * @since 1.33
+        */
+       public function matchesForCacheKey( ParserOptions $other ) {
+               foreach ( self::allCacheVaryingOptions() as $option ) {
+                       // Populate any lazy options
+                       $this->lazyLoadOption( $option );
+                       $other->lazyLoadOption( $option );
+
+                       $o1 = $this->optionToString( $this->options[$option] );
+                       $o2 = $this->optionToString( $other->options[$option] );
+                       if ( $o1 !== $o2 ) {
+                               return false;
+                       }
+               }
+
+               return true;
+       }
+
        /**
         * Registers a callback for tracking which ParserOptions which are used.
         * This is a private API with the parser.
@@ -1314,10 +1336,9 @@ class ParserOptions {
                $inCacheKey = self::allCacheVaryingOptions();
 
                // Resolve any lazy options
-               foreach ( array_intersect( $forOptions, $inCacheKey, array_keys( self::$lazyOptions ) ) as $k ) {
-                       if ( $this->options[$k] === null ) {
-                               $this->options[$k] = call_user_func( self::$lazyOptions[$k], $this, $k );
-                       }
+               $lazyOpts = array_intersect( $forOptions, $inCacheKey, array_keys( self::$lazyOptions ) );
+               foreach ( $lazyOpts as $k ) {
+                       $this->lazyLoadOption( $k );
                }
 
                $options = $this->options;
index 92c6f62..cd19cca 100644 (file)
@@ -24,6 +24,7 @@ use User;
 use Wikimedia\TestingAccessWrapper;
 use WikiPage;
 use WikitextContent;
+use DeferredUpdates;
 
 /**
  * @group Database
@@ -60,16 +61,20 @@ class DerivedPageDataUpdaterTest extends MediaWikiTestCase {
 
        /**
         * @param string|Title|WikiPage $page
+        * @param RevisionRecord|null $rec
+        * @param User|null $user
         *
         * @return DerivedPageDataUpdater
         */
-       private function getDerivedPageDataUpdater( $page, RevisionRecord $rec = null ) {
+       private function getDerivedPageDataUpdater(
+               $page, RevisionRecord $rec = null, User $user = null
+       ) {
                if ( is_string( $page ) || $page instanceof Title ) {
                        $page = $this->getPage( $page );
                }
 
                $page = TestingAccessWrapper::newFromObject( $page );
-               return $page->getDerivedDataUpdater( null, $rec );
+               return $page->getDerivedDataUpdater( $user, $rec );
        }
 
        /**
@@ -78,11 +83,12 @@ class DerivedPageDataUpdaterTest extends MediaWikiTestCase {
         * @param WikiPage $page
         * @param string|Message|CommentStoreComment $summary
         * @param null|string|Content $content
+        * @param User|null $user
         *
         * @return RevisionRecord|null
         */
-       private function createRevision( WikiPage $page, $summary, $content = null ) {
-               $user = $this->getTestUser()->getUser();
+       private function createRevision( WikiPage $page, $summary, $content = null, $user = null ) {
+               $user = $user ?: $this->getTestUser()->getUser();
                $comment = CommentStoreComment::newUnsavedComment( $summary );
 
                if ( $content === null || is_string( $content ) ) {
@@ -945,6 +951,68 @@ class DerivedPageDataUpdaterTest extends MediaWikiTestCase {
                // TODO: test category membership update (with setRcWatchCategoryMembership())
        }
 
+       /**
+        * @covers \MediaWiki\Storage\DerivedPageDataUpdater::doUpdates()
+        * @covers \MediaWiki\Storage\DerivedPageDataUpdater::doSecondaryDataUpdates()
+        * @covers \MediaWiki\Storage\DerivedPageDataUpdater::doParserCacheUpdate()
+        */
+       public function testDoUpdatesCacheSaveDeferral_canonical() {
+               $page = $this->getPage( __METHOD__ );
+
+               // Case where user has canonical parser options
+               $content = [ 'main' => new WikitextContent( 'rev ID ver #1: {{REVISIONID}}' ) ];
+               $rev = $this->createRevision( $page, 'first', $content );
+               $pcache = MediaWikiServices::getInstance()->getParserCache();
+               $pcache->deleteOptionsKey( $page );
+
+               $this->db->startAtomic( __METHOD__ ); // let deferred updates queue up
+
+               $updater = $this->getDerivedPageDataUpdater( $page, $rev );
+               $updater->prepareUpdate( $rev, [] );
+               $updater->doUpdates();
+
+               $this->assertGreaterThan( 0, DeferredUpdates::pendingUpdatesCount(), 'Pending updates' );
+               $this->assertNotFalse( $pcache->get( $page, $updater->getCanonicalParserOptions() ) );
+
+               $this->db->endAtomic( __METHOD__ ); // run deferred updates
+
+               $this->assertEquals( 0, DeferredUpdates::pendingUpdatesCount(), 'No pending updates' );
+       }
+
+       /**
+        * @covers \MediaWiki\Storage\DerivedPageDataUpdater::doUpdates()
+        * @covers \MediaWiki\Storage\DerivedPageDataUpdater::doSecondaryDataUpdates()
+        * @covers \MediaWiki\Storage\DerivedPageDataUpdater::doParserCacheUpdate()
+        */
+       public function testDoUpdatesCacheSaveDeferral_noncanonical() {
+               $page = $this->getPage( __METHOD__ );
+
+               // Case where user does not have canonical parser options
+               $user = $this->getMutableTestUser()->getUser();
+               $user->setOption(
+                       'thumbsize',
+                       $user->getOption( 'thumbsize' ) + 1
+               );
+               $content = [ 'main' => new WikitextContent( 'rev ID ver #2: {{REVISIONID}}' ) ];
+               $rev = $this->createRevision( $page, 'first', $content, $user );
+               $pcache = MediaWikiServices::getInstance()->getParserCache();
+               $pcache->deleteOptionsKey( $page );
+
+               $this->db->startAtomic( __METHOD__ ); // let deferred updates queue up
+
+               $updater = $this->getDerivedPageDataUpdater( $page, $rev, $user );
+               $updater->prepareUpdate( $rev, [] );
+               $updater->doUpdates();
+
+               $this->assertGreaterThan( 1, DeferredUpdates::pendingUpdatesCount(), 'Pending updates' );
+               $this->assertFalse( $pcache->get( $page, $updater->getCanonicalParserOptions() ) );
+
+               $this->db->endAtomic( __METHOD__ ); // run deferred updates
+
+               $this->assertEquals( 0, DeferredUpdates::pendingUpdatesCount(), 'No pending updates' );
+               $this->assertNotFalse( $pcache->get( $page, $updater->getCanonicalParserOptions() ) );
+       }
+
        /**
         * @covers \MediaWiki\Storage\DerivedPageDataUpdater::doParserCacheUpdate()
         */
diff --git a/tests/phpunit/includes/Storage/PreparedEditTest.php b/tests/phpunit/includes/Storage/PreparedEditTest.php
new file mode 100644 (file)
index 0000000..29999ee
--- /dev/null
@@ -0,0 +1,22 @@
+<?php
+
+namespace MediaWiki\Edit;
+
+use ParserOutput;
+use MediaWikiTestCase;
+
+/**
+ * @covers \MediaWiki\Edit\PreparedEdit
+ */
+class PreparedEditTest extends MediaWikiTestCase {
+       function testCallback() {
+               $output = new ParserOutput();
+               $edit = new PreparedEdit();
+               $edit->parserOutputCallback = function () {
+                       return new ParserOutput();
+               };
+
+               $this->assertEquals( $output, $edit->getOutput() );
+               $this->assertEquals( $output, $edit->output );
+       }
+}
index 6413ddd..01fde35 100644 (file)
@@ -284,6 +284,31 @@ class ParserOptionsTest extends MediaWikiTestCase {
                ScopedCallback::consume( $reset );
        }
 
+       public function testMatchesForCacheKey() {
+               $cOpts = ParserOptions::newCanonical( null, 'en' );
+
+               $uOpts = ParserOptions::newFromAnon();
+               $this->assertTrue( $cOpts->matchesForCacheKey( $uOpts ) );
+
+               $user = new User();
+               $uOpts = ParserOptions::newFromUser( $user );
+               $this->assertTrue( $cOpts->matchesForCacheKey( $uOpts ) );
+
+               $user = new User();
+               $user->setOption( 'thumbsize', 251 );
+               $uOpts = ParserOptions::newFromUser( $user );
+               $this->assertFalse( $cOpts->matchesForCacheKey( $uOpts ) );
+
+               $user = new User();
+               $user->setOption( 'stubthreshold', 800 );
+               $uOpts = ParserOptions::newFromUser( $user );
+               $this->assertFalse( $cOpts->matchesForCacheKey( $uOpts ) );
+
+               $user = new User();
+               $uOpts = ParserOptions::newFromUserAndLang( $user, Language::factory( 'zh' ) );
+               $this->assertFalse( $cOpts->matchesForCacheKey( $uOpts ) );
+       }
+
        public function testAllCacheVaryingOptions() {
                $this->setTemporaryHook( 'ParserOptionsRegister', null );
                $this->assertSame( [