Make RefreshLinksJob MCR compliant.
authordaniel <dkinzler@wikimedia.org>
Mon, 8 Oct 2018 12:32:03 +0000 (14:32 +0200)
committerdaniel <dkinzler@wikimedia.org>
Mon, 8 Oct 2018 17:20:13 +0000 (19:20 +0200)
So far, RefreshLinksJob re-generated the ParserOutput ba calling
Content::getParserOutput. This only works for the main slot. It
needs to instead get the parser output for all slots combiend,
by using RevisionRenderer and RenderedRevision.

Bug: T174035
Change-Id: I253dda26bfa5aefa15f8b1dcc59e69fc7e9d0cb7

includes/jobqueue/jobs/RefreshLinksJob.php
tests/phpunit/includes/jobqueue/jobs/RefreshLinksJobTest.php [new file with mode: 0644]

index 0b6d307..66f54b9 100644 (file)
@@ -21,6 +21,7 @@
  * @ingroup JobQueue
  */
 use MediaWiki\MediaWikiServices;
+use MediaWiki\Storage\RevisionRecord;
 
 /**
  * Job to update link tables for pages
@@ -134,6 +135,8 @@ class RefreshLinksJob extends Job {
                $services = MediaWikiServices::getInstance();
                $stats = $services->getStatsdDataFactory();
                $lbFactory = $services->getDBLoadBalancerFactory();
+               $revisionStore = $services->getRevisionStore();
+               $renderer = $services->getRevisionRenderer();
                $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
 
                $page = WikiPage::factory( $title );
@@ -156,20 +159,20 @@ class RefreshLinksJob extends Job {
                if ( !empty( $this->params['triggeringRevisionId'] ) ) {
                        // Fetch the specified revision; lockAndGetLatest() below detects if the page
                        // was edited since and aborts in order to avoid corrupting the link tables
-                       $revision = Revision::newFromId(
-                               $this->params['triggeringRevisionId'],
+                       $revision = $revisionStore->getRevisionById(
+                               (int)$this->params['triggeringRevisionId'],
                                Revision::READ_LATEST
                        );
                } else {
                        // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures
-                       $revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST );
+                       $revision = $revisionStore->getRevisionByTitle( $title, 0, Revision::READ_LATEST );
                }
 
                if ( !$revision ) {
                        $stats->increment( 'refreshlinks.rev_not_found' );
                        $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" );
                        return false; // just deleted?
-               } elseif ( $revision->getId() != $latest || $revision->getPage() !== $page->getId() ) {
+               } elseif ( $revision->getId() != $latest || $revision->getPageId() !== $page->getId() ) {
                        // Do not clobber over newer updates with older ones. If all jobs where FIFO and
                        // serialized, it would be OK to update links based on older revisions since it
                        // would eventually get to the latest. Since that is not the case (by design),
@@ -179,12 +182,6 @@ class RefreshLinksJob extends Job {
                        return false;
                }
 
-               $content = $revision->getContent( Revision::RAW );
-               if ( !$content ) {
-                       // If there is no content, pretend the content is empty
-                       $content = $revision->getContentHandler()->makeEmptyContent();
-               }
-
                $parserOutput = false;
                $parserOptions = $page->makeParserOptions( 'canonical' );
                // If page_touched changed after this root job, then it is likely that
@@ -228,17 +225,34 @@ class RefreshLinksJob extends Job {
                        $stats->increment( 'refreshlinks.parser_cached' );
                } else {
                        $start = microtime( true );
+
+                       $checkCache = $page->shouldCheckParserCache( $parserOptions, $revision->getId() );
+
                        // Revision ID must be passed to the parser output to get revision variables correct
-                       $parserOutput = $content->getParserOutput(
-                               $title, $revision->getId(), $parserOptions, false );
-                       $elapsed = microtime( true ) - $start;
+                       $renderedRevision = $renderer->getRenderedRevision(
+                               $revision,
+                               $parserOptions,
+                               null,
+                               [
+                                       // use master, for consistency with the getRevisionByTitle call above.
+                                       'use-master' => true,
+                                       // bypass audience checks, since we know that this is the current revision.
+                                       'audience' => RevisionRecord::RAW
+                               ]
+                       );
+                       $parserOutput = $renderedRevision->getRevisionParserOutput(
+                               // HTML is only needed if the output is to be placed in the parser cache
+                               [ 'generate-html' => $checkCache ]
+                       );
+
                        // If it took a long time to render, then save this back to the cache to avoid
                        // wasted CPU by other apaches or job runners. We don't want to always save to
                        // cache as this can cause high cache I/O and LRU churn when a template changes.
-                       if ( $elapsed >= self::PARSE_THRESHOLD_SEC
-                               && $page->shouldCheckParserCache( $parserOptions, $revision->getId() )
-                               && $parserOutput->isCacheable()
-                       ) {
+                       $elapsed = microtime( true ) - $start;
+
+                       $parseThreshold = $this->params['parseThreshold'] ?? self::PARSE_THRESHOLD_SEC;
+
+                       if ( $checkCache && $elapsed >= $parseThreshold && $parserOutput->isCacheable() ) {
                                $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time
                                $services->getParserCache()->save(
                                        $parserOutput, $page, $parserOptions, $ctime, $revision->getId()
diff --git a/tests/phpunit/includes/jobqueue/jobs/RefreshLinksJobTest.php b/tests/phpunit/includes/jobqueue/jobs/RefreshLinksJobTest.php
new file mode 100644 (file)
index 0000000..7665b78
--- /dev/null
@@ -0,0 +1,89 @@
+<?php
+use MediaWiki\MediaWikiServices;
+
+/**
+ * @covers RefreshLinksJob
+ *
+ * @group JobQueue
+ * @group Database
+ *
+ * @license GPL-2.0-or-later
+ * @author Addshore
+ */
+class RefreshLinksJobTest extends MediaWikiTestCase {
+
+       public function setUp() {
+               parent::setUp();
+
+               $this->tablesUsed[] = 'page';
+               $this->tablesUsed[] = 'revision';
+
+               $this->tablesUsed[] = 'pagelinks';
+               $this->tablesUsed[] = 'categorylinks';
+       }
+
+       /**
+        * @param string $name
+        * @param Content[] $content
+        *
+        * @return WikiPage
+        */
+       private function createPage( $name, array $content ) {
+               $title = Title::makeTitle( $this->getDefaultWikitextNS(), $name );
+               $page = WikiPage::factory( $title );
+
+               $updater = $page->newPageUpdater( $this->getTestUser()->getUser() );
+
+               foreach ( $content as $slot => $cnt ) {
+                       $updater->setContent( $slot, $cnt );
+               }
+
+               $updater->saveRevision( CommentStoreComment::newUnsavedComment( 'Test' ) );
+
+               return $page;
+       }
+
+       // TODO: test multi-page
+       // TODO: test recursive
+       // TODO: test partition
+
+       public function testRunForSinglePage() {
+               $mainContent = new WikitextContent( 'MAIN [[Kittens]]' );
+               $auxContent = new WikitextContent( 'AUX [[Category:Goats]]' );
+               $page = $this->createPage( __METHOD__, [ 'main' => $mainContent, 'aux' => $auxContent ] );
+
+               // clear state
+               $parserCache = MediaWikiServices::getInstance()->getParserCache();
+               $parserCache->deleteOptionsKey( $page );
+
+               $this->db->delete( 'pagelinks', '*', __METHOD__ );
+               $this->db->delete( 'categorylinks', '*', __METHOD__ );
+
+               // run job
+               $job = new RefreshLinksJob( $page->getTitle(), [ 'parseThreshold' => 0 ] );
+               $job->run();
+
+               // assert state
+               $options = ParserOptions::newCanonical( 'canonical' );
+               $out = $parserCache->get( $page, $options );
+               $this->assertNotFalse( $out, 'parser cache entry' );
+
+               $text = $out->getText();
+               $this->assertContains( 'MAIN', $text );
+               $this->assertContains( 'AUX', $text );
+
+               $this->assertSelect(
+                       'pagelinks',
+                       'pl_title',
+                       [ 'pl_from' => $page->getId() ],
+                       [ [ 'Kittens' ] ]
+               );
+               $this->assertSelect(
+                       'categorylinks',
+                       'cl_to',
+                       [ 'cl_from' => $page->getId() ],
+                       [ [ 'Goats' ] ]
+               );
+       }
+
+}