From: daniel Date: Mon, 8 Oct 2018 12:32:03 +0000 (+0200) Subject: Make RefreshLinksJob MCR compliant. X-Git-Tag: 1.34.0-rc.0~3860^2 X-Git-Url: http://git.heureux-cyclage.org/?a=commitdiff_plain;h=6248556b5e9d0e2e257cf46366a85c9ff80d6c83;p=lhc%2Fweb%2Fwiklou.git Make RefreshLinksJob MCR compliant. So far, RefreshLinksJob re-generated the ParserOutput ba calling Content::getParserOutput. This only works for the main slot. It needs to instead get the parser output for all slots combiend, by using RevisionRenderer and RenderedRevision. Bug: T174035 Change-Id: I253dda26bfa5aefa15f8b1dcc59e69fc7e9d0cb7 --- diff --git a/includes/jobqueue/jobs/RefreshLinksJob.php b/includes/jobqueue/jobs/RefreshLinksJob.php index 0b6d30776b..66f54b9420 100644 --- a/includes/jobqueue/jobs/RefreshLinksJob.php +++ b/includes/jobqueue/jobs/RefreshLinksJob.php @@ -21,6 +21,7 @@ * @ingroup JobQueue */ use MediaWiki\MediaWikiServices; +use MediaWiki\Storage\RevisionRecord; /** * Job to update link tables for pages @@ -134,6 +135,8 @@ class RefreshLinksJob extends Job { $services = MediaWikiServices::getInstance(); $stats = $services->getStatsdDataFactory(); $lbFactory = $services->getDBLoadBalancerFactory(); + $revisionStore = $services->getRevisionStore(); + $renderer = $services->getRevisionRenderer(); $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ ); $page = WikiPage::factory( $title ); @@ -156,20 +159,20 @@ class RefreshLinksJob extends Job { if ( !empty( $this->params['triggeringRevisionId'] ) ) { // Fetch the specified revision; lockAndGetLatest() below detects if the page // was edited since and aborts in order to avoid corrupting the link tables - $revision = Revision::newFromId( - $this->params['triggeringRevisionId'], + $revision = $revisionStore->getRevisionById( + (int)$this->params['triggeringRevisionId'], Revision::READ_LATEST ); } else { // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures - $revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST ); + $revision = $revisionStore->getRevisionByTitle( $title, 0, Revision::READ_LATEST ); } if ( !$revision ) { $stats->increment( 'refreshlinks.rev_not_found' ); $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" ); return false; // just deleted? - } elseif ( $revision->getId() != $latest || $revision->getPage() !== $page->getId() ) { + } elseif ( $revision->getId() != $latest || $revision->getPageId() !== $page->getId() ) { // Do not clobber over newer updates with older ones. If all jobs where FIFO and // serialized, it would be OK to update links based on older revisions since it // would eventually get to the latest. Since that is not the case (by design), @@ -179,12 +182,6 @@ class RefreshLinksJob extends Job { return false; } - $content = $revision->getContent( Revision::RAW ); - if ( !$content ) { - // If there is no content, pretend the content is empty - $content = $revision->getContentHandler()->makeEmptyContent(); - } - $parserOutput = false; $parserOptions = $page->makeParserOptions( 'canonical' ); // If page_touched changed after this root job, then it is likely that @@ -228,17 +225,34 @@ class RefreshLinksJob extends Job { $stats->increment( 'refreshlinks.parser_cached' ); } else { $start = microtime( true ); + + $checkCache = $page->shouldCheckParserCache( $parserOptions, $revision->getId() ); + // Revision ID must be passed to the parser output to get revision variables correct - $parserOutput = $content->getParserOutput( - $title, $revision->getId(), $parserOptions, false ); - $elapsed = microtime( true ) - $start; + $renderedRevision = $renderer->getRenderedRevision( + $revision, + $parserOptions, + null, + [ + // use master, for consistency with the getRevisionByTitle call above. + 'use-master' => true, + // bypass audience checks, since we know that this is the current revision. + 'audience' => RevisionRecord::RAW + ] + ); + $parserOutput = $renderedRevision->getRevisionParserOutput( + // HTML is only needed if the output is to be placed in the parser cache + [ 'generate-html' => $checkCache ] + ); + // If it took a long time to render, then save this back to the cache to avoid // wasted CPU by other apaches or job runners. We don't want to always save to // cache as this can cause high cache I/O and LRU churn when a template changes. - if ( $elapsed >= self::PARSE_THRESHOLD_SEC - && $page->shouldCheckParserCache( $parserOptions, $revision->getId() ) - && $parserOutput->isCacheable() - ) { + $elapsed = microtime( true ) - $start; + + $parseThreshold = $this->params['parseThreshold'] ?? self::PARSE_THRESHOLD_SEC; + + if ( $checkCache && $elapsed >= $parseThreshold && $parserOutput->isCacheable() ) { $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time $services->getParserCache()->save( $parserOutput, $page, $parserOptions, $ctime, $revision->getId() diff --git a/tests/phpunit/includes/jobqueue/jobs/RefreshLinksJobTest.php b/tests/phpunit/includes/jobqueue/jobs/RefreshLinksJobTest.php new file mode 100644 index 0000000000..7665b788fd --- /dev/null +++ b/tests/phpunit/includes/jobqueue/jobs/RefreshLinksJobTest.php @@ -0,0 +1,89 @@ +tablesUsed[] = 'page'; + $this->tablesUsed[] = 'revision'; + + $this->tablesUsed[] = 'pagelinks'; + $this->tablesUsed[] = 'categorylinks'; + } + + /** + * @param string $name + * @param Content[] $content + * + * @return WikiPage + */ + private function createPage( $name, array $content ) { + $title = Title::makeTitle( $this->getDefaultWikitextNS(), $name ); + $page = WikiPage::factory( $title ); + + $updater = $page->newPageUpdater( $this->getTestUser()->getUser() ); + + foreach ( $content as $slot => $cnt ) { + $updater->setContent( $slot, $cnt ); + } + + $updater->saveRevision( CommentStoreComment::newUnsavedComment( 'Test' ) ); + + return $page; + } + + // TODO: test multi-page + // TODO: test recursive + // TODO: test partition + + public function testRunForSinglePage() { + $mainContent = new WikitextContent( 'MAIN [[Kittens]]' ); + $auxContent = new WikitextContent( 'AUX [[Category:Goats]]' ); + $page = $this->createPage( __METHOD__, [ 'main' => $mainContent, 'aux' => $auxContent ] ); + + // clear state + $parserCache = MediaWikiServices::getInstance()->getParserCache(); + $parserCache->deleteOptionsKey( $page ); + + $this->db->delete( 'pagelinks', '*', __METHOD__ ); + $this->db->delete( 'categorylinks', '*', __METHOD__ ); + + // run job + $job = new RefreshLinksJob( $page->getTitle(), [ 'parseThreshold' => 0 ] ); + $job->run(); + + // assert state + $options = ParserOptions::newCanonical( 'canonical' ); + $out = $parserCache->get( $page, $options ); + $this->assertNotFalse( $out, 'parser cache entry' ); + + $text = $out->getText(); + $this->assertContains( 'MAIN', $text ); + $this->assertContains( 'AUX', $text ); + + $this->assertSelect( + 'pagelinks', + 'pl_title', + [ 'pl_from' => $page->getId() ], + [ [ 'Kittens' ] ] + ); + $this->assertSelect( + 'categorylinks', + 'cl_to', + [ 'cl_from' => $page->getId() ], + [ [ 'Goats' ] ] + ); + } + +}