Various fixes and simplifications to RefreshLinksJob::runTitle()
authorAaron Schulz <aschulz@wikimedia.org>
Tue, 2 Apr 2019 08:25:48 +0000 (01:25 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Thu, 11 Jul 2019 06:06:02 +0000 (06:06 +0000)
* Remove logic for saving slow-to-render parser output. This has
  not worked ever since DerivedPageDataUpdater was introduced.
* Make the logic to use cached output actually work. This was
  also broken since DerivedPageDataUpdater was added. In order
  to pass the output, add a known-revision-output parameter
  to both WikiPage::doSecondaryUpdates() and
  DerivedPageDataUpdater::prepareUpdate().
* Also factored out some helper methods from runForTitle() in
  RefreshLinksJob to make it more readable and avoid the need
  for multiple transaction round commit calls. This makes the
  case of multiple-title jobs less likely to break again.
* Make use of RefreshLinksJob::runForTitle() return value.
* Add unit tests for multiple-title job case.

Change-Id: I0cd13c424a87653b5a7253c42cd48fe43befd692

includes/Storage/DerivedPageDataUpdater.php
includes/deferred/LinksUpdate.php
includes/jobqueue/jobs/RefreshLinksJob.php
includes/page/WikiPage.php
tests/phpunit/includes/jobqueue/jobs/RefreshLinksJobTest.php

index b4d6f05..2cf3cee 100644 (file)
@@ -1082,6 +1082,11 @@ class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface {
         *    See DataUpdate::getCauseAction(). (default 'unknown')
         *  - causeAgent: name of the user who caused the update. See DataUpdate::getCauseAgent().
         *    (string, default 'unknown')
+        *  - known-revision-output: a combined canonical ParserOutput for the revision, perhaps
+        *    from some cache. The caller is responsible for ensuring that the ParserOutput indeed
+        *    matched the $rev and $options. This mechanism is intended as a temporary stop-gap,
+        *    for the time until caches have been changed to store RenderedRevision states instead
+        *    of ParserOutput objects. (default: null) (since 1.33)
         */
        public function prepareUpdate( RevisionRecord $revision, array $options = [] ) {
                Assert::parameter(
@@ -1228,14 +1233,17 @@ class DerivedPageDataUpdater implements IDBAccessObject, LoggerAwareInterface {
                if ( $this->renderedRevision ) {
                        $this->renderedRevision->updateRevision( $revision );
                } else {
-
                        // NOTE: we want a canonical rendering, so don't pass $this->user or ParserOptions
                        // NOTE: the revision is either new or current, so we can bypass audience checks.
                        $this->renderedRevision = $this->revisionRenderer->getRenderedRevision(
                                $this->revision,
                                null,
                                null,
-                               [ 'use-master' => $this->useMaster(), 'audience' => RevisionRecord::RAW ]
+                               [
+                                       'use-master' => $this->useMaster(),
+                                       'audience' => RevisionRecord::RAW,
+                                       'known-revision-output' => $options['known-revision-output'] ?? null
+                               ]
                        );
 
                        // XXX: Since we presumably are dealing with the current revision,
index 266d768..5b68ff8 100644 (file)
@@ -203,7 +203,7 @@ class LinksUpdate extends DataUpdate implements EnqueueableDataUpdate {
        }
 
        /**
-        * Acquire a lock for performing link table updates for a page on a DB
+        * Acquire a session-level lock for performing link table updates for a page on a DB
         *
         * @param IDatabase $dbw
         * @param int $pageId
index 89ecb0e..3179a2f 100644 (file)
@@ -22,6 +22,8 @@
  */
 use MediaWiki\MediaWikiServices;
 use MediaWiki\Revision\RevisionRecord;
+use MediaWiki\Revision\RevisionRenderer;
+use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
 
 /**
  * Job to update link tables for pages
@@ -37,10 +39,8 @@ use MediaWiki\Revision\RevisionRecord;
  * @ingroup JobQueue
  */
 class RefreshLinksJob extends Job {
-       /** @var float Cache parser output when it takes this long to render */
-       const PARSE_THRESHOLD_SEC = 1.0;
        /** @var int Lag safety margin when comparing root job times to last-refresh times */
-       const CLOCK_FUDGE = 10;
+       const NORMAL_MAX_LAG = 10;
        /** @var int How many seconds to wait for replica DBs to catch up */
        const LAG_WAIT_TIMEOUT = 15;
 
@@ -54,7 +54,9 @@ class RefreshLinksJob extends Job {
                        !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 )
                );
                $this->params += [ 'causeAction' => 'unknown', 'causeAgent' => 'unknown' ];
-               // This will control transaction rounds in order to run DataUpdates
+               // Tell JobRunner to not automatically wrap run() in a transaction round.
+               // Each runForTitle() call will manage its own rounds in order to run DataUpdates
+               // and to avoid contention as well.
                $this->executionFlags |= self::JOB_NO_EXPLICIT_TRX_ROUND;
        }
 
@@ -83,21 +85,21 @@ class RefreshLinksJob extends Job {
        }
 
        function run() {
-               global $wgUpdateRowsPerJob;
-
                $ok = true;
+
                // Job to update all (or a range of) backlink pages for a page
                if ( !empty( $this->params['recursive'] ) ) {
+                       $services = MediaWikiServices::getInstance();
                        // When the base job branches, wait for the replica DBs to catch up to the master.
                        // From then on, we know that any template changes at the time the base job was
                        // enqueued will be reflected in backlink page parses when the leaf jobs run.
                        if ( !isset( $this->params['range'] ) ) {
-                               $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
+                               $lbFactory = $services->getDBLoadBalancerFactory();
                                if ( !$lbFactory->waitForReplication( [
-                                               'domain'  => $lbFactory->getLocalDomainID(),
-                                               'timeout' => self::LAG_WAIT_TIMEOUT
+                                       'domain'  => $lbFactory->getLocalDomainID(),
+                                       'timeout' => self::LAG_WAIT_TIMEOUT
                                ] ) ) { // only try so hard
-                                       $stats = MediaWikiServices::getInstance()->getStatsdDataFactory();
+                                       $stats = $services->getStatsdDataFactory();
                                        $stats->increment( 'refreshlinks.lag_wait_failed' );
                                }
                        }
@@ -111,7 +113,7 @@ class RefreshLinksJob extends Job {
                        // jobs and possibly a recursive RefreshLinks job for the rest of the backlinks
                        $jobs = BacklinkJobUtils::partitionBacklinkJob(
                                $this,
-                               $wgUpdateRowsPerJob,
+                               $services->getMainConfig()->get( 'UpdateRowsPerJob' ),
                                1, // job-per-title
                                [ 'params' => $extraParams ]
                        );
@@ -121,7 +123,7 @@ class RefreshLinksJob extends Job {
                        foreach ( $this->params['pages'] as list( $ns, $dbKey ) ) {
                                $title = Title::makeTitleSafe( $ns, $dbKey );
                                if ( $title ) {
-                                       $this->runForTitle( $title );
+                                       $ok = $this->runForTitle( $title ) && $ok;
                                } else {
                                        $ok = false;
                                        $this->setLastError( "Invalid title ($ns,$dbKey)." );
@@ -129,7 +131,7 @@ class RefreshLinksJob extends Job {
                        }
                // Job to update link tables for a given title
                } else {
-                       $this->runForTitle( $this->title );
+                       $ok = $this->runForTitle( $this->title );
                }
 
                return $ok;
@@ -142,139 +144,233 @@ class RefreshLinksJob extends Job {
        protected function runForTitle( Title $title ) {
                $services = MediaWikiServices::getInstance();
                $stats = $services->getStatsdDataFactory();
-               $lbFactory = $services->getDBLoadBalancerFactory();
-               $revisionStore = $services->getRevisionStore();
                $renderer = $services->getRevisionRenderer();
+               $parserCache = $services->getParserCache();
+               $lbFactory = $services->getDBLoadBalancerFactory();
                $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
 
-               $lbFactory->beginMasterChanges( __METHOD__ );
-
+               // Load the page from the master DB
                $page = WikiPage::factory( $title );
                $page->loadPageData( WikiPage::READ_LATEST );
 
-               // Serialize links updates by page ID so they see each others' changes
+               // Serialize link update job by page ID so they see each others' changes.
+               // The page ID and latest revision ID will be queried again after the lock
+               // is acquired to bail if they are changed from that of loadPageData() above.
                $dbw = $lbFactory->getMainLB()->getConnection( DB_MASTER );
-               /** @noinspection PhpUnusedLocalVariableInspection */
                $scopedLock = LinksUpdate::acquirePageLock( $dbw, $page->getId(), 'job' );
                if ( $scopedLock === null ) {
-                       $lbFactory->commitMasterChanges( __METHOD__ );
-                       // Another job is already updating the page, likely for an older revision (T170596).
+                       // Another job is already updating the page, likely for a prior revision (T170596)
                        $this->setLastError( 'LinksUpdate already running for this page, try again later.' );
+                       $stats->increment( 'refreshlinks.lock_failure' );
+
+                       return false;
+               }
+
+               if ( $this->isAlreadyRefreshed( $page ) ) {
+                       $stats->increment( 'refreshlinks.update_skipped' );
+
+                       return true;
+               }
+
+               // Parse during a fresh transaction round for better read consistency
+               $lbFactory->beginMasterChanges( __METHOD__ );
+               $output = $this->getParserOutput( $renderer, $parserCache, $page, $stats );
+               $options = $this->getDataUpdateOptions();
+               $lbFactory->commitMasterChanges( __METHOD__ );
+
+               if ( !$output ) {
+                       return false; // raced out?
+               }
+
+               // Tell DerivedPageDataUpdater to use this parser output
+               $options['known-revision-output'] = $output;
+               // Execute corresponding DataUpdates immediately
+               $page->doSecondaryDataUpdates( $options );
+               InfoAction::invalidateCache( $title );
+
+               // Commit any writes here in case this method is called in a loop.
+               // In that case, the scoped lock will fail to be acquired.
+               $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
+
+               return true;
+       }
+
+       /**
+        * @param WikiPage $page
+        * @return bool Whether something updated the backlinks with data newer than this job
+        */
+       private function isAlreadyRefreshed( WikiPage $page ) {
+               // Get the timestamp of the change that triggered this job
+               $rootTimestamp = $this->params['rootJobTimestamp'] ?? null;
+               if ( $rootTimestamp === null ) {
                        return false;
                }
-               // Get the latest ID *after* acquirePageLock() flushed the transaction.
+
+               if ( !empty( $this->params['isOpportunistic'] ) ) {
+                       // Neither clock skew nor DB snapshot/replica DB lag matter much for
+                       // such updates; focus on reusing the (often recently updated) cache
+                       $lagAwareTimestamp = $rootTimestamp;
+               } else {
+                       // For transclusion updates, the template changes must be reflected
+                       $lagAwareTimestamp = wfTimestamp(
+                               TS_MW,
+                               wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
+                       );
+               }
+
+               return ( $page->getLinksTimestamp() > $lagAwareTimestamp );
+       }
+
+       /**
+        * Get the parser output if the page is unchanged from what was loaded in $page
+        *
+        * @param RevisionRenderer $renderer
+        * @param ParserCache $parserCache
+        * @param WikiPage $page Page already loaded with READ_LATEST
+        * @param StatsdDataFactoryInterface $stats
+        * @return ParserOutput|null Combined output for all slots; might only contain metadata
+        */
+       private function getParserOutput(
+               RevisionRenderer $renderer,
+               ParserCache $parserCache,
+               WikiPage $page,
+               StatsdDataFactoryInterface $stats
+       ) {
+               $revision = $this->getCurrentRevisionIfUnchanged( $page, $stats );
+               if ( !$revision ) {
+                       return null; // race condition?
+               }
+
+               $cachedOutput = $this->getParserOutputFromCache( $parserCache, $page, $revision, $stats );
+               if ( $cachedOutput ) {
+                       return $cachedOutput;
+               }
+
+               $renderedRevision = $renderer->getRenderedRevision(
+                       $revision,
+                       $page->makeParserOptions( 'canonical' ),
+                       null,
+                       [ 'audience' => $revision::RAW ]
+               );
+
+               $parseTimestamp = wfTimestampNow(); // timestamp that parsing started
+               $output = $renderedRevision->getRevisionParserOutput( [ 'generate-html' => false ] );
+               $output->setCacheTime( $parseTimestamp ); // notify LinksUpdate::doUpdate()
+
+               return $output;
+       }
+
+       /**
+        * Get the current revision record if it is unchanged from what was loaded in $page
+        *
+        * @param WikiPage $page Page already loaded with READ_LATEST
+        * @param StatsdDataFactoryInterface $stats
+        * @return RevisionRecord|null The same instance that $page->getRevisionRecord() uses
+        */
+       private function getCurrentRevisionIfUnchanged(
+               WikiPage $page,
+               StatsdDataFactoryInterface $stats
+       ) {
+               $title = $page->getTitle();
+               // Get the latest ID since acquirePageLock() in runForTitle() flushed the transaction.
                // This is used to detect edits/moves after loadPageData() but before the scope lock.
-               // The works around the chicken/egg problem of determining the scope lock key.
+               // The works around the chicken/egg problem of determining the scope lock key name.
                $latest = $title->getLatestRevID( Title::GAID_FOR_UPDATE );
 
-               if ( !empty( $this->params['triggeringRevisionId'] ) ) {
-                       // Fetch the specified revision; lockAndGetLatest() below detects if the page
-                       // was edited since and aborts in order to avoid corrupting the link tables
-                       $revision = $revisionStore->getRevisionById(
-                               (int)$this->params['triggeringRevisionId'],
-                               Revision::READ_LATEST
-                       );
-               } else {
-                       // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures
-                       $revision = $revisionStore->getRevisionByTitle( $title, 0, Revision::READ_LATEST );
+               $triggeringRevisionId = $this->params['triggeringRevisionId'] ?? null;
+               if ( $triggeringRevisionId && $triggeringRevisionId !== $latest ) {
+                       // This job is obsolete and one for the latest revision will handle updates
+                       $stats->increment( 'refreshlinks.rev_not_current' );
+                       $this->setLastError( "Revision $triggeringRevisionId is not current" );
+
+                       return null;
                }
 
+               // Load the current revision. Note that $page should have loaded with READ_LATEST.
+               // This instance will be reused in WikiPage::doSecondaryDataUpdates() later on.
+               $revision = $page->getRevisionRecord();
                if ( !$revision ) {
-                       $lbFactory->commitMasterChanges( __METHOD__ );
                        $stats->increment( 'refreshlinks.rev_not_found' );
                        $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" );
-                       return false; // just deleted?
-               } elseif ( $revision->getId() != $latest || $revision->getPageId() !== $page->getId() ) {
-                       $lbFactory->commitMasterChanges( __METHOD__ );
+
+                       return null; // just deleted?
+               } elseif ( $revision->getId() !== $latest || $revision->getPageId() !== $page->getId() ) {
                        // Do not clobber over newer updates with older ones. If all jobs where FIFO and
                        // serialized, it would be OK to update links based on older revisions since it
                        // would eventually get to the latest. Since that is not the case (by design),
                        // only update the link tables to a state matching the current revision's output.
                        $stats->increment( 'refreshlinks.rev_not_current' );
                        $this->setLastError( "Revision {$revision->getId()} is not current" );
-                       return false;
+
+                       return null;
                }
 
-               $parserOutput = false;
-               $parserOptions = $page->makeParserOptions( 'canonical' );
+               return $revision;
+       }
+
+       /**
+        * Get the parser output from cache if it reflects the change that triggered this job
+        *
+        * @param ParserCache $parserCache
+        * @param WikiPage $page
+        * @param RevisionRecord $currentRevision
+        * @param StatsdDataFactoryInterface $stats
+        * @return ParserOutput|null
+        */
+       private function getParserOutputFromCache(
+               ParserCache $parserCache,
+               WikiPage $page,
+               RevisionRecord $currentRevision,
+               StatsdDataFactoryInterface $stats
+       ) {
+               $cachedOutput = null;
                // If page_touched changed after this root job, then it is likely that
                // any views of the pages already resulted in re-parses which are now in
                // cache. The cache can be reused to avoid expensive parsing in some cases.
-               if ( isset( $this->params['rootJobTimestamp'] ) ) {
+               $rootTimestamp = $this->params['rootJobTimestamp'] ?? null;
+               if ( $rootTimestamp !== null ) {
                        $opportunistic = !empty( $this->params['isOpportunistic'] );
-
-                       $skewedTimestamp = $this->params['rootJobTimestamp'];
                        if ( $opportunistic ) {
-                               // Neither clock skew nor DB snapshot/replica DB lag matter much for such
-                               // updates; focus on reusing the (often recently updated) cache
+                               // Neither clock skew nor DB snapshot/replica DB lag matter much for
+                               // such updates; focus on reusing the (often recently updated) cache
+                               $lagAwareTimestamp = $rootTimestamp;
                        } else {
                                // For transclusion updates, the template changes must be reflected
-                               $skewedTimestamp = wfTimestamp( TS_MW,
-                                       wfTimestamp( TS_UNIX, $skewedTimestamp ) + self::CLOCK_FUDGE
+                               $lagAwareTimestamp = wfTimestamp(
+                                       TS_MW,
+                                       wfTimestamp( TS_UNIX, $rootTimestamp ) + self::NORMAL_MAX_LAG
                                );
                        }
 
-                       if ( $page->getLinksTimestamp() > $skewedTimestamp ) {
-                               $lbFactory->commitMasterChanges( __METHOD__ );
-                               // Something already updated the backlinks since this job was made
-                               $stats->increment( 'refreshlinks.update_skipped' );
-                               return true;
-                       }
-
-                       if ( $page->getTouched() >= $this->params['rootJobTimestamp'] || $opportunistic ) {
-                               // Cache is suspected to be up-to-date. As long as the cache rev ID matches
-                               // and it reflects the job's triggering change, then it is usable.
-                               $parserOutput = $services->getParserCache()->getDirty( $page, $parserOptions );
-                               if ( !$parserOutput
-                                       || $parserOutput->getCacheRevisionId() != $revision->getId()
-                                       || $parserOutput->getCacheTime() < $skewedTimestamp
+                       if ( $page->getTouched() >= $rootTimestamp || $opportunistic ) {
+                               // Cache is suspected to be up-to-date so it's worth the I/O of checking.
+                               // As long as the cache rev ID matches the current rev ID and it reflects
+                               // the job's triggering change, then it is usable.
+                               $parserOptions = $page->makeParserOptions( 'canonical' );
+                               $output = $parserCache->getDirty( $page, $parserOptions );
+                               if (
+                                       $output &&
+                                       $output->getCacheRevisionId() == $currentRevision->getId() &&
+                                       $output->getCacheTime() >= $lagAwareTimestamp
                                ) {
-                                       $parserOutput = false; // too stale
+                                       $cachedOutput = $output;
                                }
                        }
                }
 
-               // Fetch the current revision and parse it if necessary...
-               if ( $parserOutput ) {
+               if ( $cachedOutput ) {
                        $stats->increment( 'refreshlinks.parser_cached' );
                } else {
-                       $start = microtime( true );
-
-                       $checkCache = $page->shouldCheckParserCache( $parserOptions, $revision->getId() );
-
-                       // Revision ID must be passed to the parser output to get revision variables correct
-                       $renderedRevision = $renderer->getRenderedRevision(
-                               $revision,
-                               $parserOptions,
-                               null,
-                               [
-                                       // use master, for consistency with the getRevisionByTitle call above.
-                                       'use-master' => true,
-                                       // bypass audience checks, since we know that this is the current revision.
-                                       'audience' => RevisionRecord::RAW
-                               ]
-                       );
-                       $parserOutput = $renderedRevision->getRevisionParserOutput(
-                               // HTML is only needed if the output is to be placed in the parser cache
-                               [ 'generate-html' => $checkCache ]
-                       );
-
-                       // If it took a long time to render, then save this back to the cache to avoid
-                       // wasted CPU by other apaches or job runners. We don't want to always save to
-                       // cache as this can cause high cache I/O and LRU churn when a template changes.
-                       $elapsed = microtime( true ) - $start;
-
-                       $parseThreshold = $this->params['parseThreshold'] ?? self::PARSE_THRESHOLD_SEC;
-
-                       if ( $checkCache && $elapsed >= $parseThreshold && $parserOutput->isCacheable() ) {
-                               $ctime = wfTimestamp( TS_MW, (int)$start ); // cache time
-                               $services->getParserCache()->save(
-                                       $parserOutput, $page, $parserOptions, $ctime, $revision->getId()
-                               );
-                       }
                        $stats->increment( 'refreshlinks.parser_uncached' );
                }
 
+               return $cachedOutput;
+       }
+
+       /**
+        * @return array
+        */
+       private function getDataUpdateOptions() {
                $options = [
                        'recursive' => !empty( $this->params['useRecursiveLinksUpdate'] ),
                        // Carry over cause so the update can do extra logging
@@ -291,17 +387,7 @@ class RefreshLinksJob extends Job {
                        }
                }
 
-               $lbFactory->commitMasterChanges( __METHOD__ );
-
-               $page->doSecondaryDataUpdates( $options );
-
-               InfoAction::invalidateCache( $title );
-
-               // Commit any writes here in case this method is called in a loop.
-               // In that case, the scoped lock will fail to be acquired.
-               $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
-
-               return true;
+               return $options;
        }
 
        public function getDeduplicationInfo() {
index 9e80cf4..fa01ce4 100644 (file)
@@ -2111,6 +2111,11 @@ class WikiPage implements Page, IDBAccessObject {
         *   - defer: one of the DeferredUpdates constants, or false to run immediately (default: false).
         *     Note that even when this is set to false, some updates might still get deferred (as
         *     some update might directly add child updates to DeferredUpdates).
+        *   - known-revision-output: a combined canonical ParserOutput for the revision, perhaps
+        *     from some cache. The caller is responsible for ensuring that the ParserOutput indeed
+        *     matched the $rev and $options. This mechanism is intended as a temporary stop-gap,
+        *     for the time until caches have been changed to store RenderedRevision states instead
+        *     of ParserOutput objects. (default: null) (since 1.33)
         * @since 1.32
         */
        public function doSecondaryDataUpdates( array $options = [] ) {
index 50d5177..24ec2e4 100644 (file)
@@ -69,15 +69,6 @@ class RefreshLinksJobTest extends MediaWikiTestCase {
                $job = new RefreshLinksJob( $page->getTitle(), [ 'parseThreshold' => 0 ] );
                $job->run();
 
-               // assert state
-               $options = ParserOptions::newCanonical( 'canonical' );
-               $out = $parserCache->get( $page, $options );
-               $this->assertNotFalse( $out, 'parser cache entry' );
-
-               $text = $out->getText();
-               $this->assertContains( 'MAIN', $text );
-               $this->assertContains( 'AUX', $text );
-
                $this->assertSelect(
                        'pagelinks',
                        'pl_title',
@@ -92,4 +83,60 @@ class RefreshLinksJobTest extends MediaWikiTestCase {
                );
        }
 
+       public function testRunForMultiPage() {
+               MediaWikiServices::getInstance()->getSlotRoleRegistry()->defineRoleWithModel(
+                       'aux',
+                       CONTENT_MODEL_WIKITEXT
+               );
+
+               $fname = __METHOD__;
+
+               $mainContent = new WikitextContent( 'MAIN [[Kittens]]' );
+               $auxContent = new WikitextContent( 'AUX [[Category:Goats]]' );
+               $page1 = $this->createPage( "$fname-1", [ 'main' => $mainContent, 'aux' => $auxContent ] );
+
+               $mainContent = new WikitextContent( 'MAIN [[Dogs]]' );
+               $auxContent = new WikitextContent( 'AUX [[Category:Hamsters]]' );
+               $page2 = $this->createPage( "$fname-2", [ 'main' => $mainContent, 'aux' => $auxContent ] );
+
+               // clear state
+               $parserCache = MediaWikiServices::getInstance()->getParserCache();
+               $parserCache->deleteOptionsKey( $page1 );
+               $parserCache->deleteOptionsKey( $page2 );
+
+               $this->db->delete( 'pagelinks', '*', __METHOD__ );
+               $this->db->delete( 'categorylinks', '*', __METHOD__ );
+
+               // run job
+               $job = new RefreshLinksJob(
+                       Title::newMainPage(),
+                       [ 'pages' => [ [ 0, "$fname-1" ], [ 0, "$fname-2" ] ] ]
+               );
+               $job->run();
+
+               $this->assertSelect(
+                       'pagelinks',
+                       'pl_title',
+                       [ 'pl_from' => $page1->getId() ],
+                       [ [ 'Kittens' ] ]
+               );
+               $this->assertSelect(
+                       'categorylinks',
+                       'cl_to',
+                       [ 'cl_from' => $page1->getId() ],
+                       [ [ 'Goats' ] ]
+               );
+               $this->assertSelect(
+                       'pagelinks',
+                       'pl_title',
+                       [ 'pl_from' => $page2->getId() ],
+                       [ [ 'Dogs' ] ]
+               );
+               $this->assertSelect(
+                       'categorylinks',
+                       'cl_to',
+                       [ 'cl_from' => $page2->getId() ],
+                       [ [ 'Hamsters' ] ]
+               );
+       }
 }