* @ingroup JobQueue
*/
class RefreshLinksJob extends Job {
+ /** @var float Cache parser output when it takes this long to render */
const PARSE_THRESHOLD_SEC = 1.0;
-
+ /** @var integer Lag safety margin when comparing root job times to last-refresh times */
const CLOCK_FUDGE = 10;
function __construct( Title $title, array $params ) {
parent::__construct( 'refreshLinks', $title, $params );
- // Base backlink update jobs and per-title update jobs can be de-duplicated.
- // If template A changes twice before any jobs run, a clean queue will have:
- // (A base, A base)
- // The second job is ignored by the queue on insertion.
- // Suppose, many pages use template A, and that template itself uses template B.
- // An edit to both will first create two base jobs. A clean FIFO queue will have:
- // (A base, B base)
- // When these jobs run, the queue will have per-title and remnant partition jobs:
- // (titleX,titleY,titleZ,...,A remnant,titleM,titleN,titleO,...,B remnant)
- // Some these jobs will be the same, and will automatically be ignored by
- // the queue upon insertion. Some title jobs will run before the duplicate is
- // inserted, so the work will still be done twice in those cases. More titles
- // can be de-duplicated as the remnant jobs continue to be broken down. This
- // works best when $wgUpdateRowsPerJob, and either the pages have few backlinks
- // and/or the backlink sets for pages A and B are almost identical.
- $this->removeDuplicates = !isset( $params['range'] )
- && ( !isset( $params['pages'] ) || count( $params['pages'] ) == 1 );
+ // Avoid the overhead of de-duplication when it would be pointless
+ $this->removeDuplicates = (
+ // Master positions won't match
+ !isset( $params['masterPos'] ) &&
+ // Ranges rarely will line up
+ !isset( $params['range'] ) &&
+ // Multiple pages per job make matches unlikely
+ !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 )
+ );
}
/**
JobQueueGroup::singleton()->push( $jobs );
// Job to update link tables for a set of titles
} elseif ( isset( $this->params['pages'] ) ) {
+ $this->waitForMasterPosition();
foreach ( $this->params['pages'] as $pageId => $nsAndKey ) {
list( $ns, $dbKey ) = $nsAndKey;
$this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) );
}
// Job to update link tables for a given title
} else {
+ $this->waitForMasterPosition();
$this->runForTitle( $this->title );
}
return true;
}
+ protected function waitForMasterPosition() {
+ if ( !empty( $this->params['masterPos'] ) && wfGetLB()->getServerCount() > 1 ) {
+ // Wait for the current/next slave DB handle to catch up to the master.
+ // This way, we get the correct page_latest for templates or files that just
+ // changed milliseconds ago, having triggered this job to begin with.
+ wfGetLB()->waitFor( $this->params['masterPos'] );
+ }
+ }
+
/**
* @param Title $title
* @return bool
*/
protected function runForTitle( Title $title ) {
- // Wait for the DB of the current/next slave DB handle to catch up to the master.
- // This way, we get the correct page_latest for templates or files that just changed
- // milliseconds ago, having triggered this job to begin with.
- if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) {
- wfGetLB()->waitFor( $this->params['masterPos'] );
+ $page = WikiPage::factory( $title );
+ if ( !empty( $this->params['triggeringRevisionId'] ) ) {
+ // Fetch the specified revision; lockAndGetLatest() below detects if the page
+ // was edited since and aborts in order to avoid corrupting the link tables
+ $revision = Revision::newFromId(
+ $this->params['triggeringRevisionId'],
+ Revision::READ_LATEST
+ );
+ } else {
+ // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures
+ $revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST );
}
- // Clear out title cache data from prior job transaction snapshots
- $linkCache = LinkCache::singleton();
- $linkCache->clear();
-
- // Fetch the current page and revision...
- $page = WikiPage::factory( $title );
- $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
if ( !$revision ) {
- $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
- return false; // XXX: what if it was just deleted?
+ $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" );
+ return false; // just deleted?
}
$content = $revision->getContent( Revision::RAW );
}
$updates = $content->getSecondaryDataUpdates(
- $title, null, !empty( $this->params['useRecursiveLinksUpdate'] ), $parserOutput );
+ $title,
+ null,
+ !empty( $this->params['useRecursiveLinksUpdate'] ),
+ $parserOutput
+ );
+
foreach ( $updates as $key => $update ) {
+ // FIXME: This code probably shouldn't be here?
+ // Needed by things like Echo notifications which need
+ // to know which user caused the links update
if ( $update instanceof LinksUpdate ) {
- if ( !empty( $this->params['triggeredRecursive'] ) ) {
- $update->setTriggeredRecursive();
- }
if ( !empty( $this->params['triggeringUser'] ) ) {
$userInfo = $this->params['triggeringUser'];
if ( $userInfo['userId'] ) {
}
$update->setTriggeringUser( $user );
}
- if ( !empty( $this->params['triggeringRevisionId'] ) ) {
- $revision = Revision::newFromId( $this->params['triggeringRevisionId'] );
- if ( $revision === null ) {
- $revision = Revision::newFromId(
- $this->params['triggeringRevisionId'],
- Revision::READ_LATEST
- );
- }
- $update->setRevision( $revision );
- }
}
}
+ $latestNow = $page->lockAndGetLatest();
+ if ( !$latestNow || $revision->getId() != $latestNow ) {
+ // Do not clobber over newer updates with older ones. If all jobs where FIFO and
+ // serialized, it would be OK to update links based on older revisions since it
+ // would eventually get to the latest. Since that is not the case (by design),
+ // only update the link tables to a state matching the current revision's output.
+ $this->setLastError( "page_latest changed from {$revision->getId()} to $latestNow" );
+ return false;
+ }
+
DataUpdate::runUpdates( $updates );
InfoAction::invalidateCache( $title );
public function getDeduplicationInfo() {
$info = parent::getDeduplicationInfo();
if ( is_array( $info['params'] ) ) {
- // Don't let highly unique "masterPos" values ruin duplicate detection
- unset( $info['params']['masterPos'] );
// For per-pages jobs, the job title is that of the template that changed
// (or similar), so remove that since it ruins duplicate detection
if ( isset( $info['pages'] ) ) {