Race condition fixes for refreshLinks jobs
authorAaron Schulz <aschulz@wikimedia.org>
Sat, 14 Nov 2015 01:44:49 +0000 (17:44 -0800)
committerAaron Schulz <aschulz@wikimedia.org>
Mon, 16 Nov 2015 21:21:05 +0000 (13:21 -0800)
* Use READ_LATEST when needed to distinguish slave lag
  affecting new pages from page deletions that happened
  after the job was pushed. Run-of-the-mill mass backlink
  updates still typically use "masterPos" and READ_NORMAL.
* Search for the expected revision (via READ_LATEST)
  for jobs triggered by direct page edits. This avoids lag
  problems for edits to existing pages.
* Added a CAS-style check to avoid letting jobs clobber
  the work of other jobs that saw a newer page version.
* Rename and expose WikiPage::lock() method.
* Split out position wait logic to a separate protected
  method and made sure it only got called once instead of
  per-title (which didn't do anything). Note that there is
  normally 1 title per job in any case.
* Add FIXME about a related race-conditions.

Bug: T117332
Change-Id: Ib3fa0fc77040646b9a4e5e4b3dc9ae3c51ac29b3

includes/jobqueue/jobs/RefreshLinksJob.php
includes/page/WikiPage.php

index c42a4c6..fa3278d 100644 (file)
@@ -115,36 +115,50 @@ class RefreshLinksJob extends Job {
                        JobQueueGroup::singleton()->push( $jobs );
                // Job to update link tables for a set of titles
                } elseif ( isset( $this->params['pages'] ) ) {
+                       $this->waitForMasterPosition();
                        foreach ( $this->params['pages'] as $pageId => $nsAndKey ) {
                                list( $ns, $dbKey ) = $nsAndKey;
                                $this->runForTitle( Title::makeTitleSafe( $ns, $dbKey ) );
                        }
                // Job to update link tables for a given title
                } else {
+                       $this->waitForMasterPosition();
                        $this->runForTitle( $this->title );
                }
 
                return true;
        }
 
+       protected function waitForMasterPosition() {
+               if ( !empty( $this->params['masterPos'] ) && wfGetLB()->getServerCount() > 1 ) {
+                       // Wait for the current/next slave DB handle to catch up to the master.
+                       // This way, we get the correct page_latest for templates or files that just
+                       // changed milliseconds ago, having triggered this job to begin with.
+                       wfGetLB()->waitFor( $this->params['masterPos'] );
+               }
+       }
+
        /**
         * @param Title $title
         * @return bool
         */
        protected function runForTitle( Title $title ) {
-               // Wait for the DB of the current/next slave DB handle to catch up to the master.
-               // This way, we get the correct page_latest for templates or files that just changed
-               // milliseconds ago, having triggered this job to begin with.
-               if ( isset( $this->params['masterPos'] ) && $this->params['masterPos'] !== false ) {
-                       wfGetLB()->waitFor( $this->params['masterPos'] );
+               $page = WikiPage::factory( $title );
+               if ( !empty( $this->params['triggeringRevisionId'] ) ) {
+                       // Fetch the specified revision; lockAndGetLatest() below detects if the page
+                       // was edited since and aborts in order to avoid corrupting the link tables
+                       $revision = Revision::newFromId(
+                               $this->params['triggeringRevisionId'],
+                               Revision::READ_LATEST
+                       );
+               } else {
+                       // Fetch current revision; READ_LATEST reduces lockAndGetLatest() check failures
+                       $revision = Revision::newFromTitle( $title, false, Revision::READ_LATEST );
                }
 
-               // Fetch the current page and revision...
-               $page = WikiPage::factory( $title );
-               $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
                if ( !$revision ) {
-                       $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
-                       return false; // XXX: what if it was just deleted?
+                       $this->setLastError( "Revision not found for {$title->getPrefixedDBkey()}" );
+                       return false; // just deleted?
                }
 
                $content = $revision->getContent( Revision::RAW );
@@ -209,8 +223,16 @@ class RefreshLinksJob extends Job {
                }
 
                $updates = $content->getSecondaryDataUpdates(
-                       $title, null, !empty( $this->params['useRecursiveLinksUpdate'] ), $parserOutput );
+                       $title,
+                       null,
+                       !empty( $this->params['useRecursiveLinksUpdate'] ),
+                       $parserOutput
+               );
+
                foreach ( $updates as $key => $update ) {
+                       // FIXME: move category change RC stuff to a separate update.
+                       // RC entry addition aborts if edits where since made, which is not necessary.
+                       // It's also an SoC violation for links update code to care about RC.
                        if ( $update instanceof LinksUpdate ) {
                                if ( !empty( $this->params['triggeredRecursive'] ) ) {
                                        $update->setTriggeredRecursive();
@@ -226,18 +248,21 @@ class RefreshLinksJob extends Job {
                                        $update->setTriggeringUser( $user );
                                }
                                if ( !empty( $this->params['triggeringRevisionId'] ) ) {
-                                       $revision = Revision::newFromId( $this->params['triggeringRevisionId'] );
-                                       if ( $revision === null ) {
-                                               $revision = Revision::newFromId(
-                                                       $this->params['triggeringRevisionId'],
-                                                       Revision::READ_LATEST
-                                               );
-                                       }
                                        $update->setRevision( $revision );
                                }
                        }
                }
 
+               $latestNow = $page->lockAndGetLatest();
+               if ( !$latestNow || $revision->getId() != $latestNow ) {
+                       // Do not clobber over newer updates with older ones. If all jobs where FIFO and
+                       // serialized, it would be OK to update links based on older revisions since it
+                       // would eventually get to the latest. Since that is not the case (by design),
+                       // only update the link tables to a state matching the current revision's output.
+                       $this->setLastError( "page_latest changed from {$revision->getId()} to $latestNow" );
+                       return false;
+               }
+
                DataUpdate::runUpdates( $updates );
 
                InfoAction::invalidateCache( $title );
index f7a4161..d7adaf0 100644 (file)
@@ -1815,7 +1815,7 @@ class WikiPage implements Page, IDBAccessObject {
                                // Get the latest page_latest value while locking it.
                                // Do a CAS style check to see if it's the same as when this method
                                // started. If it changed then bail out before touching the DB.
-                               $latestNow = $this->lock();
+                               $latestNow = $this->lockAndGetLatest();
                                if ( $latestNow != $oldid ) {
                                        $dbw->commit( __METHOD__ );
                                        // Page updated or deleted in the mean time
@@ -2794,7 +2794,7 @@ class WikiPage implements Page, IDBAccessObject {
                // WikiPage::READ_LOCKING as that will carry over the FOR UPDATE to
                // the revisions queries (which also JOIN on user). Only lock the page
                // row and CAS check on page_latest to see if the trx snapshot matches.
-               $lockedLatest = $this->lock();
+               $lockedLatest = $this->lockAndGetLatest();
                if ( $id == 0 || $this->getLatest() != $lockedLatest ) {
                        $dbw->endAtomic( __METHOD__ );
                        // Page not there or trx snapshot is stale
@@ -2916,8 +2916,9 @@ class WikiPage implements Page, IDBAccessObject {
         * Lock the page row for this title+id and return page_latest (or 0)
         *
         * @return integer Returns 0 if no row was found with this title+id
+        * @since 1.27
         */
-       protected function lock() {
+       public function lockAndGetLatest() {
                return (int)wfGetDB( DB_MASTER )->selectField(
                        'page',
                        'page_latest',