Avoid parsing more in refreshLinksJobs
authorAaron Schulz <aschulz@wikimedia.org>
Thu, 28 Nov 2013 06:43:00 +0000 (22:43 -0800)
committerAaron Schulz <aschulz@wikimedia.org>
Thu, 28 Nov 2013 06:43:00 +0000 (22:43 -0800)
* This reuses the parser cache in some cases when possible
* Clarified the return value of CacheTime::getCacheTime()
* A few documentation tweaks

Change-Id: I80b7c6404b3f8c48b53c3bba96115dbf94d80873

includes/job/Job.php
includes/job/jobs/RefreshLinksJob.php
includes/parser/CacheTime.php

index 3f44a91..e33baf5 100644 (file)
@@ -241,7 +241,9 @@ abstract class Job {
        /**
         * @see JobQueue::deduplicateRootJob()
         * @param string $key A key that identifies the task
-        * @return array
+        * @return array Map of:
+        *   - rootJobSignature : hash (e.g. SHA1) that identifies the task
+        *   - rootJobTimestamp : TS_MW timestamp of this instance of the task
         * @since 1.21
         */
        public static function newRootJobParams( $key ) {
index 0372d85..ea1d596 100644 (file)
@@ -119,20 +119,38 @@ class RefreshLinksJob extends Job {
                        wfGetLB()->waitFor( $this->params['masterPos'] );
                }
 
-               $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
-               if ( !$revision ) {
-                       $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
-                       return false; // XXX: what if it was just deleted?
+               $parserOutput = false;
+               // If page_touched changed after this root job (with a good slave lag skew factor),
+               // then it is likely that any views of the pages already resulted in re-parses which
+               // are now in cache. This can be reused to avoid expensive parsing in some cases.
+               if ( isset( $this->params['rootJobTimestamp'] ) ) {
+                       $page = WikiPage::factory( $title );
+                       $skewedTimestamp = wfTimestamp( TS_UNIX, $this->params['rootJobTimestamp'] ) + 5;
+                       if ( $page->getTouched() > wfTimestamp( TS_MW, $skewedTimestamp ) ) {
+                               $parserOptions = $page->makeParserOptions( 'canonical' );
+                               $parserOutput = ParserCache::singleton()->getDirty( $page, $parserOptions );
+                               if ( $parserOutput->getCacheTime() <= $skewedTimestamp ) {
+                                       $parserOutput = false; // too stale
+                               }
+                       }
                }
+               // Fetch the current revision and parse it if necessary...
+               if ( $parserOutput == false ) {
+                       $revision = Revision::newFromTitle( $title, false, Revision::READ_NORMAL );
+                       if ( !$revision ) {
+                               $this->setLastError( "refreshLinks: Article not found {$title->getPrefixedDBkey()}" );
+                               return false; // XXX: what if it was just deleted?
+                       }
 
-               $content = $revision->getContent( Revision::RAW );
-               if ( !$content ) {
-                       // If there is no content, pretend the content is empty
-                       $content = $revision->getContentHandler()->makeEmptyContent();
-               }
+                       $content = $revision->getContent( Revision::RAW );
+                       if ( !$content ) {
+                               // If there is no content, pretend the content is empty
+                               $content = $revision->getContentHandler()->makeEmptyContent();
+                       }
 
-               // Revision ID must be passed to the parser output to get revision variables correct
-               $parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false );
+                       // Revision ID must be passed to the parser output to get revision variables correct
+                       $parserOutput = $content->getParserOutput( $title, $revision->getId(), null, false );
+               }
 
                $updates = $content->getSecondaryDataUpdates( $title, null, false, $parserOutput );
                DataUpdate::runUpdates( $updates );
index a4203b0..7b8935a 100644 (file)
@@ -37,14 +37,17 @@ class CacheTime {
                $mCacheExpiry = null,         # Seconds after which the object should expire, use 0 for uncachable. Used in ParserCache.
                $mContainsOldMagic;           # Boolean variable indicating if the input contained variables like {{CURRENTDAY}}
 
-       function getCacheTime()              { return $this->mCacheTime; }
+       /**
+        * @return string TS_MW timestamp
+        */
+       function getCacheTime()              { return wfTimestamp( TS_MW, $this->mCacheTime ); }
 
        function containsOldMagic()          { return $this->mContainsOldMagic; }
        function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); }
 
        /**
         * setCacheTime() sets the timestamp expressing when the page has been rendered.
-        * This doesn not control expiry, see updateCacheExpiry() for that!
+        * This does not control expiry, see updateCacheExpiry() for that!
         * @param $t string
         * @return string
         */