De-duplicate HTMLCacheUpdate jobs with a page array of size 1
authorAaron Schulz <aschulz@wikimedia.org>
Thu, 14 Sep 2017 09:25:23 +0000 (11:25 +0200)
committerAaron Schulz <aschulz@wikimedia.org>
Thu, 14 Sep 2017 09:34:24 +0000 (11:34 +0200)
BacklinkJobUtils consistently uses the "pages" field in leaf
jobs, even when there is only one page per leaf job.

RefreshLinksJob already has this logic for de-duplication.

Change-Id: Ia189bbc9df44f2161cfed4192c23b2ac3cfa65ce

includes/jobqueue/jobs/HTMLCacheUpdateJob.php

index 4c16d7f..0aa33ca 100644 (file)
@@ -38,8 +38,15 @@ use MediaWiki\MediaWikiServices;
 class HTMLCacheUpdateJob extends Job {
        function __construct( Title $title, array $params ) {
                parent::__construct( 'htmlCacheUpdate', $title, $params );
-               // Base backlink purge jobs can be de-duplicated
-               $this->removeDuplicates = ( !isset( $params['range'] ) && !isset( $params['pages'] ) );
+               // Avoid the overhead of de-duplication when it would be pointless.
+               // Note that these jobs always set page_touched to the current time,
+               // so letting the older existing job "win" is still correct.
+               $this->removeDuplicates = (
+                       // Ranges rarely will line up
+                       !isset( $params['range'] ) &&
+                       // Multiple pages per job make matches unlikely
+                       !( isset( $params['pages'] ) && count( $params['pages'] ) != 1 )
+               );
        }
 
        /**