Make PurgeJobUtils avoid creating DB replication lag
authorAaron Schulz <aschulz@wikimedia.org>
Sun, 11 Sep 2016 23:14:44 +0000 (16:14 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Sun, 11 Sep 2016 23:59:12 +0000 (16:59 -0700)
Large affected rows counts were being reported in DBPerformance logs.

Change-Id: Ia5504aa4fbd27473771c65688f0b9e78e3a5caae

includes/jobqueue/utils/PurgeJobUtils.php

index 329bc23..5eafcb3 100644 (file)
@@ -20,6 +20,8 @@
  *
  * @file
  */
+use MediaWiki\MediaWikiServices;
+
 class PurgeJobUtils {
        /**
         * Invalidate the cache of a list of pages from a single namespace.
@@ -34,7 +36,9 @@ class PurgeJobUtils {
                        return;
                }
 
-               $dbw->onTransactionPreCommitOrIdle( function() use ( $dbw, $namespace, $dbkeys ) {
+               $dbw->onTransactionIdle( function() use ( $dbw, $namespace, $dbkeys ) {
+                       $services = MediaWikiServices::getInstance();
+                       $lbFactory = $services->getDBLoadBalancerFactory();
                        // Determine which pages need to be updated.
                        // This is necessary to prevent the job queue from smashing the DB with
                        // large numbers of concurrent invalidations of the same page.
@@ -50,22 +54,24 @@ class PurgeJobUtils {
                                __METHOD__
                        );
 
-                       if ( $ids === [] ) {
+                       if ( !$ids ) {
                                return;
                        }
 
-                       // Do the update.
-                       // We still need the page_touched condition, in case the row has changed since
-                       // the non-locking select above.
-                       $dbw->update(
-                               'page',
-                               [ 'page_touched' => $now ],
-                               [
-                                       'page_id' => $ids,
-                                       'page_touched < ' . $dbw->addQuotes( $now )
-                               ],
-                               __METHOD__
-                       );
+                       $batchSize = $services->getMainConfig()->get( 'UpdateRowsPerQuery' );
+                       $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
+                       foreach ( array_chunk( $ids, $batchSize ) as $idBatch ) {
+                               $dbw->update(
+                                       'page',
+                                       [ 'page_touched' => $now ],
+                                       [
+                                               'page_id' => $idBatch,
+                                               'page_touched < ' . $dbw->addQuotes( $now ) // handle races
+                                       ],
+                                       __METHOD__
+                               );
+                               $lbFactory->commitAndWaitForReplication( __METHOD__, $ticket );
+                       }
                } );
        }
 }