Merge "Type hint against LinkTarget in WatchedItemStore"
[lhc/web/wiklou.git] / includes / deferred / DeferredUpdates.php
1 <?php
2 /**
3 * Interface and manager for deferred updates.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
24 use MediaWiki\Logger\LoggerFactory;
25 use Psr\Log\LoggerInterface;
26 use Wikimedia\Rdbms\IDatabase;
27 use MediaWiki\MediaWikiServices;
28 use Wikimedia\Rdbms\LBFactory;
29 use Wikimedia\Rdbms\ILBFactory;
30 use Wikimedia\Rdbms\LoadBalancer;
31 use Wikimedia\Rdbms\DBTransactionError;
32
33 /**
34 * Class for managing the deferred updates
35 *
36 * In web request mode, deferred updates can be run at the end of the request, either before or
37 * after the HTTP response has been sent. In either case, they run after the DB commit step. If
38 * an update runs after the response is sent, it will not block clients. If sent before, it will
39 * run synchronously. These two modes are defined via PRESEND and POSTSEND constants, the latter
40 * being the default for addUpdate() and addCallableUpdate().
41 *
42 * Updates that work through this system will be more likely to complete by the time the client
43 * makes their next request after this one than with the JobQueue system.
44 *
45 * In CLI mode, deferred updates will run:
46 * - a) During DeferredUpdates::addUpdate if no LBFactory DB handles have writes pending
47 * - b) On commit of an LBFactory DB handle if no other such handles have writes pending
48 * - c) During an LBFactory::waitForReplication call if no LBFactory DBs have writes pending
49 * - d) When the queue is large and an LBFactory DB handle commits (EnqueueableDataUpdate only)
50 * - e) At the completion of Maintenance::execute()
51 *
52 * @see Maintenance::setLBFactoryTriggers
53 *
54 * When updates are deferred, they go into one two FIFO "top-queues" (one for pre-send and one
55 * for post-send). Updates enqueued *during* doUpdate() of a "top" update go into the "sub-queue"
56 * for that update. After that method finishes, the sub-queue is run until drained. This continues
57 * for each top-queue job until the entire top queue is drained. This happens for the pre-send
58 * top-queue, and later on, the post-send top-queue, in execute().
59 *
60 * @since 1.19
61 */
62 class DeferredUpdates {
63 /** @var DeferrableUpdate[] Updates to be deferred until before request end */
64 private static $preSendUpdates = [];
65 /** @var DeferrableUpdate[] Updates to be deferred until after request end */
66 private static $postSendUpdates = [];
67
68 const ALL = 0; // all updates; in web requests, use only after flushing the output buffer
69 const PRESEND = 1; // for updates that should run before flushing output buffer
70 const POSTSEND = 2; // for updates that should run after flushing output buffer
71
72 const BIG_QUEUE_SIZE = 100;
73
74 /** @var array|null Information about the current execute() call or null if not running */
75 private static $executeContext;
76
77 /**
78 * Add an update to the deferred list to be run later by execute()
79 *
80 * In CLI mode, callback magic will also be used to run updates when safe
81 *
82 * @param DeferrableUpdate $update Some object that implements doUpdate()
83 * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27)
84 */
85 public static function addUpdate( DeferrableUpdate $update, $stage = self::POSTSEND ) {
86 global $wgCommandLineMode;
87
88 if (
89 self::$executeContext &&
90 self::$executeContext['stage'] >= $stage &&
91 !( $update instanceof MergeableUpdate )
92 ) {
93 // This is a sub-DeferredUpdate; run it right after its parent update.
94 // Also, while post-send updates are running, push any "pre-send" jobs to the
95 // active post-send queue to make sure they get run this round (or at all).
96 self::$executeContext['subqueue'][] = $update;
97
98 return;
99 }
100
101 if ( $stage === self::PRESEND ) {
102 self::push( self::$preSendUpdates, $update );
103 } else {
104 self::push( self::$postSendUpdates, $update );
105 }
106
107 // Try to run the updates now if in CLI mode and no transaction is active.
108 // This covers scripts that don't/barely use the DB but make updates to other stores.
109 if ( $wgCommandLineMode ) {
110 self::tryOpportunisticExecute( 'run' );
111 }
112 }
113
114 /**
115 * Add a callable update. In a lot of cases, we just need a callback/closure,
116 * defining a new DeferrableUpdate object is not necessary
117 *
118 * @see MWCallableUpdate::__construct()
119 *
120 * @param callable $callable
121 * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27)
122 * @param IDatabase|IDatabase[]|null $dbw Abort if this DB is rolled back [optional] (since 1.28)
123 */
124 public static function addCallableUpdate(
125 $callable, $stage = self::POSTSEND, $dbw = null
126 ) {
127 self::addUpdate( new MWCallableUpdate( $callable, wfGetCaller(), $dbw ), $stage );
128 }
129
130 /**
131 * Do any deferred updates and clear the list
132 *
133 * If $stage is self::ALL then the queue of PRESEND updates will be resolved,
134 * followed by the queue of POSTSEND updates
135 *
136 * @param string $mode Use "enqueue" to use the job queue when possible [Default: "run"]
137 * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL) (since 1.27)
138 */
139 public static function doUpdates( $mode = 'run', $stage = self::ALL ) {
140 $stageEffective = ( $stage === self::ALL ) ? self::POSTSEND : $stage;
141 // For ALL mode, make sure that any PRESEND updates added along the way get run.
142 // Normally, these use the subqueue, but that isn't true for MergeableUpdate items.
143 do {
144 if ( $stage === self::ALL || $stage === self::PRESEND ) {
145 self::handleUpdateQueue( self::$preSendUpdates, $mode, $stageEffective );
146 }
147
148 if ( $stage === self::ALL || $stage == self::POSTSEND ) {
149 self::handleUpdateQueue( self::$postSendUpdates, $mode, $stageEffective );
150 }
151 } while ( $stage === self::ALL && self::$preSendUpdates );
152 }
153
154 /**
155 * @param DeferrableUpdate[] $queue
156 * @param DeferrableUpdate $update
157 */
158 private static function push( array &$queue, DeferrableUpdate $update ) {
159 if ( $update instanceof MergeableUpdate ) {
160 $class = get_class( $update ); // fully-qualified class
161 if ( isset( $queue[$class] ) ) {
162 /** @var MergeableUpdate $existingUpdate */
163 $existingUpdate = $queue[$class];
164 $existingUpdate->merge( $update );
165 // Move the update to the end to handle things like mergeable purge
166 // updates that might depend on the prior updates in the queue running
167 unset( $queue[$class] );
168 $queue[$class] = $existingUpdate;
169 } else {
170 $queue[$class] = $update;
171 }
172 } else {
173 $queue[] = $update;
174 }
175 }
176
177 /**
178 * Immediately run or enqueue a list of updates
179 *
180 * @param DeferrableUpdate[] &$queue List of DeferrableUpdate objects
181 * @param string $mode Either "run" or "enqueue" (to use the job queue when possible)
182 * @param int $stage Class constant (PRESEND, POSTSEND) (since 1.28)
183 * @throws ErrorPageError Happens on top-level calls
184 * @throws Exception Happens on second-level calls
185 */
186 protected static function handleUpdateQueue( array &$queue, $mode, $stage ) {
187 $services = MediaWikiServices::getInstance();
188 $stats = $services->getStatsdDataFactory();
189 $lbf = $services->getDBLoadBalancerFactory();
190 $logger = LoggerFactory::getInstance( 'DeferredUpdates' );
191 $httpMethod = $services->getMainConfig()->get( 'CommandLineMode' )
192 ? 'cli'
193 : strtolower( RequestContext::getMain()->getRequest()->getMethod() );
194
195 /** @var ErrorPageError $guiEx */
196 $guiEx = null;
197 /** @var DeferrableUpdate[] $updates Snapshot of queue */
198 $updates = $queue;
199
200 // Keep doing rounds of updates until none get enqueued...
201 while ( $updates ) {
202 $queue = []; // clear the queue
203
204 // Segregate the queue into one for DataUpdate and one for everything else
205 $dataUpdateQueue = [];
206 $genericUpdateQueue = [];
207 foreach ( $updates as $update ) {
208 if ( $update instanceof DataUpdate ) {
209 $dataUpdateQueue[] = $update;
210 } else {
211 $genericUpdateQueue[] = $update;
212 }
213 }
214 // Execute all DataUpdate queue followed by the DeferrableUpdate queue...
215 foreach ( [ $dataUpdateQueue, $genericUpdateQueue ] as $updateQueue ) {
216 foreach ( $updateQueue as $du ) {
217 // Enqueue the task into the job queue system instead if applicable
218 if ( $mode === 'enqueue' && $du instanceof EnqueueableDataUpdate ) {
219 self::jobify( $du, $lbf, $logger, $stats, $httpMethod );
220 continue;
221 }
222 // Otherwise, execute the task and any subtasks that it spawns
223 self::$executeContext = [ 'stage' => $stage, 'subqueue' => [] ];
224 try {
225 $e = self::run( $du, $lbf, $logger, $stats, $httpMethod );
226 $guiEx = $guiEx ?: ( $e instanceof ErrorPageError ? $e : null );
227 // Do the subqueue updates for $update until there are none
228 while ( self::$executeContext['subqueue'] ) {
229 $duChild = reset( self::$executeContext['subqueue'] );
230 $firstKey = key( self::$executeContext['subqueue'] );
231 unset( self::$executeContext['subqueue'][$firstKey] );
232
233 $e = self::run( $duChild, $lbf, $logger, $stats, $httpMethod );
234 $guiEx = $guiEx ?: ( $e instanceof ErrorPageError ? $e : null );
235 }
236 } finally {
237 // Make sure we always clean up the context.
238 // Losing updates while rewinding the stack is acceptable,
239 // losing updates that are added later is not.
240 self::$executeContext = null;
241 }
242 }
243 }
244
245 $updates = $queue; // new snapshot of queue (check for new entries)
246 }
247
248 // Throw the first of any GUI errors as long as the context is HTTP pre-send. However,
249 // callers should check permissions *before* enqueueing updates. If the main transaction
250 // round actions succeed but some deferred updates fail due to permissions errors then
251 // there is a risk that some secondary data was not properly updated.
252 if ( $guiEx && $stage === self::PRESEND && !headers_sent() ) {
253 throw $guiEx;
254 }
255 }
256
257 /**
258 * Run a task and catch/log any exceptions
259 *
260 * @param DeferrableUpdate $update
261 * @param LBFactory $lbFactory
262 * @param LoggerInterface $logger
263 * @param StatsdDataFactoryInterface $stats
264 * @param string $httpMethod
265 * @return Exception|Throwable|null
266 */
267 private static function run(
268 DeferrableUpdate $update,
269 LBFactory $lbFactory,
270 LoggerInterface $logger,
271 StatsdDataFactoryInterface $stats,
272 $httpMethod
273 ) {
274 $name = get_class( $update );
275 $suffix = ( $update instanceof DeferrableCallback ) ? "_{$update->getOrigin()}" : '';
276 $stats->increment( "deferred_updates.$httpMethod.{$name}{$suffix}" );
277
278 $e = null;
279 try {
280 self::attemptUpdate( $update, $lbFactory );
281 } catch ( Exception $e ) {
282 } catch ( Throwable $e ) {
283 }
284
285 if ( $e ) {
286 $logger->error(
287 "Deferred update {type} failed: {message}",
288 [
289 'type' => $name . $suffix,
290 'message' => $e->getMessage(),
291 'trace' => $e->getTraceAsString()
292 ]
293 );
294 $lbFactory->rollbackMasterChanges( __METHOD__ );
295 // VW-style hack to work around T190178, so we can make sure
296 // PageMetaDataUpdater doesn't throw exceptions.
297 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
298 throw $e;
299 }
300 }
301
302 return $e;
303 }
304
305 /**
306 * Push a task into the job queue system and catch/log any exceptions
307 *
308 * @param EnqueueableDataUpdate $update
309 * @param LBFactory $lbFactory
310 * @param LoggerInterface $logger
311 * @param StatsdDataFactoryInterface $stats
312 * @param string $httpMethod
313 */
314 private static function jobify(
315 EnqueueableDataUpdate $update,
316 LBFactory $lbFactory,
317 LoggerInterface $logger,
318 StatsdDataFactoryInterface $stats,
319 $httpMethod
320 ) {
321 $stats->increment( "deferred_updates.$httpMethod." . get_class( $update ) );
322
323 $e = null;
324 try {
325 $spec = $update->getAsJobSpecification();
326 JobQueueGroup::singleton( $spec['domain'] ?? $spec['wiki'] )->push( $spec['job'] );
327 } catch ( Exception $e ) {
328 } catch ( Throwable $e ) {
329 }
330
331 if ( $e ) {
332 $logger->error(
333 "Job insertion of deferred update {type} failed: {message}",
334 [
335 'type' => get_class( $update ),
336 'message' => $e->getMessage(),
337 'trace' => $e->getTraceAsString()
338 ]
339 );
340 $lbFactory->rollbackMasterChanges( __METHOD__ );
341 }
342 }
343
344 /**
345 * Attempt to run an update with the appropriate transaction round state it expects
346 *
347 * DeferredUpdate classes that wrap the execution of bundles of other DeferredUpdate
348 * instances can use this method to run the updates. Any such wrapper class should
349 * always use TRX_ROUND_ABSENT itself.
350 *
351 * @param DeferrableUpdate $update
352 * @param ILBFactory $lbFactory
353 * @since 1.34
354 */
355 public static function attemptUpdate( DeferrableUpdate $update, ILBFactory $lbFactory ) {
356 $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
357 if ( !$ticket || $lbFactory->hasTransactionRound() ) {
358 throw new DBTransactionError( null, "A database transaction round is pending." );
359 }
360
361 if ( $update instanceof DataUpdate ) {
362 $update->setTransactionTicket( $ticket );
363 }
364
365 $fnameTrxOwner = get_class( $update ) . '::doUpdate';
366 $useExplicitTrxRound = !(
367 $update instanceof TransactionRoundAwareUpdate &&
368 $update->getTransactionRoundRequirement() == $update::TRX_ROUND_ABSENT
369 );
370 // Flush any pending changes left over from an implicit transaction round
371 if ( $useExplicitTrxRound ) {
372 $lbFactory->beginMasterChanges( $fnameTrxOwner ); // new explicit round
373 } else {
374 $lbFactory->commitMasterChanges( $fnameTrxOwner ); // new implicit round
375 }
376 // Run the update after any stale master view snapshots have been flushed
377 $update->doUpdate();
378 // Commit any pending changes from the explicit or implicit transaction round
379 $lbFactory->commitMasterChanges( $fnameTrxOwner );
380 }
381
382 /**
383 * Run all deferred updates immediately if there are no DB writes active
384 *
385 * If there are many deferred updates pending, $mode is 'run', and there
386 * are still busy LBFactory database handles, then any EnqueueableDataUpdate
387 * tasks might be enqueued as jobs to be executed later.
388 *
389 * @param string $mode Use "enqueue" to use the job queue when possible
390 * @return bool Whether updates were allowed to run
391 * @since 1.28
392 */
393 public static function tryOpportunisticExecute( $mode = 'run' ) {
394 // execute() loop is already running
395 if ( self::$executeContext ) {
396 return false;
397 }
398
399 // Avoiding running updates without them having outer scope
400 if ( !self::areDatabaseTransactionsActive() ) {
401 self::doUpdates( $mode );
402 return true;
403 }
404
405 if ( self::pendingUpdatesCount() >= self::BIG_QUEUE_SIZE ) {
406 // If we cannot run the updates with outer transaction context, try to
407 // at least enqueue all the updates that support queueing to job queue
408 self::$preSendUpdates = self::enqueueUpdates( self::$preSendUpdates );
409 self::$postSendUpdates = self::enqueueUpdates( self::$postSendUpdates );
410 }
411
412 return !self::pendingUpdatesCount();
413 }
414
415 /**
416 * Enqueue a job for each EnqueueableDataUpdate item and return the other items
417 *
418 * @param DeferrableUpdate[] $updates A list of deferred update instances
419 * @return DeferrableUpdate[] Remaining updates that do not support being queued
420 */
421 private static function enqueueUpdates( array $updates ) {
422 $remaining = [];
423
424 foreach ( $updates as $update ) {
425 if ( $update instanceof EnqueueableDataUpdate ) {
426 $spec = $update->getAsJobSpecification();
427 $domain = $spec['domain'] ?? $spec['wiki'];
428 JobQueueGroup::singleton( $domain )->push( $spec['job'] );
429 } else {
430 $remaining[] = $update;
431 }
432 }
433
434 return $remaining;
435 }
436
437 /**
438 * @return int Number of enqueued updates
439 * @since 1.28
440 */
441 public static function pendingUpdatesCount() {
442 return count( self::$preSendUpdates ) + count( self::$postSendUpdates );
443 }
444
445 /**
446 * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL)
447 * @return DeferrableUpdate[]
448 * @since 1.29
449 */
450 public static function getPendingUpdates( $stage = self::ALL ) {
451 $updates = [];
452 if ( $stage === self::ALL || $stage === self::PRESEND ) {
453 $updates = array_merge( $updates, self::$preSendUpdates );
454 }
455 if ( $stage === self::ALL || $stage === self::POSTSEND ) {
456 $updates = array_merge( $updates, self::$postSendUpdates );
457 }
458 return $updates;
459 }
460
461 /**
462 * Clear all pending updates without performing them. Generally, you don't
463 * want or need to call this. Unit tests need it though.
464 */
465 public static function clearPendingUpdates() {
466 self::$preSendUpdates = [];
467 self::$postSendUpdates = [];
468 }
469
470 /**
471 * @return bool If a transaction round is active or connection is not ready for commit()
472 */
473 private static function areDatabaseTransactionsActive() {
474 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
475 if ( $lbFactory->hasTransactionRound() || !$lbFactory->isReadyForRoundOperations() ) {
476 return true;
477 }
478
479 $connsBusy = false;
480 $lbFactory->forEachLB( function ( LoadBalancer $lb ) use ( &$connsBusy ) {
481 $lb->forEachOpenMasterConnection( function ( IDatabase $conn ) use ( &$connsBusy ) {
482 if ( $conn->writesOrCallbacksPending() || $conn->explicitTrxActive() ) {
483 $connsBusy = true;
484 }
485 } );
486 } );
487
488 return $connsBusy;
489 }
490 }