Merge "Title: Title::getSubpage should not lose the interwiki prefix"
[lhc/web/wiklou.git] / includes / deferred / DeferredUpdates.php
1 <?php
2 /**
3 * Interface and manager for deferred updates.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
24 use MediaWiki\Logger\LoggerFactory;
25 use Psr\Log\LoggerInterface;
26 use Wikimedia\Rdbms\IDatabase;
27 use MediaWiki\MediaWikiServices;
28 use Wikimedia\Rdbms\LBFactory;
29 use Wikimedia\Rdbms\ILBFactory;
30 use Wikimedia\Rdbms\LoadBalancer;
31
32 /**
33 * Class for managing the deferred updates
34 *
35 * In web request mode, deferred updates can be run at the end of the request, either before or
36 * after the HTTP response has been sent. In either case, they run after the DB commit step. If
37 * an update runs after the response is sent, it will not block clients. If sent before, it will
38 * run synchronously. These two modes are defined via PRESEND and POSTSEND constants, the latter
39 * being the default for addUpdate() and addCallableUpdate().
40 *
41 * Updates that work through this system will be more likely to complete by the time the client
42 * makes their next request after this one than with the JobQueue system.
43 *
44 * In CLI mode, deferred updates will run:
45 * - a) During DeferredUpdates::addUpdate if no LBFactory DB handles have writes pending
46 * - b) On commit of an LBFactory DB handle if no other such handles have writes pending
47 * - c) During an LBFactory::waitForReplication call if no LBFactory DBs have writes pending
48 * - d) When the queue is large and an LBFactory DB handle commits (EnqueueableDataUpdate only)
49 * - e) At the completion of Maintenance::execute()
50 *
51 * @see Maintenance::setLBFactoryTriggers
52 *
53 * When updates are deferred, they go into one two FIFO "top-queues" (one for pre-send and one
54 * for post-send). Updates enqueued *during* doUpdate() of a "top" update go into the "sub-queue"
55 * for that update. After that method finishes, the sub-queue is run until drained. This continues
56 * for each top-queue job until the entire top queue is drained. This happens for the pre-send
57 * top-queue, and later on, the post-send top-queue, in execute().
58 *
59 * @since 1.19
60 */
61 class DeferredUpdates {
62 /** @var DeferrableUpdate[] Updates to be deferred until before request end */
63 private static $preSendUpdates = [];
64 /** @var DeferrableUpdate[] Updates to be deferred until after request end */
65 private static $postSendUpdates = [];
66
67 const ALL = 0; // all updates; in web requests, use only after flushing the output buffer
68 const PRESEND = 1; // for updates that should run before flushing output buffer
69 const POSTSEND = 2; // for updates that should run after flushing output buffer
70
71 const BIG_QUEUE_SIZE = 100;
72
73 /** @var array|null Information about the current execute() call or null if not running */
74 private static $executeContext;
75
76 /**
77 * Add an update to the deferred list to be run later by execute()
78 *
79 * In CLI mode, callback magic will also be used to run updates when safe
80 *
81 * @param DeferrableUpdate $update Some object that implements doUpdate()
82 * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27)
83 */
84 public static function addUpdate( DeferrableUpdate $update, $stage = self::POSTSEND ) {
85 global $wgCommandLineMode;
86
87 if (
88 self::$executeContext &&
89 self::$executeContext['stage'] >= $stage &&
90 !( $update instanceof MergeableUpdate )
91 ) {
92 // This is a sub-DeferredUpdate; run it right after its parent update.
93 // Also, while post-send updates are running, push any "pre-send" jobs to the
94 // active post-send queue to make sure they get run this round (or at all).
95 self::$executeContext['subqueue'][] = $update;
96
97 return;
98 }
99
100 if ( $stage === self::PRESEND ) {
101 self::push( self::$preSendUpdates, $update );
102 } else {
103 self::push( self::$postSendUpdates, $update );
104 }
105
106 // Try to run the updates now if in CLI mode and no transaction is active.
107 // This covers scripts that don't/barely use the DB but make updates to other stores.
108 if ( $wgCommandLineMode ) {
109 self::tryOpportunisticExecute( 'run' );
110 }
111 }
112
113 /**
114 * Add a callable update. In a lot of cases, we just need a callback/closure,
115 * defining a new DeferrableUpdate object is not necessary
116 *
117 * @see MWCallableUpdate::__construct()
118 *
119 * @param callable $callable
120 * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27)
121 * @param IDatabase|IDatabase[]|null $dbw Abort if this DB is rolled back [optional] (since 1.28)
122 */
123 public static function addCallableUpdate(
124 $callable, $stage = self::POSTSEND, $dbw = null
125 ) {
126 self::addUpdate( new MWCallableUpdate( $callable, wfGetCaller(), $dbw ), $stage );
127 }
128
129 /**
130 * Do any deferred updates and clear the list
131 *
132 * If $stage is self::ALL then the queue of PRESEND updates will be resolved,
133 * followed by the queue of POSTSEND updates
134 *
135 * @param string $mode Use "enqueue" to use the job queue when possible [Default: "run"]
136 * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL) (since 1.27)
137 */
138 public static function doUpdates( $mode = 'run', $stage = self::ALL ) {
139 $stageEffective = ( $stage === self::ALL ) ? self::POSTSEND : $stage;
140 // For ALL mode, make sure that any PRESEND updates added along the way get run.
141 // Normally, these use the subqueue, but that isn't true for MergeableUpdate items.
142 do {
143 if ( $stage === self::ALL || $stage === self::PRESEND ) {
144 self::handleUpdateQueue( self::$preSendUpdates, $mode, $stageEffective );
145 }
146
147 if ( $stage === self::ALL || $stage == self::POSTSEND ) {
148 self::handleUpdateQueue( self::$postSendUpdates, $mode, $stageEffective );
149 }
150 } while ( $stage === self::ALL && self::$preSendUpdates );
151 }
152
153 /**
154 * @param DeferrableUpdate[] $queue
155 * @param DeferrableUpdate $update
156 */
157 private static function push( array &$queue, DeferrableUpdate $update ) {
158 if ( $update instanceof MergeableUpdate ) {
159 $class = get_class( $update ); // fully-qualified class
160 if ( isset( $queue[$class] ) ) {
161 /** @var MergeableUpdate $existingUpdate */
162 $existingUpdate = $queue[$class];
163 $existingUpdate->merge( $update );
164 // Move the update to the end to handle things like mergeable purge
165 // updates that might depend on the prior updates in the queue running
166 unset( $queue[$class] );
167 $queue[$class] = $existingUpdate;
168 } else {
169 $queue[$class] = $update;
170 }
171 } else {
172 $queue[] = $update;
173 }
174 }
175
176 /**
177 * Immediately run or enqueue a list of updates
178 *
179 * @param DeferrableUpdate[] &$queue List of DeferrableUpdate objects
180 * @param string $mode Either "run" or "enqueue" (to use the job queue when possible)
181 * @param int $stage Class constant (PRESEND, POSTSEND) (since 1.28)
182 * @throws ErrorPageError Happens on top-level calls
183 * @throws Exception Happens on second-level calls
184 */
185 protected static function handleUpdateQueue( array &$queue, $mode, $stage ) {
186 $services = MediaWikiServices::getInstance();
187 $stats = $services->getStatsdDataFactory();
188 $lbf = $services->getDBLoadBalancerFactory();
189 $logger = LoggerFactory::getInstance( 'DeferredUpdates' );
190 $httpMethod = $services->getMainConfig()->get( 'CommandLineMode' )
191 ? 'cli'
192 : strtolower( RequestContext::getMain()->getRequest()->getMethod() );
193
194 /** @var ErrorPageError $guiEx */
195 $guiEx = null;
196 /** @var DeferrableUpdate[] $updates Snapshot of queue */
197 $updates = $queue;
198
199 // Keep doing rounds of updates until none get enqueued...
200 while ( $updates ) {
201 $queue = []; // clear the queue
202
203 // Segregate the queue into one for DataUpdate and one for everything else
204 $dataUpdateQueue = [];
205 $genericUpdateQueue = [];
206 foreach ( $updates as $update ) {
207 if ( $update instanceof DataUpdate ) {
208 $dataUpdateQueue[] = $update;
209 } else {
210 $genericUpdateQueue[] = $update;
211 }
212 }
213 // Execute all DataUpdate queue followed by the DeferrableUpdate queue...
214 foreach ( [ $dataUpdateQueue, $genericUpdateQueue ] as $updateQueue ) {
215 foreach ( $updateQueue as $du ) {
216 // Enqueue the task into the job queue system instead if applicable
217 if ( $mode === 'enqueue' && $du instanceof EnqueueableDataUpdate ) {
218 self::jobify( $du, $lbf, $logger, $stats, $httpMethod );
219 continue;
220 }
221 // Otherwise, execute the task and any subtasks that it spawns
222 self::$executeContext = [ 'stage' => $stage, 'subqueue' => [] ];
223 try {
224 $e = self::run( $du, $lbf, $logger, $stats, $httpMethod );
225 $guiEx = $guiEx ?: ( $e instanceof ErrorPageError ? $e : null );
226 // Do the subqueue updates for $update until there are none
227 while ( self::$executeContext['subqueue'] ) {
228 $duChild = reset( self::$executeContext['subqueue'] );
229 $firstKey = key( self::$executeContext['subqueue'] );
230 unset( self::$executeContext['subqueue'][$firstKey] );
231
232 $e = self::run( $duChild, $lbf, $logger, $stats, $httpMethod );
233 $guiEx = $guiEx ?: ( $e instanceof ErrorPageError ? $e : null );
234 }
235 } finally {
236 // Make sure we always clean up the context.
237 // Losing updates while rewinding the stack is acceptable,
238 // losing updates that are added later is not.
239 self::$executeContext = null;
240 }
241 }
242 }
243
244 $updates = $queue; // new snapshot of queue (check for new entries)
245 }
246
247 // Throw the first of any GUI errors as long as the context is HTTP pre-send. However,
248 // callers should check permissions *before* enqueueing updates. If the main transaction
249 // round actions succeed but some deferred updates fail due to permissions errors then
250 // there is a risk that some secondary data was not properly updated.
251 if ( $guiEx && $stage === self::PRESEND && !headers_sent() ) {
252 throw $guiEx;
253 }
254 }
255
256 /**
257 * Run a task and catch/log any exceptions
258 *
259 * @param DeferrableUpdate $update
260 * @param LBFactory $lbFactory
261 * @param LoggerInterface $logger
262 * @param StatsdDataFactoryInterface $stats
263 * @param string $httpMethod
264 * @return Exception|Throwable|null
265 */
266 private static function run(
267 DeferrableUpdate $update,
268 LBFactory $lbFactory,
269 LoggerInterface $logger,
270 StatsdDataFactoryInterface $stats,
271 $httpMethod
272 ) {
273 $name = get_class( $update );
274 $suffix = ( $update instanceof DeferrableCallback ) ? "_{$update->getOrigin()}" : '';
275 $stats->increment( "deferred_updates.$httpMethod.{$name}{$suffix}" );
276
277 $e = null;
278 try {
279 self::attemptUpdate( $update, $lbFactory );
280 } catch ( Exception $e ) {
281 } catch ( Throwable $e ) {
282 }
283
284 if ( $e ) {
285 $logger->error(
286 "Deferred update {type} failed: {message}",
287 [
288 'type' => $name . $suffix,
289 'message' => $e->getMessage(),
290 'trace' => $e->getTraceAsString()
291 ]
292 );
293 $lbFactory->rollbackMasterChanges( __METHOD__ );
294 // VW-style hack to work around T190178, so we can make sure
295 // PageMetaDataUpdater doesn't throw exceptions.
296 if ( defined( 'MW_PHPUNIT_TEST' ) ) {
297 throw $e;
298 }
299 }
300
301 return $e;
302 }
303
304 /**
305 * Push a task into the job queue system and catch/log any exceptions
306 *
307 * @param EnqueueableDataUpdate $update
308 * @param LBFactory $lbFactory
309 * @param LoggerInterface $logger
310 * @param StatsdDataFactoryInterface $stats
311 * @param string $httpMethod
312 */
313 private static function jobify(
314 EnqueueableDataUpdate $update,
315 LBFactory $lbFactory,
316 LoggerInterface $logger,
317 StatsdDataFactoryInterface $stats,
318 $httpMethod
319 ) {
320 $stats->increment( "deferred_updates.$httpMethod." . get_class( $update ) );
321
322 $e = null;
323 try {
324 $spec = $update->getAsJobSpecification();
325 JobQueueGroup::singleton( $spec['domain'] ?? $spec['wiki'] )->push( $spec['job'] );
326 } catch ( Exception $e ) {
327 } catch ( Throwable $e ) {
328 }
329
330 if ( $e ) {
331 $logger->error(
332 "Job insertion of deferred update {type} failed: {message}",
333 [
334 'type' => get_class( $update ),
335 'message' => $e->getMessage(),
336 'trace' => $e->getTraceAsString()
337 ]
338 );
339 $lbFactory->rollbackMasterChanges( __METHOD__ );
340 }
341 }
342
343 /**
344 * Attempt to run an update with the appropriate transaction round state it expects
345 *
346 * DeferredUpdate classes that wrap the execution of bundles of other DeferredUpdate
347 * instances can use this method to run the updates. Any such wrapper class should
348 * always use TRX_ROUND_ABSENT itself.
349 *
350 * @param DeferrableUpdate $update
351 * @param ILBFactory $lbFactory
352 * @since 1.34
353 */
354 public static function attemptUpdate( DeferrableUpdate $update, ILBFactory $lbFactory ) {
355 if ( $update instanceof DataUpdate ) {
356 $update->setTransactionTicket( $lbFactory->getEmptyTransactionTicket( __METHOD__ ) );
357 }
358
359 if (
360 $update instanceof TransactionRoundAwareUpdate &&
361 $update->getTransactionRoundRequirement() == $update::TRX_ROUND_ABSENT
362 ) {
363 $fnameTrxOwner = null;
364 } else {
365 $fnameTrxOwner = get_class( $update ) . '::doUpdate';
366 }
367
368 if ( $fnameTrxOwner !== null ) {
369 $lbFactory->beginMasterChanges( $fnameTrxOwner );
370 }
371
372 $update->doUpdate();
373
374 if ( $fnameTrxOwner !== null ) {
375 $lbFactory->commitMasterChanges( $fnameTrxOwner );
376 }
377 }
378
379 /**
380 * Run all deferred updates immediately if there are no DB writes active
381 *
382 * If there are many deferred updates pending, $mode is 'run', and there
383 * are still busy LBFactory database handles, then any EnqueueableDataUpdate
384 * tasks might be enqueued as jobs to be executed later.
385 *
386 * @param string $mode Use "enqueue" to use the job queue when possible
387 * @return bool Whether updates were allowed to run
388 * @since 1.28
389 */
390 public static function tryOpportunisticExecute( $mode = 'run' ) {
391 // execute() loop is already running
392 if ( self::$executeContext ) {
393 return false;
394 }
395
396 // Avoiding running updates without them having outer scope
397 if ( !self::areDatabaseTransactionsActive() ) {
398 self::doUpdates( $mode );
399 return true;
400 }
401
402 if ( self::pendingUpdatesCount() >= self::BIG_QUEUE_SIZE ) {
403 // If we cannot run the updates with outer transaction context, try to
404 // at least enqueue all the updates that support queueing to job queue
405 self::$preSendUpdates = self::enqueueUpdates( self::$preSendUpdates );
406 self::$postSendUpdates = self::enqueueUpdates( self::$postSendUpdates );
407 }
408
409 return !self::pendingUpdatesCount();
410 }
411
412 /**
413 * Enqueue a job for each EnqueueableDataUpdate item and return the other items
414 *
415 * @param DeferrableUpdate[] $updates A list of deferred update instances
416 * @return DeferrableUpdate[] Remaining updates that do not support being queued
417 */
418 private static function enqueueUpdates( array $updates ) {
419 $remaining = [];
420
421 foreach ( $updates as $update ) {
422 if ( $update instanceof EnqueueableDataUpdate ) {
423 $spec = $update->getAsJobSpecification();
424 $domain = $spec['domain'] ?? $spec['wiki'];
425 JobQueueGroup::singleton( $domain )->push( $spec['job'] );
426 } else {
427 $remaining[] = $update;
428 }
429 }
430
431 return $remaining;
432 }
433
434 /**
435 * @return int Number of enqueued updates
436 * @since 1.28
437 */
438 public static function pendingUpdatesCount() {
439 return count( self::$preSendUpdates ) + count( self::$postSendUpdates );
440 }
441
442 /**
443 * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL)
444 * @return DeferrableUpdate[]
445 * @since 1.29
446 */
447 public static function getPendingUpdates( $stage = self::ALL ) {
448 $updates = [];
449 if ( $stage === self::ALL || $stage === self::PRESEND ) {
450 $updates = array_merge( $updates, self::$preSendUpdates );
451 }
452 if ( $stage === self::ALL || $stage === self::POSTSEND ) {
453 $updates = array_merge( $updates, self::$postSendUpdates );
454 }
455 return $updates;
456 }
457
458 /**
459 * Clear all pending updates without performing them. Generally, you don't
460 * want or need to call this. Unit tests need it though.
461 */
462 public static function clearPendingUpdates() {
463 self::$preSendUpdates = [];
464 self::$postSendUpdates = [];
465 }
466
467 /**
468 * @return bool If a transaction round is active or connection is not ready for commit()
469 */
470 private static function areDatabaseTransactionsActive() {
471 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
472 if ( $lbFactory->hasTransactionRound() || !$lbFactory->isReadyForRoundOperations() ) {
473 return true;
474 }
475
476 $connsBusy = false;
477 $lbFactory->forEachLB( function ( LoadBalancer $lb ) use ( &$connsBusy ) {
478 $lb->forEachOpenMasterConnection( function ( IDatabase $conn ) use ( &$connsBusy ) {
479 if ( $conn->writesOrCallbacksPending() || $conn->explicitTrxActive() ) {
480 $connsBusy = true;
481 }
482 } );
483 } );
484
485 return $connsBusy;
486 }
487 }