CentralIdLookup: Add @since to factoryNonLocal()
[mediawiki.git] / includes / deferred / DeferredUpdates.php
blob7f506bfeafde4eb45b1a45fb1eb5f228c3ce1d80
1 <?php
2 /**
3 * Interface and manager for deferred updates.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
24 use MediaWiki\Logger\LoggerFactory;
25 use MediaWiki\MediaWikiServices;
26 use Psr\Log\LoggerInterface;
27 use Wikimedia\Rdbms\DBTransactionError;
28 use Wikimedia\Rdbms\IDatabase;
29 use Wikimedia\Rdbms\ILBFactory;
30 use Wikimedia\Rdbms\LBFactory;
31 use Wikimedia\Rdbms\LoadBalancer;
33 /**
34 * Class for managing the deferred updates
36 * In web request mode, deferred updates can be run at the end of the request, either before or
37 * after the HTTP response has been sent. In either case, they run after the DB commit step. If
38 * an update runs after the response is sent, it will not block clients. If sent before, it will
39 * run synchronously. These two modes are defined via PRESEND and POSTSEND constants, the latter
40 * being the default for addUpdate() and addCallableUpdate().
42 * Updates that work through this system will be more likely to complete by the time the client
43 * makes their next request after this one than with the JobQueue system.
45 * In CLI mode, deferred updates will run:
46 * - a) During DeferredUpdates::addUpdate if no LBFactory DB handles have writes pending
47 * - b) On commit of an LBFactory DB handle if no other such handles have writes pending
48 * - c) During an LBFactory::waitForReplication call if no LBFactory DBs have writes pending
49 * - d) When the queue is large and an LBFactory DB handle commits (EnqueueableDataUpdate only)
50 * - e) At the completion of Maintenance::execute()
52 * @see Maintenance::setLBFactoryTriggers
54 * When updates are deferred, they go into one two FIFO "top-queues" (one for pre-send and one
55 * for post-send). Updates enqueued *during* doUpdate() of a "top" update go into the "sub-queue"
56 * for that update. After that method finishes, the sub-queue is run until drained. This continues
57 * for each top-queue job until the entire top queue is drained. This happens for the pre-send
58 * top-queue, and later on, the post-send top-queue, in doUpdates().
60 * @since 1.19
62 class DeferredUpdates {
63 /**
64 * @var DeferrableUpdate[] Updates to be deferred until just before HTTP response emission.
65 * Integer-keyed entries form a list of FIFO updates and a string-keyed entries form a map
66 * of (class => MergeableUpdate) for updates that absorb the work of any already pending
67 * updates of the same class.
69 private static $preSendUpdates = [];
70 /**
71 * @var DeferrableUpdate[] Updates to be deferred until just after HTTP response emission.
72 * Integer-keyed entries form a list of FIFO updates and a string-keyed entries form a map
73 * of (class => MergeableUpdate) for updates that absorb the work of any already pending
74 * updates of the same class.
76 private static $postSendUpdates = [];
77 /**
78 * @var array[] Execution stack of currently running updates
79 * @phan-var array<array{stage:int,update:DeferrableUpdate,subqueue:DeferrableUpdate[]}>
81 private static $executionStack = [];
83 public const ALL = 0; // all updates; in web requests, use only after flushing the output buffer
84 public const PRESEND = 1; // for updates that should run before flushing output buffer
85 public const POSTSEND = 2; // for updates that should run after flushing output buffer
87 private const BIG_QUEUE_SIZE = 100;
89 /**
90 * Add an update to the deferred update queue for execution at the appropriate time
92 * In CLI mode, callback magic will also be used to run updates when safe
94 * If an update is already in progress, then what happens to this update is as follows:
95 * - MergeableUpdate instances always go on the top-queue for the specified stage, with
96 * existing updates melding into the newly added instance at the end of the queue.
97 * - Non-MergeableUpdate instances with a "defer until" stage at/before the actual run
98 * stage of the innermost in-progress update go into the sub-queue of that in-progress
99 * update. They are executed right after the update finishes to maximize isolation.
100 * - Non-MergeableUpdate instances with a "defer until" stage after the actual run stage
101 * of the innermost in-progress update go into the normal top-queue for that stage.
103 * @param DeferrableUpdate $update Some object that implements doUpdate()
104 * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27)
106 public static function addUpdate( DeferrableUpdate $update, $stage = self::POSTSEND ) {
107 global $wgCommandLineMode;
109 // Special handling for updates pushed while another update is in progress
110 if ( self::$executionStack && !( $update instanceof MergeableUpdate ) ) {
111 // Get the innermost in-progress update
112 end( self::$executionStack );
113 $topStackPos = key( self::$executionStack );
114 if ( self::$executionStack[$topStackPos]['stage'] >= $stage ) {
115 // Put this update into the sub-queue of that in-progress update
116 self::push( self::$executionStack[$topStackPos]['subqueue'], $update );
118 return;
122 if ( $stage === self::PRESEND ) {
123 self::push( self::$preSendUpdates, $update );
124 } else {
125 self::push( self::$postSendUpdates, $update );
128 // Try to run the updates now if in CLI mode and no transaction is active.
129 // This covers scripts that don't/barely use the DB but make updates to other stores.
130 if ( $wgCommandLineMode ) {
131 self::tryOpportunisticExecute( 'run' );
136 * Add a callable update. In a lot of cases, we just need a callback/closure,
137 * defining a new DeferrableUpdate object is not necessary
139 * @see MWCallableUpdate::__construct()
141 * @param callable $callable
142 * @param int $stage DeferredUpdates constant (PRESEND or POSTSEND) (since 1.27)
143 * @param IDatabase|IDatabase[]|null $dbw Abort if this DB is rolled back [optional] (since 1.28)
145 public static function addCallableUpdate(
146 $callable, $stage = self::POSTSEND, $dbw = null
148 self::addUpdate( new MWCallableUpdate( $callable, wfGetCaller(), $dbw ), $stage );
152 * Consume the list of deferred updates and execute them
154 * Note that it is rarely the case that this method should be called outside of a few
155 * select entry points. For simplicity, that kind of recursion is discouraged. Recursion
156 * cannot happen if an explicit transaction round is active, which limits usage to updates
157 * with TRX_ROUND_ABSENT that do not leave open an transactions round of their own during
158 * the call to this method.
160 * In the less-common case of this being called within an in-progress DeferrableUpdate,
161 * this will not see any top-queue updates (since they were consumed and are being run
162 * inside an outer execution loop). In that case, it will instead operate on the sub-queue
163 * of the innermost in-progress update on the stack.
165 * If $stage is self::ALL then the queue of PRESEND updates will be resolved, followed
166 * by the queue of POSTSEND updates.
168 * @param string $mode Use "enqueue" to use the job queue when possible [Default: "run"]
169 * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL) (since 1.27)
171 public static function doUpdates( $mode = 'run', $stage = self::ALL ) {
172 $stageEffective = ( $stage === self::ALL ) ? self::POSTSEND : $stage;
173 // Special handling for when an in-progress update triggers this method
174 if ( self::$executionStack ) {
175 // Run the sub-queue updates for the innermost in-progress update
176 end( self::$executionStack );
177 $topStackPos = key( self::$executionStack );
178 self::handleUpdateQueue(
179 self::$executionStack[$topStackPos]['subqueue'],
180 $mode,
181 $stageEffective
184 return;
186 // For ALL mode, make sure that any PRESEND updates added along the way get run.
187 // Normally, these use the subqueue, but that isn't true for MergeableUpdate items.
188 do {
189 if ( $stage === self::ALL || $stage === self::PRESEND ) {
190 self::handleUpdateQueue( self::$preSendUpdates, $mode, $stageEffective );
193 if ( $stage === self::ALL || $stage == self::POSTSEND ) {
194 self::handleUpdateQueue( self::$postSendUpdates, $mode, $stageEffective );
196 } while ( $stage === self::ALL && self::$preSendUpdates );
200 * @param DeferrableUpdate[] &$queue Combined FIFO update list and MergeableUpdate map
201 * @param DeferrableUpdate $update
203 private static function push( array &$queue, DeferrableUpdate $update ) {
204 if ( $update instanceof MergeableUpdate ) {
205 $class = get_class( $update ); // fully-qualified class
206 if ( isset( $queue[$class] ) ) {
207 /** @var MergeableUpdate $existingUpdate */
208 $existingUpdate = $queue[$class];
209 '@phan-var MergeableUpdate $existingUpdate';
210 $existingUpdate->merge( $update );
211 // Move the update to the end to handle things like mergeable purge
212 // updates that might depend on the prior updates in the queue running
213 unset( $queue[$class] );
214 $queue[$class] = $existingUpdate;
215 } else {
216 $queue[$class] = $update;
218 } else {
219 $queue[] = $update;
224 * Immediately run or enqueue a list of updates
226 * Updates that implement EnqueueableDataUpdate and fail to run will be enqueued
228 * @param DeferrableUpdate[] &$queue List of DeferrableUpdate objects
229 * @param string $mode Either "run" or "enqueue" (to use the job queue when possible)
230 * @param int $stage Class constant (PRESEND, POSTSEND) (since 1.28)
231 * @throws ErrorPageError Happens on top-level calls
232 * @throws Exception Happens on second-level calls
234 protected static function handleUpdateQueue( array &$queue, $mode, $stage ) {
235 $services = MediaWikiServices::getInstance();
236 $stats = $services->getStatsdDataFactory();
237 $lbf = $services->getDBLoadBalancerFactory();
238 $logger = LoggerFactory::getInstance( 'DeferredUpdates' );
239 $httpMethod = $services->getMainConfig()->get( 'CommandLineMode' )
240 ? 'cli'
241 : strtolower( RequestContext::getMain()->getRequest()->getMethod() );
243 /** @var ErrorPageError $guiEx */
244 $guiEx = null;
245 /** @var Throwable $exception */
246 $exception = null;
248 /** @var DeferrableUpdate[] $updates Snapshot of queue */
249 $updates = $queue;
251 // Keep doing rounds of updates until none get enqueued...
252 while ( $updates ) {
253 $queue = []; // clear the queue
255 // Segregate the queue into one for DataUpdate and one for everything else
256 $dataUpdateQueue = [];
257 $genericUpdateQueue = [];
258 foreach ( $updates as $update ) {
259 if ( $update instanceof DataUpdate ) {
260 $dataUpdateQueue[] = $update;
261 } else {
262 $genericUpdateQueue[] = $update;
265 // Execute all DataUpdate queue followed by the DeferrableUpdate queue...
266 foreach ( [ $dataUpdateQueue, $genericUpdateQueue ] as $updateQueue ) {
267 foreach ( $updateQueue as $curUpdate ) {
268 // Enqueue the update into the job queue system instead if applicable
269 if ( $mode === 'enqueue' && $curUpdate instanceof EnqueueableDataUpdate ) {
270 self::jobify( $curUpdate, $lbf, $logger, $stats, $httpMethod );
271 continue;
273 // Otherwise, execute the update, followed by any sub-updates that it spawns
274 $stackEntry = [ 'stage' => $stage, 'update' => $curUpdate, 'subqueue' => [] ];
275 $stackKey = count( self::$executionStack );
276 self::$executionStack[$stackKey] =& $stackEntry;
277 try {
278 $e = self::run( $curUpdate, $lbf, $logger, $stats, $httpMethod );
279 $guiEx = $guiEx ?: ( $e instanceof ErrorPageError ? $e : null );
280 $exception = $exception ?: $e;
281 // Do the subqueue updates for $update until there are none
282 // @phan-suppress-next-line PhanImpossibleConditionInLoop
283 while ( $stackEntry['subqueue'] ) {
284 $duChild = reset( $stackEntry['subqueue'] );
285 $duChildKey = key( $stackEntry['subqueue'] );
286 unset( $stackEntry['subqueue'][$duChildKey] );
288 $e = self::run( $duChild, $lbf, $logger, $stats, $httpMethod );
289 $guiEx = $guiEx ?: ( $e instanceof ErrorPageError ? $e : null );
290 $exception = $exception ?: $e;
292 } finally {
293 // Make sure we always clean up the context.
294 // Losing updates while rewinding the stack is acceptable,
295 // losing updates that are added later is not.
296 unset( self::$executionStack[$stackKey] );
301 $updates = $queue; // new snapshot of queue (check for new entries)
304 // VW-style hack to work around T190178, so we can make sure
305 // PageMetaDataUpdater doesn't throw exceptions.
306 if ( $exception && defined( 'MW_PHPUNIT_TEST' ) ) {
307 throw $exception;
310 // Throw the first of any GUI errors as long as the context is HTTP pre-send. However,
311 // callers should check permissions *before* enqueueing updates. If the main transaction
312 // round actions succeed but some deferred updates fail due to permissions errors then
313 // there is a risk that some secondary data was not properly updated.
314 if ( $guiEx && $stage === self::PRESEND && !headers_sent() ) {
315 throw $guiEx;
320 * Run an update, and, if an error was thrown, catch/log it and fallback to the job queue
322 * @param DeferrableUpdate $update
323 * @param LBFactory $lbFactory
324 * @param LoggerInterface $logger
325 * @param StatsdDataFactoryInterface $stats
326 * @param string $httpMethod
327 * @return Throwable|null
329 private static function run(
330 DeferrableUpdate $update,
331 LBFactory $lbFactory,
332 LoggerInterface $logger,
333 StatsdDataFactoryInterface $stats,
334 $httpMethod
335 ) : ?Throwable {
336 $suffix = ( $update instanceof DeferrableCallback ) ? "_{$update->getOrigin()}" : '';
337 $type = get_class( $update ) . $suffix;
338 $stats->increment( "deferred_updates.$httpMethod.$type" );
340 $updateId = spl_object_id( $update );
341 $logger->debug( __METHOD__ . ": started $type #$updateId" );
342 $e = null;
343 try {
344 self::attemptUpdate( $update, $lbFactory );
346 return null;
347 } catch ( Throwable $e ) {
348 } finally {
349 $logger->debug( __METHOD__ . ": ended $type #$updateId" );
352 MWExceptionHandler::logException( $e );
353 $logger->error(
354 "Deferred update '{deferred_type}' failed to run.",
356 'deferred_type' => $type,
357 'exception' => $e,
361 $lbFactory->rollbackMasterChanges( __METHOD__ );
363 // Try to push the update as a job so it can run later if possible
364 if ( $update instanceof EnqueueableDataUpdate ) {
365 $jobEx = null;
366 try {
367 $spec = $update->getAsJobSpecification();
368 JobQueueGroup::singleton( $spec['domain'] )->push( $spec['job'] );
370 return $e;
371 } catch ( Throwable $jobEx ) {
374 MWExceptionHandler::logException( $jobEx );
375 $logger->error(
376 "Deferred update '{deferred_type}' failed to enqueue as a job.",
378 'deferred_type' => $type,
379 'exception' => $jobEx,
383 $lbFactory->rollbackMasterChanges( __METHOD__ );
386 return $e;
390 * Push a update into the job queue system and catch/log any exceptions
392 * @param EnqueueableDataUpdate $update
393 * @param LBFactory $lbFactory
394 * @param LoggerInterface $logger
395 * @param StatsdDataFactoryInterface $stats
396 * @param string $httpMethod
398 private static function jobify(
399 EnqueueableDataUpdate $update,
400 LBFactory $lbFactory,
401 LoggerInterface $logger,
402 StatsdDataFactoryInterface $stats,
403 $httpMethod
405 $type = get_class( $update );
406 $stats->increment( "deferred_updates.$httpMethod.$type" );
408 $jobEx = null;
409 try {
410 $spec = $update->getAsJobSpecification();
411 JobQueueGroup::singleton( $spec['domain'] )->push( $spec['job'] );
413 return;
414 } catch ( Throwable $jobEx ) {
417 MWExceptionHandler::logException( $jobEx );
418 $logger->error(
419 "Deferred update '$type' failed to enqueue as a job.",
421 'deferred_type' => $type,
422 'exception' => $jobEx,
426 $lbFactory->rollbackMasterChanges( __METHOD__ );
430 * Attempt to run an update with the appropriate transaction round state it expects
432 * DeferredUpdate classes that wrap the execution of bundles of other DeferredUpdate
433 * instances can use this method to run the updates. Any such wrapper class should
434 * always use TRX_ROUND_ABSENT itself.
436 * @param DeferrableUpdate $update
437 * @param ILBFactory $lbFactory
438 * @since 1.34
440 public static function attemptUpdate( DeferrableUpdate $update, ILBFactory $lbFactory ) {
441 $ticket = $lbFactory->getEmptyTransactionTicket( __METHOD__ );
442 if ( !$ticket || $lbFactory->hasTransactionRound() ) {
443 throw new DBTransactionError( null, "A database transaction round is pending." );
446 if ( $update instanceof DataUpdate ) {
447 $update->setTransactionTicket( $ticket );
450 // Designate $update::doUpdate() as the write round owner
451 $fnameTrxOwner = ( $update instanceof DeferrableCallback )
452 ? $update->getOrigin()
453 : get_class( $update ) . '::doUpdate';
454 // Determine whether the write round will be explicit or implicit
455 $useExplicitTrxRound = !(
456 $update instanceof TransactionRoundAwareUpdate &&
457 $update->getTransactionRoundRequirement() == $update::TRX_ROUND_ABSENT
460 // Flush any pending changes left over from an implicit transaction round
461 if ( $useExplicitTrxRound ) {
462 $lbFactory->beginMasterChanges( $fnameTrxOwner ); // new explicit round
463 } else {
464 $lbFactory->commitMasterChanges( $fnameTrxOwner ); // new implicit round
466 // Run the update after any stale master view snapshots have been flushed
467 $update->doUpdate();
468 // Commit any pending changes from the explicit or implicit transaction round
469 $lbFactory->commitMasterChanges( $fnameTrxOwner );
473 * Run all deferred updates immediately if there are no DB writes active
475 * If there are many deferred updates pending, $mode is 'run', and there
476 * are still busy LBFactory database handles, then any EnqueueableDataUpdate
477 * updates might be enqueued as jobs to be executed later.
479 * @param string $mode Use "enqueue" to use the job queue when possible
480 * @return bool Whether updates were allowed to run
481 * @since 1.28
483 public static function tryOpportunisticExecute( $mode = 'run' ) {
484 // An update is already in progress
485 if ( self::$executionStack ) {
486 return false;
489 // Avoiding running updates without them having outer scope
490 if ( !self::areDatabaseTransactionsActive() ) {
491 self::doUpdates( $mode );
492 return true;
495 if ( self::pendingUpdatesCount() >= self::BIG_QUEUE_SIZE ) {
496 // If we cannot run the updates with outer transaction context, try to
497 // at least enqueue all the updates that support queueing to job queue
498 self::$preSendUpdates = self::enqueueUpdates( self::$preSendUpdates );
499 self::$postSendUpdates = self::enqueueUpdates( self::$postSendUpdates );
502 return !self::pendingUpdatesCount();
506 * Enqueue a job for each EnqueueableDataUpdate item and return the other items
508 * @param DeferrableUpdate[] $updates A list of deferred update instances
509 * @return DeferrableUpdate[] Remaining updates that do not support being queued
511 private static function enqueueUpdates( array $updates ) {
512 $remaining = [];
514 foreach ( $updates as $update ) {
515 if ( $update instanceof EnqueueableDataUpdate ) {
516 $spec = $update->getAsJobSpecification();
517 $domain = $spec['domain'] ?? $spec['wiki'];
518 JobQueueGroup::singleton( $domain )->push( $spec['job'] );
519 } else {
520 $remaining[] = $update;
524 return $remaining;
528 * Get the number of currently enqueued updates in the top-queues
530 * Calling this while an update is in-progress produces undefined results
532 * @return int
533 * @since 1.28
535 public static function pendingUpdatesCount() {
536 if ( self::$executionStack ) {
537 throw new LogicException( "Called during execution of a DeferrableUpdate" );
540 return count( self::$preSendUpdates ) + count( self::$postSendUpdates );
544 * Get the list of pending updates in the top-queues
546 * Calling this while an update is in-progress produces undefined results
548 * This method should only be used for unit tests
550 * @param int $stage DeferredUpdates constant (PRESEND, POSTSEND, or ALL)
551 * @return DeferrableUpdate[]
552 * @since 1.29
554 public static function getPendingUpdates( $stage = self::ALL ) {
555 $updates = [];
556 if ( $stage === self::ALL || $stage === self::PRESEND ) {
557 $updates = array_merge( $updates, self::$preSendUpdates );
559 if ( $stage === self::ALL || $stage === self::POSTSEND ) {
560 $updates = array_merge( $updates, self::$postSendUpdates );
563 return $updates;
567 * Clear all pending updates without performing them
569 * Calling this while an update is in-progress produces undefined results
571 * This method should only be used for unit tests
573 public static function clearPendingUpdates() {
574 self::$preSendUpdates = [];
575 self::$postSendUpdates = [];
579 * @return bool If a transaction round is active or connection is not ready for commit()
581 private static function areDatabaseTransactionsActive() {
582 $lbFactory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
583 if ( $lbFactory->hasTransactionRound() || !$lbFactory->isReadyForRoundOperations() ) {
584 return true;
587 $connsBusy = false;
588 $lbFactory->forEachLB( function ( LoadBalancer $lb ) use ( &$connsBusy ) {
589 $lb->forEachOpenMasterConnection( function ( IDatabase $conn ) use ( &$connsBusy ) {
590 if ( $conn->writesOrCallbacksPending() || $conn->explicitTrxActive() ) {
591 $connsBusy = true;
593 } );
594 } );
596 return $connsBusy;