Merge "Special:BlockList: Update remove/change block links"
[mediawiki.git] / includes / parser / ParserCache.php
blob181dd9eb2a9c467cfcbafff3161b929aa0d74a83
1 <?php
2 /**
3 * Cache for outputs of the PHP parser
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Cache Parser
24 namespace MediaWiki\Parser;
26 use Exception;
27 use InvalidArgumentException;
28 use JsonException;
29 use MediaWiki\HookContainer\HookContainer;
30 use MediaWiki\HookContainer\HookRunner;
31 use MediaWiki\Json\JsonCodec;
32 use MediaWiki\Page\PageRecord;
33 use MediaWiki\Page\WikiPageFactory;
34 use MediaWiki\Title\TitleFactory;
35 use Psr\Log\LoggerInterface;
36 use Wikimedia\ObjectCache\BagOStuff;
37 use Wikimedia\ObjectCache\EmptyBagOStuff;
38 use Wikimedia\ObjectCache\HashBagOStuff;
39 use Wikimedia\Stats\StatsFactory;
40 use Wikimedia\UUID\GlobalIdGenerator;
42 /**
43 * Cache for ParserOutput objects corresponding to the latest page revisions.
45 * The ParserCache is a two-tiered cache backed by BagOStuff which supports
46 * varying the stored content on the values of ParserOptions used during
47 * a page parse.
49 * First tier is keyed by the page ID and stores ParserCacheMetadata, which
50 * contains information about cache expiration and the list of ParserOptions
51 * used during the parse of the page. For example, if only 'dateformat' and
52 * 'userlang' options were accessed by the parser when producing output for the
53 * page, array [ 'dateformat', 'userlang' ] will be stored in the metadata cache.
54 * This means none of the other existing options had any effect on the output.
56 * The second tier of the cache contains ParserOutput objects. The key for the
57 * second tier is constructed from the page ID and values of those ParserOptions
58 * used during a page parse which affected the output. Upon cache lookup, the list
59 * of used option names is retrieved from tier 1 cache, and only the values of
60 * those options are hashed together with the page ID to produce a key, while
61 * the rest of the options are ignored. Following the example above where
62 * only [ 'dateformat', 'userlang' ] options changed the parser output for a
63 * page, the key will look like 'page_id!dateformat=default:userlang=ru'.
64 * Thus any cache lookup with dateformat=default and userlang=ru will hit the
65 * same cache entry regardless of the values of the rest of the options, since they
66 * were not accessed during a parse and thus did not change the output.
68 * @see ParserOutput::recordOption()
69 * @see ParserOutput::getUsedOptions()
70 * @see ParserOptions::allCacheVaryingOptions()
71 * @ingroup Cache Parser
73 class ParserCache {
74 /**
75 * Constants for self::getKey()
76 * @since 1.30
77 * @since 1.36 the constants were made public
80 /** Use only current data */
81 public const USE_CURRENT_ONLY = 0;
83 /** Use expired data if current data is unavailable */
84 public const USE_EXPIRED = 1;
86 /** Use expired data or data from different revisions if current data is unavailable */
87 public const USE_OUTDATED = 2;
89 /**
90 * Use expired data and data from different revisions, and if all else
91 * fails vary on all variable options
93 private const USE_ANYTHING = 3;
95 /** @var string The name of this ParserCache. Used as a root of the cache key. */
96 private $name;
98 /** @var BagOStuff */
99 private $cache;
102 * Anything cached prior to this is invalidated
104 * @var string
106 private $cacheEpoch;
108 /** @var HookRunner */
109 private $hookRunner;
111 /** @var JsonCodec */
112 private $jsonCodec;
114 /** @var StatsFactory */
115 private $stats;
117 /** @var LoggerInterface */
118 private $logger;
120 /** @var TitleFactory */
121 private $titleFactory;
123 /** @var WikiPageFactory */
124 private $wikiPageFactory;
126 private ?ParserCacheFilter $filter = null;
128 private GlobalIdGenerator $globalIdGenerator;
131 * @var BagOStuff small in-process cache to store metadata.
132 * It's needed multiple times during the request, for example
133 * to build a PoolWorkArticleView key, and then to fetch the
134 * actual ParserCache entry.
136 private $metadataProcCache;
139 * Setup a cache pathway with a given back-end storage mechanism.
141 * This class use an invalidation strategy that is compatible with
142 * MultiWriteBagOStuff in async replication mode.
144 * @param string $name
145 * @param BagOStuff $cache
146 * @param string $cacheEpoch Anything before this timestamp is invalidated
147 * @param HookContainer $hookContainer
148 * @param JsonCodec $jsonCodec
149 * @param StatsFactory $stats
150 * @param LoggerInterface $logger
151 * @param TitleFactory $titleFactory
152 * @param WikiPageFactory $wikiPageFactory
153 * @param GlobalIdGenerator $globalIdGenerator
155 public function __construct(
156 string $name,
157 BagOStuff $cache,
158 string $cacheEpoch,
159 HookContainer $hookContainer,
160 JsonCodec $jsonCodec,
161 StatsFactory $stats,
162 LoggerInterface $logger,
163 TitleFactory $titleFactory,
164 WikiPageFactory $wikiPageFactory,
165 GlobalIdGenerator $globalIdGenerator
167 $this->name = $name;
168 $this->cache = $cache;
169 $this->cacheEpoch = $cacheEpoch;
170 $this->hookRunner = new HookRunner( $hookContainer );
171 $this->jsonCodec = $jsonCodec;
172 $this->stats = $stats;
173 $this->logger = $logger;
174 $this->titleFactory = $titleFactory;
175 $this->wikiPageFactory = $wikiPageFactory;
176 $this->globalIdGenerator = $globalIdGenerator;
177 $this->metadataProcCache = new HashBagOStuff( [ 'maxKeys' => 2 ] );
181 * @since 1.41
182 * @param ParserCacheFilter $filter
184 public function setFilter( ParserCacheFilter $filter ): void {
185 $this->filter = $filter;
189 * @param PageRecord $page
190 * @since 1.28
192 public function deleteOptionsKey( PageRecord $page ) {
193 $page->assertWiki( PageRecord::LOCAL );
194 $key = $this->makeMetadataKey( $page );
195 $this->metadataProcCache->delete( $key );
196 $this->cache->delete( $key );
200 * Retrieve the ParserOutput from ParserCache, even if it's outdated.
201 * @param PageRecord $page
202 * @param ParserOptions $popts
203 * @return ParserOutput|false
205 public function getDirty( PageRecord $page, $popts ) {
206 $page->assertWiki( PageRecord::LOCAL );
207 $value = $this->get( $page, $popts, true );
208 return is_object( $value ) ? $value : false;
212 * @param PageRecord $page
213 * @return string
215 private function getContentModelFromPage( PageRecord $page ) {
216 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
217 return str_replace( '.', '_', $wikiPage->getContentModel() );
221 * @param PageRecord $page
222 * @param string $status
223 * @param string|null $reason
225 private function incrementStats( PageRecord $page, $status, $reason = null ) {
226 $contentModel = $this->getContentModelFromPage( $page );
227 $metricSuffix = $reason ? "{$status}_{$reason}" : $status;
229 $this->stats->getCounter( 'ParserCache_operation_total' )
230 ->setLabel( 'name', $this->name )
231 ->setLabel( 'contentModel', $contentModel )
232 ->setLabel( 'status', $status )
233 ->setLabel( 'reason', $reason ?: 'n/a' )
234 ->copyToStatsdAt( "{$this->name}.{$contentModel}.{$metricSuffix}" )
235 ->increment();
239 * @param PageRecord $page
240 * @param string $renderReason
242 private function incrementRenderReasonStats( PageRecord $page, $renderReason ) {
243 $contentModel = $this->getContentModelFromPage( $page );
244 $renderReason = preg_replace( '/\W+/', '_', $renderReason );
246 $this->stats->getCounter( 'ParserCache_render_total' )
247 ->setLabel( 'name', $this->name )
248 ->setLabel( 'contentModel', $contentModel )
249 ->setLabel( 'reason', $renderReason )
250 ->copyToStatsdAt( "{$this->name}.{$contentModel}.reason.{$renderReason}" )
251 ->increment();
255 * Returns the ParserCache metadata about the given page
256 * considering the given options.
258 * @note Which parser options influence the cache key
259 * is controlled via ParserOutput::recordOption() or
260 * ParserOptions::addExtraKey().
262 * @param PageRecord $page
263 * @param int $staleConstraint one of the self::USE_ constants
264 * @return ParserCacheMetadata|null
265 * @since 1.36
267 public function getMetadata(
268 PageRecord $page,
269 int $staleConstraint = self::USE_ANYTHING
270 ): ?ParserCacheMetadata {
271 $page->assertWiki( PageRecord::LOCAL );
273 $pageKey = $this->makeMetadataKey( $page );
274 $metadata = $this->metadataProcCache->get( $pageKey );
275 if ( !$metadata ) {
276 $metadata = $this->cache->get(
277 $pageKey,
278 BagOStuff::READ_VERIFIED
282 if ( $metadata === false ) {
283 $this->incrementStats( $page, 'miss', 'absent_metadata' );
284 $this->logger->debug( 'ParserOutput metadata cache miss', [ 'name' => $this->name ] );
285 return null;
286 } else {
287 // Ensure this cache hit is present in the in-process cache (T277829)
288 $this->metadataProcCache->set( $pageKey, $metadata );
291 // NOTE: If the value wasn't serialized to JSON when being stored,
292 // we may already have a ParserOutput object here. This used
293 // to be the default behavior before 1.36. We need to retain
294 // support so we can handle cached objects after an update
295 // from an earlier revision.
296 // NOTE: Support for reading string values from the cache must be
297 // deployed a while before starting to write JSON to the cache,
298 // in case we have to revert either change.
299 if ( is_string( $metadata ) ) {
300 $metadata = $this->restoreFromJson( $metadata, $pageKey, CacheTime::class );
303 if ( !$metadata instanceof CacheTime ) {
304 $this->incrementStats( $page, 'miss', 'unserialize' );
305 return null;
308 if ( $this->checkExpired( $metadata, $page, $staleConstraint, 'metadata' ) ) {
309 return null;
312 if ( $this->checkOutdated( $metadata, $page, $staleConstraint, 'metadata' ) ) {
313 return null;
316 $this->logger->debug( 'Parser cache options found', [ 'name' => $this->name ] );
317 return $metadata;
320 private function makeMetadataKey( PageRecord $page ): string {
321 return $this->cache->makeKey( $this->name, $page->getId( PageRecord::LOCAL ), '|#|', 'idoptions' );
325 * Get a key that will be used by the ParserCache to store the content
326 * for a given page considering the given options and the array of
327 * used options.
329 * @warning The exact format of the key is considered internal and is subject
330 * to change, thus should not be used as storage or long-term caching key.
331 * This is intended to be used for logging or keying something transient.
333 * @param PageRecord $page
334 * @param ParserOptions $options
335 * @param array|null $usedOptions Defaults to all cache varying options.
336 * @return string
337 * @internal
338 * @since 1.36
340 public function makeParserOutputKey(
341 PageRecord $page,
342 ParserOptions $options,
343 ?array $usedOptions = null
344 ): string {
345 $usedOptions ??= ParserOptions::allCacheVaryingOptions();
346 $title = $this->titleFactory->newFromPageIdentity( $page );
347 $hash = $options->optionsHash( $usedOptions, $title );
348 // idhash seem to mean 'page id' + 'rendering hash' (r3710)
349 return $this->cache->makeKey( $this->name, $page->getId( PageRecord::LOCAL ), '|#|', 'idhash', $hash );
353 * Retrieve the ParserOutput from ParserCache.
354 * false if not found or outdated.
356 * @param PageRecord $page
357 * @param ParserOptions $popts
358 * @param bool $useOutdated (default false)
360 * @return ParserOutput|false
362 public function get( PageRecord $page, $popts, $useOutdated = false ) {
363 $page->assertWiki( PageRecord::LOCAL );
365 if ( !$page->exists() ) {
366 $this->incrementStats( $page, 'miss', 'nonexistent' );
367 return false;
370 if ( $page->isRedirect() ) {
371 // It's a redirect now
372 $this->incrementStats( $page, 'miss', 'redirect' );
373 return false;
376 $staleConstraint = $useOutdated ? self::USE_OUTDATED : self::USE_CURRENT_ONLY;
377 $parserOutputMetadata = $this->getMetadata( $page, $staleConstraint );
378 if ( !$parserOutputMetadata ) {
379 return false;
382 if ( !$popts->isSafeToCache( $parserOutputMetadata->getUsedOptions() ) ) {
383 $this->incrementStats( $page, 'miss', 'unsafe' );
384 return false;
387 $parserOutputKey = $this->makeParserOutputKey(
388 $page,
389 $popts,
390 $parserOutputMetadata->getUsedOptions()
393 $value = $this->cache->get( $parserOutputKey, BagOStuff::READ_VERIFIED );
394 if ( $value === false ) {
395 $this->incrementStats( $page, 'miss', 'absent' );
396 $this->logger->debug( 'ParserOutput cache miss', [ 'name' => $this->name ] );
397 return false;
400 // NOTE: If the value wasn't serialized to JSON when being stored,
401 // we may already have a ParserOutput object here. This used
402 // to be the default behavior before 1.36. We need to retain
403 // support so we can handle cached objects after an update
404 // from an earlier revision.
405 // NOTE: Support for reading string values from the cache must be
406 // deployed a while before starting to write JSON to the cache,
407 // in case we have to revert either change.
408 if ( is_string( $value ) ) {
409 $value = $this->restoreFromJson( $value, $parserOutputKey, ParserOutput::class );
412 if ( !$value instanceof ParserOutput ) {
413 $this->incrementStats( $page, 'miss', 'unserialize' );
414 return false;
417 if ( $this->checkExpired( $value, $page, $staleConstraint, 'output' ) ) {
418 return false;
421 if ( $this->checkOutdated( $value, $page, $staleConstraint, 'output' ) ) {
422 return false;
425 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
426 if ( $this->hookRunner->onRejectParserCacheValue( $value, $wikiPage, $popts ) === false ) {
427 $this->incrementStats( $page, 'miss', 'rejected' );
428 $this->logger->debug( 'key valid, but rejected by RejectParserCacheValue hook handler',
429 [ 'name' => $this->name ] );
430 return false;
433 $this->logger->debug( 'ParserOutput cache found', [ 'name' => $this->name ] );
434 $this->incrementStats( $page, 'hit' );
435 return $value;
439 * @param ParserOutput $parserOutput
440 * @param PageRecord $page
441 * @param ParserOptions $popts
442 * @param string|null $cacheTime TS_MW timestamp when the cache was generated
443 * @param int|null $revId Revision ID that was parsed
445 public function save(
446 ParserOutput $parserOutput,
447 PageRecord $page,
448 $popts,
449 $cacheTime = null,
450 $revId = null
452 $page->assertWiki( PageRecord::LOCAL );
453 // T350538: Eventually we'll warn if the $cacheTime and $revId
454 // parameters are non-null here, since we *should* be getting
455 // them from the ParserOutput.
456 if ( $revId !== null && $revId !== $parserOutput->getCacheRevisionId() ) {
457 $this->logger->warning(
458 'Inconsistent revision ID',
460 'name' => $this->name,
461 'reason' => $popts->getRenderReason(),
462 'revid1' => $revId,
463 'revid2' => $parserOutput->getCacheRevisionId(),
468 if ( !$parserOutput->hasText() ) {
469 throw new InvalidArgumentException( 'Attempt to cache a ParserOutput with no text set!' );
472 $expire = $parserOutput->getCacheExpiry();
474 if ( !$popts->isSafeToCache( $parserOutput->getUsedOptions() ) ) {
475 $this->logger->debug(
476 'Parser options are not safe to cache and has not been saved',
477 [ 'name' => $this->name ]
479 $this->incrementStats( $page, 'save', 'unsafe' );
480 return;
483 if ( $expire <= 0 ) {
484 $this->logger->debug(
485 'Parser output was marked as uncacheable and has not been saved',
486 [ 'name' => $this->name ]
488 $this->incrementStats( $page, 'save', 'uncacheable' );
489 return;
492 if ( $this->filter && !$this->filter->shouldCache( $parserOutput, $page, $popts ) ) {
493 $this->logger->debug(
494 'Parser output was filtered and has not been saved',
495 [ 'name' => $this->name ]
497 $this->incrementStats( $page, 'save', 'filtered' );
499 // TODO: In this case, we still want to cache in RevisionOutputCache (T350669).
500 return;
503 if ( $this->cache instanceof EmptyBagOStuff ) {
504 return;
507 // Ensure cache properties are set in the ParserOutput
508 // T350538: These should be turned into assertions that the
509 // properties are already present.
510 if ( $cacheTime ) {
511 $parserOutput->setCacheTime( $cacheTime );
512 } else {
513 if ( !$parserOutput->hasCacheTime() ) {
514 $this->logger->warning(
515 'No cache time set',
517 'name' => $this->name,
518 'reason' => $popts->getRenderReason(),
522 $cacheTime = $parserOutput->getCacheTime();
525 if ( $revId ) {
526 $parserOutput->setCacheRevisionId( $revId );
527 } elseif ( $parserOutput->getCacheRevisionId() ) {
528 $revId = $parserOutput->getCacheRevisionId();
529 } else {
530 $revId = $page->getLatest( PageRecord::LOCAL );
531 $parserOutput->setCacheRevisionId( $revId );
533 if ( !$revId ) {
534 $this->logger->warning(
535 'Parser output cannot be saved if the revision ID is not known',
536 [ 'name' => $this->name ]
538 $this->incrementStats( $page, 'save', 'norevid' );
539 return;
542 if ( !$parserOutput->getRenderId() ) {
543 $this->logger->warning(
544 'Parser output missing render ID',
546 'name' => $this->name,
547 'reason' => $popts->getRenderReason(),
550 $parserOutput->setRenderId( $this->globalIdGenerator->newUUIDv1() );
553 // Transfer cache properties to the cache metadata
554 $metadata = new CacheTime;
555 $metadata->recordOptions( $parserOutput->getUsedOptions() );
556 $metadata->updateCacheExpiry( $expire );
557 $metadata->setCacheTime( $cacheTime );
558 $metadata->setCacheRevisionId( $revId );
560 $parserOutputKey = $this->makeParserOutputKey(
561 $page,
562 $popts,
563 $metadata->getUsedOptions()
566 $msg = "Saved in parser cache with key $parserOutputKey" .
567 " and timestamp $cacheTime" .
568 " and revision id $revId.";
570 $reason = $popts->getRenderReason();
571 $msg .= " Rendering was triggered because: $reason";
573 $parserOutput->addCacheMessage( $msg );
575 $pageKey = $this->makeMetadataKey( $page );
577 $parserOutputData = $this->convertForCache( $parserOutput, $parserOutputKey );
578 $metadataData = $this->convertForCache( $metadata, $pageKey );
580 if ( !$parserOutputData || !$metadataData ) {
581 $this->logger->warning(
582 'Parser output failed to serialize and was not saved',
583 [ 'name' => $this->name ]
585 $this->incrementStats( $page, 'save', 'nonserializable' );
586 return;
589 // Save the parser output
590 $this->cache->set(
591 $parserOutputKey,
592 $parserOutputData,
593 $expire,
594 BagOStuff::WRITE_ALLOW_SEGMENTS
597 // ...and its pointer to the local cache.
598 $this->metadataProcCache->set( $pageKey, $metadataData, $expire );
599 // ...and to the global cache.
600 $this->cache->set( $pageKey, $metadataData, $expire );
602 $title = $this->titleFactory->newFromPageIdentity( $page );
603 $this->hookRunner->onParserCacheSaveComplete( $this, $parserOutput, $title, $popts, $revId );
605 $this->logger->debug( 'Saved in parser cache', [
606 'name' => $this->name,
607 'key' => $parserOutputKey,
608 'cache_time' => $cacheTime,
609 'rev_id' => $revId
610 ] );
611 $this->incrementStats( $page, 'save', 'success' );
612 $this->incrementRenderReasonStats( $page, $popts->getRenderReason() );
616 * Get the backend BagOStuff instance that
617 * powers the parser cache
619 * @since 1.30
620 * @internal
621 * @return BagOStuff
623 public function getCacheStorage() {
624 return $this->cache;
628 * Check if $entry expired for $page given the $staleConstraint
629 * when fetching from $cacheTier.
630 * @param CacheTime $entry
631 * @param PageRecord $page
632 * @param int $staleConstraint One of USE_* constants.
633 * @param string $cacheTier
634 * @return bool
636 private function checkExpired(
637 CacheTime $entry,
638 PageRecord $page,
639 int $staleConstraint,
640 string $cacheTier
641 ): bool {
642 if ( $staleConstraint < self::USE_EXPIRED && $entry->expired( $page->getTouched() ) ) {
643 $this->incrementStats( $page, 'miss', 'expired' );
644 $this->logger->debug( "{$cacheTier} key expired", [
645 'name' => $this->name,
646 'touched' => $page->getTouched(),
647 'epoch' => $this->cacheEpoch,
648 'cache_time' => $entry->getCacheTime()
649 ] );
650 return true;
652 return false;
656 * Check if $entry belongs to the latest revision of $page
657 * given $staleConstraint when fetched from $cacheTier.
658 * @param CacheTime $entry
659 * @param PageRecord $page
660 * @param int $staleConstraint One of USE_* constants.
661 * @param string $cacheTier
662 * @return bool
664 private function checkOutdated(
665 CacheTime $entry,
666 PageRecord $page,
667 int $staleConstraint,
668 string $cacheTier
669 ): bool {
670 $latestRevId = $page->getLatest( PageRecord::LOCAL );
671 if ( $staleConstraint < self::USE_OUTDATED && $entry->isDifferentRevision( $latestRevId ) ) {
672 $this->incrementStats( $page, 'miss', 'revid' );
673 $this->logger->debug( "{$cacheTier} key is for an old revision", [
674 'name' => $this->name,
675 'rev_id' => $latestRevId,
676 'cached_rev_id' => $entry->getCacheRevisionId()
677 ] );
678 return true;
680 return false;
684 * @param string $jsonData
685 * @param string $key
686 * @param string $expectedClass
687 * @return CacheTime|ParserOutput|null
689 private function restoreFromJson( string $jsonData, string $key, string $expectedClass ) {
690 try {
691 /** @var CacheTime $obj */
692 $obj = $this->jsonCodec->deserialize( $jsonData, $expectedClass );
693 return $obj;
694 } catch ( JsonException $e ) {
695 $this->logger->error( "Unable to deserialize JSON", [
696 'name' => $this->name,
697 'cache_key' => $key,
698 'ex_message' => $e->getMessage()
699 ] );
700 return null;
701 } catch ( Exception $e ) {
702 $this->logger->error( "Unexpected failure during cache load", [
703 'name' => $this->name,
704 'cache_key' => $key,
705 'ex_message' => $e->getMessage()
706 ] );
707 return null;
712 * @param CacheTime $obj
713 * @param string $key
714 * @return string|null
716 protected function convertForCache( CacheTime $obj, string $key ) {
717 try {
718 return $this->jsonCodec->serialize( $obj );
719 } catch ( JsonException $e ) {
720 // Try to collect some additional debugging information, but
721 // wrap this in a try block to ensure we don't make the problem
722 // worse.
723 try {
724 $details = $this->jsonCodec->detectNonSerializableData( $obj, true );
725 } catch ( \Throwable $t ) {
726 $details = $t->getMessage();
728 $this->logger->error( "Unable to serialize JSON", [
729 'name' => $this->name,
730 'cache_key' => $key,
731 'ex_message' => $e->getMessage(),
732 'details' => $details,
733 'trace' => $e->getTraceAsString(),
734 ] );
735 return null;
740 /** @deprecated class alias since 1.43 */
741 class_alias( ParserCache::class, 'ParserCache' );