Localisation updates from https://translatewiki.net.
[mediawiki.git] / includes / parser / ParserCache.php
blob923f0500045fd24c8dd1bc082d3a1f70603be69a
1 <?php
2 /**
3 * Cache for outputs of the PHP parser
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Cache Parser
24 namespace MediaWiki\Parser;
26 use Exception;
27 use InvalidArgumentException;
28 use JsonException;
29 use MediaWiki\HookContainer\HookContainer;
30 use MediaWiki\HookContainer\HookRunner;
31 use MediaWiki\Json\JsonCodec;
32 use MediaWiki\Page\PageRecord;
33 use MediaWiki\Page\WikiPageFactory;
34 use MediaWiki\Title\TitleFactory;
35 use Psr\Log\LoggerInterface;
36 use Wikimedia\ObjectCache\BagOStuff;
37 use Wikimedia\ObjectCache\EmptyBagOStuff;
38 use Wikimedia\ObjectCache\HashBagOStuff;
39 use Wikimedia\Stats\StatsFactory;
40 use Wikimedia\UUID\GlobalIdGenerator;
42 /**
43 * Cache for ParserOutput objects corresponding to the latest page revisions.
45 * The ParserCache is a two-tiered cache backed by BagOStuff which supports
46 * varying the stored content on the values of ParserOptions used during
47 * a page parse.
49 * First tier is keyed by the page ID and stores ParserCacheMetadata, which
50 * contains information about cache expiration and the list of ParserOptions
51 * used during the parse of the page. For example, if only 'dateformat' and
52 * 'userlang' options were accessed by the parser when producing output for the
53 * page, array [ 'dateformat', 'userlang' ] will be stored in the metadata cache.
54 * This means none of the other existing options had any effect on the output.
56 * The second tier of the cache contains ParserOutput objects. The key for the
57 * second tier is constructed from the page ID and values of those ParserOptions
58 * used during a page parse which affected the output. Upon cache lookup, the list
59 * of used option names is retrieved from tier 1 cache, and only the values of
60 * those options are hashed together with the page ID to produce a key, while
61 * the rest of the options are ignored. Following the example above where
62 * only [ 'dateformat', 'userlang' ] options changed the parser output for a
63 * page, the key will look like 'page_id!dateformat=default:userlang=ru'.
64 * Thus any cache lookup with dateformat=default and userlang=ru will hit the
65 * same cache entry regardless of the values of the rest of the options, since they
66 * were not accessed during a parse and thus did not change the output.
68 * @see ParserOutput::recordOption()
69 * @see ParserOutput::getUsedOptions()
70 * @see ParserOptions::allCacheVaryingOptions()
71 * @ingroup Cache Parser
73 class ParserCache {
74 /**
75 * Constants for self::getKey()
76 * @since 1.30
77 * @since 1.36 the constants were made public
80 /** Use only current data */
81 public const USE_CURRENT_ONLY = 0;
83 /** Use expired data if current data is unavailable */
84 public const USE_EXPIRED = 1;
86 /** Use expired data or data from different revisions if current data is unavailable */
87 public const USE_OUTDATED = 2;
89 /**
90 * Use expired data and data from different revisions, and if all else
91 * fails vary on all variable options
93 private const USE_ANYTHING = 3;
95 /** @var string The name of this ParserCache. Used as a root of the cache key. */
96 private $name;
98 /** @var BagOStuff */
99 private $cache;
102 * Anything cached prior to this is invalidated
104 * @var string
106 private $cacheEpoch;
108 /** @var HookRunner */
109 private $hookRunner;
111 /** @var JsonCodec */
112 private $jsonCodec;
114 /** @var StatsFactory */
115 private $stats;
117 /** @var LoggerInterface */
118 private $logger;
120 /** @var TitleFactory */
121 private $titleFactory;
123 /** @var WikiPageFactory */
124 private $wikiPageFactory;
126 private ?ParserCacheFilter $filter = null;
128 private GlobalIdGenerator $globalIdGenerator;
131 * @var BagOStuff small in-process cache to store metadata.
132 * It's needed multiple times during the request, for example
133 * to build a PoolWorkArticleView key, and then to fetch the
134 * actual ParserCache entry.
136 private $metadataProcCache;
139 * Setup a cache pathway with a given back-end storage mechanism.
141 * This class use an invalidation strategy that is compatible with
142 * MultiWriteBagOStuff in async replication mode.
144 * @param string $name
145 * @param BagOStuff $cache
146 * @param string $cacheEpoch Anything before this timestamp is invalidated
147 * @param HookContainer $hookContainer
148 * @param JsonCodec $jsonCodec
149 * @param StatsFactory $stats
150 * @param LoggerInterface $logger
151 * @param TitleFactory $titleFactory
152 * @param WikiPageFactory $wikiPageFactory
153 * @param GlobalIdGenerator $globalIdGenerator
155 public function __construct(
156 string $name,
157 BagOStuff $cache,
158 string $cacheEpoch,
159 HookContainer $hookContainer,
160 JsonCodec $jsonCodec,
161 StatsFactory $stats,
162 LoggerInterface $logger,
163 TitleFactory $titleFactory,
164 WikiPageFactory $wikiPageFactory,
165 GlobalIdGenerator $globalIdGenerator
167 $this->name = $name;
168 $this->cache = $cache;
169 $this->cacheEpoch = $cacheEpoch;
170 $this->hookRunner = new HookRunner( $hookContainer );
171 $this->jsonCodec = $jsonCodec;
172 $this->stats = $stats;
173 $this->logger = $logger;
174 $this->titleFactory = $titleFactory;
175 $this->wikiPageFactory = $wikiPageFactory;
176 $this->globalIdGenerator = $globalIdGenerator;
177 $this->metadataProcCache = new HashBagOStuff( [ 'maxKeys' => 2 ] );
181 * @since 1.41
182 * @param ParserCacheFilter $filter
184 public function setFilter( ParserCacheFilter $filter ): void {
185 $this->filter = $filter;
189 * @param PageRecord $page
190 * @since 1.28
192 public function deleteOptionsKey( PageRecord $page ) {
193 $page->assertWiki( PageRecord::LOCAL );
194 $key = $this->makeMetadataKey( $page );
195 $this->metadataProcCache->delete( $key );
196 $this->cache->delete( $key );
200 * Retrieve the ParserOutput from ParserCache, even if it's outdated.
201 * @param PageRecord $page
202 * @param ParserOptions $popts
203 * @return ParserOutput|false
205 public function getDirty( PageRecord $page, $popts ) {
206 $page->assertWiki( PageRecord::LOCAL );
207 $value = $this->get( $page, $popts, true );
208 return is_object( $value ) ? $value : false;
212 * @param PageRecord $page
213 * @return string
215 private function getContentModelFromPage( PageRecord $page ) {
216 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
217 return str_replace( '.', '_', $wikiPage->getContentModel() );
221 * @param PageRecord $page
222 * @param string $status
223 * @param string|null $reason
225 private function incrementStats( PageRecord $page, $status, $reason = null ) {
226 $contentModel = $this->getContentModelFromPage( $page );
227 $metricSuffix = $reason ? "{$status}_{$reason}" : $status;
229 $this->stats->getCounter( 'ParserCache_operation_total' )
230 ->setLabel( 'name', $this->name )
231 ->setLabel( 'contentModel', $contentModel )
232 ->setLabel( 'status', $status )
233 ->setLabel( 'reason', $reason ?: 'n/a' )
234 ->copyToStatsdAt( "{$this->name}.{$contentModel}.{$metricSuffix}" )
235 ->increment();
239 * @param PageRecord $page
240 * @param string $renderReason
242 private function incrementRenderReasonStats( PageRecord $page, $renderReason ) {
243 $contentModel = $this->getContentModelFromPage( $page );
244 $renderReason = preg_replace( '/\W+/', '_', $renderReason );
246 $this->stats->getCounter( 'ParserCache_render_total' )
247 ->setLabel( 'name', $this->name )
248 ->setLabel( 'contentModel', $contentModel )
249 ->setLabel( 'reason', $renderReason )
250 ->copyToStatsdAt( "{$this->name}.{$contentModel}.reason.{$renderReason}" )
251 ->increment();
255 * Returns the ParserCache metadata about the given page
256 * considering the given options.
258 * @note Which parser options influence the cache key
259 * is controlled via ParserOutput::recordOption() or
260 * ParserOptions::addExtraKey().
262 * @param PageRecord $page
263 * @param int $staleConstraint one of the self::USE_ constants
264 * @return ParserCacheMetadata|null
265 * @since 1.36
267 public function getMetadata(
268 PageRecord $page,
269 int $staleConstraint = self::USE_ANYTHING
270 ): ?ParserCacheMetadata {
271 $page->assertWiki( PageRecord::LOCAL );
273 $pageKey = $this->makeMetadataKey( $page );
274 $metadata = $this->metadataProcCache->get( $pageKey );
275 if ( !$metadata ) {
276 $metadata = $this->cache->get(
277 $pageKey,
278 BagOStuff::READ_VERIFIED
282 if ( $metadata === false ) {
283 $this->incrementStats( $page, 'miss', 'absent_metadata' );
284 $this->logger->debug( 'ParserOutput metadata cache miss', [ 'name' => $this->name ] );
285 return null;
286 } else {
287 // Ensure this cache hit is present in the in-process cache (T277829)
288 $this->metadataProcCache->set( $pageKey, $metadata );
291 // NOTE: If the value wasn't serialized to JSON when being stored,
292 // we may already have a ParserOutput object here. This used
293 // to be the default behavior before 1.36. We need to retain
294 // support so we can handle cached objects after an update
295 // from an earlier revision.
296 // NOTE: Support for reading string values from the cache must be
297 // deployed a while before starting to write JSON to the cache,
298 // in case we have to revert either change.
299 if ( is_string( $metadata ) ) {
300 $metadata = $this->restoreFromJson( $metadata, $pageKey, CacheTime::class );
303 if ( !$metadata instanceof CacheTime ) {
304 $this->incrementStats( $page, 'miss', 'unserialize' );
305 return null;
308 if ( $this->checkExpired( $metadata, $page, $staleConstraint, 'metadata' ) ) {
309 return null;
312 if ( $this->checkOutdated( $metadata, $page, $staleConstraint, 'metadata' ) ) {
313 return null;
316 $this->logger->debug( 'Parser cache options found', [ 'name' => $this->name ] );
317 return $metadata;
321 * @param PageRecord $page
322 * @return string
324 private function makeMetadataKey( PageRecord $page ): string {
325 return $this->cache->makeKey( $this->name, $page->getId( PageRecord::LOCAL ), '|#|', 'idoptions' );
329 * Get a key that will be used by the ParserCache to store the content
330 * for a given page considering the given options and the array of
331 * used options.
333 * @warning The exact format of the key is considered internal and is subject
334 * to change, thus should not be used as storage or long-term caching key.
335 * This is intended to be used for logging or keying something transient.
337 * @param PageRecord $page
338 * @param ParserOptions $options
339 * @param array|null $usedOptions Defaults to all cache varying options.
340 * @return string
341 * @internal
342 * @since 1.36
344 public function makeParserOutputKey(
345 PageRecord $page,
346 ParserOptions $options,
347 ?array $usedOptions = null
348 ): string {
349 $usedOptions ??= ParserOptions::allCacheVaryingOptions();
350 $title = $this->titleFactory->newFromPageIdentity( $page );
351 $hash = $options->optionsHash( $usedOptions, $title );
352 // idhash seem to mean 'page id' + 'rendering hash' (r3710)
353 return $this->cache->makeKey( $this->name, $page->getId( PageRecord::LOCAL ), '|#|', 'idhash', $hash );
357 * Retrieve the ParserOutput from ParserCache.
358 * false if not found or outdated.
360 * @param PageRecord $page
361 * @param ParserOptions $popts
362 * @param bool $useOutdated (default false)
364 * @return ParserOutput|false
366 public function get( PageRecord $page, $popts, $useOutdated = false ) {
367 $page->assertWiki( PageRecord::LOCAL );
369 if ( !$page->exists() ) {
370 $this->incrementStats( $page, 'miss', 'nonexistent' );
371 return false;
374 if ( $page->isRedirect() ) {
375 // It's a redirect now
376 $this->incrementStats( $page, 'miss', 'redirect' );
377 return false;
380 $staleConstraint = $useOutdated ? self::USE_OUTDATED : self::USE_CURRENT_ONLY;
381 $parserOutputMetadata = $this->getMetadata( $page, $staleConstraint );
382 if ( !$parserOutputMetadata ) {
383 return false;
386 if ( !$popts->isSafeToCache( $parserOutputMetadata->getUsedOptions() ) ) {
387 $this->incrementStats( $page, 'miss', 'unsafe' );
388 return false;
391 $parserOutputKey = $this->makeParserOutputKey(
392 $page,
393 $popts,
394 $parserOutputMetadata->getUsedOptions()
397 $value = $this->cache->get( $parserOutputKey, BagOStuff::READ_VERIFIED );
398 if ( $value === false ) {
399 $this->incrementStats( $page, 'miss', 'absent' );
400 $this->logger->debug( 'ParserOutput cache miss', [ 'name' => $this->name ] );
401 return false;
404 // NOTE: If the value wasn't serialized to JSON when being stored,
405 // we may already have a ParserOutput object here. This used
406 // to be the default behavior before 1.36. We need to retain
407 // support so we can handle cached objects after an update
408 // from an earlier revision.
409 // NOTE: Support for reading string values from the cache must be
410 // deployed a while before starting to write JSON to the cache,
411 // in case we have to revert either change.
412 if ( is_string( $value ) ) {
413 $value = $this->restoreFromJson( $value, $parserOutputKey, ParserOutput::class );
416 if ( !$value instanceof ParserOutput ) {
417 $this->incrementStats( $page, 'miss', 'unserialize' );
418 return false;
421 if ( $this->checkExpired( $value, $page, $staleConstraint, 'output' ) ) {
422 return false;
425 if ( $this->checkOutdated( $value, $page, $staleConstraint, 'output' ) ) {
426 return false;
429 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
430 if ( $this->hookRunner->onRejectParserCacheValue( $value, $wikiPage, $popts ) === false ) {
431 $this->incrementStats( $page, 'miss', 'rejected' );
432 $this->logger->debug( 'key valid, but rejected by RejectParserCacheValue hook handler',
433 [ 'name' => $this->name ] );
434 return false;
437 $this->logger->debug( 'ParserOutput cache found', [ 'name' => $this->name ] );
438 $this->incrementStats( $page, 'hit' );
439 return $value;
443 * @param ParserOutput $parserOutput
444 * @param PageRecord $page
445 * @param ParserOptions $popts
446 * @param string|null $cacheTime TS_MW timestamp when the cache was generated
447 * @param int|null $revId Revision ID that was parsed
449 public function save(
450 ParserOutput $parserOutput,
451 PageRecord $page,
452 $popts,
453 $cacheTime = null,
454 $revId = null
456 $page->assertWiki( PageRecord::LOCAL );
457 // T350538: Eventually we'll warn if the $cacheTime and $revId
458 // parameters are non-null here, since we *should* be getting
459 // them from the ParserOutput.
460 if ( $revId !== null && $revId !== $parserOutput->getCacheRevisionId() ) {
461 $this->logger->warning(
462 'Inconsistent revision ID',
464 'name' => $this->name,
465 'reason' => $popts->getRenderReason(),
466 'revid1' => $revId,
467 'revid2' => $parserOutput->getCacheRevisionId(),
472 if ( !$parserOutput->hasText() ) {
473 throw new InvalidArgumentException( 'Attempt to cache a ParserOutput with no text set!' );
476 $expire = $parserOutput->getCacheExpiry();
478 if ( !$popts->isSafeToCache( $parserOutput->getUsedOptions() ) ) {
479 $this->logger->debug(
480 'Parser options are not safe to cache and has not been saved',
481 [ 'name' => $this->name ]
483 $this->incrementStats( $page, 'save', 'unsafe' );
484 return;
487 if ( $expire <= 0 ) {
488 $this->logger->debug(
489 'Parser output was marked as uncacheable and has not been saved',
490 [ 'name' => $this->name ]
492 $this->incrementStats( $page, 'save', 'uncacheable' );
493 return;
496 if ( $this->filter && !$this->filter->shouldCache( $parserOutput, $page, $popts ) ) {
497 $this->logger->debug(
498 'Parser output was filtered and has not been saved',
499 [ 'name' => $this->name ]
501 $this->incrementStats( $page, 'save', 'filtered' );
503 // TODO: In this case, we still want to cache in RevisionOutputCache (T350669).
504 return;
507 if ( $this->cache instanceof EmptyBagOStuff ) {
508 return;
511 // Ensure cache properties are set in the ParserOutput
512 // T350538: These should be turned into assertions that the
513 // properties are already present.
514 if ( $cacheTime ) {
515 $parserOutput->setCacheTime( $cacheTime );
516 } else {
517 if ( !$parserOutput->hasCacheTime() ) {
518 $this->logger->warning(
519 'No cache time set',
521 'name' => $this->name,
522 'reason' => $popts->getRenderReason(),
526 $cacheTime = $parserOutput->getCacheTime();
529 if ( $revId ) {
530 $parserOutput->setCacheRevisionId( $revId );
531 } elseif ( $parserOutput->getCacheRevisionId() ) {
532 $revId = $parserOutput->getCacheRevisionId();
533 } else {
534 $revId = $page->getLatest( PageRecord::LOCAL );
535 $parserOutput->setCacheRevisionId( $revId );
537 if ( !$revId ) {
538 $this->logger->warning(
539 'Parser output cannot be saved if the revision ID is not known',
540 [ 'name' => $this->name ]
542 $this->incrementStats( $page, 'save', 'norevid' );
543 return;
546 if ( !$parserOutput->getRenderId() ) {
547 $this->logger->warning(
548 'Parser output missing render ID',
550 'name' => $this->name,
551 'reason' => $popts->getRenderReason(),
554 $parserOutput->setRenderId( $this->globalIdGenerator->newUUIDv1() );
557 // Transfer cache properties to the cache metadata
558 $metadata = new CacheTime;
559 $metadata->recordOptions( $parserOutput->getUsedOptions() );
560 $metadata->updateCacheExpiry( $expire );
561 $metadata->setCacheTime( $cacheTime );
562 $metadata->setCacheRevisionId( $revId );
564 $parserOutputKey = $this->makeParserOutputKey(
565 $page,
566 $popts,
567 $metadata->getUsedOptions()
570 $msg = "Saved in parser cache with key $parserOutputKey" .
571 " and timestamp $cacheTime" .
572 " and revision id $revId.";
574 $reason = $popts->getRenderReason();
575 $msg .= " Rendering was triggered because: $reason";
577 $parserOutput->addCacheMessage( $msg );
579 $pageKey = $this->makeMetadataKey( $page );
581 $parserOutputData = $this->convertForCache( $parserOutput, $parserOutputKey );
582 $metadataData = $this->convertForCache( $metadata, $pageKey );
584 if ( !$parserOutputData || !$metadataData ) {
585 $this->logger->warning(
586 'Parser output failed to serialize and was not saved',
587 [ 'name' => $this->name ]
589 $this->incrementStats( $page, 'save', 'nonserializable' );
590 return;
593 // Save the parser output
594 $this->cache->set(
595 $parserOutputKey,
596 $parserOutputData,
597 $expire,
598 BagOStuff::WRITE_ALLOW_SEGMENTS
601 // ...and its pointer to the local cache.
602 $this->metadataProcCache->set( $pageKey, $metadataData, $expire );
603 // ...and to the global cache.
604 $this->cache->set( $pageKey, $metadataData, $expire );
606 $title = $this->titleFactory->newFromPageIdentity( $page );
607 $this->hookRunner->onParserCacheSaveComplete( $this, $parserOutput, $title, $popts, $revId );
609 $this->logger->debug( 'Saved in parser cache', [
610 'name' => $this->name,
611 'key' => $parserOutputKey,
612 'cache_time' => $cacheTime,
613 'rev_id' => $revId
614 ] );
615 $this->incrementStats( $page, 'save', 'success' );
616 $this->incrementRenderReasonStats( $page, $popts->getRenderReason() );
620 * Get the backend BagOStuff instance that
621 * powers the parser cache
623 * @since 1.30
624 * @internal
625 * @return BagOStuff
627 public function getCacheStorage() {
628 return $this->cache;
632 * Check if $entry expired for $page given the $staleConstraint
633 * when fetching from $cacheTier.
634 * @param CacheTime $entry
635 * @param PageRecord $page
636 * @param int $staleConstraint One of USE_* constants.
637 * @param string $cacheTier
638 * @return bool
640 private function checkExpired(
641 CacheTime $entry,
642 PageRecord $page,
643 int $staleConstraint,
644 string $cacheTier
645 ): bool {
646 if ( $staleConstraint < self::USE_EXPIRED && $entry->expired( $page->getTouched() ) ) {
647 $this->incrementStats( $page, 'miss', 'expired' );
648 $this->logger->debug( "{$cacheTier} key expired", [
649 'name' => $this->name,
650 'touched' => $page->getTouched(),
651 'epoch' => $this->cacheEpoch,
652 'cache_time' => $entry->getCacheTime()
653 ] );
654 return true;
656 return false;
660 * Check if $entry belongs to the latest revision of $page
661 * given $staleConstraint when fetched from $cacheTier.
662 * @param CacheTime $entry
663 * @param PageRecord $page
664 * @param int $staleConstraint One of USE_* constants.
665 * @param string $cacheTier
666 * @return bool
668 private function checkOutdated(
669 CacheTime $entry,
670 PageRecord $page,
671 int $staleConstraint,
672 string $cacheTier
673 ): bool {
674 $latestRevId = $page->getLatest( PageRecord::LOCAL );
675 if ( $staleConstraint < self::USE_OUTDATED && $entry->isDifferentRevision( $latestRevId ) ) {
676 $this->incrementStats( $page, 'miss', 'revid' );
677 $this->logger->debug( "{$cacheTier} key is for an old revision", [
678 'name' => $this->name,
679 'rev_id' => $latestRevId,
680 'cached_rev_id' => $entry->getCacheRevisionId()
681 ] );
682 return true;
684 return false;
688 * @param string $jsonData
689 * @param string $key
690 * @param string $expectedClass
691 * @return CacheTime|ParserOutput|null
693 private function restoreFromJson( string $jsonData, string $key, string $expectedClass ) {
694 try {
695 /** @var CacheTime $obj */
696 $obj = $this->jsonCodec->deserialize( $jsonData, $expectedClass );
697 return $obj;
698 } catch ( JsonException $e ) {
699 $this->logger->error( "Unable to deserialize JSON", [
700 'name' => $this->name,
701 'cache_key' => $key,
702 'ex_message' => $e->getMessage()
703 ] );
704 return null;
705 } catch ( Exception $e ) {
706 $this->logger->error( "Unexpected failure during cache load", [
707 'name' => $this->name,
708 'cache_key' => $key,
709 'ex_message' => $e->getMessage()
710 ] );
711 return null;
716 * @param CacheTime $obj
717 * @param string $key
718 * @return string|null
720 protected function convertForCache( CacheTime $obj, string $key ) {
721 try {
722 return $this->jsonCodec->serialize( $obj );
723 } catch ( JsonException $e ) {
724 // Try to collect some additional debugging information, but
725 // wrap this in a try block to ensure we don't make the problem
726 // worse.
727 try {
728 $details = $this->jsonCodec->detectNonSerializableData( $obj, true );
729 } catch ( \Throwable $t ) {
730 $details = $t->getMessage();
732 $this->logger->error( "Unable to serialize JSON", [
733 'name' => $this->name,
734 'cache_key' => $key,
735 'ex_message' => $e->getMessage(),
736 'details' => $details,
737 'trace' => $e->getTraceAsString(),
738 ] );
739 return null;
744 /** @deprecated class alias since 1.43 */
745 class_alias( ParserCache::class, 'ParserCache' );