3 * Cache for outputs of the PHP parser
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @ingroup Cache Parser
24 namespace MediaWiki\Parser
;
27 use InvalidArgumentException
;
29 use MediaWiki\HookContainer\HookContainer
;
30 use MediaWiki\HookContainer\HookRunner
;
31 use MediaWiki\Json\JsonCodec
;
32 use MediaWiki\Page\PageRecord
;
33 use MediaWiki\Page\WikiPageFactory
;
34 use MediaWiki\Title\TitleFactory
;
35 use Psr\Log\LoggerInterface
;
36 use Wikimedia\ObjectCache\BagOStuff
;
37 use Wikimedia\ObjectCache\EmptyBagOStuff
;
38 use Wikimedia\ObjectCache\HashBagOStuff
;
39 use Wikimedia\Stats\StatsFactory
;
40 use Wikimedia\UUID\GlobalIdGenerator
;
43 * Cache for ParserOutput objects corresponding to the latest page revisions.
45 * The ParserCache is a two-tiered cache backed by BagOStuff which supports
46 * varying the stored content on the values of ParserOptions used during
49 * First tier is keyed by the page ID and stores ParserCacheMetadata, which
50 * contains information about cache expiration and the list of ParserOptions
51 * used during the parse of the page. For example, if only 'dateformat' and
52 * 'userlang' options were accessed by the parser when producing output for the
53 * page, array [ 'dateformat', 'userlang' ] will be stored in the metadata cache.
54 * This means none of the other existing options had any effect on the output.
56 * The second tier of the cache contains ParserOutput objects. The key for the
57 * second tier is constructed from the page ID and values of those ParserOptions
58 * used during a page parse which affected the output. Upon cache lookup, the list
59 * of used option names is retrieved from tier 1 cache, and only the values of
60 * those options are hashed together with the page ID to produce a key, while
61 * the rest of the options are ignored. Following the example above where
62 * only [ 'dateformat', 'userlang' ] options changed the parser output for a
63 * page, the key will look like 'page_id!dateformat=default:userlang=ru'.
64 * Thus any cache lookup with dateformat=default and userlang=ru will hit the
65 * same cache entry regardless of the values of the rest of the options, since they
66 * were not accessed during a parse and thus did not change the output.
68 * @see ParserOutput::recordOption()
69 * @see ParserOutput::getUsedOptions()
70 * @see ParserOptions::allCacheVaryingOptions()
71 * @ingroup Cache Parser
75 * Constants for self::getKey()
77 * @since 1.36 the constants were made public
80 /** Use only current data */
81 public const USE_CURRENT_ONLY
= 0;
83 /** Use expired data if current data is unavailable */
84 public const USE_EXPIRED
= 1;
86 /** Use expired data or data from different revisions if current data is unavailable */
87 public const USE_OUTDATED
= 2;
90 * Use expired data and data from different revisions, and if all else
91 * fails vary on all variable options
93 private const USE_ANYTHING
= 3;
95 /** @var string The name of this ParserCache. Used as a root of the cache key. */
102 * Anything cached prior to this is invalidated
108 /** @var HookRunner */
111 /** @var JsonCodec */
114 /** @var StatsFactory */
117 /** @var LoggerInterface */
120 /** @var TitleFactory */
121 private $titleFactory;
123 /** @var WikiPageFactory */
124 private $wikiPageFactory;
126 private ?ParserCacheFilter
$filter = null;
128 private GlobalIdGenerator
$globalIdGenerator;
131 * @var BagOStuff small in-process cache to store metadata.
132 * It's needed multiple times during the request, for example
133 * to build a PoolWorkArticleView key, and then to fetch the
134 * actual ParserCache entry.
136 private $metadataProcCache;
139 * Setup a cache pathway with a given back-end storage mechanism.
141 * This class use an invalidation strategy that is compatible with
142 * MultiWriteBagOStuff in async replication mode.
144 * @param string $name
145 * @param BagOStuff $cache
146 * @param string $cacheEpoch Anything before this timestamp is invalidated
147 * @param HookContainer $hookContainer
148 * @param JsonCodec $jsonCodec
149 * @param StatsFactory $stats
150 * @param LoggerInterface $logger
151 * @param TitleFactory $titleFactory
152 * @param WikiPageFactory $wikiPageFactory
153 * @param GlobalIdGenerator $globalIdGenerator
155 public function __construct(
159 HookContainer
$hookContainer,
160 JsonCodec
$jsonCodec,
162 LoggerInterface
$logger,
163 TitleFactory
$titleFactory,
164 WikiPageFactory
$wikiPageFactory,
165 GlobalIdGenerator
$globalIdGenerator
168 $this->cache
= $cache;
169 $this->cacheEpoch
= $cacheEpoch;
170 $this->hookRunner
= new HookRunner( $hookContainer );
171 $this->jsonCodec
= $jsonCodec;
172 $this->stats
= $stats;
173 $this->logger
= $logger;
174 $this->titleFactory
= $titleFactory;
175 $this->wikiPageFactory
= $wikiPageFactory;
176 $this->globalIdGenerator
= $globalIdGenerator;
177 $this->metadataProcCache
= new HashBagOStuff( [ 'maxKeys' => 2 ] );
182 * @param ParserCacheFilter $filter
184 public function setFilter( ParserCacheFilter
$filter ): void
{
185 $this->filter
= $filter;
189 * @param PageRecord $page
192 public function deleteOptionsKey( PageRecord
$page ) {
193 $page->assertWiki( PageRecord
::LOCAL
);
194 $key = $this->makeMetadataKey( $page );
195 $this->metadataProcCache
->delete( $key );
196 $this->cache
->delete( $key );
200 * Retrieve the ParserOutput from ParserCache, even if it's outdated.
201 * @param PageRecord $page
202 * @param ParserOptions $popts
203 * @return ParserOutput|false
205 public function getDirty( PageRecord
$page, $popts ) {
206 $page->assertWiki( PageRecord
::LOCAL
);
207 $value = $this->get( $page, $popts, true );
208 return is_object( $value ) ?
$value : false;
212 * @param PageRecord $page
215 private function getContentModelFromPage( PageRecord
$page ) {
216 $wikiPage = $this->wikiPageFactory
->newFromTitle( $page );
217 return str_replace( '.', '_', $wikiPage->getContentModel() );
221 * @param PageRecord $page
222 * @param string $status
223 * @param string|null $reason
225 private function incrementStats( PageRecord
$page, $status, $reason = null ) {
226 $contentModel = $this->getContentModelFromPage( $page );
227 $metricSuffix = $reason ?
"{$status}_{$reason}" : $status;
229 $this->stats
->getCounter( 'ParserCache_operation_total' )
230 ->setLabel( 'name', $this->name
)
231 ->setLabel( 'contentModel', $contentModel )
232 ->setLabel( 'status', $status )
233 ->setLabel( 'reason', $reason ?
: 'n/a' )
234 ->copyToStatsdAt( "{$this->name}.{$contentModel}.{$metricSuffix}" )
239 * @param PageRecord $page
240 * @param string $renderReason
242 private function incrementRenderReasonStats( PageRecord
$page, $renderReason ) {
243 $contentModel = $this->getContentModelFromPage( $page );
244 $renderReason = preg_replace( '/\W+/', '_', $renderReason );
246 $this->stats
->getCounter( 'ParserCache_render_total' )
247 ->setLabel( 'name', $this->name
)
248 ->setLabel( 'contentModel', $contentModel )
249 ->setLabel( 'reason', $renderReason )
250 ->copyToStatsdAt( "{$this->name}.{$contentModel}.reason.{$renderReason}" )
255 * Returns the ParserCache metadata about the given page
256 * considering the given options.
258 * @note Which parser options influence the cache key
259 * is controlled via ParserOutput::recordOption() or
260 * ParserOptions::addExtraKey().
262 * @param PageRecord $page
263 * @param int $staleConstraint one of the self::USE_ constants
264 * @return ParserCacheMetadata|null
267 public function getMetadata(
269 int $staleConstraint = self
::USE_ANYTHING
270 ): ?ParserCacheMetadata
{
271 $page->assertWiki( PageRecord
::LOCAL
);
273 $pageKey = $this->makeMetadataKey( $page );
274 $metadata = $this->metadataProcCache
->get( $pageKey );
276 $metadata = $this->cache
->get(
278 BagOStuff
::READ_VERIFIED
282 if ( $metadata === false ) {
283 $this->incrementStats( $page, 'miss', 'absent_metadata' );
284 $this->logger
->debug( 'ParserOutput metadata cache miss', [ 'name' => $this->name
] );
287 // Ensure this cache hit is present in the in-process cache (T277829)
288 $this->metadataProcCache
->set( $pageKey, $metadata );
291 // NOTE: If the value wasn't serialized to JSON when being stored,
292 // we may already have a ParserOutput object here. This used
293 // to be the default behavior before 1.36. We need to retain
294 // support so we can handle cached objects after an update
295 // from an earlier revision.
296 // NOTE: Support for reading string values from the cache must be
297 // deployed a while before starting to write JSON to the cache,
298 // in case we have to revert either change.
299 if ( is_string( $metadata ) ) {
300 $metadata = $this->restoreFromJson( $metadata, $pageKey, CacheTime
::class );
303 if ( !$metadata instanceof CacheTime
) {
304 $this->incrementStats( $page, 'miss', 'unserialize' );
308 if ( $this->checkExpired( $metadata, $page, $staleConstraint, 'metadata' ) ) {
312 if ( $this->checkOutdated( $metadata, $page, $staleConstraint, 'metadata' ) ) {
316 $this->logger
->debug( 'Parser cache options found', [ 'name' => $this->name
] );
321 * @param PageRecord $page
324 private function makeMetadataKey( PageRecord
$page ): string {
325 return $this->cache
->makeKey( $this->name
, $page->getId( PageRecord
::LOCAL
), '|#|', 'idoptions' );
329 * Get a key that will be used by the ParserCache to store the content
330 * for a given page considering the given options and the array of
333 * @warning The exact format of the key is considered internal and is subject
334 * to change, thus should not be used as storage or long-term caching key.
335 * This is intended to be used for logging or keying something transient.
337 * @param PageRecord $page
338 * @param ParserOptions $options
339 * @param array|null $usedOptions Defaults to all cache varying options.
344 public function makeParserOutputKey(
346 ParserOptions
$options,
347 ?
array $usedOptions = null
349 $usedOptions ??
= ParserOptions
::allCacheVaryingOptions();
350 $title = $this->titleFactory
->newFromPageIdentity( $page );
351 $hash = $options->optionsHash( $usedOptions, $title );
352 // idhash seem to mean 'page id' + 'rendering hash' (r3710)
353 return $this->cache
->makeKey( $this->name
, $page->getId( PageRecord
::LOCAL
), '|#|', 'idhash', $hash );
357 * Retrieve the ParserOutput from ParserCache.
358 * false if not found or outdated.
360 * @param PageRecord $page
361 * @param ParserOptions $popts
362 * @param bool $useOutdated (default false)
364 * @return ParserOutput|false
366 public function get( PageRecord
$page, $popts, $useOutdated = false ) {
367 $page->assertWiki( PageRecord
::LOCAL
);
369 if ( !$page->exists() ) {
370 $this->incrementStats( $page, 'miss', 'nonexistent' );
374 if ( $page->isRedirect() ) {
375 // It's a redirect now
376 $this->incrementStats( $page, 'miss', 'redirect' );
380 $staleConstraint = $useOutdated ? self
::USE_OUTDATED
: self
::USE_CURRENT_ONLY
;
381 $parserOutputMetadata = $this->getMetadata( $page, $staleConstraint );
382 if ( !$parserOutputMetadata ) {
386 if ( !$popts->isSafeToCache( $parserOutputMetadata->getUsedOptions() ) ) {
387 $this->incrementStats( $page, 'miss', 'unsafe' );
391 $parserOutputKey = $this->makeParserOutputKey(
394 $parserOutputMetadata->getUsedOptions()
397 $value = $this->cache
->get( $parserOutputKey, BagOStuff
::READ_VERIFIED
);
398 if ( $value === false ) {
399 $this->incrementStats( $page, 'miss', 'absent' );
400 $this->logger
->debug( 'ParserOutput cache miss', [ 'name' => $this->name
] );
404 // NOTE: If the value wasn't serialized to JSON when being stored,
405 // we may already have a ParserOutput object here. This used
406 // to be the default behavior before 1.36. We need to retain
407 // support so we can handle cached objects after an update
408 // from an earlier revision.
409 // NOTE: Support for reading string values from the cache must be
410 // deployed a while before starting to write JSON to the cache,
411 // in case we have to revert either change.
412 if ( is_string( $value ) ) {
413 $value = $this->restoreFromJson( $value, $parserOutputKey, ParserOutput
::class );
416 if ( !$value instanceof ParserOutput
) {
417 $this->incrementStats( $page, 'miss', 'unserialize' );
421 if ( $this->checkExpired( $value, $page, $staleConstraint, 'output' ) ) {
425 if ( $this->checkOutdated( $value, $page, $staleConstraint, 'output' ) ) {
429 $wikiPage = $this->wikiPageFactory
->newFromTitle( $page );
430 if ( $this->hookRunner
->onRejectParserCacheValue( $value, $wikiPage, $popts ) === false ) {
431 $this->incrementStats( $page, 'miss', 'rejected' );
432 $this->logger
->debug( 'key valid, but rejected by RejectParserCacheValue hook handler',
433 [ 'name' => $this->name
] );
437 $this->logger
->debug( 'ParserOutput cache found', [ 'name' => $this->name
] );
438 $this->incrementStats( $page, 'hit' );
443 * @param ParserOutput $parserOutput
444 * @param PageRecord $page
445 * @param ParserOptions $popts
446 * @param string|null $cacheTime TS_MW timestamp when the cache was generated
447 * @param int|null $revId Revision ID that was parsed
449 public function save(
450 ParserOutput
$parserOutput,
456 $page->assertWiki( PageRecord
::LOCAL
);
457 // T350538: Eventually we'll warn if the $cacheTime and $revId
458 // parameters are non-null here, since we *should* be getting
459 // them from the ParserOutput.
460 if ( $revId !== null && $revId !== $parserOutput->getCacheRevisionId() ) {
461 $this->logger
->warning(
462 'Inconsistent revision ID',
464 'name' => $this->name
,
465 'reason' => $popts->getRenderReason(),
467 'revid2' => $parserOutput->getCacheRevisionId(),
472 if ( !$parserOutput->hasText() ) {
473 throw new InvalidArgumentException( 'Attempt to cache a ParserOutput with no text set!' );
476 $expire = $parserOutput->getCacheExpiry();
478 if ( !$popts->isSafeToCache( $parserOutput->getUsedOptions() ) ) {
479 $this->logger
->debug(
480 'Parser options are not safe to cache and has not been saved',
481 [ 'name' => $this->name
]
483 $this->incrementStats( $page, 'save', 'unsafe' );
487 if ( $expire <= 0 ) {
488 $this->logger
->debug(
489 'Parser output was marked as uncacheable and has not been saved',
490 [ 'name' => $this->name
]
492 $this->incrementStats( $page, 'save', 'uncacheable' );
496 if ( $this->filter
&& !$this->filter
->shouldCache( $parserOutput, $page, $popts ) ) {
497 $this->logger
->debug(
498 'Parser output was filtered and has not been saved',
499 [ 'name' => $this->name
]
501 $this->incrementStats( $page, 'save', 'filtered' );
503 // TODO: In this case, we still want to cache in RevisionOutputCache (T350669).
507 if ( $this->cache
instanceof EmptyBagOStuff
) {
511 // Ensure cache properties are set in the ParserOutput
512 // T350538: These should be turned into assertions that the
513 // properties are already present.
515 $parserOutput->setCacheTime( $cacheTime );
517 if ( !$parserOutput->hasCacheTime() ) {
518 $this->logger
->warning(
521 'name' => $this->name
,
522 'reason' => $popts->getRenderReason(),
526 $cacheTime = $parserOutput->getCacheTime();
530 $parserOutput->setCacheRevisionId( $revId );
531 } elseif ( $parserOutput->getCacheRevisionId() ) {
532 $revId = $parserOutput->getCacheRevisionId();
534 $revId = $page->getLatest( PageRecord
::LOCAL
);
535 $parserOutput->setCacheRevisionId( $revId );
538 $this->logger
->warning(
539 'Parser output cannot be saved if the revision ID is not known',
540 [ 'name' => $this->name
]
542 $this->incrementStats( $page, 'save', 'norevid' );
546 if ( !$parserOutput->getRenderId() ) {
547 $this->logger
->warning(
548 'Parser output missing render ID',
550 'name' => $this->name
,
551 'reason' => $popts->getRenderReason(),
554 $parserOutput->setRenderId( $this->globalIdGenerator
->newUUIDv1() );
557 // Transfer cache properties to the cache metadata
558 $metadata = new CacheTime
;
559 $metadata->recordOptions( $parserOutput->getUsedOptions() );
560 $metadata->updateCacheExpiry( $expire );
561 $metadata->setCacheTime( $cacheTime );
562 $metadata->setCacheRevisionId( $revId );
564 $parserOutputKey = $this->makeParserOutputKey(
567 $metadata->getUsedOptions()
570 $msg = "Saved in parser cache with key $parserOutputKey" .
571 " and timestamp $cacheTime" .
572 " and revision id $revId.";
574 $reason = $popts->getRenderReason();
575 $msg .= " Rendering was triggered because: $reason";
577 $parserOutput->addCacheMessage( $msg );
579 $pageKey = $this->makeMetadataKey( $page );
581 $parserOutputData = $this->convertForCache( $parserOutput, $parserOutputKey );
582 $metadataData = $this->convertForCache( $metadata, $pageKey );
584 if ( !$parserOutputData ||
!$metadataData ) {
585 $this->logger
->warning(
586 'Parser output failed to serialize and was not saved',
587 [ 'name' => $this->name
]
589 $this->incrementStats( $page, 'save', 'nonserializable' );
593 // Save the parser output
598 BagOStuff
::WRITE_ALLOW_SEGMENTS
601 // ...and its pointer to the local cache.
602 $this->metadataProcCache
->set( $pageKey, $metadataData, $expire );
603 // ...and to the global cache.
604 $this->cache
->set( $pageKey, $metadataData, $expire );
606 $title = $this->titleFactory
->newFromPageIdentity( $page );
607 $this->hookRunner
->onParserCacheSaveComplete( $this, $parserOutput, $title, $popts, $revId );
609 $this->logger
->debug( 'Saved in parser cache', [
610 'name' => $this->name
,
611 'key' => $parserOutputKey,
612 'cache_time' => $cacheTime,
615 $this->incrementStats( $page, 'save', 'success' );
616 $this->incrementRenderReasonStats( $page, $popts->getRenderReason() );
620 * Get the backend BagOStuff instance that
621 * powers the parser cache
627 public function getCacheStorage() {
632 * Check if $entry expired for $page given the $staleConstraint
633 * when fetching from $cacheTier.
634 * @param CacheTime $entry
635 * @param PageRecord $page
636 * @param int $staleConstraint One of USE_* constants.
637 * @param string $cacheTier
640 private function checkExpired(
643 int $staleConstraint,
646 if ( $staleConstraint < self
::USE_EXPIRED
&& $entry->expired( $page->getTouched() ) ) {
647 $this->incrementStats( $page, 'miss', 'expired' );
648 $this->logger
->debug( "{$cacheTier} key expired", [
649 'name' => $this->name
,
650 'touched' => $page->getTouched(),
651 'epoch' => $this->cacheEpoch
,
652 'cache_time' => $entry->getCacheTime()
660 * Check if $entry belongs to the latest revision of $page
661 * given $staleConstraint when fetched from $cacheTier.
662 * @param CacheTime $entry
663 * @param PageRecord $page
664 * @param int $staleConstraint One of USE_* constants.
665 * @param string $cacheTier
668 private function checkOutdated(
671 int $staleConstraint,
674 $latestRevId = $page->getLatest( PageRecord
::LOCAL
);
675 if ( $staleConstraint < self
::USE_OUTDATED
&& $entry->isDifferentRevision( $latestRevId ) ) {
676 $this->incrementStats( $page, 'miss', 'revid' );
677 $this->logger
->debug( "{$cacheTier} key is for an old revision", [
678 'name' => $this->name
,
679 'rev_id' => $latestRevId,
680 'cached_rev_id' => $entry->getCacheRevisionId()
688 * @param string $jsonData
690 * @param string $expectedClass
691 * @return CacheTime|ParserOutput|null
693 private function restoreFromJson( string $jsonData, string $key, string $expectedClass ) {
695 /** @var CacheTime $obj */
696 $obj = $this->jsonCodec
->deserialize( $jsonData, $expectedClass );
698 } catch ( JsonException
$e ) {
699 $this->logger
->error( "Unable to deserialize JSON", [
700 'name' => $this->name
,
702 'ex_message' => $e->getMessage()
705 } catch ( Exception
$e ) {
706 $this->logger
->error( "Unexpected failure during cache load", [
707 'name' => $this->name
,
709 'ex_message' => $e->getMessage()
716 * @param CacheTime $obj
718 * @return string|null
720 protected function convertForCache( CacheTime
$obj, string $key ) {
722 return $this->jsonCodec
->serialize( $obj );
723 } catch ( JsonException
$e ) {
724 // Try to collect some additional debugging information, but
725 // wrap this in a try block to ensure we don't make the problem
728 $details = $this->jsonCodec
->detectNonSerializableData( $obj, true );
729 } catch ( \Throwable
$t ) {
730 $details = $t->getMessage();
732 $this->logger
->error( "Unable to serialize JSON", [
733 'name' => $this->name
,
735 'ex_message' => $e->getMessage(),
736 'details' => $details,
737 'trace' => $e->getTraceAsString(),
744 /** @deprecated class alias since 1.43 */
745 class_alias( ParserCache
::class, 'ParserCache' );