3 * Cache for outputs of the PHP parser
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @ingroup Cache Parser
24 namespace MediaWiki\Parser
;
27 use InvalidArgumentException
;
29 use MediaWiki\HookContainer\HookContainer
;
30 use MediaWiki\HookContainer\HookRunner
;
31 use MediaWiki\Json\JsonCodec
;
32 use MediaWiki\Page\PageRecord
;
33 use MediaWiki\Page\WikiPageFactory
;
34 use MediaWiki\Title\TitleFactory
;
35 use Psr\Log\LoggerInterface
;
36 use Wikimedia\ObjectCache\BagOStuff
;
37 use Wikimedia\ObjectCache\EmptyBagOStuff
;
38 use Wikimedia\ObjectCache\HashBagOStuff
;
39 use Wikimedia\Stats\StatsFactory
;
40 use Wikimedia\UUID\GlobalIdGenerator
;
43 * Cache for ParserOutput objects corresponding to the latest page revisions.
45 * The ParserCache is a two-tiered cache backed by BagOStuff which supports
46 * varying the stored content on the values of ParserOptions used during
49 * First tier is keyed by the page ID and stores ParserCacheMetadata, which
50 * contains information about cache expiration and the list of ParserOptions
51 * used during the parse of the page. For example, if only 'dateformat' and
52 * 'userlang' options were accessed by the parser when producing output for the
53 * page, array [ 'dateformat', 'userlang' ] will be stored in the metadata cache.
54 * This means none of the other existing options had any effect on the output.
56 * The second tier of the cache contains ParserOutput objects. The key for the
57 * second tier is constructed from the page ID and values of those ParserOptions
58 * used during a page parse which affected the output. Upon cache lookup, the list
59 * of used option names is retrieved from tier 1 cache, and only the values of
60 * those options are hashed together with the page ID to produce a key, while
61 * the rest of the options are ignored. Following the example above where
62 * only [ 'dateformat', 'userlang' ] options changed the parser output for a
63 * page, the key will look like 'page_id!dateformat=default:userlang=ru'.
64 * Thus any cache lookup with dateformat=default and userlang=ru will hit the
65 * same cache entry regardless of the values of the rest of the options, since they
66 * were not accessed during a parse and thus did not change the output.
68 * @see ParserOutput::recordOption()
69 * @see ParserOutput::getUsedOptions()
70 * @see ParserOptions::allCacheVaryingOptions()
71 * @ingroup Cache Parser
75 * Constants for self::getKey()
77 * @since 1.36 the constants were made public
80 /** Use only current data */
81 public const USE_CURRENT_ONLY
= 0;
83 /** Use expired data if current data is unavailable */
84 public const USE_EXPIRED
= 1;
86 /** Use expired data or data from different revisions if current data is unavailable */
87 public const USE_OUTDATED
= 2;
90 * Use expired data and data from different revisions, and if all else
91 * fails vary on all variable options
93 private const USE_ANYTHING
= 3;
95 /** @var string The name of this ParserCache. Used as a root of the cache key. */
102 * Anything cached prior to this is invalidated
108 /** @var HookRunner */
111 /** @var JsonCodec */
114 /** @var StatsFactory */
117 /** @var LoggerInterface */
120 /** @var TitleFactory */
121 private $titleFactory;
123 /** @var WikiPageFactory */
124 private $wikiPageFactory;
126 private ?ParserCacheFilter
$filter = null;
128 private GlobalIdGenerator
$globalIdGenerator;
131 * @var BagOStuff small in-process cache to store metadata.
132 * It's needed multiple times during the request, for example
133 * to build a PoolWorkArticleView key, and then to fetch the
134 * actual ParserCache entry.
136 private $metadataProcCache;
139 * Setup a cache pathway with a given back-end storage mechanism.
141 * This class use an invalidation strategy that is compatible with
142 * MultiWriteBagOStuff in async replication mode.
144 * @param string $name
145 * @param BagOStuff $cache
146 * @param string $cacheEpoch Anything before this timestamp is invalidated
147 * @param HookContainer $hookContainer
148 * @param JsonCodec $jsonCodec
149 * @param StatsFactory $stats
150 * @param LoggerInterface $logger
151 * @param TitleFactory $titleFactory
152 * @param WikiPageFactory $wikiPageFactory
153 * @param GlobalIdGenerator $globalIdGenerator
155 public function __construct(
159 HookContainer
$hookContainer,
160 JsonCodec
$jsonCodec,
162 LoggerInterface
$logger,
163 TitleFactory
$titleFactory,
164 WikiPageFactory
$wikiPageFactory,
165 GlobalIdGenerator
$globalIdGenerator
168 $this->cache
= $cache;
169 $this->cacheEpoch
= $cacheEpoch;
170 $this->hookRunner
= new HookRunner( $hookContainer );
171 $this->jsonCodec
= $jsonCodec;
172 $this->stats
= $stats;
173 $this->logger
= $logger;
174 $this->titleFactory
= $titleFactory;
175 $this->wikiPageFactory
= $wikiPageFactory;
176 $this->globalIdGenerator
= $globalIdGenerator;
177 $this->metadataProcCache
= new HashBagOStuff( [ 'maxKeys' => 2 ] );
182 * @param ParserCacheFilter $filter
184 public function setFilter( ParserCacheFilter
$filter ): void
{
185 $this->filter
= $filter;
189 * @param PageRecord $page
192 public function deleteOptionsKey( PageRecord
$page ) {
193 $page->assertWiki( PageRecord
::LOCAL
);
194 $key = $this->makeMetadataKey( $page );
195 $this->metadataProcCache
->delete( $key );
196 $this->cache
->delete( $key );
200 * Retrieve the ParserOutput from ParserCache, even if it's outdated.
201 * @param PageRecord $page
202 * @param ParserOptions $popts
203 * @return ParserOutput|false
205 public function getDirty( PageRecord
$page, $popts ) {
206 $page->assertWiki( PageRecord
::LOCAL
);
207 $value = $this->get( $page, $popts, true );
208 return is_object( $value ) ?
$value : false;
212 * @param PageRecord $page
215 private function getContentModelFromPage( PageRecord
$page ) {
216 $wikiPage = $this->wikiPageFactory
->newFromTitle( $page );
217 return str_replace( '.', '_', $wikiPage->getContentModel() );
221 * @param PageRecord $page
222 * @param string $status
223 * @param string|null $reason
225 private function incrementStats( PageRecord
$page, $status, $reason = null ) {
226 $contentModel = $this->getContentModelFromPage( $page );
227 $metricSuffix = $reason ?
"{$status}_{$reason}" : $status;
229 $this->stats
->getCounter( 'ParserCache_operation_total' )
230 ->setLabel( 'name', $this->name
)
231 ->setLabel( 'contentModel', $contentModel )
232 ->setLabel( 'status', $status )
233 ->setLabel( 'reason', $reason ?
: 'n/a' )
234 ->copyToStatsdAt( "{$this->name}.{$contentModel}.{$metricSuffix}" )
239 * @param PageRecord $page
240 * @param string $renderReason
242 private function incrementRenderReasonStats( PageRecord
$page, $renderReason ) {
243 $contentModel = $this->getContentModelFromPage( $page );
244 $renderReason = preg_replace( '/\W+/', '_', $renderReason );
246 $this->stats
->getCounter( 'ParserCache_render_total' )
247 ->setLabel( 'name', $this->name
)
248 ->setLabel( 'contentModel', $contentModel )
249 ->setLabel( 'reason', $renderReason )
250 ->copyToStatsdAt( "{$this->name}.{$contentModel}.reason.{$renderReason}" )
255 * Returns the ParserCache metadata about the given page
256 * considering the given options.
258 * @note Which parser options influence the cache key
259 * is controlled via ParserOutput::recordOption() or
260 * ParserOptions::addExtraKey().
262 * @param PageRecord $page
263 * @param int $staleConstraint one of the self::USE_ constants
264 * @return ParserCacheMetadata|null
267 public function getMetadata(
269 int $staleConstraint = self
::USE_ANYTHING
270 ): ?ParserCacheMetadata
{
271 $page->assertWiki( PageRecord
::LOCAL
);
273 $pageKey = $this->makeMetadataKey( $page );
274 $metadata = $this->metadataProcCache
->get( $pageKey );
276 $metadata = $this->cache
->get(
278 BagOStuff
::READ_VERIFIED
282 if ( $metadata === false ) {
283 $this->incrementStats( $page, 'miss', 'absent_metadata' );
284 $this->logger
->debug( 'ParserOutput metadata cache miss', [ 'name' => $this->name
] );
287 // Ensure this cache hit is present in the in-process cache (T277829)
288 $this->metadataProcCache
->set( $pageKey, $metadata );
291 // NOTE: If the value wasn't serialized to JSON when being stored,
292 // we may already have a ParserOutput object here. This used
293 // to be the default behavior before 1.36. We need to retain
294 // support so we can handle cached objects after an update
295 // from an earlier revision.
296 // NOTE: Support for reading string values from the cache must be
297 // deployed a while before starting to write JSON to the cache,
298 // in case we have to revert either change.
299 if ( is_string( $metadata ) ) {
300 $metadata = $this->restoreFromJson( $metadata, $pageKey, CacheTime
::class );
303 if ( !$metadata instanceof CacheTime
) {
304 $this->incrementStats( $page, 'miss', 'unserialize' );
308 if ( $this->checkExpired( $metadata, $page, $staleConstraint, 'metadata' ) ) {
312 if ( $this->checkOutdated( $metadata, $page, $staleConstraint, 'metadata' ) ) {
316 $this->logger
->debug( 'Parser cache options found', [ 'name' => $this->name
] );
320 private function makeMetadataKey( PageRecord
$page ): string {
321 return $this->cache
->makeKey( $this->name
, $page->getId( PageRecord
::LOCAL
), '|#|', 'idoptions' );
325 * Get a key that will be used by the ParserCache to store the content
326 * for a given page considering the given options and the array of
329 * @warning The exact format of the key is considered internal and is subject
330 * to change, thus should not be used as storage or long-term caching key.
331 * This is intended to be used for logging or keying something transient.
333 * @param PageRecord $page
334 * @param ParserOptions $options
335 * @param array|null $usedOptions Defaults to all cache varying options.
340 public function makeParserOutputKey(
342 ParserOptions
$options,
343 ?
array $usedOptions = null
345 $usedOptions ??
= ParserOptions
::allCacheVaryingOptions();
346 $title = $this->titleFactory
->newFromPageIdentity( $page );
347 $hash = $options->optionsHash( $usedOptions, $title );
348 // idhash seem to mean 'page id' + 'rendering hash' (r3710)
349 return $this->cache
->makeKey( $this->name
, $page->getId( PageRecord
::LOCAL
), '|#|', 'idhash', $hash );
353 * Retrieve the ParserOutput from ParserCache.
354 * false if not found or outdated.
356 * @param PageRecord $page
357 * @param ParserOptions $popts
358 * @param bool $useOutdated (default false)
360 * @return ParserOutput|false
362 public function get( PageRecord
$page, $popts, $useOutdated = false ) {
363 $page->assertWiki( PageRecord
::LOCAL
);
365 if ( !$page->exists() ) {
366 $this->incrementStats( $page, 'miss', 'nonexistent' );
370 if ( $page->isRedirect() ) {
371 // It's a redirect now
372 $this->incrementStats( $page, 'miss', 'redirect' );
376 $staleConstraint = $useOutdated ? self
::USE_OUTDATED
: self
::USE_CURRENT_ONLY
;
377 $parserOutputMetadata = $this->getMetadata( $page, $staleConstraint );
378 if ( !$parserOutputMetadata ) {
382 if ( !$popts->isSafeToCache( $parserOutputMetadata->getUsedOptions() ) ) {
383 $this->incrementStats( $page, 'miss', 'unsafe' );
387 $parserOutputKey = $this->makeParserOutputKey(
390 $parserOutputMetadata->getUsedOptions()
393 $value = $this->cache
->get( $parserOutputKey, BagOStuff
::READ_VERIFIED
);
394 if ( $value === false ) {
395 $this->incrementStats( $page, 'miss', 'absent' );
396 $this->logger
->debug( 'ParserOutput cache miss', [ 'name' => $this->name
] );
400 // NOTE: If the value wasn't serialized to JSON when being stored,
401 // we may already have a ParserOutput object here. This used
402 // to be the default behavior before 1.36. We need to retain
403 // support so we can handle cached objects after an update
404 // from an earlier revision.
405 // NOTE: Support for reading string values from the cache must be
406 // deployed a while before starting to write JSON to the cache,
407 // in case we have to revert either change.
408 if ( is_string( $value ) ) {
409 $value = $this->restoreFromJson( $value, $parserOutputKey, ParserOutput
::class );
412 if ( !$value instanceof ParserOutput
) {
413 $this->incrementStats( $page, 'miss', 'unserialize' );
417 if ( $this->checkExpired( $value, $page, $staleConstraint, 'output' ) ) {
421 if ( $this->checkOutdated( $value, $page, $staleConstraint, 'output' ) ) {
425 $wikiPage = $this->wikiPageFactory
->newFromTitle( $page );
426 if ( $this->hookRunner
->onRejectParserCacheValue( $value, $wikiPage, $popts ) === false ) {
427 $this->incrementStats( $page, 'miss', 'rejected' );
428 $this->logger
->debug( 'key valid, but rejected by RejectParserCacheValue hook handler',
429 [ 'name' => $this->name
] );
433 $this->logger
->debug( 'ParserOutput cache found', [ 'name' => $this->name
] );
434 $this->incrementStats( $page, 'hit' );
439 * @param ParserOutput $parserOutput
440 * @param PageRecord $page
441 * @param ParserOptions $popts
442 * @param string|null $cacheTime TS_MW timestamp when the cache was generated
443 * @param int|null $revId Revision ID that was parsed
445 public function save(
446 ParserOutput
$parserOutput,
452 $page->assertWiki( PageRecord
::LOCAL
);
453 // T350538: Eventually we'll warn if the $cacheTime and $revId
454 // parameters are non-null here, since we *should* be getting
455 // them from the ParserOutput.
456 if ( $revId !== null && $revId !== $parserOutput->getCacheRevisionId() ) {
457 $this->logger
->warning(
458 'Inconsistent revision ID',
460 'name' => $this->name
,
461 'reason' => $popts->getRenderReason(),
463 'revid2' => $parserOutput->getCacheRevisionId(),
468 if ( !$parserOutput->hasText() ) {
469 throw new InvalidArgumentException( 'Attempt to cache a ParserOutput with no text set!' );
472 $expire = $parserOutput->getCacheExpiry();
474 if ( !$popts->isSafeToCache( $parserOutput->getUsedOptions() ) ) {
475 $this->logger
->debug(
476 'Parser options are not safe to cache and has not been saved',
477 [ 'name' => $this->name
]
479 $this->incrementStats( $page, 'save', 'unsafe' );
483 if ( $expire <= 0 ) {
484 $this->logger
->debug(
485 'Parser output was marked as uncacheable and has not been saved',
486 [ 'name' => $this->name
]
488 $this->incrementStats( $page, 'save', 'uncacheable' );
492 if ( $this->filter
&& !$this->filter
->shouldCache( $parserOutput, $page, $popts ) ) {
493 $this->logger
->debug(
494 'Parser output was filtered and has not been saved',
495 [ 'name' => $this->name
]
497 $this->incrementStats( $page, 'save', 'filtered' );
499 // TODO: In this case, we still want to cache in RevisionOutputCache (T350669).
503 if ( $this->cache
instanceof EmptyBagOStuff
) {
507 // Ensure cache properties are set in the ParserOutput
508 // T350538: These should be turned into assertions that the
509 // properties are already present.
511 $parserOutput->setCacheTime( $cacheTime );
513 if ( !$parserOutput->hasCacheTime() ) {
514 $this->logger
->warning(
517 'name' => $this->name
,
518 'reason' => $popts->getRenderReason(),
522 $cacheTime = $parserOutput->getCacheTime();
526 $parserOutput->setCacheRevisionId( $revId );
527 } elseif ( $parserOutput->getCacheRevisionId() ) {
528 $revId = $parserOutput->getCacheRevisionId();
530 $revId = $page->getLatest( PageRecord
::LOCAL
);
531 $parserOutput->setCacheRevisionId( $revId );
534 $this->logger
->warning(
535 'Parser output cannot be saved if the revision ID is not known',
536 [ 'name' => $this->name
]
538 $this->incrementStats( $page, 'save', 'norevid' );
542 if ( !$parserOutput->getRenderId() ) {
543 $this->logger
->warning(
544 'Parser output missing render ID',
546 'name' => $this->name
,
547 'reason' => $popts->getRenderReason(),
550 $parserOutput->setRenderId( $this->globalIdGenerator
->newUUIDv1() );
553 // Transfer cache properties to the cache metadata
554 $metadata = new CacheTime
;
555 $metadata->recordOptions( $parserOutput->getUsedOptions() );
556 $metadata->updateCacheExpiry( $expire );
557 $metadata->setCacheTime( $cacheTime );
558 $metadata->setCacheRevisionId( $revId );
560 $parserOutputKey = $this->makeParserOutputKey(
563 $metadata->getUsedOptions()
566 $msg = "Saved in parser cache with key $parserOutputKey" .
567 " and timestamp $cacheTime" .
568 " and revision id $revId.";
570 $reason = $popts->getRenderReason();
571 $msg .= " Rendering was triggered because: $reason";
573 $parserOutput->addCacheMessage( $msg );
575 $pageKey = $this->makeMetadataKey( $page );
577 $parserOutputData = $this->convertForCache( $parserOutput, $parserOutputKey );
578 $metadataData = $this->convertForCache( $metadata, $pageKey );
580 if ( !$parserOutputData ||
!$metadataData ) {
581 $this->logger
->warning(
582 'Parser output failed to serialize and was not saved',
583 [ 'name' => $this->name
]
585 $this->incrementStats( $page, 'save', 'nonserializable' );
589 // Save the parser output
594 BagOStuff
::WRITE_ALLOW_SEGMENTS
597 // ...and its pointer to the local cache.
598 $this->metadataProcCache
->set( $pageKey, $metadataData, $expire );
599 // ...and to the global cache.
600 $this->cache
->set( $pageKey, $metadataData, $expire );
602 $title = $this->titleFactory
->newFromPageIdentity( $page );
603 $this->hookRunner
->onParserCacheSaveComplete( $this, $parserOutput, $title, $popts, $revId );
605 $this->logger
->debug( 'Saved in parser cache', [
606 'name' => $this->name
,
607 'key' => $parserOutputKey,
608 'cache_time' => $cacheTime,
611 $this->incrementStats( $page, 'save', 'success' );
612 $this->incrementRenderReasonStats( $page, $popts->getRenderReason() );
616 * Get the backend BagOStuff instance that
617 * powers the parser cache
623 public function getCacheStorage() {
628 * Check if $entry expired for $page given the $staleConstraint
629 * when fetching from $cacheTier.
630 * @param CacheTime $entry
631 * @param PageRecord $page
632 * @param int $staleConstraint One of USE_* constants.
633 * @param string $cacheTier
636 private function checkExpired(
639 int $staleConstraint,
642 if ( $staleConstraint < self
::USE_EXPIRED
&& $entry->expired( $page->getTouched() ) ) {
643 $this->incrementStats( $page, 'miss', 'expired' );
644 $this->logger
->debug( "{$cacheTier} key expired", [
645 'name' => $this->name
,
646 'touched' => $page->getTouched(),
647 'epoch' => $this->cacheEpoch
,
648 'cache_time' => $entry->getCacheTime()
656 * Check if $entry belongs to the latest revision of $page
657 * given $staleConstraint when fetched from $cacheTier.
658 * @param CacheTime $entry
659 * @param PageRecord $page
660 * @param int $staleConstraint One of USE_* constants.
661 * @param string $cacheTier
664 private function checkOutdated(
667 int $staleConstraint,
670 $latestRevId = $page->getLatest( PageRecord
::LOCAL
);
671 if ( $staleConstraint < self
::USE_OUTDATED
&& $entry->isDifferentRevision( $latestRevId ) ) {
672 $this->incrementStats( $page, 'miss', 'revid' );
673 $this->logger
->debug( "{$cacheTier} key is for an old revision", [
674 'name' => $this->name
,
675 'rev_id' => $latestRevId,
676 'cached_rev_id' => $entry->getCacheRevisionId()
684 * @param string $jsonData
686 * @param string $expectedClass
687 * @return CacheTime|ParserOutput|null
689 private function restoreFromJson( string $jsonData, string $key, string $expectedClass ) {
691 /** @var CacheTime $obj */
692 $obj = $this->jsonCodec
->deserialize( $jsonData, $expectedClass );
694 } catch ( JsonException
$e ) {
695 $this->logger
->error( "Unable to deserialize JSON", [
696 'name' => $this->name
,
698 'ex_message' => $e->getMessage()
701 } catch ( Exception
$e ) {
702 $this->logger
->error( "Unexpected failure during cache load", [
703 'name' => $this->name
,
705 'ex_message' => $e->getMessage()
712 * @param CacheTime $obj
714 * @return string|null
716 protected function convertForCache( CacheTime
$obj, string $key ) {
718 return $this->jsonCodec
->serialize( $obj );
719 } catch ( JsonException
$e ) {
720 // Try to collect some additional debugging information, but
721 // wrap this in a try block to ensure we don't make the problem
724 $details = $this->jsonCodec
->detectNonSerializableData( $obj, true );
725 } catch ( \Throwable
$t ) {
726 $details = $t->getMessage();
728 $this->logger
->error( "Unable to serialize JSON", [
729 'name' => $this->name
,
731 'ex_message' => $e->getMessage(),
732 'details' => $details,
733 'trace' => $e->getTraceAsString(),
740 /** @deprecated class alias since 1.43 */
741 class_alias( ParserCache
::class, 'ParserCache' );