3 * Cache for outputs of the PHP parser
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @ingroup Cache Parser
24 use MediaWiki\HookContainer\HookContainer
;
25 use MediaWiki\HookContainer\HookRunner
;
26 use MediaWiki\Json\JsonCodec
;
27 use MediaWiki\Page\PageRecord
;
28 use MediaWiki\Page\WikiPageFactory
;
29 use MediaWiki\Parser\ParserCacheMetadata
;
30 use MediaWiki\Title\TitleFactory
;
31 use Psr\Log\LoggerInterface
;
34 * Cache for ParserOutput objects corresponding to the latest page revisions.
36 * The ParserCache is a two-tiered cache backed by BagOStuff which supports
37 * varying the stored content on the values of ParserOptions used during
40 * First tier is keyed by the page ID and stores ParserCacheMetadata, which
41 * contains information about cache expiration and the list of ParserOptions
42 * used during the parse of the page. For example, if only 'dateformat' and
43 * 'userlang' options were accessed by the parser when producing output for the
44 * page, array [ 'dateformat', 'userlang' ] will be stored in the metadata cache.
45 * This means none of the other existing options had any effect on the output.
47 * The second tier of the cache contains ParserOutput objects. The key for the
48 * second tier is constructed from the page ID and values of those ParserOptions
49 * used during a page parse which affected the output. Upon cache lookup, the list
50 * of used option names is retrieved from tier 1 cache, and only the values of
51 * those options are hashed together with the page ID to produce a key, while
52 * the rest of the options are ignored. Following the example above where
53 * only [ 'dateformat', 'userlang' ] options changed the parser output for a
54 * page, the key will look like 'page_id!dateformat=default:userlang=ru'.
55 * Thus any cache lookup with dateformat=default and userlang=ru will hit the
56 * same cache entry regardless of the values of the rest of the options, since they
57 * were not accessed during a parse and thus did not change the output.
59 * @see ParserOutput::recordOption()
60 * @see ParserOutput::getUsedOptions()
61 * @see ParserOptions::allCacheVaryingOptions()
62 * @ingroup Cache Parser
66 * Constants for self::getKey()
68 * @since 1.36 the constants were made public
71 /** Use only current data */
72 public const USE_CURRENT_ONLY
= 0;
74 /** Use expired data if current data is unavailable */
75 public const USE_EXPIRED
= 1;
77 /** Use expired data or data from different revisions if current data is unavailable */
78 public const USE_OUTDATED
= 2;
81 * Use expired data and data from different revisions, and if all else
82 * fails vary on all variable options
84 private const USE_ANYTHING
= 3;
86 /** @var string The name of this ParserCache. Used as a root of the cache key. */
93 * Anything cached prior to this is invalidated
99 /** @var HookRunner */
102 /** @var JsonCodec */
105 /** @var IBufferingStatsdDataFactory */
108 /** @var LoggerInterface */
111 /** @var TitleFactory */
112 private $titleFactory;
114 /** @var WikiPageFactory */
115 private $wikiPageFactory;
118 * @var BagOStuff small in-process cache to store metadata.
119 * It's needed multiple times during the request, for example
120 * to build a PoolWorkArticleView key, and then to fetch the
121 * actual ParserCache entry.
123 private $metadataProcCache;
126 * Setup a cache pathway with a given back-end storage mechanism.
128 * This class use an invalidation strategy that is compatible with
129 * MultiWriteBagOStuff in async replication mode.
131 * @param string $name
132 * @param BagOStuff $cache
133 * @param string $cacheEpoch Anything before this timestamp is invalidated
134 * @param HookContainer $hookContainer
135 * @param JsonCodec $jsonCodec
136 * @param IBufferingStatsdDataFactory $stats
137 * @param LoggerInterface $logger
138 * @param TitleFactory $titleFactory
139 * @param WikiPageFactory $wikiPageFactory
141 public function __construct(
145 HookContainer
$hookContainer,
146 JsonCodec
$jsonCodec,
147 IBufferingStatsdDataFactory
$stats,
148 LoggerInterface
$logger,
149 TitleFactory
$titleFactory,
150 WikiPageFactory
$wikiPageFactory
153 $this->cache
= $cache;
154 $this->cacheEpoch
= $cacheEpoch;
155 $this->hookRunner
= new HookRunner( $hookContainer );
156 $this->jsonCodec
= $jsonCodec;
157 $this->stats
= $stats;
158 $this->logger
= $logger;
159 $this->titleFactory
= $titleFactory;
160 $this->wikiPageFactory
= $wikiPageFactory;
161 $this->metadataProcCache
= new HashBagOStuff( [ 'maxKeys' => 2 ] );
165 * @param PageRecord $page
168 public function deleteOptionsKey( PageRecord
$page ) {
169 $page->assertWiki( PageRecord
::LOCAL
);
170 $key = $this->makeMetadataKey( $page );
171 $this->metadataProcCache
->delete( $key );
172 $this->cache
->delete( $key );
176 * Retrieve the ParserOutput from ParserCache, even if it's outdated.
177 * @param PageRecord $page
178 * @param ParserOptions $popts
179 * @return ParserOutput|false
181 public function getDirty( PageRecord
$page, $popts ) {
182 $page->assertWiki( PageRecord
::LOCAL
);
183 $value = $this->get( $page, $popts, true );
184 return is_object( $value ) ?
$value : false;
188 * @param PageRecord $page
189 * @param string $metricSuffix
191 private function incrementStats( PageRecord
$page, $metricSuffix ) {
192 $wikiPage = $this->wikiPageFactory
->newFromTitle( $page );
193 $contentModel = str_replace( '.', '_', $wikiPage->getContentModel() );
194 $this->stats
->increment( "{$this->name}.{$contentModel}.{$metricSuffix}" );
198 * Returns the ParserCache metadata about the given page
199 * considering the given options.
201 * @note Which parser options influence the cache key
202 * is controlled via ParserOutput::recordOption() or
203 * ParserOptions::addExtraKey().
205 * @param PageRecord $page
206 * @param int $staleConstraint one of the self::USE_ constants
207 * @return ParserCacheMetadata|null
210 public function getMetadata(
212 int $staleConstraint = self
::USE_ANYTHING
213 ): ?ParserCacheMetadata
{
214 $page->assertWiki( PageRecord
::LOCAL
);
216 $pageKey = $this->makeMetadataKey( $page );
217 $metadata = $this->metadataProcCache
->get( $pageKey );
219 $metadata = $this->cache
->get(
221 BagOStuff
::READ_VERIFIED
225 if ( $metadata === false ) {
226 $this->incrementStats( $page, "miss_absent_metadata" );
227 $this->logger
->debug( 'ParserOutput metadata cache miss', [ 'name' => $this->name
] );
231 // NOTE: If the value wasn't serialized to JSON when being stored,
232 // we may already have a ParserOutput object here. This used
233 // to be the default behavior before 1.36. We need to retain
234 // support so we can handle cached objects after an update
235 // from an earlier revision.
236 // NOTE: Support for reading string values from the cache must be
237 // deployed a while before starting to write JSON to the cache,
238 // in case we have to revert either change.
239 if ( is_string( $metadata ) ) {
240 $metadata = $this->restoreFromJson( $metadata, $pageKey, CacheTime
::class );
243 if ( !$metadata instanceof CacheTime
) {
244 $this->incrementStats( $page, 'miss_unserialize' );
248 if ( $this->checkExpired( $metadata, $page, $staleConstraint, 'metadata' ) ) {
252 if ( $this->checkOutdated( $metadata, $page, $staleConstraint, 'metadata' ) ) {
256 $this->logger
->debug( 'Parser cache options found', [ 'name' => $this->name
] );
261 * @param PageRecord $page
264 private function makeMetadataKey( PageRecord
$page ): string {
265 return $this->cache
->makeKey( $this->name
, 'idoptions', $page->getId( PageRecord
::LOCAL
) );
269 * Get a key that will be used by the ParserCache to store the content
270 * for a given page considering the given options and the array of
273 * @warning The exact format of the key is considered internal and is subject
274 * to change, thus should not be used as storage or long-term caching key.
275 * This is intended to be used for logging or keying something transient.
277 * @param PageRecord $page
278 * @param ParserOptions $options
279 * @param array|null $usedOptions Defaults to all cache varying options.
284 public function makeParserOutputKey(
286 ParserOptions
$options,
287 array $usedOptions = null
289 $usedOptions ??
= ParserOptions
::allCacheVaryingOptions();
290 // idhash seem to mean 'page id' + 'rendering hash' (r3710)
291 $pageid = $page->getId( PageRecord
::LOCAL
);
292 $title = $this->titleFactory
->newFromPageIdentity( $page );
293 $hash = $options->optionsHash( $usedOptions, $title );
294 // Before T263581 ParserCache was split between normal page views
295 // and action=parse. -0 is left in the key to avoid invalidating the entire
296 // cache when removing the cache split.
297 return $this->cache
->makeKey( $this->name
, 'idhash', "{$pageid}-0!{$hash}" );
301 * Retrieve the ParserOutput from ParserCache.
302 * false if not found or outdated.
304 * @param PageRecord $page
305 * @param ParserOptions $popts
306 * @param bool $useOutdated (default false)
308 * @return ParserOutput|false
310 public function get( PageRecord
$page, $popts, $useOutdated = false ) {
311 $page->assertWiki( PageRecord
::LOCAL
);
313 if ( !$page->exists() ) {
314 $this->incrementStats( $page, 'miss_nonexistent' );
318 if ( $page->isRedirect() ) {
319 // It's a redirect now
320 $this->incrementStats( $page, 'miss_redirect' );
324 $staleConstraint = $useOutdated ? self
::USE_OUTDATED
: self
::USE_CURRENT_ONLY
;
325 $parserOutputMetadata = $this->getMetadata( $page, $staleConstraint );
326 if ( !$parserOutputMetadata ) {
330 if ( !$popts->isSafeToCache( $parserOutputMetadata->getUsedOptions() ) ) {
331 $this->incrementStats( $page, 'miss_unsafe' );
335 $parserOutputKey = $this->makeParserOutputKey(
338 $parserOutputMetadata->getUsedOptions()
341 $value = $this->cache
->get( $parserOutputKey, BagOStuff
::READ_VERIFIED
);
342 if ( $value === false ) {
343 $this->incrementStats( $page, "miss_absent" );
344 $this->logger
->debug( 'ParserOutput cache miss', [ 'name' => $this->name
] );
348 // NOTE: If the value wasn't serialized to JSON when being stored,
349 // we may already have a ParserOutput object here. This used
350 // to be the default behavior before 1.36. We need to retain
351 // support so we can handle cached objects after an update
352 // from an earlier revision.
353 // NOTE: Support for reading string values from the cache must be
354 // deployed a while before starting to write JSON to the cache,
355 // in case we have to revert either change.
356 if ( is_string( $value ) ) {
357 $value = $this->restoreFromJson( $value, $parserOutputKey, ParserOutput
::class );
360 if ( !$value instanceof ParserOutput
) {
361 $this->incrementStats( $page, 'miss_unserialize' );
365 if ( $this->checkExpired( $value, $page, $staleConstraint, 'output' ) ) {
369 if ( $this->checkOutdated( $value, $page, $staleConstraint, 'output' ) ) {
373 $wikiPage = $this->wikiPageFactory
->newFromTitle( $page );
374 if ( $this->hookRunner
->onRejectParserCacheValue( $value, $wikiPage, $popts ) === false ) {
375 $this->incrementStats( $page, 'miss_rejected' );
376 $this->logger
->debug( 'key valid, but rejected by RejectParserCacheValue hook handler',
377 [ 'name' => $this->name
] );
381 $this->logger
->debug( 'ParserOutput cache found', [ 'name' => $this->name
] );
382 $this->incrementStats( $page, 'hit' );
387 * @param ParserOutput $parserOutput
388 * @param PageRecord $page
389 * @param ParserOptions $popts
390 * @param string|null $cacheTime TS_MW timestamp when the cache was generated
391 * @param int|null $revId Revision ID that was parsed
393 public function save(
394 ParserOutput
$parserOutput,
400 $page->assertWiki( PageRecord
::LOCAL
);
402 if ( !$parserOutput->hasText() ) {
403 throw new InvalidArgumentException( 'Attempt to cache a ParserOutput with no text set!' );
406 $expire = $parserOutput->getCacheExpiry();
408 if ( !$popts->isSafeToCache( $parserOutput->getUsedOptions() ) ) {
409 $this->logger
->debug(
410 'Parser options are not safe to cache and has not been saved',
411 [ 'name' => $this->name
]
413 $this->incrementStats( $page, 'save_unsafe' );
417 if ( $expire <= 0 ) {
418 $this->logger
->debug(
419 'Parser output was marked as uncacheable and has not been saved',
420 [ 'name' => $this->name
]
422 $this->incrementStats( $page, 'save_uncacheable' );
426 if ( $this->cache
instanceof EmptyBagOStuff
) {
430 $cacheTime = $cacheTime ?
: wfTimestampNow();
431 $revId = $revId ?
: $page->getLatest( PageRecord
::LOCAL
);
434 $this->logger
->debug(
435 'Parser output cannot be saved if the revision ID is not known',
436 [ 'name' => $this->name
]
438 $this->incrementStats( $page, 'save_norevid' );
442 $metadata = new CacheTime
;
443 $metadata->recordOptions( $parserOutput->getUsedOptions() );
444 $metadata->updateCacheExpiry( $expire );
446 $metadata->setCacheTime( $cacheTime );
447 $parserOutput->setCacheTime( $cacheTime );
448 $metadata->setCacheRevisionId( $revId );
449 $parserOutput->setCacheRevisionId( $revId );
451 $parserOutputKey = $this->makeParserOutputKey(
454 $metadata->getUsedOptions()
457 $msg = "Saved in parser cache with key $parserOutputKey" .
458 " and timestamp $cacheTime" .
459 " and revision id $revId.";
461 $reason = $popts->getRenderReason();
462 $msg .= " Rendering was triggered because: $reason";
464 $parserOutput->addCacheMessage( $msg );
466 $pageKey = $this->makeMetadataKey( $page );
468 $parserOutputData = $this->convertForCache( $parserOutput, $parserOutputKey );
469 $metadataData = $this->convertForCache( $metadata, $pageKey );
471 if ( !$parserOutputData ||
!$metadataData ) {
472 $this->logger
->warning(
473 'Parser output failed to serialize and was not saved',
474 [ 'name' => $this->name
]
476 $this->incrementStats( $page, 'save_nonserializable' );
480 // Save the parser output
485 BagOStuff
::WRITE_ALLOW_SEGMENTS
488 // ...and its pointer to the local cache.
489 $this->metadataProcCache
->set( $pageKey, $metadataData, $expire );
490 // ...and to the global cache.
491 $this->cache
->set( $pageKey, $metadataData, $expire );
493 $title = $this->titleFactory
->newFromPageIdentity( $page );
494 $this->hookRunner
->onParserCacheSaveComplete( $this, $parserOutput, $title, $popts, $revId );
496 $this->logger
->debug( 'Saved in parser cache', [
497 'name' => $this->name
,
498 'key' => $parserOutputKey,
499 'cache_time' => $cacheTime,
502 $this->incrementStats( $page, 'save_success' );
504 $reasonKey = preg_replace( '/\W+/', '_', $popts->getRenderReason() );
505 $this->incrementStats( $page, "reason.$reasonKey" );
509 * Get the backend BagOStuff instance that
510 * powers the parser cache
516 public function getCacheStorage() {
521 * Check if $entry expired for $page given the $staleConstraint
522 * when fetching from $cacheTier.
523 * @param CacheTime $entry
524 * @param PageRecord $page
525 * @param int $staleConstraint One of USE_* constants.
526 * @param string $cacheTier
529 private function checkExpired(
532 int $staleConstraint,
535 if ( $staleConstraint < self
::USE_EXPIRED
&& $entry->expired( $page->getTouched() ) ) {
536 $this->incrementStats( $page, 'miss_expired' );
537 $this->logger
->debug( "{$cacheTier} key expired", [
538 'name' => $this->name
,
539 'touched' => $page->getTouched(),
540 'epoch' => $this->cacheEpoch
,
541 'cache_time' => $entry->getCacheTime()
549 * Check if $entry belongs to the latest revision of $page
550 * given $staleConstraint when fetched from $cacheTier.
551 * @param CacheTime $entry
552 * @param PageRecord $page
553 * @param int $staleConstraint One of USE_* constants.
554 * @param string $cacheTier
557 private function checkOutdated(
560 int $staleConstraint,
563 $latestRevId = $page->getLatest( PageRecord
::LOCAL
);
564 if ( $staleConstraint < self
::USE_OUTDATED
&& $entry->isDifferentRevision( $latestRevId ) ) {
565 $this->incrementStats( $page, "miss_revid" );
566 $this->logger
->debug( "{$cacheTier} key is for an old revision", [
567 'name' => $this->name
,
568 'rev_id' => $latestRevId,
569 'cached_rev_id' => $entry->getCacheRevisionId()
577 * @param string $jsonData
579 * @param string $expectedClass
580 * @return CacheTime|ParserOutput|null
582 private function restoreFromJson( string $jsonData, string $key, string $expectedClass ) {
584 /** @var CacheTime $obj */
585 $obj = $this->jsonCodec
->unserialize( $jsonData, $expectedClass );
587 } catch ( InvalidArgumentException
$e ) {
588 $this->logger
->error( "Unable to unserialize JSON", [
589 'name' => $this->name
,
591 'message' => $e->getMessage()
598 * @param CacheTime $obj
600 * @return string|null
602 protected function convertForCache( CacheTime
$obj, string $key ) {
604 return $this->jsonCodec
->serialize( $obj );
605 } catch ( InvalidArgumentException
$e ) {
606 $this->logger
->error( "Unable to serialize JSON", [
607 'name' => $this->name
,
609 'message' => $e->getMessage(),