Update git submodules
[mediawiki.git] / includes / parser / ParserCache.php
blob178d4461b15bbf6bc7557cbbc9c81ad100cafa98
1 <?php
2 /**
3 * Cache for outputs of the PHP parser
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Cache Parser
24 use MediaWiki\HookContainer\HookContainer;
25 use MediaWiki\HookContainer\HookRunner;
26 use MediaWiki\Json\JsonCodec;
27 use MediaWiki\Page\PageRecord;
28 use MediaWiki\Page\WikiPageFactory;
29 use MediaWiki\Parser\ParserCacheMetadata;
30 use MediaWiki\Title\TitleFactory;
31 use Psr\Log\LoggerInterface;
33 /**
34 * Cache for ParserOutput objects corresponding to the latest page revisions.
36 * The ParserCache is a two-tiered cache backed by BagOStuff which supports
37 * varying the stored content on the values of ParserOptions used during
38 * a page parse.
40 * First tier is keyed by the page ID and stores ParserCacheMetadata, which
41 * contains information about cache expiration and the list of ParserOptions
42 * used during the parse of the page. For example, if only 'dateformat' and
43 * 'userlang' options were accessed by the parser when producing output for the
44 * page, array [ 'dateformat', 'userlang' ] will be stored in the metadata cache.
45 * This means none of the other existing options had any effect on the output.
47 * The second tier of the cache contains ParserOutput objects. The key for the
48 * second tier is constructed from the page ID and values of those ParserOptions
49 * used during a page parse which affected the output. Upon cache lookup, the list
50 * of used option names is retrieved from tier 1 cache, and only the values of
51 * those options are hashed together with the page ID to produce a key, while
52 * the rest of the options are ignored. Following the example above where
53 * only [ 'dateformat', 'userlang' ] options changed the parser output for a
54 * page, the key will look like 'page_id!dateformat=default:userlang=ru'.
55 * Thus any cache lookup with dateformat=default and userlang=ru will hit the
56 * same cache entry regardless of the values of the rest of the options, since they
57 * were not accessed during a parse and thus did not change the output.
59 * @see ParserOutput::recordOption()
60 * @see ParserOutput::getUsedOptions()
61 * @see ParserOptions::allCacheVaryingOptions()
62 * @ingroup Cache Parser
64 class ParserCache {
65 /**
66 * Constants for self::getKey()
67 * @since 1.30
68 * @since 1.36 the constants were made public
71 /** Use only current data */
72 public const USE_CURRENT_ONLY = 0;
74 /** Use expired data if current data is unavailable */
75 public const USE_EXPIRED = 1;
77 /** Use expired data or data from different revisions if current data is unavailable */
78 public const USE_OUTDATED = 2;
80 /**
81 * Use expired data and data from different revisions, and if all else
82 * fails vary on all variable options
84 private const USE_ANYTHING = 3;
86 /** @var string The name of this ParserCache. Used as a root of the cache key. */
87 private $name;
89 /** @var BagOStuff */
90 private $cache;
92 /**
93 * Anything cached prior to this is invalidated
95 * @var string
97 private $cacheEpoch;
99 /** @var HookRunner */
100 private $hookRunner;
102 /** @var JsonCodec */
103 private $jsonCodec;
105 /** @var IBufferingStatsdDataFactory */
106 private $stats;
108 /** @var LoggerInterface */
109 private $logger;
111 /** @var TitleFactory */
112 private $titleFactory;
114 /** @var WikiPageFactory */
115 private $wikiPageFactory;
118 * @var BagOStuff small in-process cache to store metadata.
119 * It's needed multiple times during the request, for example
120 * to build a PoolWorkArticleView key, and then to fetch the
121 * actual ParserCache entry.
123 private $metadataProcCache;
126 * Setup a cache pathway with a given back-end storage mechanism.
128 * This class use an invalidation strategy that is compatible with
129 * MultiWriteBagOStuff in async replication mode.
131 * @param string $name
132 * @param BagOStuff $cache
133 * @param string $cacheEpoch Anything before this timestamp is invalidated
134 * @param HookContainer $hookContainer
135 * @param JsonCodec $jsonCodec
136 * @param IBufferingStatsdDataFactory $stats
137 * @param LoggerInterface $logger
138 * @param TitleFactory $titleFactory
139 * @param WikiPageFactory $wikiPageFactory
141 public function __construct(
142 string $name,
143 BagOStuff $cache,
144 string $cacheEpoch,
145 HookContainer $hookContainer,
146 JsonCodec $jsonCodec,
147 IBufferingStatsdDataFactory $stats,
148 LoggerInterface $logger,
149 TitleFactory $titleFactory,
150 WikiPageFactory $wikiPageFactory
152 $this->name = $name;
153 $this->cache = $cache;
154 $this->cacheEpoch = $cacheEpoch;
155 $this->hookRunner = new HookRunner( $hookContainer );
156 $this->jsonCodec = $jsonCodec;
157 $this->stats = $stats;
158 $this->logger = $logger;
159 $this->titleFactory = $titleFactory;
160 $this->wikiPageFactory = $wikiPageFactory;
161 $this->metadataProcCache = new HashBagOStuff( [ 'maxKeys' => 2 ] );
165 * @param PageRecord $page
166 * @since 1.28
168 public function deleteOptionsKey( PageRecord $page ) {
169 $page->assertWiki( PageRecord::LOCAL );
170 $key = $this->makeMetadataKey( $page );
171 $this->metadataProcCache->delete( $key );
172 $this->cache->delete( $key );
176 * Retrieve the ParserOutput from ParserCache, even if it's outdated.
177 * @param PageRecord $page
178 * @param ParserOptions $popts
179 * @return ParserOutput|false
181 public function getDirty( PageRecord $page, $popts ) {
182 $page->assertWiki( PageRecord::LOCAL );
183 $value = $this->get( $page, $popts, true );
184 return is_object( $value ) ? $value : false;
188 * @param PageRecord $page
189 * @param string $metricSuffix
191 private function incrementStats( PageRecord $page, $metricSuffix ) {
192 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
193 $contentModel = str_replace( '.', '_', $wikiPage->getContentModel() );
194 $this->stats->increment( "{$this->name}.{$contentModel}.{$metricSuffix}" );
198 * Returns the ParserCache metadata about the given page
199 * considering the given options.
201 * @note Which parser options influence the cache key
202 * is controlled via ParserOutput::recordOption() or
203 * ParserOptions::addExtraKey().
205 * @param PageRecord $page
206 * @param int $staleConstraint one of the self::USE_ constants
207 * @return ParserCacheMetadata|null
208 * @since 1.36
210 public function getMetadata(
211 PageRecord $page,
212 int $staleConstraint = self::USE_ANYTHING
213 ): ?ParserCacheMetadata {
214 $page->assertWiki( PageRecord::LOCAL );
216 $pageKey = $this->makeMetadataKey( $page );
217 $metadata = $this->metadataProcCache->get( $pageKey );
218 if ( !$metadata ) {
219 $metadata = $this->cache->get(
220 $pageKey,
221 BagOStuff::READ_VERIFIED
225 if ( $metadata === false ) {
226 $this->incrementStats( $page, "miss_absent_metadata" );
227 $this->logger->debug( 'ParserOutput metadata cache miss', [ 'name' => $this->name ] );
228 return null;
231 // NOTE: If the value wasn't serialized to JSON when being stored,
232 // we may already have a ParserOutput object here. This used
233 // to be the default behavior before 1.36. We need to retain
234 // support so we can handle cached objects after an update
235 // from an earlier revision.
236 // NOTE: Support for reading string values from the cache must be
237 // deployed a while before starting to write JSON to the cache,
238 // in case we have to revert either change.
239 if ( is_string( $metadata ) ) {
240 $metadata = $this->restoreFromJson( $metadata, $pageKey, CacheTime::class );
243 if ( !$metadata instanceof CacheTime ) {
244 $this->incrementStats( $page, 'miss_unserialize' );
245 return null;
248 if ( $this->checkExpired( $metadata, $page, $staleConstraint, 'metadata' ) ) {
249 return null;
252 if ( $this->checkOutdated( $metadata, $page, $staleConstraint, 'metadata' ) ) {
253 return null;
256 $this->logger->debug( 'Parser cache options found', [ 'name' => $this->name ] );
257 return $metadata;
261 * @param PageRecord $page
262 * @return string
264 private function makeMetadataKey( PageRecord $page ): string {
265 return $this->cache->makeKey( $this->name, 'idoptions', $page->getId( PageRecord::LOCAL ) );
269 * Get a key that will be used by the ParserCache to store the content
270 * for a given page considering the given options and the array of
271 * used options.
273 * @warning The exact format of the key is considered internal and is subject
274 * to change, thus should not be used as storage or long-term caching key.
275 * This is intended to be used for logging or keying something transient.
277 * @param PageRecord $page
278 * @param ParserOptions $options
279 * @param array|null $usedOptions Defaults to all cache varying options.
280 * @return string
281 * @internal
282 * @since 1.36
284 public function makeParserOutputKey(
285 PageRecord $page,
286 ParserOptions $options,
287 array $usedOptions = null
288 ): string {
289 $usedOptions ??= ParserOptions::allCacheVaryingOptions();
290 // idhash seem to mean 'page id' + 'rendering hash' (r3710)
291 $pageid = $page->getId( PageRecord::LOCAL );
292 $title = $this->titleFactory->newFromPageIdentity( $page );
293 $hash = $options->optionsHash( $usedOptions, $title );
294 // Before T263581 ParserCache was split between normal page views
295 // and action=parse. -0 is left in the key to avoid invalidating the entire
296 // cache when removing the cache split.
297 return $this->cache->makeKey( $this->name, 'idhash', "{$pageid}-0!{$hash}" );
301 * Retrieve the ParserOutput from ParserCache.
302 * false if not found or outdated.
304 * @param PageRecord $page
305 * @param ParserOptions $popts
306 * @param bool $useOutdated (default false)
308 * @return ParserOutput|false
310 public function get( PageRecord $page, $popts, $useOutdated = false ) {
311 $page->assertWiki( PageRecord::LOCAL );
313 if ( !$page->exists() ) {
314 $this->incrementStats( $page, 'miss_nonexistent' );
315 return false;
318 if ( $page->isRedirect() ) {
319 // It's a redirect now
320 $this->incrementStats( $page, 'miss_redirect' );
321 return false;
324 $staleConstraint = $useOutdated ? self::USE_OUTDATED : self::USE_CURRENT_ONLY;
325 $parserOutputMetadata = $this->getMetadata( $page, $staleConstraint );
326 if ( !$parserOutputMetadata ) {
327 return false;
330 if ( !$popts->isSafeToCache( $parserOutputMetadata->getUsedOptions() ) ) {
331 $this->incrementStats( $page, 'miss_unsafe' );
332 return false;
335 $parserOutputKey = $this->makeParserOutputKey(
336 $page,
337 $popts,
338 $parserOutputMetadata->getUsedOptions()
341 $value = $this->cache->get( $parserOutputKey, BagOStuff::READ_VERIFIED );
342 if ( $value === false ) {
343 $this->incrementStats( $page, "miss_absent" );
344 $this->logger->debug( 'ParserOutput cache miss', [ 'name' => $this->name ] );
345 return false;
348 // NOTE: If the value wasn't serialized to JSON when being stored,
349 // we may already have a ParserOutput object here. This used
350 // to be the default behavior before 1.36. We need to retain
351 // support so we can handle cached objects after an update
352 // from an earlier revision.
353 // NOTE: Support for reading string values from the cache must be
354 // deployed a while before starting to write JSON to the cache,
355 // in case we have to revert either change.
356 if ( is_string( $value ) ) {
357 $value = $this->restoreFromJson( $value, $parserOutputKey, ParserOutput::class );
360 if ( !$value instanceof ParserOutput ) {
361 $this->incrementStats( $page, 'miss_unserialize' );
362 return false;
365 if ( $this->checkExpired( $value, $page, $staleConstraint, 'output' ) ) {
366 return false;
369 if ( $this->checkOutdated( $value, $page, $staleConstraint, 'output' ) ) {
370 return false;
373 $wikiPage = $this->wikiPageFactory->newFromTitle( $page );
374 if ( $this->hookRunner->onRejectParserCacheValue( $value, $wikiPage, $popts ) === false ) {
375 $this->incrementStats( $page, 'miss_rejected' );
376 $this->logger->debug( 'key valid, but rejected by RejectParserCacheValue hook handler',
377 [ 'name' => $this->name ] );
378 return false;
381 $this->logger->debug( 'ParserOutput cache found', [ 'name' => $this->name ] );
382 $this->incrementStats( $page, 'hit' );
383 return $value;
387 * @param ParserOutput $parserOutput
388 * @param PageRecord $page
389 * @param ParserOptions $popts
390 * @param string|null $cacheTime TS_MW timestamp when the cache was generated
391 * @param int|null $revId Revision ID that was parsed
393 public function save(
394 ParserOutput $parserOutput,
395 PageRecord $page,
396 $popts,
397 $cacheTime = null,
398 $revId = null
400 $page->assertWiki( PageRecord::LOCAL );
402 if ( !$parserOutput->hasText() ) {
403 throw new InvalidArgumentException( 'Attempt to cache a ParserOutput with no text set!' );
406 $expire = $parserOutput->getCacheExpiry();
408 if ( !$popts->isSafeToCache( $parserOutput->getUsedOptions() ) ) {
409 $this->logger->debug(
410 'Parser options are not safe to cache and has not been saved',
411 [ 'name' => $this->name ]
413 $this->incrementStats( $page, 'save_unsafe' );
414 return;
417 if ( $expire <= 0 ) {
418 $this->logger->debug(
419 'Parser output was marked as uncacheable and has not been saved',
420 [ 'name' => $this->name ]
422 $this->incrementStats( $page, 'save_uncacheable' );
423 return;
426 if ( $this->cache instanceof EmptyBagOStuff ) {
427 return;
430 $cacheTime = $cacheTime ?: wfTimestampNow();
431 $revId = $revId ?: $page->getLatest( PageRecord::LOCAL );
433 if ( !$revId ) {
434 $this->logger->debug(
435 'Parser output cannot be saved if the revision ID is not known',
436 [ 'name' => $this->name ]
438 $this->incrementStats( $page, 'save_norevid' );
439 return;
442 $metadata = new CacheTime;
443 $metadata->recordOptions( $parserOutput->getUsedOptions() );
444 $metadata->updateCacheExpiry( $expire );
446 $metadata->setCacheTime( $cacheTime );
447 $parserOutput->setCacheTime( $cacheTime );
448 $metadata->setCacheRevisionId( $revId );
449 $parserOutput->setCacheRevisionId( $revId );
451 $parserOutputKey = $this->makeParserOutputKey(
452 $page,
453 $popts,
454 $metadata->getUsedOptions()
457 $msg = "Saved in parser cache with key $parserOutputKey" .
458 " and timestamp $cacheTime" .
459 " and revision id $revId.";
461 $reason = $popts->getRenderReason();
462 $msg .= " Rendering was triggered because: $reason";
464 $parserOutput->addCacheMessage( $msg );
466 $pageKey = $this->makeMetadataKey( $page );
468 $parserOutputData = $this->convertForCache( $parserOutput, $parserOutputKey );
469 $metadataData = $this->convertForCache( $metadata, $pageKey );
471 if ( !$parserOutputData || !$metadataData ) {
472 $this->logger->warning(
473 'Parser output failed to serialize and was not saved',
474 [ 'name' => $this->name ]
476 $this->incrementStats( $page, 'save_nonserializable' );
477 return;
480 // Save the parser output
481 $this->cache->set(
482 $parserOutputKey,
483 $parserOutputData,
484 $expire,
485 BagOStuff::WRITE_ALLOW_SEGMENTS
488 // ...and its pointer to the local cache.
489 $this->metadataProcCache->set( $pageKey, $metadataData, $expire );
490 // ...and to the global cache.
491 $this->cache->set( $pageKey, $metadataData, $expire );
493 $title = $this->titleFactory->newFromPageIdentity( $page );
494 $this->hookRunner->onParserCacheSaveComplete( $this, $parserOutput, $title, $popts, $revId );
496 $this->logger->debug( 'Saved in parser cache', [
497 'name' => $this->name,
498 'key' => $parserOutputKey,
499 'cache_time' => $cacheTime,
500 'rev_id' => $revId
501 ] );
502 $this->incrementStats( $page, 'save_success' );
504 $reasonKey = preg_replace( '/\W+/', '_', $popts->getRenderReason() );
505 $this->incrementStats( $page, "reason.$reasonKey" );
509 * Get the backend BagOStuff instance that
510 * powers the parser cache
512 * @since 1.30
513 * @internal
514 * @return BagOStuff
516 public function getCacheStorage() {
517 return $this->cache;
521 * Check if $entry expired for $page given the $staleConstraint
522 * when fetching from $cacheTier.
523 * @param CacheTime $entry
524 * @param PageRecord $page
525 * @param int $staleConstraint One of USE_* constants.
526 * @param string $cacheTier
527 * @return bool
529 private function checkExpired(
530 CacheTime $entry,
531 PageRecord $page,
532 int $staleConstraint,
533 string $cacheTier
534 ): bool {
535 if ( $staleConstraint < self::USE_EXPIRED && $entry->expired( $page->getTouched() ) ) {
536 $this->incrementStats( $page, 'miss_expired' );
537 $this->logger->debug( "{$cacheTier} key expired", [
538 'name' => $this->name,
539 'touched' => $page->getTouched(),
540 'epoch' => $this->cacheEpoch,
541 'cache_time' => $entry->getCacheTime()
542 ] );
543 return true;
545 return false;
549 * Check if $entry belongs to the latest revision of $page
550 * given $staleConstraint when fetched from $cacheTier.
551 * @param CacheTime $entry
552 * @param PageRecord $page
553 * @param int $staleConstraint One of USE_* constants.
554 * @param string $cacheTier
555 * @return bool
557 private function checkOutdated(
558 CacheTime $entry,
559 PageRecord $page,
560 int $staleConstraint,
561 string $cacheTier
562 ): bool {
563 $latestRevId = $page->getLatest( PageRecord::LOCAL );
564 if ( $staleConstraint < self::USE_OUTDATED && $entry->isDifferentRevision( $latestRevId ) ) {
565 $this->incrementStats( $page, "miss_revid" );
566 $this->logger->debug( "{$cacheTier} key is for an old revision", [
567 'name' => $this->name,
568 'rev_id' => $latestRevId,
569 'cached_rev_id' => $entry->getCacheRevisionId()
570 ] );
571 return true;
573 return false;
577 * @param string $jsonData
578 * @param string $key
579 * @param string $expectedClass
580 * @return CacheTime|ParserOutput|null
582 private function restoreFromJson( string $jsonData, string $key, string $expectedClass ) {
583 try {
584 /** @var CacheTime $obj */
585 $obj = $this->jsonCodec->unserialize( $jsonData, $expectedClass );
586 return $obj;
587 } catch ( InvalidArgumentException $e ) {
588 $this->logger->error( "Unable to unserialize JSON", [
589 'name' => $this->name,
590 'cache_key' => $key,
591 'message' => $e->getMessage()
592 ] );
593 return null;
598 * @param CacheTime $obj
599 * @param string $key
600 * @return string|null
602 protected function convertForCache( CacheTime $obj, string $key ) {
603 try {
604 return $this->jsonCodec->serialize( $obj );
605 } catch ( InvalidArgumentException $e ) {
606 $this->logger->error( "Unable to serialize JSON", [
607 'name' => $this->name,
608 'cache_key' => $key,
609 'message' => $e->getMessage(),
610 ] );
611 return null;