ParsoidParser: Record ParserOptions watcher on ParserOutput object
[mediawiki.git] / includes / Storage / PageEditStash.php
blobae89bec2363a7bc48642cf3fba0b13b7c4b553d3
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
18 * @file
21 namespace MediaWiki\Storage;
23 use BagOStuff;
24 use Content;
25 use Liuggio\StatsdClient\Factory\StatsdDataFactoryInterface;
26 use MediaWiki\HookContainer\HookContainer;
27 use MediaWiki\HookContainer\HookRunner;
28 use MediaWiki\Page\PageIdentity;
29 use MediaWiki\Page\WikiPageFactory;
30 use MediaWiki\Parser\ParserOutputFlags;
31 use MediaWiki\Revision\SlotRecord;
32 use MediaWiki\Storage\Hook\ParserOutputStashForEditHook;
33 use MediaWiki\User\UserEditTracker;
34 use MediaWiki\User\UserFactory;
35 use MediaWiki\User\UserIdentity;
36 use ParserOutput;
37 use Psr\Log\LoggerInterface;
38 use stdClass;
39 use Wikimedia\Rdbms\ILoadBalancer;
40 use Wikimedia\ScopedCallback;
41 use WikiPage;
43 /**
44 * Manage the pre-emptive page parsing for edits to wiki pages.
46 * This is written to by ApiStashEdit, and consumed by ApiEditPage
47 * and EditPage (via PageUpdaterFactory and DerivedPageDataUpdater).
49 * See also mediawiki.action.edit/stash.js.
51 * @since 1.34
52 * @ingroup Page
54 class PageEditStash {
55 /** @var BagOStuff */
56 private $cache;
57 /** @var ILoadBalancer */
58 private $lb;
59 /** @var LoggerInterface */
60 private $logger;
61 /** @var StatsdDataFactoryInterface */
62 private $stats;
63 /** @var ParserOutputStashForEditHook */
64 private $hookRunner;
65 /** @var UserEditTracker */
66 private $userEditTracker;
67 /** @var UserFactory */
68 private $userFactory;
69 /** @var WikiPageFactory */
70 private $wikiPageFactory;
71 /** @var int */
72 private $initiator;
74 public const ERROR_NONE = 'stashed';
75 public const ERROR_PARSE = 'error_parse';
76 public const ERROR_CACHE = 'error_cache';
77 public const ERROR_UNCACHEABLE = 'uncacheable';
78 public const ERROR_BUSY = 'busy';
80 public const PRESUME_FRESH_TTL_SEC = 30;
81 public const MAX_CACHE_TTL = 300; // 5 minutes
82 public const MAX_SIGNATURE_TTL = 60;
84 private const MAX_CACHE_RECENT = 2;
86 public const INITIATOR_USER = 1;
87 public const INITIATOR_JOB_OR_CLI = 2;
89 /**
90 * @param BagOStuff $cache
91 * @param ILoadBalancer $lb
92 * @param LoggerInterface $logger
93 * @param StatsdDataFactoryInterface $stats
94 * @param UserEditTracker $userEditTracker
95 * @param UserFactory $userFactory
96 * @param WikiPageFactory $wikiPageFactory
97 * @param HookContainer $hookContainer
98 * @param int $initiator Class INITIATOR__* constant
100 public function __construct(
101 BagOStuff $cache,
102 ILoadBalancer $lb,
103 LoggerInterface $logger,
104 StatsdDataFactoryInterface $stats,
105 UserEditTracker $userEditTracker,
106 UserFactory $userFactory,
107 WikiPageFactory $wikiPageFactory,
108 HookContainer $hookContainer,
109 $initiator
111 $this->cache = $cache;
112 $this->lb = $lb;
113 $this->logger = $logger;
114 $this->stats = $stats;
115 $this->userEditTracker = $userEditTracker;
116 $this->userFactory = $userFactory;
117 $this->wikiPageFactory = $wikiPageFactory;
118 $this->hookRunner = new HookRunner( $hookContainer );
119 $this->initiator = $initiator;
123 * @param PageUpdater $pageUpdater (a WikiPage instance is also supported but deprecated)
124 * @param Content $content Edit content
125 * @param UserIdentity $user
126 * @param string $summary Edit summary
127 * @return string Class ERROR_* constant
129 public function parseAndCache( $pageUpdater, Content $content, UserIdentity $user, string $summary ) {
130 $logger = $this->logger;
132 if ( $pageUpdater instanceof WikiPage ) {
133 // TODO: Trigger deprecation warning once extensions have been fixed.
134 // Or better, create PageUpdater::prepareAndStash and deprecate this method.
135 $pageUpdater = $pageUpdater->newPageUpdater( $user );
138 $page = $pageUpdater->getPage();
139 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
140 $fname = __METHOD__;
142 // Use the primary DB to allow for fast blocking locks on the "save path" where this
143 // value might actually be used to complete a page edit. If the edit submission request
144 // happens before this edit stash requests finishes, then the submission will block until
145 // the stash request finishes parsing. For the lock acquisition below, there is not much
146 // need to duplicate parsing of the same content/user/summary bundle, so try to avoid
147 // blocking at all here.
148 $dbw = $this->lb->getConnectionRef( DB_PRIMARY );
149 if ( !$dbw->lock( $key, $fname, 0 ) ) {
150 // De-duplicate requests on the same key
151 return self::ERROR_BUSY;
153 /** @noinspection PhpUnusedLocalVariableInspection */
154 $unlocker = new ScopedCallback( static function () use ( $dbw, $key, $fname ) {
155 $dbw->unlock( $key, $fname );
156 } );
158 $cutoffTime = time() - self::PRESUME_FRESH_TTL_SEC;
160 // Reuse any freshly build matching edit stash cache
161 $editInfo = $this->getStashValue( $key );
162 if ( $editInfo && (int)wfTimestamp( TS_UNIX, $editInfo->timestamp ) >= $cutoffTime ) {
163 $alreadyCached = true;
164 } else {
165 $pageUpdater->setContent( SlotRecord::MAIN, $content );
167 $update = $pageUpdater->prepareUpdate( EDIT_INTERNAL ); // applies pre-safe transform
168 $output = $update->getCanonicalParserOutput(); // causes content to be parsed
169 $output->setCacheTime( $update->getRevision()->getTimestamp() );
171 // emulate a cache value that kind of looks like a PreparedEdit, for use below
172 $editInfo = (object)[
173 'pstContent' => $update->getRawContent( SlotRecord::MAIN ),
174 'output' => $output,
175 'timestamp' => $output->getCacheTime()
178 $alreadyCached = false;
181 $logContext = [ 'cachekey' => $key, 'title' => (string)$page ];
183 if ( $editInfo->output ) {
184 // Let extensions add ParserOutput metadata or warm other caches
185 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
186 $legacyPage = $this->wikiPageFactory->newFromTitle( $page );
187 $this->hookRunner->onParserOutputStashForEdit(
188 $legacyPage, $content, $editInfo->output, $summary, $legacyUser );
190 if ( $alreadyCached ) {
191 $logger->debug( "Parser output for key '{cachekey}' already cached.", $logContext );
193 return self::ERROR_NONE;
196 $code = $this->storeStashValue(
197 $key,
198 $editInfo->pstContent,
199 $editInfo->output,
200 $editInfo->timestamp,
201 $user
204 if ( $code === true ) {
205 $logger->debug( "Cached parser output for key '{cachekey}'.", $logContext );
207 return self::ERROR_NONE;
208 } elseif ( $code === 'uncacheable' ) {
209 $logger->info(
210 "Uncacheable parser output for key '{cachekey}' [{code}].",
211 $logContext + [ 'code' => $code ]
214 return self::ERROR_UNCACHEABLE;
215 } else {
216 $logger->error(
217 "Failed to cache parser output for key '{cachekey}'.",
218 $logContext + [ 'code' => $code ]
221 return self::ERROR_CACHE;
225 return self::ERROR_PARSE;
229 * Check that a prepared edit is in cache and still up-to-date
231 * This method blocks if the prepared edit is already being rendered,
232 * waiting until rendering finishes before doing final validity checks.
234 * The cache is rejected if template or file changes are detected.
235 * Note that foreign template or file transclusions are not checked.
237 * This returns an object with the following fields:
238 * - pstContent: the Content after pre-save-transform
239 * - output: the ParserOutput instance
240 * - timestamp: the timestamp of the parse
241 * - edits: author edit count if they are logged in or NULL otherwise
243 * @param PageIdentity $page
244 * @param Content $content
245 * @param UserIdentity $user to get parser options from
246 * @return stdClass|false Returns edit stash object or false on cache miss
248 public function checkCache( PageIdentity $page, Content $content, UserIdentity $user ) {
249 $legacyUser = $this->userFactory->newFromUserIdentity( $user );
250 if (
251 // The context is not an HTTP POST request
252 !$legacyUser->getRequest()->wasPosted() ||
253 // The context is a CLI script or a job runner HTTP POST request
254 $this->initiator !== self::INITIATOR_USER ||
255 // The editor account is a known bot
256 $legacyUser->isBot()
258 // Avoid wasted queries and statsd pollution
259 return false;
262 $logger = $this->logger;
264 $key = $this->getStashKey( $page, $this->getContentHash( $content ), $user );
265 $logContext = [
266 'key' => $key,
267 'title' => (string)$page,
268 'user' => $user->getName()
271 $editInfo = $this->getAndWaitForStashValue( $key );
272 if ( !is_object( $editInfo ) || !$editInfo->output ) {
273 $this->incrStatsByContent( 'cache_misses.no_stash', $content );
274 if ( $this->recentStashEntryCount( $user ) > 0 ) {
275 $logger->info( "Empty cache for key '{key}' but not for user.", $logContext );
276 } else {
277 $logger->debug( "Empty cache for key '{key}'.", $logContext );
280 return false;
283 $age = time() - (int)wfTimestamp( TS_UNIX, $editInfo->output->getCacheTime() );
284 $logContext['age'] = $age;
286 $isCacheUsable = true;
287 if ( $age <= self::PRESUME_FRESH_TTL_SEC ) {
288 // Assume nothing changed in this time
289 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
290 $logger->debug( "Timestamp-based cache hit for key '{key}'.", $logContext );
291 } elseif ( !$user->isRegistered() ) {
292 $lastEdit = $this->lastEditTime( $user );
293 $cacheTime = $editInfo->output->getCacheTime();
294 if ( $lastEdit < $cacheTime ) {
295 // Logged-out user made no local upload/template edits in the meantime
296 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
297 $logger->debug( "Edit check based cache hit for key '{key}'.", $logContext );
298 } else {
299 $isCacheUsable = false;
300 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
301 $logger->info( "Stale cache for key '{key}' due to outside edits.", $logContext );
303 } else {
304 if ( $editInfo->edits === $this->userEditTracker->getUserEditCount( $user ) ) {
305 // Logged-in user made no local upload/template edits in the meantime
306 $this->incrStatsByContent( 'cache_hits.presumed_fresh', $content );
307 $logger->debug( "Edit count based cache hit for key '{key}'.", $logContext );
308 } else {
309 $isCacheUsable = false;
310 $this->incrStatsByContent( 'cache_misses.proven_stale', $content );
311 $logger->info( "Stale cache for key '{key}'due to outside edits.", $logContext );
315 if ( !$isCacheUsable ) {
316 return false;
319 if ( $editInfo->output->getOutputFlag( ParserOutputFlags::VARY_REVISION ) ) {
320 // This can be used for the initial parse, e.g. for filters or doUserEditContent(),
321 // but a second parse will be triggered in doEditUpdates() no matter what
322 $logger->info(
323 "Cache for key '{key}' has vary-revision; post-insertion parse inevitable.",
324 $logContext
326 } else {
327 static $flagsMaybeReparse = [
328 // Similar to the above if we didn't guess the ID correctly
329 ParserOutputFlags::VARY_REVISION_ID,
330 // Similar to the above if we didn't guess the timestamp correctly
331 ParserOutputFlags::VARY_REVISION_TIMESTAMP,
332 // Similar to the above if we didn't guess the content correctly
333 ParserOutputFlags::VARY_REVISION_SHA1,
334 // Similar to the above if we didn't guess page ID correctly
335 ParserOutputFlags::VARY_PAGE_ID,
337 foreach ( $flagsMaybeReparse as $flag ) {
338 if ( $editInfo->output->getOutputFlag( $flag ) ) {
339 $logger->debug(
340 "Cache for key '{key}' has $flag; post-insertion parse possible.",
341 $logContext
347 return $editInfo;
351 * @param string $subkey
352 * @param Content $content
354 private function incrStatsByContent( $subkey, Content $content ) {
355 $this->stats->increment( 'editstash.' . $subkey ); // overall for b/c
356 $this->stats->increment( 'editstash_by_model.' . $content->getModel() . '.' . $subkey );
360 * @param string $key
361 * @return bool|stdClass
363 private function getAndWaitForStashValue( $key ) {
364 $editInfo = $this->getStashValue( $key );
366 if ( !$editInfo ) {
367 $start = microtime( true );
368 // We ignore user aborts and keep parsing. Block on any prior parsing
369 // so as to use its results and make use of the time spent parsing.
370 $dbw = $this->lb->getConnection( DB_PRIMARY );
371 if ( $dbw->lock( $key, __METHOD__, 30 ) ) {
372 $editInfo = $this->getStashValue( $key );
373 $dbw->unlock( $key, __METHOD__ );
376 $timeMs = 1000 * max( 0, microtime( true ) - $start );
377 $this->stats->timing( 'editstash.lock_wait_time', $timeMs );
380 return $editInfo;
384 * @param string $textHash
385 * @return string|bool Text or false if missing
387 public function fetchInputText( $textHash ) {
388 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
390 return $this->cache->get( $textKey );
394 * @param string $text
395 * @param string $textHash
396 * @return bool Success
398 public function stashInputText( $text, $textHash ) {
399 $textKey = $this->cache->makeKey( 'stashedit', 'text', $textHash );
401 return $this->cache->set(
402 $textKey,
403 $text,
404 self::MAX_CACHE_TTL,
405 BagOStuff::WRITE_ALLOW_SEGMENTS
410 * @param UserIdentity $user
411 * @return string|null TS_MW timestamp or null
413 private function lastEditTime( UserIdentity $user ) {
414 $db = $this->lb->getConnectionRef( DB_REPLICA );
416 $time = $db->newSelectQueryBuilder()
417 ->select( 'MAX(rc_timestamp)' )
418 ->from( 'recentchanges' )
419 ->join( 'actor', null, 'actor_id=rc_actor' )
420 ->where( [ 'actor_name' => $user->getName() ] )
421 ->caller( __METHOD__ )
422 ->fetchField();
424 return wfTimestampOrNull( TS_MW, $time );
428 * Get hash of the content, factoring in model/format
430 * @param Content $content
431 * @return string
433 private function getContentHash( Content $content ) {
434 return sha1( implode( "\n", [
435 $content->getModel(),
436 $content->getDefaultFormat(),
437 $content->serialize( $content->getDefaultFormat() )
438 ] ) );
442 * Get the temporary prepared edit stash key for a user
444 * This key can be used for caching prepared edits provided:
445 * - a) The $user was used for PST options
446 * - b) The parser output was made from the PST using cannonical matching options
448 * @param PageIdentity $page
449 * @param string $contentHash Result of getContentHash()
450 * @param UserIdentity $user User to get parser options from
451 * @return string
453 private function getStashKey( PageIdentity $page, $contentHash, UserIdentity $user ) {
454 return $this->cache->makeKey(
455 'stashedit-info-v2',
456 md5( "{$page->getNamespace()}\n{$page->getDBkey()}" ),
457 // Account for the edit model/text
458 $contentHash,
459 // Account for user name related variables like signatures
460 md5( "{$user->getId()}\n{$user->getName()}" )
465 * @param string $key
466 * @return stdClass|bool Object map (pstContent,output,outputID,timestamp,edits) or false
468 private function getStashValue( $key ) {
469 $serial = $this->cache->get( $key );
471 return $this->unserializeStashInfo( $serial );
475 * Build a value to store in memcached based on the PST content and parser output
477 * This makes a simple version of WikiPage::prepareContentForEdit() as stash info
479 * @param string $key
480 * @param Content $pstContent Pre-Save transformed content
481 * @param ParserOutput $parserOutput
482 * @param string $timestamp TS_MW
483 * @param UserIdentity $user
484 * @return string|bool True or an error code
486 private function storeStashValue(
487 $key,
488 Content $pstContent,
489 ParserOutput $parserOutput,
490 $timestamp,
491 UserIdentity $user
493 // If an item is renewed, mind the cache TTL determined by config and parser functions.
494 // Put an upper limit on the TTL to avoid extreme template/file staleness.
495 $age = time() - (int)wfTimestamp( TS_UNIX, $parserOutput->getCacheTime() );
496 $ttl = min( $parserOutput->getCacheExpiry() - $age, self::MAX_CACHE_TTL );
497 // Avoid extremely stale user signature timestamps (T84843)
498 if ( $parserOutput->getOutputFlag( ParserOutputFlags::USER_SIGNATURE ) ) {
499 $ttl = min( $ttl, self::MAX_SIGNATURE_TTL );
502 if ( $ttl <= 0 ) {
503 return 'uncacheable'; // low TTL due to a tag, magic word, or signature?
506 // Store what is actually needed and split the output into another key (T204742)
507 $stashInfo = (object)[
508 'pstContent' => $pstContent,
509 'output' => $parserOutput,
510 'timestamp' => $timestamp,
511 'edits' => $user->isRegistered()
512 ? $this->userEditTracker->getUserEditCount( $user )
513 : null,
515 $serial = $this->serializeStashInfo( $stashInfo );
516 if ( $serial === false ) {
517 return 'store_error';
520 $ok = $this->cache->set( $key, $serial, $ttl, BagOStuff::WRITE_ALLOW_SEGMENTS );
521 if ( $ok ) {
522 // These blobs can waste slots in low cardinality memcached slabs
523 $this->pruneExcessStashedEntries( $user, $key );
526 return $ok ? true : 'store_error';
530 * @param UserIdentity $user
531 * @param string $newKey
533 private function pruneExcessStashedEntries( UserIdentity $user, $newKey ) {
534 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
536 $keyList = $this->cache->get( $key ) ?: [];
537 if ( count( $keyList ) >= self::MAX_CACHE_RECENT ) {
538 $oldestKey = array_shift( $keyList );
539 $this->cache->delete( $oldestKey, BagOStuff::WRITE_PRUNE_SEGMENTS );
542 $keyList[] = $newKey;
543 $this->cache->set( $key, $keyList, 2 * self::MAX_CACHE_TTL );
547 * @param UserIdentity $user
548 * @return int
550 private function recentStashEntryCount( UserIdentity $user ) {
551 $key = $this->cache->makeKey( 'stash-edit-recent', sha1( $user->getName() ) );
553 return count( $this->cache->get( $key ) ?: [] );
556 private function serializeStashInfo( stdClass $stashInfo ) {
557 // @todo: use JSON with ParserOutput and Content
558 return serialize( $stashInfo );
561 private function unserializeStashInfo( $serial ) {
562 if ( is_string( $serial ) ) {
563 // @todo: use JSON with ParserOutput and Content
564 $stashInfo = unserialize( $serial );
565 if ( is_object( $stashInfo ) && $stashInfo->output instanceof ParserOutput ) {
566 return $stashInfo;
570 return false;