From cd7ab52a4a799d4c072fedf717e3f6e2e08b42a9 Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Tue, 29 Nov 2016 03:38:57 +0000 Subject: [PATCH] MessageCache invalidation improvements Re-submission of 9339a08b7. * Increase time range for getValidationHash() using "latest" values. The lower value ran the risk of regenerating from slaves and ending up with *older* data than what was there. * Avoid cache set() calls in replace() unless the lock was acquired. Use delete() instead in that case, which invalidates the cache. * Remember if the cache is volatile in process memory instead of doing check key lookups for each "big" message to determine this. Use the message hash in the big message keys so purges to the former chain down to the latter. An "EXCESSIVE" key/revision map is now used in the main cache for big messages. This means that editing an existing big message will result in a different hash value. This is needed so purges propage correctly. * Add logging when replace() fails to acquire the lock. * Factored message cache update code duplication into a new method. * Use makeKey() in more places, replacing deprecated wfMemcKey(). Change-Id: I82c5baa8137d1ffaaec6adace82ccb0181441342 --- includes/cache/MessageCache.php | 190 +++++++++++++++++++++++++++------------- includes/page/WikiPage.php | 40 ++------- 2 files changed, 132 insertions(+), 98 deletions(-) diff --git a/includes/cache/MessageCache.php b/includes/cache/MessageCache.php index 0c2f9de9b5f..3f78d9a4c8d 100644 --- a/includes/cache/MessageCache.php +++ b/includes/cache/MessageCache.php @@ -22,6 +22,7 @@ */ use MediaWiki\MediaWikiServices; use Wikimedia\ScopedCallback; +use MediaWiki\Logger\LoggerFactory; /** * MediaWiki message cache structure version. @@ -53,6 +54,11 @@ class MessageCache { protected $mCache; /** + * @var bool[] Map of (language code => boolean) + */ + protected $mCacheVolatile = []; + + /** * Should mean that database cannot be used, but check * @var bool $mDisable */ @@ -65,10 +71,12 @@ class MessageCache { protected $mExpiry; /** - * Message cache has its own parser which it uses to transform - * messages. + * Message cache has its own parser which it uses to transform messages + * @var ParserOptions */ - protected $mParserOptions, $mParser; + protected $mParserOptions; + /** @var Parser */ + protected $mParser; /** * Variable for tracking which variables are already loaded @@ -129,6 +137,7 @@ class MessageCache { */ public static function normalizeKey( $key ) { global $wgContLang; + $lckey = strtr( $key, ' ', '_' ); if ( ord( $lckey ) < 128 ) { $lckey[0] = strtolower( $lckey[0] ); @@ -258,6 +267,7 @@ class MessageCache { # Hash of the contents is stored in memcache, to detect if data-center cache # or local cache goes out of date (e.g. due to replace() on some other server) list( $hash, $hashVolatile ) = $this->getValidationHash( $code ); + $this->mCacheVolatile[$code] = $hashVolatile; # Try the local cache and check against the cluster hash key... $cache = $this->getLocalCache( $code ); @@ -473,9 +483,16 @@ class MessageCache { $bigConds[] = 'page_len > ' . intval( $wgMaxMsgCacheEntrySize ); # Load titles for all oversized pages in the MediaWiki namespace - $res = $dbr->select( 'page', 'page_title', $bigConds, __METHOD__ . "($code)-big" ); + $res = $dbr->select( + 'page', + [ 'page_title', 'page_latest' ], + $bigConds, + __METHOD__ . "($code)-big" + ); foreach ( $res as $row ) { $cache[$row->page_title] = '!TOO BIG'; + // At least include revision ID so page changes are reflected in the hash + $cache['EXCESSIVE'][$row->page_title] = $row->page_latest; } # Conditions to load the remaining pages with their contents @@ -520,7 +537,7 @@ class MessageCache { * Updates cache as necessary when message page is changed * * @param string|bool $title Name of the page changed (false if deleted) - * @param mixed $text New contents of the page. + * @param string|bool $text New contents of the page (false if deleted) */ public function replace( $title, $text ) { global $wgMaxMsgCacheEntrySize, $wgContLang, $wgLanguageCode; @@ -540,31 +557,32 @@ class MessageCache { // a self-deadlock. This is safe as no reads happen *directly* in this // method between getReentrantScopedLock() and load() below. There is // no risk of data "changing under our feet" for replace(). - $cacheKey = wfMemcKey( 'messages', $code ); - $scopedLock = $this->getReentrantScopedLock( $cacheKey ); + $scopedLock = $this->getReentrantScopedLock( wfMemcKey( 'messages', $code ) ); + // Load the messages from the master DB to avoid race conditions $this->load( $code, self::FOR_UPDATE ); - $titleKey = wfMemcKey( 'messages', 'individual', $title ); + // Load the new value into the process cache... if ( $text === false ) { - // Article was deleted $this->mCache[$code][$title] = '!NONEXISTENT'; - $this->wanCache->delete( $titleKey ); } elseif ( strlen( $text ) > $wgMaxMsgCacheEntrySize ) { - // Check for size $this->mCache[$code][$title] = '!TOO BIG'; - $this->wanCache->set( $titleKey, ' ' . $text, $this->mExpiry ); + // Pre-fill the individual key cache with the known latest message text + $key = $this->wanCache->makeKey( 'messages-big', $this->mCache[$code]['HASH'], $title ); + $this->wanCache->set( $key, " $text", $this->mExpiry ); } else { $this->mCache[$code][$title] = ' ' . $text; - $this->wanCache->delete( $titleKey ); } - - // Mark this cache as definitely "latest" (non-volatile) so - // load() calls do try to refresh the cache with replica DB data + // Mark this cache as definitely being "latest" (non-volatile) so + // load() calls do not try to refresh the cache with replica DB data $this->mCache[$code]['LATEST'] = time(); // Update caches if the lock was acquired if ( $scopedLock ) { $this->saveToCaches( $this->mCache[$code], 'all', $code ); + } else { + LoggerFactory::getInstance( 'MessageCache' )->error( + __METHOD__ . ': could not acquire lock to update {title} ({code})', + [ 'title' => $title, 'code' => $code ] ); } ScopedCallback::consume( $scopedLock ); @@ -644,24 +662,26 @@ class MessageCache { protected function getValidationHash( $code ) { $curTTL = null; $value = $this->wanCache->get( - wfMemcKey( 'messages', $code, 'hash', 'v1' ), + $this->wanCache->makeKey( 'messages', $code, 'hash', 'v1' ), $curTTL, [ wfMemcKey( 'messages', $code ) ] ); - if ( !$value ) { - // No hash found at all; cache must regenerate to be safe - $hash = false; - $expired = true; - } else { + if ( $value ) { $hash = $value['hash']; - if ( ( time() - $value['latest'] ) < WANObjectCache::HOLDOFF_TTL ) { - // Cache was recently updated via replace() and should be up-to-date + if ( ( time() - $value['latest'] ) < WANObjectCache::TTL_MINUTE ) { + // Cache was recently updated via replace() and should be up-to-date. + // That method is only called in the primary datacenter and uses FOR_UPDATE. + // Also, it is unlikely that the current datacenter is *now* secondary one. $expired = false; } else { // See if the "check" key was bumped after the hash was generated $expired = ( $curTTL < 0 ); } + } else { + // No hash found at all; cache must regenerate to be safe + $hash = false; + $expired = true; } return [ $hash, $expired ]; @@ -671,14 +691,15 @@ class MessageCache { * Set the md5 used to validate the local disk cache * * If $cache has a 'LATEST' UNIX timestamp key, then the hash will not - * be treated as "volatile" by getValidationHash() for the next few seconds + * be treated as "volatile" by getValidationHash() for the next few seconds. + * This is triggered when $cache is generated using FOR_UPDATE mode. * * @param string $code * @param array $cache Cached messages with a version */ protected function setValidationHash( $code, array $cache ) { $this->wanCache->set( - wfMemcKey( 'messages', $code, 'hash', 'v1' ), + $this->wanCache->makeKey( 'messages', $code, 'hash', 'v1' ), [ 'hash' => $cache['HASH'], 'latest' => isset( $cache['LATEST'] ) ? $cache['LATEST'] : 0 @@ -841,6 +862,7 @@ class MessageCache { */ private function getMessageForLang( $lang, $lckey, $useDB, &$alreadyTried ) { global $wgContLang; + $langcode = $lang->getCode(); // Try checking the database for the requested language @@ -899,6 +921,7 @@ class MessageCache { */ private function getMessagePageName( $langcode, $uckey ) { global $wgLanguageCode; + if ( $langcode === $wgLanguageCode ) { // Messages created in the content language will not have the /lang extension return $uckey; @@ -924,8 +947,7 @@ class MessageCache { if ( isset( $this->mCache[$code][$title] ) ) { $entry = $this->mCache[$code][$title]; if ( substr( $entry, 0, 1 ) === ' ' ) { - // The message exists, so make sure a string - // is returned. + // The message exists, so make sure a string is returned. return (string)substr( $entry, 1 ); } elseif ( $entry === '!NONEXISTENT' ) { return false; @@ -944,17 +966,19 @@ class MessageCache { } // Try the individual message cache - $titleKey = wfMemcKey( 'messages', 'individual', $title ); - - $curTTL = null; - $entry = $this->wanCache->get( - $titleKey, - $curTTL, - [ wfMemcKey( 'messages', $code ) ] - ); - $entry = ( $curTTL >= 0 ) ? $entry : false; + $titleKey = $this->wanCache->makeKey( 'messages-big', $this->mCache[$code]['HASH'], $title ); + + if ( $this->mCacheVolatile[$code] ) { + $entry = false; + // Make sure that individual keys respect the WAN cache holdoff period too + LoggerFactory::getInstance( 'MessageCache' )->debug( + __METHOD__ . ': loading volatile key \'{titleKey}\'', + [ 'titleKey' => $titleKey, 'code' => $code ] ); + } else { + $entry = $this->wanCache->get( $titleKey ); + } - if ( $entry ) { + if ( $entry !== false ) { if ( substr( $entry, 0, 1 ) === ' ' ) { $this->mCache[$code][$title] = $entry; // The message exists, so make sure a string is returned @@ -969,7 +993,7 @@ class MessageCache { } } - // Try loading it from the database + // Try loading the message from the database $dbr = wfGetDB( DB_REPLICA ); $cacheOpts = Database::getCacheSetOptions( $dbr ); // Use newKnownCurrent() to avoid querying revision/user tables @@ -986,32 +1010,18 @@ class MessageCache { if ( $revision ) { $content = $revision->getContent(); - if ( !$content ) { - // A possibly temporary loading failure. - wfDebugLog( - 'MessageCache', - __METHOD__ . ": failed to load message page text for {$title} ($code)" - ); - $message = null; // no negative caching - } else { - // XXX: Is this the right way to turn a Content object into a message? - // NOTE: $content is typically either WikitextContent, JavaScriptContent or - // CssContent. MessageContent is *not* used for storing messages, it's - // only used for wrapping them when needed. - $message = $content->getWikitextForTransclusion(); - - if ( $message === false || $message === null ) { - wfDebugLog( - 'MessageCache', - __METHOD__ . ": message content doesn't provide wikitext " - . "(content model: " . $content->getModel() . ")" - ); - - $message = false; // negative caching - } else { + if ( $content ) { + $message = $this->getMessageTextFromContent( $content ); + if ( is_string( $message ) ) { $this->mCache[$code][$title] = ' ' . $message; $this->wanCache->set( $titleKey, ' ' . $message, $this->mExpiry, $cacheOpts ); } + } else { + // A possibly temporary loading failure + LoggerFactory::getInstance( 'MessageCache' )->warning( + __METHOD__ . ': failed to load message page text for \'{titleKey}\'', + [ 'titleKey' => $titleKey, 'code' => $code ] ); + $message = null; // no negative caching } } else { $message = false; // negative caching @@ -1063,6 +1073,7 @@ class MessageCache { */ function getParser() { global $wgParser, $wgParserConf; + if ( !$this->mParser && isset( $wgParser ) ) { # Do some initialisation so that we don't have to do it twice $wgParser->firstCallInit(); @@ -1090,6 +1101,8 @@ class MessageCache { public function parse( $text, $title = null, $linestart = true, $interface = false, $language = null ) { + global $wgTitle; + if ( $this->mInParser ) { return htmlspecialchars( $text ); } @@ -1104,7 +1117,6 @@ class MessageCache { $popts->setTargetLanguage( $language ); if ( !$title || !$title instanceof Title ) { - global $wgTitle; wfDebugLog( 'GlobalTitleFail', __METHOD__ . ' called by ' . wfGetAllCallers( 6 ) . ' with no title set.' ); $title = $wgTitle; @@ -1192,6 +1204,7 @@ class MessageCache { */ public function getAllMessageKeys( $code ) { global $wgContLang; + $this->load( $code ); if ( !isset( $this->mCache[$code] ) ) { // Apparently load() failed @@ -1201,10 +1214,61 @@ class MessageCache { $cache = $this->mCache[$code]; unset( $cache['VERSION'] ); unset( $cache['EXPIRY'] ); + unset( $cache['EXCESSIVE'] ); // Remove any !NONEXISTENT keys $cache = array_diff( $cache, [ '!NONEXISTENT' ] ); // Keys may appear with a capital first letter. lcfirst them. return array_map( [ $wgContLang, 'lcfirst' ], array_keys( $cache ) ); } + + /** + * Purge message caches when a MediaWiki: page is created, updated, or deleted + * + * @param Title $title Message page title + * @param Content|null $content New content for edit/create, null on deletion + * @since 1.29 + */ + public function updateMessageOverride( Title $title, Content $content = null ) { + global $wgContLang; + + $msgText = $this->getMessageTextFromContent( $content ); + if ( $msgText === null ) { + $msgText = false; // treat as not existing + } + + $this->replace( $title->getDBkey(), $msgText ); + + if ( $wgContLang->hasVariants() ) { + $wgContLang->updateConversionTable( $title ); + } + } + + /** + * @param Content|null $content Content or null if the message page does not exist + * @return string|bool|null Returns false if $content is null and null on error + */ + private function getMessageTextFromContent( Content $content = null ) { + // @TODO: could skip pseudo-messages like js/css here, based on content model + if ( $content ) { + // Message page exists... + // XXX: Is this the right way to turn a Content object into a message? + // NOTE: $content is typically either WikitextContent, JavaScriptContent or + // CssContent. MessageContent is *not* used for storing messages, it's + // only used for wrapping them when needed. + $msgText = $content->getWikitextForTransclusion(); + if ( $msgText === false || $msgText === null ) { + // This might be due to some kind of misconfiguration... + $msgText = null; + LoggerFactory::getInstance( 'MessageCache' )->warning( + __METHOD__ . ": message content doesn't provide wikitext " + . "(content model: " . $content->getModel() . ")" ); + } + } else { + // Message page does not exist... + $msgText = false; + } + + return $msgText; + } } diff --git a/includes/page/WikiPage.php b/includes/page/WikiPage.php index f0164944325..dc6554942fd 100644 --- a/includes/page/WikiPage.php +++ b/includes/page/WikiPage.php @@ -1173,22 +1173,8 @@ class WikiPage implements Page, IDBAccessObject { } if ( $this->mTitle->getNamespace() == NS_MEDIAWIKI ) { - // @todo move this logic to MessageCache - if ( $this->exists() ) { - // NOTE: use transclusion text for messages. - // This is consistent with MessageCache::getMsgFromNamespace() - - $content = $this->getContent(); - $text = $content === null ? null : $content->getWikitextForTransclusion(); - - if ( $text === null ) { - $text = false; - } - } else { - $text = false; - } - - MessageCache::singleton()->replace( $this->mTitle->getDBkey(), $text ); + $messageCache = MessageCache::singleton(); + $messageCache->updateMessageOverride( $this->mTitle, $this->getContent() ); } return true; @@ -2250,7 +2236,7 @@ class WikiPage implements Page, IDBAccessObject { * - 'no-change': don't update the article count, ever */ public function doEditUpdates( Revision $revision, User $user, array $options = [] ) { - global $wgRCWatchCategoryMembership, $wgContLang; + global $wgRCWatchCategoryMembership; $options += [ 'changed' => true, @@ -2388,17 +2374,7 @@ class WikiPage implements Page, IDBAccessObject { } if ( $this->mTitle->getNamespace() == NS_MEDIAWIKI ) { - // XXX: could skip pseudo-messages like js/css here, based on content model. - $msgtext = $content ? $content->getWikitextForTransclusion() : null; - if ( $msgtext === false || $msgtext === null ) { - $msgtext = ''; - } - - MessageCache::singleton()->replace( $shortTitle, $msgtext ); - - if ( $wgContLang->hasVariants() ) { - $wgContLang->updateConversionTable( $this->mTitle ); - } + MessageCache::singleton()->updateMessageOverride( $this->mTitle, $content ); } if ( $options['created'] ) { @@ -3396,8 +3372,6 @@ class WikiPage implements Page, IDBAccessObject { * @param Title $title */ public static function onArticleDelete( Title $title ) { - global $wgContLang; - // Update existence markers on article/talk tabs... $other = $title->getOtherPage(); @@ -3414,11 +3388,7 @@ class WikiPage implements Page, IDBAccessObject { // Messages if ( $title->getNamespace() == NS_MEDIAWIKI ) { - MessageCache::singleton()->replace( $title->getDBkey(), false ); - - if ( $wgContLang->hasVariants() ) { - $wgContLang->updateConversionTable( $title ); - } + MessageCache::singleton()->updateMessageOverride( $title, null ); } // Images -- 2.11.4.GIT