3 * Page existence cache.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
24 namespace MediaWiki\Cache
;
26 use InvalidArgumentException
;
28 use MediaWiki\Linker\LinkTarget
;
29 use MediaWiki\MainConfigNames
;
30 use MediaWiki\MediaWikiServices
;
31 use MediaWiki\Page\PageIdentity
;
32 use MediaWiki\Page\PageReference
;
33 use MediaWiki\Page\PageStoreRecord
;
34 use MediaWiki\Title\NamespaceInfo
;
35 use MediaWiki\Title\TitleFormatter
;
36 use MediaWiki\Title\TitleValue
;
37 use Psr\Log\LoggerAwareInterface
;
38 use Psr\Log\LoggerInterface
;
39 use Psr\Log\NullLogger
;
41 use Wikimedia\ObjectCache\WANObjectCache
;
42 use Wikimedia\Rdbms\Database
;
43 use Wikimedia\Rdbms\IDBAccessObject
;
44 use Wikimedia\Rdbms\ILoadBalancer
;
45 use Wikimedia\Rdbms\IReadableDatabase
;
48 * Cache for article titles (prefixed DB keys) and ids linked from one source
52 class LinkCache
implements LoggerAwareInterface
{
53 /** @var MapCacheLRU */
55 /** @var WANObjectCache */
57 /** @var TitleFormatter */
58 private $titleFormatter;
59 /** @var NamespaceInfo */
61 /** @var ILoadBalancer|null */
62 private $loadBalancer;
63 /** @var LoggerInterface */
66 /** How many Titles to store */
67 private const MAX_SIZE
= 10000;
69 /** Key to page row object or null */
70 private const ROW
= 0;
71 /** Key to query READ_* flags */
72 private const FLAGS
= 1;
75 * @param TitleFormatter $titleFormatter
76 * @param WANObjectCache $cache
77 * @param NamespaceInfo $nsInfo
78 * @param ILoadBalancer|null $loadBalancer Use null when no database is set up, for example on installation
80 public function __construct(
81 TitleFormatter
$titleFormatter,
82 WANObjectCache
$cache,
83 NamespaceInfo
$nsInfo,
84 ?ILoadBalancer
$loadBalancer = null
86 $this->entries
= new MapCacheLRU( self
::MAX_SIZE
);
87 $this->wanCache
= $cache;
88 $this->titleFormatter
= $titleFormatter;
89 $this->nsInfo
= $nsInfo;
90 $this->loadBalancer
= $loadBalancer;
91 $this->logger
= new NullLogger();
95 * @param LoggerInterface $logger
97 public function setLogger( LoggerInterface
$logger ) {
98 $this->logger
= $logger;
102 * @param LinkTarget|PageReference|array|string $page
103 * @param bool $passThrough Return $page if $page is a string
104 * @return ?string the cache key
106 private function getCacheKey( $page, $passThrough = false ) {
107 if ( is_string( $page ) ) {
108 if ( $passThrough ) {
111 throw new InvalidArgumentException( 'They key may not be given as a string here' );
115 if ( is_array( $page ) ) {
116 $namespace = $page['page_namespace'];
117 $dbkey = $page['page_title'];
118 return strtr( $this->titleFormatter
->formatTitle( $namespace, $dbkey ), ' ', '_' );
121 if ( $page instanceof PageReference
&& $page->getWikiId() !== PageReference
::LOCAL
) {
122 // No cross-wiki support yet. Perhaps LinkCache can become wiki-aware in the future.
124 'cross-wiki page reference',
126 'page-wiki' => $page->getWikiId(),
127 'page-reference' => $this->titleFormatter
->getFullText( $page )
133 if ( $page instanceof PageIdentity
&& !$page->canExist() ) {
134 // Non-proper page, perhaps a special page or interwiki link or relative section link.
135 $this->logger
->warning(
136 'non-proper page reference: {page-reference}',
137 [ 'page-reference' => $this->titleFormatter
->getFullText( $page ) ]
142 if ( $page instanceof LinkTarget
143 && ( $page->isExternal() ||
$page->getText() === '' ||
$page->getNamespace() < 0 )
145 // Interwiki link or relative section link. These do not have a page ID, so they
146 // can neither be "good" nor "bad" in the sense of this class.
147 $this->logger
->warning(
148 'link to non-proper page: {page-link}',
149 [ 'page-link' => $this->titleFormatter
->getFullText( $page ) ]
154 return $this->titleFormatter
->getPrefixedDBkey( $page );
158 * Get the ID of a page known to the process cache
160 * @param LinkTarget|PageReference|array|string $page The page to get the ID for,
161 * as an object, an array containing the page_namespace and page_title fields,
162 * or a prefixed DB key. In MediaWiki 1.36 and earlier, only a string was accepted.
163 * @return int Page ID, or zero if the page was not cached or does not exist or is not a
164 * proper page (e.g. a special page or an interwiki link).
166 public function getGoodLinkID( $page ) {
167 $key = $this->getCacheKey( $page, true );
168 if ( $key === null ) {
172 $entry = $this->entries
->get( $key );
177 $row = $entry[self
::ROW
];
179 return $row ?
(int)$row->page_id
: 0;
183 * Get the field of a page known to the process cache
185 * If this link is not a cached good title, it will return NULL.
186 * @param LinkTarget|PageReference|array $page The page to get cached info for.
187 * Can be given as an object or an associative array containing the
188 * page_namespace and page_title fields.
189 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
190 * @param string $field ( 'id', 'length', 'redirect', 'revision', 'model', 'lang' )
191 * @return string|int|null The field value, or null if the page was not cached or does not exist
192 * or is not a proper page (e.g. a special page or interwiki link).
194 public function getGoodLinkFieldObj( $page, string $field ) {
195 $key = $this->getCacheKey( $page );
196 if ( $key === null ) {
200 $entry = $this->entries
->get( $key );
205 $row = $entry[self
::ROW
];
212 return (int)$row->page_id
;
214 return (int)$row->page_len
;
216 return (int)$row->page_is_redirect
;
218 return (int)$row->page_latest
;
220 return !empty( $row->page_content_model
)
221 ?
(string)$row->page_content_model
224 return !empty( $row->page_lang
)
225 ?
(string)$row->page_lang
228 throw new InvalidArgumentException( "Unknown field: $field" );
233 * Check if a page is known to be missing based on the process cache
235 * @param LinkTarget|PageReference|array|string $page The page to get cached info for,
236 * as an object, an array containing the page_namespace and page_title fields,
237 * or a prefixed DB key. In MediaWiki 1.36 and earlier, only a string was accepted.
238 * In MediaWiki 1.36 and earlier, only a string was accepted.
239 * @return bool Whether the page is known to be missing based on the process cache
241 public function isBadLink( $page ) {
242 $key = $this->getCacheKey( $page, true );
243 if ( $key === null ) {
247 $entry = $this->entries
->get( $key );
249 return ( $entry && !$entry[self
::ROW
] );
253 * Add information about an existing page to the process cache
255 * Callers must set the READ_LATEST flag if the row came from a DB_PRIMARY source.
256 * However, the use of such data is highly discouraged; most callers rely on seeing
257 * consistent DB_REPLICA data (e.g. REPEATABLE-READ point-in-time snapshots) and the
258 * accidental use of DB_PRIMARY data via LinkCache is prone to causing anomalies.
260 * @param LinkTarget|PageReference|array $page The page to set cached info for.
261 * Can be given as an object or an associative array containing the
262 * page_namespace and page_title fields.
263 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
264 * @param stdClass $row Object which has all fields returned by getSelectFields().
265 * @param int $queryFlags The query flags used to retrieve the row, IDBAccessObject::READ_*
268 public function addGoodLinkObjFromRow(
271 int $queryFlags = IDBAccessObject
::READ_NORMAL
273 $key = $this->getCacheKey( $page );
274 if ( $key === null ) {
278 foreach ( self
::getSelectFields() as $field ) {
279 if ( !property_exists( $row, $field ) ) {
280 throw new InvalidArgumentException( "Missing field: $field" );
284 $this->entries
->set( $key, [ self
::ROW
=> $row, self
::FLAGS
=> $queryFlags ] );
288 * Add information about a missing page to the process cache
290 * Callers must set the READ_LATEST flag if the row came from a DB_PRIMARY source.
291 * However, the use of such data is highly discouraged; most callers rely on seeing
292 * consistent DB_REPLICA data (e.g. REPEATABLE-READ point-in-time snapshots) and the
293 * accidental use of DB_PRIMARY data via LinkCache is prone to causing anomalies.
295 * @param LinkTarget|PageReference|array $page The page to set cached info for.
296 * Can be given as an object or an associative array containing the
297 * page_namespace and page_title fields.
298 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
299 * @param int $queryFlags The query flags used to retrieve the row, IDBAccessObject::READ_*
301 public function addBadLinkObj( $page, int $queryFlags = IDBAccessObject
::READ_NORMAL
) {
302 $key = $this->getCacheKey( $page );
303 if ( $key === null ) {
307 $this->entries
->set( $key, [ self
::ROW
=> null, self
::FLAGS
=> $queryFlags ] );
311 * Clear information about a page being missing from the process cache
313 * @param LinkTarget|PageReference|array|string $page The page to clear cached info for,
314 * as an object, an array containing the page_namespace and page_title fields,
315 * or a prefixed DB key. In MediaWiki 1.36 and earlier, only a string was accepted.
316 * In MediaWiki 1.36 and earlier, only a string was accepted.
318 public function clearBadLink( $page ) {
319 $key = $this->getCacheKey( $page, true );
320 if ( $key === null ) {
324 $entry = $this->entries
->get( $key );
325 if ( $entry && !$entry[self
::ROW
] ) {
326 $this->entries
->clear( $key );
331 * Clear information about a page from the process cache
333 * @param LinkTarget|PageReference|array $page The page to clear cached info for.
334 * Can be given as an object or an associative array containing the
335 * page_namespace and page_title fields.
336 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
338 public function clearLink( $page ) {
339 $key = $this->getCacheKey( $page );
340 if ( $key !== null ) {
341 $this->entries
->clear( $key );
346 * Fields that LinkCache needs to select
351 public static function getSelectFields() {
352 $pageLanguageUseDB = MediaWikiServices
::getInstance()->getMainConfig()
353 ->get( MainConfigNames
::PageLanguageUseDB
);
355 $fields = array_merge(
356 PageStoreRecord
::REQUIRED_FIELDS
,
359 'page_content_model',
363 if ( $pageLanguageUseDB ) {
364 $fields[] = 'page_lang';
371 * Add a title to the link cache, return the page_id or zero if non-existent.
372 * This causes the link to be looked up in the database if it is not yet cached.
374 * @deprecated since 1.37, use PageStore::getPageForLink() instead.
376 * @param LinkTarget|PageReference|array $page The page to load.
377 * Can be given as an object or an associative array containing the
378 * page_namespace and page_title fields.
379 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
380 * @param int $queryFlags IDBAccessObject::READ_XXX
382 * @return int Page ID or zero
384 public function addLinkObj( $page, int $queryFlags = IDBAccessObject
::READ_NORMAL
) {
385 $row = $this->getGoodLinkRow(
386 $page->getNamespace(),
388 [ $this, 'fetchPageRow' ],
392 return $row ?
(int)$row->page_id
: 0;
396 * @param TitleValue $link
397 * @param callable|null $fetchCallback
398 * @param int $queryFlags
399 * @return array [ $shouldAddGoodLink, $row ], $shouldAddGoodLink is a bool indicating
400 * whether addGoodLinkObjFromRow should be called, and $row is the row the caller was looking
401 * for (or null, when it was not found).
403 private function getGoodLinkRowInternal(
405 ?callable
$fetchCallback = null,
406 int $queryFlags = IDBAccessObject
::READ_NORMAL
408 $callerShouldAddGoodLink = false;
410 $key = $this->getCacheKey( $link );
411 if ( $key === null ) {
412 return [ $callerShouldAddGoodLink, null ];
415 $ns = $link->getNamespace();
416 $dbkey = $link->getDBkey();
418 $entry = $this->entries
->get( $key );
419 if ( $entry && $entry[self
::FLAGS
] >= $queryFlags ) {
420 return [ $callerShouldAddGoodLink, $entry[self
::ROW
] ?
: null ];
423 if ( !$fetchCallback ) {
424 return [ $callerShouldAddGoodLink, null ];
427 $callerShouldAddGoodLink = true;
429 $wanCacheKey = $this->getPersistentCacheKey( $link );
430 if ( $wanCacheKey !== null && !( $queryFlags & IDBAccessObject
::READ_LATEST
) ) {
431 // Some pages are often transcluded heavily, so use persistent caching
432 $row = $this->wanCache
->getWithSetCallback(
434 WANObjectCache
::TTL_DAY
,
435 function ( $curValue, &$ttl, array &$setOpts ) use ( $fetchCallback, $ns, $dbkey ) {
436 $dbr = $this->loadBalancer
->getConnection( ILoadBalancer
::DB_REPLICA
);
437 $setOpts +
= Database
::getCacheSetOptions( $dbr );
439 $row = $fetchCallback( $dbr, $ns, $dbkey, [] );
440 $mtime = $row ?
(int)wfTimestamp( TS_UNIX
, $row->page_touched
) : false;
441 $ttl = $this->wanCache
->adaptiveTTL( $mtime, $ttl );
447 // No persistent caching needed, but we can still use the callback.
448 if ( ( $queryFlags & IDBAccessObject
::READ_LATEST
) == IDBAccessObject
::READ_LATEST
) {
449 $dbr = $this->loadBalancer
->getConnection( DB_PRIMARY
);
451 $dbr = $this->loadBalancer
->getConnection( DB_REPLICA
);
454 if ( ( $queryFlags & IDBAccessObject
::READ_EXCLUSIVE
) == IDBAccessObject
::READ_EXCLUSIVE
) {
455 $options[] = 'FOR UPDATE';
456 } elseif ( ( $queryFlags & IDBAccessObject
::READ_LOCKING
) == IDBAccessObject
::READ_LOCKING
) {
457 $options[] = 'LOCK IN SHARE MODE';
459 $row = $fetchCallback( $dbr, $ns, $dbkey, $options );
462 return [ $callerShouldAddGoodLink, $row ?
: null ];
466 * Returns the row for the page if the page exists (subject to race conditions).
467 * The row will be returned from local cache or WAN cache if possible, or it
468 * will be looked up using the callback provided.
471 * @param string $dbkey
472 * @param callable|null $fetchCallback A callback that will retrieve the link row with the
473 * signature ( IReadableDatabase $db, int $ns, string $dbkey, array $queryOptions ): ?stdObj.
474 * @param int $queryFlags IDBAccessObject::READ_XXX
476 * @return stdClass|null
477 * @internal for use by PageStore. Other code should use a PageLookup instead.
479 public function getGoodLinkRow(
482 ?callable
$fetchCallback = null,
483 int $queryFlags = IDBAccessObject
::READ_NORMAL
485 $link = TitleValue
::tryNew( $ns, $dbkey );
486 if ( $link === null ) {
490 [ $shouldAddGoodLink, $row ] = $this->getGoodLinkRowInternal(
497 if ( $shouldAddGoodLink ) {
499 $this->addGoodLinkObjFromRow( $link, $row, $queryFlags );
500 } catch ( InvalidArgumentException
$e ) {
501 // a field is missing from $row; maybe we used a cache?; invalidate it and try again
502 $this->invalidateTitle( $link );
503 [ , $row ] = $this->getGoodLinkRowInternal(
508 $this->addGoodLinkObjFromRow( $link, $row, $queryFlags );
512 $this->addBadLinkObj( $link );
519 * @param LinkTarget|PageReference|TitleValue $page
520 * @return string|null
522 private function getPersistentCacheKey( $page ) {
523 // if no key can be derived, the page isn't cacheable
524 if ( $this->getCacheKey( $page ) === null ||
!$this->usePersistentCache( $page ) ) {
528 return $this->wanCache
->makeKey(
530 $page->getNamespace(),
531 sha1( $page->getDBkey()
536 * @param LinkTarget|PageReference|int $pageOrNamespace
539 private function usePersistentCache( $pageOrNamespace ) {
540 $ns = is_int( $pageOrNamespace ) ?
$pageOrNamespace : $pageOrNamespace->getNamespace();
541 if ( in_array( $ns, [ NS_TEMPLATE
, NS_FILE
, NS_CATEGORY
, NS_MEDIAWIKI
] ) ) {
544 // Focus on transcluded pages more than the main content
545 if ( $this->nsInfo
->isContent( $ns ) ) {
548 // Non-talk extension namespaces (e.g. NS_MODULE)
549 return ( $ns >= 100 && $this->nsInfo
->isSubject( $ns ) );
553 * @param IReadableDatabase $db
555 * @param string $dbkey
556 * @param array $options Query options, see IDatabase::select() for details.
557 * @return stdClass|false
559 private function fetchPageRow( IReadableDatabase
$db, int $ns, string $dbkey, $options = [] ) {
560 $queryBuilder = $db->newSelectQueryBuilder()
561 ->select( self
::getSelectFields() )
563 ->where( [ 'page_namespace' => $ns, 'page_title' => $dbkey ] )
564 ->options( $options );
566 return $queryBuilder->caller( __METHOD__
)->fetchRow();
570 * Purge the persistent link cache for a title
572 * @param LinkTarget|PageReference $page
573 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
576 public function invalidateTitle( $page ) {
577 $wanCacheKey = $this->getPersistentCacheKey( $page );
578 if ( $wanCacheKey !== null ) {
579 $this->wanCache
->delete( $wanCacheKey );
582 $this->clearLink( $page );
588 public function clear() {
589 $this->entries
->clear();
593 /** @deprecated class alias since 1.42 */
594 class_alias( LinkCache
::class, 'LinkCache' );