Merge "Set namespaces for dtp"
[mediawiki.git] / includes / cache / LinkCache.php
blob83abee0b2cd513aa8b8899dbe97580ea1160b0d4
1 <?php
2 /**
3 * Page existence cache.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Cache
24 namespace MediaWiki\Cache;
26 use InvalidArgumentException;
27 use MapCacheLRU;
28 use MediaWiki\Linker\LinkTarget;
29 use MediaWiki\MainConfigNames;
30 use MediaWiki\MediaWikiServices;
31 use MediaWiki\Page\PageIdentity;
32 use MediaWiki\Page\PageReference;
33 use MediaWiki\Page\PageStoreRecord;
34 use MediaWiki\Title\NamespaceInfo;
35 use MediaWiki\Title\TitleFormatter;
36 use MediaWiki\Title\TitleValue;
37 use Psr\Log\LoggerAwareInterface;
38 use Psr\Log\LoggerInterface;
39 use Psr\Log\NullLogger;
40 use stdClass;
41 use Wikimedia\ObjectCache\WANObjectCache;
42 use Wikimedia\Rdbms\Database;
43 use Wikimedia\Rdbms\IDBAccessObject;
44 use Wikimedia\Rdbms\ILoadBalancer;
45 use Wikimedia\Rdbms\IReadableDatabase;
47 /**
48 * Cache for article titles (prefixed DB keys) and ids linked from one source
50 * @ingroup Cache
52 class LinkCache implements LoggerAwareInterface {
53 /** @var MapCacheLRU */
54 private $entries;
55 /** @var WANObjectCache */
56 private $wanCache;
57 /** @var TitleFormatter */
58 private $titleFormatter;
59 /** @var NamespaceInfo */
60 private $nsInfo;
61 /** @var ILoadBalancer|null */
62 private $loadBalancer;
63 /** @var LoggerInterface */
64 private $logger;
66 /** How many Titles to store */
67 private const MAX_SIZE = 10000;
69 /** Key to page row object or null */
70 private const ROW = 0;
71 /** Key to query READ_* flags */
72 private const FLAGS = 1;
74 /**
75 * @param TitleFormatter $titleFormatter
76 * @param WANObjectCache $cache
77 * @param NamespaceInfo $nsInfo
78 * @param ILoadBalancer|null $loadBalancer Use null when no database is set up, for example on installation
80 public function __construct(
81 TitleFormatter $titleFormatter,
82 WANObjectCache $cache,
83 NamespaceInfo $nsInfo,
84 ?ILoadBalancer $loadBalancer = null
85 ) {
86 $this->entries = new MapCacheLRU( self::MAX_SIZE );
87 $this->wanCache = $cache;
88 $this->titleFormatter = $titleFormatter;
89 $this->nsInfo = $nsInfo;
90 $this->loadBalancer = $loadBalancer;
91 $this->logger = new NullLogger();
94 /**
95 * @param LoggerInterface $logger
97 public function setLogger( LoggerInterface $logger ) {
98 $this->logger = $logger;
102 * @param LinkTarget|PageReference|array|string $page
103 * @param bool $passThrough Return $page if $page is a string
104 * @return ?string the cache key
106 private function getCacheKey( $page, $passThrough = false ) {
107 if ( is_string( $page ) ) {
108 if ( $passThrough ) {
109 return $page;
110 } else {
111 throw new InvalidArgumentException( 'They key may not be given as a string here' );
115 if ( is_array( $page ) ) {
116 $namespace = $page['page_namespace'];
117 $dbkey = $page['page_title'];
118 return strtr( $this->titleFormatter->formatTitle( $namespace, $dbkey ), ' ', '_' );
121 if ( $page instanceof PageReference && $page->getWikiId() !== PageReference::LOCAL ) {
122 // No cross-wiki support yet. Perhaps LinkCache can become wiki-aware in the future.
123 $this->logger->info(
124 'cross-wiki page reference',
126 'page-wiki' => $page->getWikiId(),
127 'page-reference' => $this->titleFormatter->getFullText( $page )
130 return null;
133 if ( $page instanceof PageIdentity && !$page->canExist() ) {
134 // Non-proper page, perhaps a special page or interwiki link or relative section link.
135 $this->logger->warning(
136 'non-proper page reference: {page-reference}',
137 [ 'page-reference' => $this->titleFormatter->getFullText( $page ) ]
139 return null;
142 if ( $page instanceof LinkTarget
143 && ( $page->isExternal() || $page->getText() === '' || $page->getNamespace() < 0 )
145 // Interwiki link or relative section link. These do not have a page ID, so they
146 // can neither be "good" nor "bad" in the sense of this class.
147 $this->logger->warning(
148 'link to non-proper page: {page-link}',
149 [ 'page-link' => $this->titleFormatter->getFullText( $page ) ]
151 return null;
154 return $this->titleFormatter->getPrefixedDBkey( $page );
158 * Get the ID of a page known to the process cache
160 * @param LinkTarget|PageReference|array|string $page The page to get the ID for,
161 * as an object, an array containing the page_namespace and page_title fields,
162 * or a prefixed DB key. In MediaWiki 1.36 and earlier, only a string was accepted.
163 * @return int Page ID, or zero if the page was not cached or does not exist or is not a
164 * proper page (e.g. a special page or an interwiki link).
166 public function getGoodLinkID( $page ) {
167 $key = $this->getCacheKey( $page, true );
168 if ( $key === null ) {
169 return 0;
172 $entry = $this->entries->get( $key );
173 if ( !$entry ) {
174 return 0;
177 $row = $entry[self::ROW];
179 return $row ? (int)$row->page_id : 0;
183 * Get the field of a page known to the process cache
185 * If this link is not a cached good title, it will return NULL.
186 * @param LinkTarget|PageReference|array $page The page to get cached info for.
187 * Can be given as an object or an associative array containing the
188 * page_namespace and page_title fields.
189 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
190 * @param string $field ( 'id', 'length', 'redirect', 'revision', 'model', 'lang' )
191 * @return string|int|null The field value, or null if the page was not cached or does not exist
192 * or is not a proper page (e.g. a special page or interwiki link).
194 public function getGoodLinkFieldObj( $page, string $field ) {
195 $key = $this->getCacheKey( $page );
196 if ( $key === null ) {
197 return null;
200 $entry = $this->entries->get( $key );
201 if ( !$entry ) {
202 return null;
205 $row = $entry[self::ROW];
206 if ( !$row ) {
207 return null;
210 switch ( $field ) {
211 case 'id':
212 return (int)$row->page_id;
213 case 'length':
214 return (int)$row->page_len;
215 case 'redirect':
216 return (int)$row->page_is_redirect;
217 case 'revision':
218 return (int)$row->page_latest;
219 case 'model':
220 return !empty( $row->page_content_model )
221 ? (string)$row->page_content_model
222 : null;
223 case 'lang':
224 return !empty( $row->page_lang )
225 ? (string)$row->page_lang
226 : null;
227 default:
228 throw new InvalidArgumentException( "Unknown field: $field" );
233 * Check if a page is known to be missing based on the process cache
235 * @param LinkTarget|PageReference|array|string $page The page to get cached info for,
236 * as an object, an array containing the page_namespace and page_title fields,
237 * or a prefixed DB key. In MediaWiki 1.36 and earlier, only a string was accepted.
238 * In MediaWiki 1.36 and earlier, only a string was accepted.
239 * @return bool Whether the page is known to be missing based on the process cache
241 public function isBadLink( $page ) {
242 $key = $this->getCacheKey( $page, true );
243 if ( $key === null ) {
244 return false;
247 $entry = $this->entries->get( $key );
249 return ( $entry && !$entry[self::ROW] );
253 * Add information about an existing page to the process cache
255 * Callers must set the READ_LATEST flag if the row came from a DB_PRIMARY source.
256 * However, the use of such data is highly discouraged; most callers rely on seeing
257 * consistent DB_REPLICA data (e.g. REPEATABLE-READ point-in-time snapshots) and the
258 * accidental use of DB_PRIMARY data via LinkCache is prone to causing anomalies.
260 * @param LinkTarget|PageReference|array $page The page to set cached info for.
261 * Can be given as an object or an associative array containing the
262 * page_namespace and page_title fields.
263 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
264 * @param stdClass $row Object which has all fields returned by getSelectFields().
265 * @param int $queryFlags The query flags used to retrieve the row, IDBAccessObject::READ_*
266 * @since 1.19
268 public function addGoodLinkObjFromRow(
269 $page,
270 stdClass $row,
271 int $queryFlags = IDBAccessObject::READ_NORMAL
273 $key = $this->getCacheKey( $page );
274 if ( $key === null ) {
275 return;
278 foreach ( self::getSelectFields() as $field ) {
279 if ( !property_exists( $row, $field ) ) {
280 throw new InvalidArgumentException( "Missing field: $field" );
284 $this->entries->set( $key, [ self::ROW => $row, self::FLAGS => $queryFlags ] );
288 * Add information about a missing page to the process cache
290 * Callers must set the READ_LATEST flag if the row came from a DB_PRIMARY source.
291 * However, the use of such data is highly discouraged; most callers rely on seeing
292 * consistent DB_REPLICA data (e.g. REPEATABLE-READ point-in-time snapshots) and the
293 * accidental use of DB_PRIMARY data via LinkCache is prone to causing anomalies.
295 * @param LinkTarget|PageReference|array $page The page to set cached info for.
296 * Can be given as an object or an associative array containing the
297 * page_namespace and page_title fields.
298 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
299 * @param int $queryFlags The query flags used to retrieve the row, IDBAccessObject::READ_*
301 public function addBadLinkObj( $page, int $queryFlags = IDBAccessObject::READ_NORMAL ) {
302 $key = $this->getCacheKey( $page );
303 if ( $key === null ) {
304 return;
307 $this->entries->set( $key, [ self::ROW => null, self::FLAGS => $queryFlags ] );
311 * Clear information about a page being missing from the process cache
313 * @param LinkTarget|PageReference|array|string $page The page to clear cached info for,
314 * as an object, an array containing the page_namespace and page_title fields,
315 * or a prefixed DB key. In MediaWiki 1.36 and earlier, only a string was accepted.
316 * In MediaWiki 1.36 and earlier, only a string was accepted.
318 public function clearBadLink( $page ) {
319 $key = $this->getCacheKey( $page, true );
320 if ( $key === null ) {
321 return;
324 $entry = $this->entries->get( $key );
325 if ( $entry && !$entry[self::ROW] ) {
326 $this->entries->clear( $key );
331 * Clear information about a page from the process cache
333 * @param LinkTarget|PageReference|array $page The page to clear cached info for.
334 * Can be given as an object or an associative array containing the
335 * page_namespace and page_title fields.
336 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
338 public function clearLink( $page ) {
339 $key = $this->getCacheKey( $page );
340 if ( $key !== null ) {
341 $this->entries->clear( $key );
346 * Fields that LinkCache needs to select
348 * @since 1.28
349 * @return array
351 public static function getSelectFields() {
352 $pageLanguageUseDB = MediaWikiServices::getInstance()->getMainConfig()
353 ->get( MainConfigNames::PageLanguageUseDB );
355 $fields = array_merge(
356 PageStoreRecord::REQUIRED_FIELDS,
358 'page_len',
359 'page_content_model',
363 if ( $pageLanguageUseDB ) {
364 $fields[] = 'page_lang';
367 return $fields;
371 * Add a title to the link cache, return the page_id or zero if non-existent.
372 * This causes the link to be looked up in the database if it is not yet cached.
374 * @deprecated since 1.37, use PageStore::getPageForLink() instead.
376 * @param LinkTarget|PageReference|array $page The page to load.
377 * Can be given as an object or an associative array containing the
378 * page_namespace and page_title fields.
379 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
380 * @param int $queryFlags IDBAccessObject::READ_XXX
382 * @return int Page ID or zero
384 public function addLinkObj( $page, int $queryFlags = IDBAccessObject::READ_NORMAL ) {
385 $row = $this->getGoodLinkRow(
386 $page->getNamespace(),
387 $page->getDBkey(),
388 [ $this, 'fetchPageRow' ],
389 $queryFlags
392 return $row ? (int)$row->page_id : 0;
396 * @param TitleValue $link
397 * @param callable|null $fetchCallback
398 * @param int $queryFlags
399 * @return array [ $shouldAddGoodLink, $row ], $shouldAddGoodLink is a bool indicating
400 * whether addGoodLinkObjFromRow should be called, and $row is the row the caller was looking
401 * for (or null, when it was not found).
403 private function getGoodLinkRowInternal(
404 TitleValue $link,
405 ?callable $fetchCallback = null,
406 int $queryFlags = IDBAccessObject::READ_NORMAL
407 ): array {
408 $callerShouldAddGoodLink = false;
410 $key = $this->getCacheKey( $link );
411 if ( $key === null ) {
412 return [ $callerShouldAddGoodLink, null ];
415 $ns = $link->getNamespace();
416 $dbkey = $link->getDBkey();
418 $entry = $this->entries->get( $key );
419 if ( $entry && $entry[self::FLAGS] >= $queryFlags ) {
420 return [ $callerShouldAddGoodLink, $entry[self::ROW] ?: null ];
423 if ( !$fetchCallback ) {
424 return [ $callerShouldAddGoodLink, null ];
427 $callerShouldAddGoodLink = true;
429 $wanCacheKey = $this->getPersistentCacheKey( $link );
430 if ( $wanCacheKey !== null && !( $queryFlags & IDBAccessObject::READ_LATEST ) ) {
431 // Some pages are often transcluded heavily, so use persistent caching
432 $row = $this->wanCache->getWithSetCallback(
433 $wanCacheKey,
434 WANObjectCache::TTL_DAY,
435 function ( $curValue, &$ttl, array &$setOpts ) use ( $fetchCallback, $ns, $dbkey ) {
436 $dbr = $this->loadBalancer->getConnection( ILoadBalancer::DB_REPLICA );
437 $setOpts += Database::getCacheSetOptions( $dbr );
439 $row = $fetchCallback( $dbr, $ns, $dbkey, [] );
440 $mtime = $row ? (int)wfTimestamp( TS_UNIX, $row->page_touched ) : false;
441 $ttl = $this->wanCache->adaptiveTTL( $mtime, $ttl );
443 return $row;
446 } else {
447 // No persistent caching needed, but we can still use the callback.
448 if ( ( $queryFlags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) {
449 $dbr = $this->loadBalancer->getConnection( DB_PRIMARY );
450 } else {
451 $dbr = $this->loadBalancer->getConnection( DB_REPLICA );
453 $options = [];
454 if ( ( $queryFlags & IDBAccessObject::READ_EXCLUSIVE ) == IDBAccessObject::READ_EXCLUSIVE ) {
455 $options[] = 'FOR UPDATE';
456 } elseif ( ( $queryFlags & IDBAccessObject::READ_LOCKING ) == IDBAccessObject::READ_LOCKING ) {
457 $options[] = 'LOCK IN SHARE MODE';
459 $row = $fetchCallback( $dbr, $ns, $dbkey, $options );
462 return [ $callerShouldAddGoodLink, $row ?: null ];
466 * Returns the row for the page if the page exists (subject to race conditions).
467 * The row will be returned from local cache or WAN cache if possible, or it
468 * will be looked up using the callback provided.
470 * @param int $ns
471 * @param string $dbkey
472 * @param callable|null $fetchCallback A callback that will retrieve the link row with the
473 * signature ( IReadableDatabase $db, int $ns, string $dbkey, array $queryOptions ): ?stdObj.
474 * @param int $queryFlags IDBAccessObject::READ_XXX
476 * @return stdClass|null
477 * @internal for use by PageStore. Other code should use a PageLookup instead.
479 public function getGoodLinkRow(
480 int $ns,
481 string $dbkey,
482 ?callable $fetchCallback = null,
483 int $queryFlags = IDBAccessObject::READ_NORMAL
484 ): ?stdClass {
485 $link = TitleValue::tryNew( $ns, $dbkey );
486 if ( $link === null ) {
487 return null;
490 [ $shouldAddGoodLink, $row ] = $this->getGoodLinkRowInternal(
491 $link,
492 $fetchCallback,
493 $queryFlags
496 if ( $row ) {
497 if ( $shouldAddGoodLink ) {
498 try {
499 $this->addGoodLinkObjFromRow( $link, $row, $queryFlags );
500 } catch ( InvalidArgumentException $e ) {
501 // a field is missing from $row; maybe we used a cache?; invalidate it and try again
502 $this->invalidateTitle( $link );
503 [ , $row ] = $this->getGoodLinkRowInternal(
504 $link,
505 $fetchCallback,
506 $queryFlags
508 $this->addGoodLinkObjFromRow( $link, $row, $queryFlags );
511 } else {
512 $this->addBadLinkObj( $link );
515 return $row ?: null;
519 * @param LinkTarget|PageReference|TitleValue $page
520 * @return string|null
522 private function getPersistentCacheKey( $page ) {
523 // if no key can be derived, the page isn't cacheable
524 if ( $this->getCacheKey( $page ) === null || !$this->usePersistentCache( $page ) ) {
525 return null;
528 return $this->wanCache->makeKey(
529 'page',
530 $page->getNamespace(),
531 sha1( $page->getDBkey()
532 ) );
536 * @param LinkTarget|PageReference|int $pageOrNamespace
537 * @return bool
539 private function usePersistentCache( $pageOrNamespace ) {
540 $ns = is_int( $pageOrNamespace ) ? $pageOrNamespace : $pageOrNamespace->getNamespace();
541 if ( in_array( $ns, [ NS_TEMPLATE, NS_FILE, NS_CATEGORY, NS_MEDIAWIKI ] ) ) {
542 return true;
544 // Focus on transcluded pages more than the main content
545 if ( $this->nsInfo->isContent( $ns ) ) {
546 return false;
548 // Non-talk extension namespaces (e.g. NS_MODULE)
549 return ( $ns >= 100 && $this->nsInfo->isSubject( $ns ) );
553 * @param IReadableDatabase $db
554 * @param int $ns
555 * @param string $dbkey
556 * @param array $options Query options, see IDatabase::select() for details.
557 * @return stdClass|false
559 private function fetchPageRow( IReadableDatabase $db, int $ns, string $dbkey, $options = [] ) {
560 $queryBuilder = $db->newSelectQueryBuilder()
561 ->select( self::getSelectFields() )
562 ->from( 'page' )
563 ->where( [ 'page_namespace' => $ns, 'page_title' => $dbkey ] )
564 ->options( $options );
566 return $queryBuilder->caller( __METHOD__ )->fetchRow();
570 * Purge the persistent link cache for a title
572 * @param LinkTarget|PageReference $page
573 * In MediaWiki 1.36 and earlier, only LinkTarget was accepted.
574 * @since 1.28
576 public function invalidateTitle( $page ) {
577 $wanCacheKey = $this->getPersistentCacheKey( $page );
578 if ( $wanCacheKey !== null ) {
579 $this->wanCache->delete( $wanCacheKey );
582 $this->clearLink( $page );
586 * Clears cache
588 public function clear() {
589 $this->entries->clear();
593 /** @deprecated class alias since 1.42 */
594 class_alias( LinkCache::class, 'LinkCache' );