Merge ".mailmap: Correct two contributor names"
[mediawiki.git] / includes / page / PageStore.php
blob452664e79a2e6a7f8efe5e064117f504fd416141
1 <?php
3 namespace MediaWiki\Page;
5 use EmptyIterator;
6 use InvalidArgumentException;
7 use Iterator;
8 use MediaWiki\Cache\LinkCache;
9 use MediaWiki\Config\ServiceOptions;
10 use MediaWiki\DAO\WikiAwareEntity;
11 use MediaWiki\MainConfigNames;
12 use MediaWiki\Title\MalformedTitleException;
13 use MediaWiki\Title\NamespaceInfo;
14 use MediaWiki\Title\TitleParser;
15 use stdClass;
16 use Wikimedia\Assert\Assert;
17 use Wikimedia\Parsoid\Core\LinkTarget as ParsoidLinkTarget;
18 use Wikimedia\Rdbms\IDBAccessObject;
19 use Wikimedia\Rdbms\ILoadBalancer;
20 use Wikimedia\Rdbms\IReadableDatabase;
21 use Wikimedia\Stats\StatsFactory;
23 /**
24 * @since 1.36
25 * @unstable
27 class PageStore implements PageLookup {
29 private ServiceOptions $options;
30 private ILoadBalancer $dbLoadBalancer;
31 private NamespaceInfo $namespaceInfo;
32 private TitleParser $titleParser;
33 private ?LinkCache $linkCache;
34 private StatsFactory $stats;
35 /** @var string|false */
36 private $wikiId;
38 /**
39 * @internal for use by service wiring
41 public const CONSTRUCTOR_OPTIONS = [
42 MainConfigNames::PageLanguageUseDB,
45 /**
46 * @param ServiceOptions $options
47 * @param ILoadBalancer $dbLoadBalancer
48 * @param NamespaceInfo $namespaceInfo
49 * @param TitleParser $titleParser
50 * @param ?LinkCache $linkCache
51 * @param StatsFactory $stats
52 * @param false|string $wikiId
54 public function __construct(
55 ServiceOptions $options,
56 ILoadBalancer $dbLoadBalancer,
57 NamespaceInfo $namespaceInfo,
58 TitleParser $titleParser,
59 ?LinkCache $linkCache,
60 StatsFactory $stats,
61 $wikiId = WikiAwareEntity::LOCAL
62 ) {
63 $options->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
65 $this->options = $options;
66 $this->dbLoadBalancer = $dbLoadBalancer;
67 $this->namespaceInfo = $namespaceInfo;
68 $this->titleParser = $titleParser;
69 $this->wikiId = $wikiId;
70 $this->linkCache = $linkCache;
71 $this->stats = $stats;
73 if ( $wikiId !== WikiAwareEntity::LOCAL && $linkCache ) {
74 // LinkCache currently doesn't support cross-wiki PageReferences.
75 // Once it does, this check can go away. At that point, LinkCache should
76 // probably also no longer be optional.
77 throw new InvalidArgumentException( "Can't use LinkCache with pages from $wikiId" );
81 /**
82 * Increment a cache hit or miss counter for LinkCache.
83 * Possible reason labels are:
84 * - `good`: The page was found in LinkCache and was complete.
85 * - `bad_early`: The page was known by LinkCache to not exist.
86 * - `bad_late`: The page was not found in LinkCache and did not exist.
87 * - `incomplete_loaded`: The page was found in LinkCache but was incomplete.
88 * - `incomplete_missing`: Incomplete page data was found in LinkCache, and the page did not exist.
90 * @param string $hitOrMiss 'hit' or 'miss'
91 * @param string $reason Well-known reason string
92 * @return void
94 private function incrementLinkCacheHitOrMiss( $hitOrMiss, $reason ) {
95 $legacyReason = strtr( $reason, '_', '.' );
96 $this->stats->getCounter( 'pagestore_linkcache_accesses_total' )
97 ->setLabel( 'reason', $reason )
98 ->setLabel( 'status', $hitOrMiss )
99 ->copyToStatsdAt( "LinkCache.$hitOrMiss.$legacyReason" )
100 ->increment();
104 * @param ParsoidLinkTarget $link
105 * @param int $queryFlags
107 * @return ProperPageIdentity
109 public function getPageForLink(
110 ParsoidLinkTarget $link,
111 int $queryFlags = IDBAccessObject::READ_NORMAL
112 ): ProperPageIdentity {
113 Assert::parameter( !$link->isExternal(), '$link', 'must not be external' );
114 Assert::parameter( $link->getDBkey() !== '', '$link', 'must not be relative' );
116 $ns = $link->getNamespace();
118 // Map Media links to File namespace
119 if ( $ns === NS_MEDIA ) {
120 $ns = NS_FILE;
123 Assert::parameter( $ns >= 0, '$link', 'namespace must not be virtual' );
125 $page = $this->getPageByName( $ns, $link->getDBkey(), $queryFlags );
127 if ( !$page ) {
128 $page = new PageIdentityValue( 0, $ns, $link->getDBkey(), $this->wikiId );
131 return $page;
135 * @param int $namespace
136 * @param string $dbKey
137 * @param int $queryFlags
139 * @return ExistingPageRecord|null
141 public function getPageByName(
142 int $namespace,
143 string $dbKey,
144 int $queryFlags = IDBAccessObject::READ_NORMAL
145 ): ?ExistingPageRecord {
146 Assert::parameter( $dbKey !== '', '$dbKey', 'must not be empty' );
147 Assert::parameter( !strpos( $dbKey, ' ' ), '$dbKey', 'must not contain spaces' );
148 Assert::parameter( $namespace >= 0, '$namespace', 'must not be virtual' );
150 $conds = [
151 'page_namespace' => $namespace,
152 'page_title' => $dbKey,
155 if ( $this->linkCache ) {
156 return $this->getPageByNameViaLinkCache( $namespace, $dbKey, $queryFlags );
157 } else {
158 return $this->loadPageFromConditions( $conds, $queryFlags );
163 * @param int $namespace
164 * @param string $dbKey
165 * @param int $queryFlags
167 * @return ExistingPageRecord|null
169 private function getPageByNameViaLinkCache(
170 int $namespace,
171 string $dbKey,
172 int $queryFlags = IDBAccessObject::READ_NORMAL
173 ): ?ExistingPageRecord {
174 $conds = [
175 'page_namespace' => $namespace,
176 'page_title' => $dbKey,
179 if ( $queryFlags === IDBAccessObject::READ_NORMAL && $this->linkCache->isBadLink( $conds ) ) {
180 $this->incrementLinkCacheHitOrMiss( 'hit', 'bad_early' );
181 return null;
184 $caller = __METHOD__;
185 $hitOrMiss = 'hit';
187 // Try to get the row from LinkCache, providing a callback to fetch it if it's not cached.
188 // When getGoodLinkRow() returns, LinkCache should have an entry for the row, good or bad.
189 $row = $this->linkCache->getGoodLinkRow(
190 $namespace,
191 $dbKey,
192 function ( IReadableDatabase $dbr, $ns, $dbkey, array $options )
193 use ( $conds, $caller, &$hitOrMiss )
195 $hitOrMiss = 'miss';
196 $row = $this->newSelectQueryBuilder( $dbr )
197 ->fields( $this->getSelectFields() )
198 ->conds( $conds )
199 ->options( $options )
200 ->caller( $caller )
201 ->fetchRow();
203 return $row;
205 $queryFlags
208 if ( $row ) {
209 try {
210 // NOTE: LinkCache may not include namespace and title in the cached row,
211 // since it's already used as the cache key!
212 $row->page_namespace = $namespace;
213 $row->page_title = $dbKey;
214 $page = $this->newPageRecordFromRow( $row );
216 // We were able to use the row we got from link cache.
217 $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'good' );
218 } catch ( InvalidArgumentException $e ) {
219 // The cached row was incomplete or corrupt,
220 // just keep going and load from the database.
221 $page = $this->loadPageFromConditions( $conds, $queryFlags );
223 if ( $page ) {
224 // PageSelectQueryBuilder should have added the full row to the LinkCache now.
225 $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'incomplete_loaded' );
226 } else {
227 // If we get here, an incomplete row was cached, but we failed to
228 // load the full row from the database. This should only happen
229 // if the page was deleted under out feet, which should be very rare.
230 // Update the LinkCache to reflect the new situation.
231 $this->linkCache->addBadLinkObj( $conds );
232 $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'incomplete_missing' );
235 } else {
236 $this->incrementLinkCacheHitOrMiss( $hitOrMiss, 'bad_late' );
237 $page = null;
240 return $page;
244 * @since 1.37
246 * @param string $text
247 * @param int $defaultNamespace Namespace to assume by default (usually NS_MAIN)
248 * @param int $queryFlags
250 * @return ProperPageIdentity|null
252 public function getPageByText(
253 string $text,
254 int $defaultNamespace = NS_MAIN,
255 int $queryFlags = IDBAccessObject::READ_NORMAL
256 ): ?ProperPageIdentity {
257 try {
258 $title = $this->titleParser->parseTitle( $text, $defaultNamespace );
259 return $this->getPageForLink( $title, $queryFlags );
260 } catch ( MalformedTitleException | InvalidArgumentException $e ) {
261 // Note that even some well-formed links are still invalid parameters
262 // for getPageForLink(), e.g. interwiki links or special pages.
263 return null;
268 * @since 1.37
270 * @param string $text
271 * @param int $defaultNamespace Namespace to assume by default (usually NS_MAIN)
272 * @param int $queryFlags
274 * @return ExistingPageRecord|null
276 public function getExistingPageByText(
277 string $text,
278 int $defaultNamespace = NS_MAIN,
279 int $queryFlags = IDBAccessObject::READ_NORMAL
280 ): ?ExistingPageRecord {
281 $pageIdentity = $this->getPageByText( $text, $defaultNamespace, $queryFlags );
282 if ( !$pageIdentity ) {
283 return null;
285 return $this->getPageByReference( $pageIdentity, $queryFlags );
289 * @param int $pageId
290 * @param int $queryFlags
292 * @return ExistingPageRecord|null
294 public function getPageById(
295 int $pageId,
296 int $queryFlags = IDBAccessObject::READ_NORMAL
297 ): ?ExistingPageRecord {
298 Assert::parameter( $pageId > 0, '$pageId', 'must be greater than zero' );
300 $conds = [
301 'page_id' => $pageId,
304 // XXX: no caching needed?
306 return $this->loadPageFromConditions( $conds, $queryFlags );
310 * @param PageReference $page
311 * @param int $queryFlags
313 * @return ExistingPageRecord|null The page's PageRecord, or null if the page was not found.
315 public function getPageByReference(
316 PageReference $page,
317 int $queryFlags = IDBAccessObject::READ_NORMAL
318 ): ?ExistingPageRecord {
319 $page->assertWiki( $this->wikiId );
320 Assert::parameter( $page->getNamespace() >= 0, '$page', 'namespace must not be virtual' );
322 if ( $page instanceof ExistingPageRecord && $queryFlags === IDBAccessObject::READ_NORMAL ) {
323 return $page;
325 if ( $page instanceof PageIdentity ) {
326 Assert::parameter( $page->canExist(), '$page', 'Must be a proper page' );
328 return $this->getPageByName( $page->getNamespace(), $page->getDBkey(), $queryFlags );
332 * @param array $conds
333 * @param int $queryFlags
335 * @return ExistingPageRecord|null
337 private function loadPageFromConditions(
338 array $conds,
339 int $queryFlags = IDBAccessObject::READ_NORMAL
340 ): ?ExistingPageRecord {
341 $queryBuilder = $this->newSelectQueryBuilder( $queryFlags )
342 ->conds( $conds )
343 ->caller( __METHOD__ );
345 // @phan-suppress-next-line PhanTypeMismatchReturnSuperType
346 return $queryBuilder->fetchPageRecord();
350 * @internal
352 * @param stdClass $row
354 * @return ExistingPageRecord
356 public function newPageRecordFromRow( stdClass $row ): ExistingPageRecord {
357 return new PageStoreRecord(
358 $row,
359 $this->wikiId
364 * @internal
366 * @return string[]
368 public function getSelectFields(): array {
369 $fields = [
370 'page_id',
371 'page_namespace',
372 'page_title',
373 'page_is_redirect',
374 'page_is_new',
375 'page_touched',
376 'page_links_updated',
377 'page_latest',
378 'page_len',
379 'page_content_model'
382 if ( $this->options->get( MainConfigNames::PageLanguageUseDB ) ) {
383 $fields[] = 'page_lang';
386 // Since we are putting rows into LinkCache, we need to include all fields
387 // that LinkCache needs.
388 $fields = array_unique(
389 array_merge( $fields, LinkCache::getSelectFields() )
392 return $fields;
396 * @param IReadableDatabase|int $dbOrFlags The database connection to use, or a READ_XXX constant
397 * indicating what kind of database connection to use.
399 * @return PageSelectQueryBuilder
401 public function newSelectQueryBuilder( $dbOrFlags = IDBAccessObject::READ_NORMAL ): PageSelectQueryBuilder {
402 if ( $dbOrFlags instanceof IReadableDatabase ) {
403 $db = $dbOrFlags;
404 $flags = IDBAccessObject::READ_NORMAL;
405 } else {
406 if ( ( $dbOrFlags & IDBAccessObject::READ_LATEST ) == IDBAccessObject::READ_LATEST ) {
407 $db = $this->dbLoadBalancer->getConnection( DB_PRIMARY, [], $this->wikiId );
408 } else {
409 $db = $this->dbLoadBalancer->getConnection( DB_REPLICA, [], $this->wikiId );
411 $flags = $dbOrFlags;
414 $queryBuilder = new PageSelectQueryBuilder( $db, $this, $this->linkCache );
415 $queryBuilder->recency( $flags );
417 return $queryBuilder;
421 * Get all subpages of this page.
422 * Will return an empty list of the namespace doesn't support subpages.
424 * @param PageIdentity $page
425 * @param int $limit Maximum number of subpages to fetch
427 * @return Iterator<ExistingPageRecord>
429 public function getSubpages( PageIdentity $page, int $limit ): Iterator {
430 if ( !$this->namespaceInfo->hasSubpages( $page->getNamespace() ) ) {
431 return new EmptyIterator();
434 return $this->newSelectQueryBuilder()
435 ->whereTitlePrefix( $page->getNamespace(), $page->getDBkey() . '/' )
436 ->orderByTitle()
437 ->limit( $limit )
438 ->caller( __METHOD__ )
439 ->fetchPageRecords();