Merge "Use adaptive CDN TTLs for page views"
[mediawiki.git] / includes / api / ApiPageSet.php
blob46c57b86b4d244ad38fb4100c1ca5976e8836755
1 <?php
2 /**
5 * Created on Sep 24, 2006
7 * Copyright © 2006, 2013 Yuri Astrakhan "<Firstname><Lastname>@gmail.com"
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License along
20 * with this program; if not, write to the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 * http://www.gnu.org/copyleft/gpl.html
24 * @file
27 /**
28 * This class contains a list of pages that the client has requested.
29 * Initially, when the client passes in titles=, pageids=, or revisions=
30 * parameter, an instance of the ApiPageSet class will normalize titles,
31 * determine if the pages/revisions exist, and prefetch any additional page
32 * data requested.
34 * When a generator is used, the result of the generator will become the input
35 * for the second instance of this class, and all subsequent actions will use
36 * the second instance for all their work.
38 * @ingroup API
39 * @since 1.21 derives from ApiBase instead of ApiQueryBase
41 class ApiPageSet extends ApiBase {
42 /**
43 * Constructor flag: The new instance of ApiPageSet will ignore the 'generator=' parameter
44 * @since 1.21
46 const DISABLE_GENERATORS = 1;
48 private $mDbSource;
49 private $mParams;
50 private $mResolveRedirects;
51 private $mConvertTitles;
52 private $mAllowGenerator;
54 private $mAllPages = []; // [ns][dbkey] => page_id or negative when missing
55 private $mTitles = [];
56 private $mGoodAndMissingPages = []; // [ns][dbkey] => page_id or negative when missing
57 private $mGoodPages = []; // [ns][dbkey] => page_id
58 private $mGoodTitles = [];
59 private $mMissingPages = []; // [ns][dbkey] => fake page_id
60 private $mMissingTitles = [];
61 /** @var array [fake_page_id] => [ 'title' => $title, 'invalidreason' => $reason ] */
62 private $mInvalidTitles = [];
63 private $mMissingPageIDs = [];
64 private $mRedirectTitles = [];
65 private $mSpecialTitles = [];
66 private $mNormalizedTitles = [];
67 private $mInterwikiTitles = [];
68 /** @var Title[] */
69 private $mPendingRedirectIDs = [];
70 private $mResolvedRedirectTitles = [];
71 private $mConvertedTitles = [];
72 private $mGoodRevIDs = [];
73 private $mLiveRevIDs = [];
74 private $mDeletedRevIDs = [];
75 private $mMissingRevIDs = [];
76 private $mGeneratorData = []; // [ns][dbkey] => data array
77 private $mFakePageId = -1;
78 private $mCacheMode = 'public';
79 private $mRequestedPageFields = [];
80 /** @var int */
81 private $mDefaultNamespace = NS_MAIN;
82 /** @var callable|null */
83 private $mRedirectMergePolicy;
85 /**
86 * Add all items from $values into the result
87 * @param array $result Output
88 * @param array $values Values to add
89 * @param string[] $flags The names of boolean flags to mark this element
90 * @param string $name If given, name of the value
92 private static function addValues( array &$result, $values, $flags = [], $name = null ) {
93 foreach ( $values as $val ) {
94 if ( $val instanceof Title ) {
95 $v = [];
96 ApiQueryBase::addTitleInfo( $v, $val );
97 } elseif ( $name !== null ) {
98 $v = [ $name => $val ];
99 } else {
100 $v = $val;
102 foreach ( $flags as $flag ) {
103 $v[$flag] = true;
105 $result[] = $v;
110 * @param ApiBase $dbSource Module implementing getDB().
111 * Allows PageSet to reuse existing db connection from the shared state like ApiQuery.
112 * @param int $flags Zero or more flags like DISABLE_GENERATORS
113 * @param int $defaultNamespace The namespace to use if none is specified by a prefix.
114 * @since 1.21 accepts $flags instead of two boolean values
116 public function __construct( ApiBase $dbSource, $flags = 0, $defaultNamespace = NS_MAIN ) {
117 parent::__construct( $dbSource->getMain(), $dbSource->getModuleName() );
118 $this->mDbSource = $dbSource;
119 $this->mAllowGenerator = ( $flags & ApiPageSet::DISABLE_GENERATORS ) == 0;
120 $this->mDefaultNamespace = $defaultNamespace;
122 $this->mParams = $this->extractRequestParams();
123 $this->mResolveRedirects = $this->mParams['redirects'];
124 $this->mConvertTitles = $this->mParams['converttitles'];
128 * In case execute() is not called, call this method to mark all relevant parameters as used
129 * This prevents unused parameters from being reported as warnings
131 public function executeDryRun() {
132 $this->executeInternal( true );
136 * Populate the PageSet from the request parameters.
138 public function execute() {
139 $this->executeInternal( false );
143 * Populate the PageSet from the request parameters.
144 * @param bool $isDryRun If true, instantiates generator, but only to mark
145 * relevant parameters as used
147 private function executeInternal( $isDryRun ) {
148 $generatorName = $this->mAllowGenerator ? $this->mParams['generator'] : null;
149 if ( isset( $generatorName ) ) {
150 $dbSource = $this->mDbSource;
151 if ( !$dbSource instanceof ApiQuery ) {
152 // If the parent container of this pageset is not ApiQuery, we must create it to run generator
153 $dbSource = $this->getMain()->getModuleManager()->getModule( 'query' );
155 $generator = $dbSource->getModuleManager()->getModule( $generatorName, null, true );
156 if ( $generator === null ) {
157 $this->dieUsage( 'Unknown generator=' . $generatorName, 'badgenerator' );
159 if ( !$generator instanceof ApiQueryGeneratorBase ) {
160 $this->dieUsage( "Module $generatorName cannot be used as a generator", 'badgenerator' );
162 // Create a temporary pageset to store generator's output,
163 // add any additional fields generator may need, and execute pageset to populate titles/pageids
164 $tmpPageSet = new ApiPageSet( $dbSource, ApiPageSet::DISABLE_GENERATORS );
165 $generator->setGeneratorMode( $tmpPageSet );
166 $this->mCacheMode = $generator->getCacheMode( $generator->extractRequestParams() );
168 if ( !$isDryRun ) {
169 $generator->requestExtraData( $tmpPageSet );
171 $tmpPageSet->executeInternal( $isDryRun );
173 // populate this pageset with the generator output
174 if ( !$isDryRun ) {
175 $generator->executeGenerator( $this );
176 Hooks::run( 'APIQueryGeneratorAfterExecute', [ &$generator, &$this ] );
177 } else {
178 // Prevent warnings from being reported on these parameters
179 $main = $this->getMain();
180 foreach ( $generator->extractRequestParams() as $paramName => $param ) {
181 $main->markParamsUsed( $generator->encodeParamName( $paramName ) );
185 if ( !$isDryRun ) {
186 $this->resolvePendingRedirects();
188 } else {
189 // Only one of the titles/pageids/revids is allowed at the same time
190 $dataSource = null;
191 if ( isset( $this->mParams['titles'] ) ) {
192 $dataSource = 'titles';
194 if ( isset( $this->mParams['pageids'] ) ) {
195 if ( isset( $dataSource ) ) {
196 $this->dieUsage( "Cannot use 'pageids' at the same time as '$dataSource'", 'multisource' );
198 $dataSource = 'pageids';
200 if ( isset( $this->mParams['revids'] ) ) {
201 if ( isset( $dataSource ) ) {
202 $this->dieUsage( "Cannot use 'revids' at the same time as '$dataSource'", 'multisource' );
204 $dataSource = 'revids';
207 if ( !$isDryRun ) {
208 // Populate page information with the original user input
209 switch ( $dataSource ) {
210 case 'titles':
211 $this->initFromTitles( $this->mParams['titles'] );
212 break;
213 case 'pageids':
214 $this->initFromPageIds( $this->mParams['pageids'] );
215 break;
216 case 'revids':
217 if ( $this->mResolveRedirects ) {
218 $this->setWarning( 'Redirect resolution cannot be used ' .
219 'together with the revids= parameter. Any redirects ' .
220 'the revids= point to have not been resolved.' );
222 $this->mResolveRedirects = false;
223 $this->initFromRevIDs( $this->mParams['revids'] );
224 break;
225 default:
226 // Do nothing - some queries do not need any of the data sources.
227 break;
234 * Check whether this PageSet is resolving redirects
235 * @return bool
237 public function isResolvingRedirects() {
238 return $this->mResolveRedirects;
242 * Return the parameter name that is the source of data for this PageSet
244 * If multiple source parameters are specified (e.g. titles and pageids),
245 * one will be named arbitrarily.
247 * @return string|null
249 public function getDataSource() {
250 if ( $this->mAllowGenerator && isset( $this->mParams['generator'] ) ) {
251 return 'generator';
253 if ( isset( $this->mParams['titles'] ) ) {
254 return 'titles';
256 if ( isset( $this->mParams['pageids'] ) ) {
257 return 'pageids';
259 if ( isset( $this->mParams['revids'] ) ) {
260 return 'revids';
263 return null;
267 * Request an additional field from the page table.
268 * Must be called before execute()
269 * @param string $fieldName Field name
271 public function requestField( $fieldName ) {
272 $this->mRequestedPageFields[$fieldName] = null;
276 * Get the value of a custom field previously requested through
277 * requestField()
278 * @param string $fieldName Field name
279 * @return mixed Field value
281 public function getCustomField( $fieldName ) {
282 return $this->mRequestedPageFields[$fieldName];
286 * Get the fields that have to be queried from the page table:
287 * the ones requested through requestField() and a few basic ones
288 * we always need
289 * @return array Array of field names
291 public function getPageTableFields() {
292 // Ensure we get minimum required fields
293 // DON'T change this order
294 $pageFlds = [
295 'page_namespace' => null,
296 'page_title' => null,
297 'page_id' => null,
300 if ( $this->mResolveRedirects ) {
301 $pageFlds['page_is_redirect'] = null;
304 if ( $this->getConfig()->get( 'ContentHandlerUseDB' ) ) {
305 $pageFlds['page_content_model'] = null;
308 if ( $this->getConfig()->get( 'PageLanguageUseDB' ) ) {
309 $pageFlds['page_lang'] = null;
312 foreach ( LinkCache::getSelectFields() as $field ) {
313 $pageFlds[$field] = null;
316 $pageFlds = array_merge( $pageFlds, $this->mRequestedPageFields );
318 return array_keys( $pageFlds );
322 * Returns an array [ns][dbkey] => page_id for all requested titles.
323 * page_id is a unique negative number in case title was not found.
324 * Invalid titles will also have negative page IDs and will be in namespace 0
325 * @return array
327 public function getAllTitlesByNamespace() {
328 return $this->mAllPages;
332 * All Title objects provided.
333 * @return Title[]
335 public function getTitles() {
336 return $this->mTitles;
340 * Returns the number of unique pages (not revisions) in the set.
341 * @return int
343 public function getTitleCount() {
344 return count( $this->mTitles );
348 * Returns an array [ns][dbkey] => page_id for all good titles.
349 * @return array
351 public function getGoodTitlesByNamespace() {
352 return $this->mGoodPages;
356 * Title objects that were found in the database.
357 * @return Title[] Array page_id (int) => Title (obj)
359 public function getGoodTitles() {
360 return $this->mGoodTitles;
364 * Returns the number of found unique pages (not revisions) in the set.
365 * @return int
367 public function getGoodTitleCount() {
368 return count( $this->mGoodTitles );
372 * Returns an array [ns][dbkey] => fake_page_id for all missing titles.
373 * fake_page_id is a unique negative number.
374 * @return array
376 public function getMissingTitlesByNamespace() {
377 return $this->mMissingPages;
381 * Title objects that were NOT found in the database.
382 * The array's index will be negative for each item
383 * @return Title[]
385 public function getMissingTitles() {
386 return $this->mMissingTitles;
390 * Returns an array [ns][dbkey] => page_id for all good and missing titles.
391 * @return array
393 public function getGoodAndMissingTitlesByNamespace() {
394 return $this->mGoodAndMissingPages;
398 * Title objects for good and missing titles.
399 * @return array
401 public function getGoodAndMissingTitles() {
402 return $this->mGoodTitles + $this->mMissingTitles;
406 * Titles that were deemed invalid by Title::newFromText()
407 * The array's index will be unique and negative for each item
408 * @deprecated since 1.26, use self::getInvalidTitlesAndReasons()
409 * @return string[] Array of strings (not Title objects)
411 public function getInvalidTitles() {
412 wfDeprecated( __METHOD__, '1.26' );
413 return array_map( function ( $t ) {
414 return $t['title'];
415 }, $this->mInvalidTitles );
419 * Titles that were deemed invalid by Title::newFromText()
420 * The array's index will be unique and negative for each item
421 * @return array[] Array of arrays with 'title' and 'invalidreason' properties
423 public function getInvalidTitlesAndReasons() {
424 return $this->mInvalidTitles;
428 * Page IDs that were not found in the database
429 * @return array Array of page IDs
431 public function getMissingPageIDs() {
432 return $this->mMissingPageIDs;
436 * Get a list of redirect resolutions - maps a title to its redirect
437 * target, as an array of output-ready arrays
438 * @return Title[]
440 public function getRedirectTitles() {
441 return $this->mRedirectTitles;
445 * Get a list of redirect resolutions - maps a title to its redirect
446 * target. Includes generator data for redirect source when available.
447 * @param ApiResult $result
448 * @return array Array of prefixed_title (string) => Title object
449 * @since 1.21
451 public function getRedirectTitlesAsResult( $result = null ) {
452 $values = [];
453 foreach ( $this->getRedirectTitles() as $titleStrFrom => $titleTo ) {
454 $r = [
455 'from' => strval( $titleStrFrom ),
456 'to' => $titleTo->getPrefixedText(),
458 if ( $titleTo->hasFragment() ) {
459 $r['tofragment'] = $titleTo->getFragment();
461 if ( $titleTo->isExternal() ) {
462 $r['tointerwiki'] = $titleTo->getInterwiki();
464 if ( isset( $this->mResolvedRedirectTitles[$titleStrFrom] ) ) {
465 $titleFrom = $this->mResolvedRedirectTitles[$titleStrFrom];
466 $ns = $titleFrom->getNamespace();
467 $dbkey = $titleFrom->getDBkey();
468 if ( isset( $this->mGeneratorData[$ns][$dbkey] ) ) {
469 $r = array_merge( $this->mGeneratorData[$ns][$dbkey], $r );
473 $values[] = $r;
475 if ( !empty( $values ) && $result ) {
476 ApiResult::setIndexedTagName( $values, 'r' );
479 return $values;
483 * Get a list of title normalizations - maps a title to its normalized
484 * version.
485 * @return array Array of raw_prefixed_title (string) => prefixed_title (string)
487 public function getNormalizedTitles() {
488 return $this->mNormalizedTitles;
492 * Get a list of title normalizations - maps a title to its normalized
493 * version in the form of result array.
494 * @param ApiResult $result
495 * @return array Array of raw_prefixed_title (string) => prefixed_title (string)
496 * @since 1.21
498 public function getNormalizedTitlesAsResult( $result = null ) {
499 global $wgContLang;
501 $values = [];
502 foreach ( $this->getNormalizedTitles() as $rawTitleStr => $titleStr ) {
503 $encode = ( $wgContLang->normalize( $rawTitleStr ) !== $rawTitleStr );
504 $values[] = [
505 'fromencoded' => $encode,
506 'from' => $encode ? rawurlencode( $rawTitleStr ) : $rawTitleStr,
507 'to' => $titleStr
510 if ( !empty( $values ) && $result ) {
511 ApiResult::setIndexedTagName( $values, 'n' );
514 return $values;
518 * Get a list of title conversions - maps a title to its converted
519 * version.
520 * @return array Array of raw_prefixed_title (string) => prefixed_title (string)
522 public function getConvertedTitles() {
523 return $this->mConvertedTitles;
527 * Get a list of title conversions - maps a title to its converted
528 * version as a result array.
529 * @param ApiResult $result
530 * @return array Array of (from, to) strings
531 * @since 1.21
533 public function getConvertedTitlesAsResult( $result = null ) {
534 $values = [];
535 foreach ( $this->getConvertedTitles() as $rawTitleStr => $titleStr ) {
536 $values[] = [
537 'from' => $rawTitleStr,
538 'to' => $titleStr
541 if ( !empty( $values ) && $result ) {
542 ApiResult::setIndexedTagName( $values, 'c' );
545 return $values;
549 * Get a list of interwiki titles - maps a title to its interwiki
550 * prefix.
551 * @return array Array of raw_prefixed_title (string) => interwiki_prefix (string)
553 public function getInterwikiTitles() {
554 return $this->mInterwikiTitles;
558 * Get a list of interwiki titles - maps a title to its interwiki
559 * prefix as result.
560 * @param ApiResult $result
561 * @param bool $iwUrl
562 * @return array Array of raw_prefixed_title (string) => interwiki_prefix (string)
563 * @since 1.21
565 public function getInterwikiTitlesAsResult( $result = null, $iwUrl = false ) {
566 $values = [];
567 foreach ( $this->getInterwikiTitles() as $rawTitleStr => $interwikiStr ) {
568 $item = [
569 'title' => $rawTitleStr,
570 'iw' => $interwikiStr,
572 if ( $iwUrl ) {
573 $title = Title::newFromText( $rawTitleStr );
574 $item['url'] = $title->getFullURL( '', false, PROTO_CURRENT );
576 $values[] = $item;
578 if ( !empty( $values ) && $result ) {
579 ApiResult::setIndexedTagName( $values, 'i' );
582 return $values;
586 * Get an array of invalid/special/missing titles.
588 * @param array $invalidChecks List of types of invalid titles to include.
589 * Recognized values are:
590 * - invalidTitles: Titles and reasons from $this->getInvalidTitlesAndReasons()
591 * - special: Titles from $this->getSpecialTitles()
592 * - missingIds: ids from $this->getMissingPageIDs()
593 * - missingRevIds: ids from $this->getMissingRevisionIDs()
594 * - missingTitles: Titles from $this->getMissingTitles()
595 * - interwikiTitles: Titles from $this->getInterwikiTitlesAsResult()
596 * @return array Array suitable for inclusion in the response
597 * @since 1.23
599 public function getInvalidTitlesAndRevisions( $invalidChecks = [ 'invalidTitles',
600 'special', 'missingIds', 'missingRevIds', 'missingTitles', 'interwikiTitles' ]
602 $result = [];
603 if ( in_array( 'invalidTitles', $invalidChecks ) ) {
604 self::addValues( $result, $this->getInvalidTitlesAndReasons(), [ 'invalid' ] );
606 if ( in_array( 'special', $invalidChecks ) ) {
607 $known = [];
608 $unknown = [];
609 foreach ( $this->getSpecialTitles() as $title ) {
610 if ( $title->isKnown() ) {
611 $known[] = $title;
612 } else {
613 $unknown[] = $title;
616 self::addValues( $result, $unknown, [ 'special', 'missing' ] );
617 self::addValues( $result, $known, [ 'special' ] );
619 if ( in_array( 'missingIds', $invalidChecks ) ) {
620 self::addValues( $result, $this->getMissingPageIDs(), [ 'missing' ], 'pageid' );
622 if ( in_array( 'missingRevIds', $invalidChecks ) ) {
623 self::addValues( $result, $this->getMissingRevisionIDs(), [ 'missing' ], 'revid' );
625 if ( in_array( 'missingTitles', $invalidChecks ) ) {
626 $known = [];
627 $unknown = [];
628 foreach ( $this->getMissingTitles() as $title ) {
629 if ( $title->isKnown() ) {
630 $known[] = $title;
631 } else {
632 $unknown[] = $title;
635 self::addValues( $result, $unknown, [ 'missing' ] );
636 self::addValues( $result, $known, [ 'missing', 'known' ] );
638 if ( in_array( 'interwikiTitles', $invalidChecks ) ) {
639 self::addValues( $result, $this->getInterwikiTitlesAsResult() );
642 return $result;
646 * Get the list of valid revision IDs (requested with the revids= parameter)
647 * @return array Array of revID (int) => pageID (int)
649 public function getRevisionIDs() {
650 return $this->mGoodRevIDs;
654 * Get the list of non-deleted revision IDs (requested with the revids= parameter)
655 * @return array Array of revID (int) => pageID (int)
657 public function getLiveRevisionIDs() {
658 return $this->mLiveRevIDs;
662 * Get the list of revision IDs that were associated with deleted titles.
663 * @return array Array of revID (int) => pageID (int)
665 public function getDeletedRevisionIDs() {
666 return $this->mDeletedRevIDs;
670 * Revision IDs that were not found in the database
671 * @return array Array of revision IDs
673 public function getMissingRevisionIDs() {
674 return $this->mMissingRevIDs;
678 * Revision IDs that were not found in the database as result array.
679 * @param ApiResult $result
680 * @return array Array of revision IDs
681 * @since 1.21
683 public function getMissingRevisionIDsAsResult( $result = null ) {
684 $values = [];
685 foreach ( $this->getMissingRevisionIDs() as $revid ) {
686 $values[$revid] = [
687 'revid' => $revid
690 if ( !empty( $values ) && $result ) {
691 ApiResult::setIndexedTagName( $values, 'rev' );
694 return $values;
698 * Get the list of titles with negative namespace
699 * @return Title[]
701 public function getSpecialTitles() {
702 return $this->mSpecialTitles;
706 * Returns the number of revisions (requested with revids= parameter).
707 * @return int Number of revisions.
709 public function getRevisionCount() {
710 return count( $this->getRevisionIDs() );
714 * Populate this PageSet from a list of Titles
715 * @param array $titles Array of Title objects
717 public function populateFromTitles( $titles ) {
718 $this->initFromTitles( $titles );
722 * Populate this PageSet from a list of page IDs
723 * @param array $pageIDs Array of page IDs
725 public function populateFromPageIDs( $pageIDs ) {
726 $this->initFromPageIds( $pageIDs );
730 * Populate this PageSet from a rowset returned from the database
732 * Note that the query result must include the columns returned by
733 * $this->getPageTableFields().
735 * @param IDatabase $db
736 * @param ResultWrapper $queryResult Query result object
738 public function populateFromQueryResult( $db, $queryResult ) {
739 $this->initFromQueryResult( $queryResult );
743 * Populate this PageSet from a list of revision IDs
744 * @param array $revIDs Array of revision IDs
746 public function populateFromRevisionIDs( $revIDs ) {
747 $this->initFromRevIDs( $revIDs );
751 * Extract all requested fields from the row received from the database
752 * @param stdClass $row Result row
754 public function processDbRow( $row ) {
755 // Store Title object in various data structures
756 $title = Title::newFromRow( $row );
758 LinkCache::singleton()->addGoodLinkObjFromRow( $title, $row );
760 $pageId = intval( $row->page_id );
761 $this->mAllPages[$row->page_namespace][$row->page_title] = $pageId;
762 $this->mTitles[] = $title;
764 if ( $this->mResolveRedirects && $row->page_is_redirect == '1' ) {
765 $this->mPendingRedirectIDs[$pageId] = $title;
766 } else {
767 $this->mGoodPages[$row->page_namespace][$row->page_title] = $pageId;
768 $this->mGoodAndMissingPages[$row->page_namespace][$row->page_title] = $pageId;
769 $this->mGoodTitles[$pageId] = $title;
772 foreach ( $this->mRequestedPageFields as $fieldName => &$fieldValues ) {
773 $fieldValues[$pageId] = $row->$fieldName;
778 * This method populates internal variables with page information
779 * based on the given array of title strings.
781 * Steps:
782 * #1 For each title, get data from `page` table
783 * #2 If page was not found in the DB, store it as missing
785 * Additionally, when resolving redirects:
786 * #3 If no more redirects left, stop.
787 * #4 For each redirect, get its target from the `redirect` table.
788 * #5 Substitute the original LinkBatch object with the new list
789 * #6 Repeat from step #1
791 * @param array $titles Array of Title objects or strings
793 private function initFromTitles( $titles ) {
794 // Get validated and normalized title objects
795 $linkBatch = $this->processTitlesArray( $titles );
796 if ( $linkBatch->isEmpty() ) {
797 return;
800 $db = $this->getDB();
801 $set = $linkBatch->constructSet( 'page', $db );
803 // Get pageIDs data from the `page` table
804 $res = $db->select( 'page', $this->getPageTableFields(), $set,
805 __METHOD__ );
807 // Hack: get the ns:titles stored in [ ns => [ titles ] ] format
808 $this->initFromQueryResult( $res, $linkBatch->data, true ); // process Titles
810 // Resolve any found redirects
811 $this->resolvePendingRedirects();
815 * Does the same as initFromTitles(), but is based on page IDs instead
816 * @param array $pageids Array of page IDs
818 private function initFromPageIds( $pageids ) {
819 if ( !$pageids ) {
820 return;
823 $pageids = array_map( 'intval', $pageids ); // paranoia
824 $remaining = array_flip( $pageids );
826 $pageids = self::getPositiveIntegers( $pageids );
828 $res = null;
829 if ( !empty( $pageids ) ) {
830 $set = [
831 'page_id' => $pageids
833 $db = $this->getDB();
835 // Get pageIDs data from the `page` table
836 $res = $db->select( 'page', $this->getPageTableFields(), $set,
837 __METHOD__ );
840 $this->initFromQueryResult( $res, $remaining, false ); // process PageIDs
842 // Resolve any found redirects
843 $this->resolvePendingRedirects();
847 * Iterate through the result of the query on 'page' table,
848 * and for each row create and store title object and save any extra fields requested.
849 * @param ResultWrapper $res DB Query result
850 * @param array $remaining Array of either pageID or ns/title elements (optional).
851 * If given, any missing items will go to $mMissingPageIDs and $mMissingTitles
852 * @param bool $processTitles Must be provided together with $remaining.
853 * If true, treat $remaining as an array of [ns][title]
854 * If false, treat it as an array of [pageIDs]
856 private function initFromQueryResult( $res, &$remaining = null, $processTitles = null ) {
857 if ( !is_null( $remaining ) && is_null( $processTitles ) ) {
858 ApiBase::dieDebug( __METHOD__, 'Missing $processTitles parameter when $remaining is provided' );
861 $usernames = [];
862 if ( $res ) {
863 foreach ( $res as $row ) {
864 $pageId = intval( $row->page_id );
866 // Remove found page from the list of remaining items
867 if ( isset( $remaining ) ) {
868 if ( $processTitles ) {
869 unset( $remaining[$row->page_namespace][$row->page_title] );
870 } else {
871 unset( $remaining[$pageId] );
875 // Store any extra fields requested by modules
876 $this->processDbRow( $row );
878 // Need gender information
879 if ( MWNamespace::hasGenderDistinction( $row->page_namespace ) ) {
880 $usernames[] = $row->page_title;
885 if ( isset( $remaining ) ) {
886 // Any items left in the $remaining list are added as missing
887 if ( $processTitles ) {
888 // The remaining titles in $remaining are non-existent pages
889 $linkCache = LinkCache::singleton();
890 foreach ( $remaining as $ns => $dbkeys ) {
891 foreach ( array_keys( $dbkeys ) as $dbkey ) {
892 $title = Title::makeTitle( $ns, $dbkey );
893 $linkCache->addBadLinkObj( $title );
894 $this->mAllPages[$ns][$dbkey] = $this->mFakePageId;
895 $this->mMissingPages[$ns][$dbkey] = $this->mFakePageId;
896 $this->mGoodAndMissingPages[$ns][$dbkey] = $this->mFakePageId;
897 $this->mMissingTitles[$this->mFakePageId] = $title;
898 $this->mFakePageId--;
899 $this->mTitles[] = $title;
901 // need gender information
902 if ( MWNamespace::hasGenderDistinction( $ns ) ) {
903 $usernames[] = $dbkey;
907 } else {
908 // The remaining pageids do not exist
909 if ( !$this->mMissingPageIDs ) {
910 $this->mMissingPageIDs = array_keys( $remaining );
911 } else {
912 $this->mMissingPageIDs = array_merge( $this->mMissingPageIDs, array_keys( $remaining ) );
917 // Get gender information
918 $genderCache = GenderCache::singleton();
919 $genderCache->doQuery( $usernames, __METHOD__ );
923 * Does the same as initFromTitles(), but is based on revision IDs
924 * instead
925 * @param array $revids Array of revision IDs
927 private function initFromRevIDs( $revids ) {
928 if ( !$revids ) {
929 return;
932 $revids = array_map( 'intval', $revids ); // paranoia
933 $db = $this->getDB();
934 $pageids = [];
935 $remaining = array_flip( $revids );
937 $revids = self::getPositiveIntegers( $revids );
939 if ( !empty( $revids ) ) {
940 $tables = [ 'revision', 'page' ];
941 $fields = [ 'rev_id', 'rev_page' ];
942 $where = [ 'rev_id' => $revids, 'rev_page = page_id' ];
944 // Get pageIDs data from the `page` table
945 $res = $db->select( $tables, $fields, $where, __METHOD__ );
946 foreach ( $res as $row ) {
947 $revid = intval( $row->rev_id );
948 $pageid = intval( $row->rev_page );
949 $this->mGoodRevIDs[$revid] = $pageid;
950 $this->mLiveRevIDs[$revid] = $pageid;
951 $pageids[$pageid] = '';
952 unset( $remaining[$revid] );
956 $this->mMissingRevIDs = array_keys( $remaining );
958 // Populate all the page information
959 $this->initFromPageIds( array_keys( $pageids ) );
961 // If the user can see deleted revisions, pull out the corresponding
962 // titles from the archive table and include them too. We ignore
963 // ar_page_id because deleted revisions are tied by title, not page_id.
964 if ( !empty( $this->mMissingRevIDs ) && $this->getUser()->isAllowed( 'deletedhistory' ) ) {
965 $remaining = array_flip( $this->mMissingRevIDs );
966 $tables = [ 'archive' ];
967 $fields = [ 'ar_rev_id', 'ar_namespace', 'ar_title' ];
968 $where = [ 'ar_rev_id' => $this->mMissingRevIDs ];
970 $res = $db->select( $tables, $fields, $where, __METHOD__ );
971 $titles = [];
972 foreach ( $res as $row ) {
973 $revid = intval( $row->ar_rev_id );
974 $titles[$revid] = Title::makeTitle( $row->ar_namespace, $row->ar_title );
975 unset( $remaining[$revid] );
978 $this->initFromTitles( $titles );
980 foreach ( $titles as $revid => $title ) {
981 $ns = $title->getNamespace();
982 $dbkey = $title->getDBkey();
984 // Handle converted titles
985 if ( !isset( $this->mAllPages[$ns][$dbkey] ) &&
986 isset( $this->mConvertedTitles[$title->getPrefixedText()] )
988 $title = Title::newFromText( $this->mConvertedTitles[$title->getPrefixedText()] );
989 $ns = $title->getNamespace();
990 $dbkey = $title->getDBkey();
993 if ( isset( $this->mAllPages[$ns][$dbkey] ) ) {
994 $this->mGoodRevIDs[$revid] = $this->mAllPages[$ns][$dbkey];
995 $this->mDeletedRevIDs[$revid] = $this->mAllPages[$ns][$dbkey];
996 } else {
997 $remaining[$revid] = true;
1001 $this->mMissingRevIDs = array_keys( $remaining );
1006 * Resolve any redirects in the result if redirect resolution was
1007 * requested. This function is called repeatedly until all redirects
1008 * have been resolved.
1010 private function resolvePendingRedirects() {
1011 if ( $this->mResolveRedirects ) {
1012 $db = $this->getDB();
1013 $pageFlds = $this->getPageTableFields();
1015 // Repeat until all redirects have been resolved
1016 // The infinite loop is prevented by keeping all known pages in $this->mAllPages
1017 while ( $this->mPendingRedirectIDs ) {
1018 // Resolve redirects by querying the pagelinks table, and repeat the process
1019 // Create a new linkBatch object for the next pass
1020 $linkBatch = $this->getRedirectTargets();
1022 if ( $linkBatch->isEmpty() ) {
1023 break;
1026 $set = $linkBatch->constructSet( 'page', $db );
1027 if ( $set === false ) {
1028 break;
1031 // Get pageIDs data from the `page` table
1032 $res = $db->select( 'page', $pageFlds, $set, __METHOD__ );
1034 // Hack: get the ns:titles stored in [ns => array(titles)] format
1035 $this->initFromQueryResult( $res, $linkBatch->data, true );
1041 * Get the targets of the pending redirects from the database
1043 * Also creates entries in the redirect table for redirects that don't
1044 * have one.
1045 * @return LinkBatch
1047 private function getRedirectTargets() {
1048 $lb = new LinkBatch();
1049 $db = $this->getDB();
1051 $res = $db->select(
1052 'redirect',
1054 'rd_from',
1055 'rd_namespace',
1056 'rd_fragment',
1057 'rd_interwiki',
1058 'rd_title'
1059 ], [ 'rd_from' => array_keys( $this->mPendingRedirectIDs ) ],
1060 __METHOD__
1062 foreach ( $res as $row ) {
1063 $rdfrom = intval( $row->rd_from );
1064 $from = $this->mPendingRedirectIDs[$rdfrom]->getPrefixedText();
1065 $to = Title::makeTitle(
1066 $row->rd_namespace,
1067 $row->rd_title,
1068 $row->rd_fragment,
1069 $row->rd_interwiki
1071 $this->mResolvedRedirectTitles[$from] = $this->mPendingRedirectIDs[$rdfrom];
1072 unset( $this->mPendingRedirectIDs[$rdfrom] );
1073 if ( $to->isExternal() ) {
1074 $this->mInterwikiTitles[$to->getPrefixedText()] = $to->getInterwiki();
1075 } elseif ( !isset( $this->mAllPages[$row->rd_namespace][$row->rd_title] ) ) {
1076 $lb->add( $row->rd_namespace, $row->rd_title );
1078 $this->mRedirectTitles[$from] = $to;
1081 if ( $this->mPendingRedirectIDs ) {
1082 // We found pages that aren't in the redirect table
1083 // Add them
1084 foreach ( $this->mPendingRedirectIDs as $id => $title ) {
1085 $page = WikiPage::factory( $title );
1086 $rt = $page->insertRedirect();
1087 if ( !$rt ) {
1088 // What the hell. Let's just ignore this
1089 continue;
1091 $lb->addObj( $rt );
1092 $from = $title->getPrefixedText();
1093 $this->mResolvedRedirectTitles[$from] = $title;
1094 $this->mRedirectTitles[$from] = $rt;
1095 unset( $this->mPendingRedirectIDs[$id] );
1099 return $lb;
1103 * Get the cache mode for the data generated by this module.
1104 * All PageSet users should take into account whether this returns a more-restrictive
1105 * cache mode than the using module itself. For possible return values and other
1106 * details about cache modes, see ApiMain::setCacheMode()
1108 * Public caching will only be allowed if *all* the modules that supply
1109 * data for a given request return a cache mode of public.
1111 * @param array|null $params
1112 * @return string
1113 * @since 1.21
1115 public function getCacheMode( $params = null ) {
1116 return $this->mCacheMode;
1120 * Given an array of title strings, convert them into Title objects.
1121 * Alternatively, an array of Title objects may be given.
1122 * This method validates access rights for the title,
1123 * and appends normalization values to the output.
1125 * @param array $titles Array of Title objects or strings
1126 * @return LinkBatch
1128 private function processTitlesArray( $titles ) {
1129 $usernames = [];
1130 $linkBatch = new LinkBatch();
1132 foreach ( $titles as $title ) {
1133 if ( is_string( $title ) ) {
1134 try {
1135 $titleObj = Title::newFromTextThrow( $title, $this->mDefaultNamespace );
1136 } catch ( MalformedTitleException $ex ) {
1137 // Handle invalid titles gracefully
1138 $this->mAllPages[0][$title] = $this->mFakePageId;
1139 $this->mInvalidTitles[$this->mFakePageId] = [
1140 'title' => $title,
1141 'invalidreason' => $ex->getMessage(),
1143 $this->mFakePageId--;
1144 continue; // There's nothing else we can do
1146 } else {
1147 $titleObj = $title;
1149 $unconvertedTitle = $titleObj->getPrefixedText();
1150 $titleWasConverted = false;
1151 if ( $titleObj->isExternal() ) {
1152 // This title is an interwiki link.
1153 $this->mInterwikiTitles[$unconvertedTitle] = $titleObj->getInterwiki();
1154 } else {
1155 // Variants checking
1156 global $wgContLang;
1157 if ( $this->mConvertTitles &&
1158 count( $wgContLang->getVariants() ) > 1 &&
1159 !$titleObj->exists()
1161 // Language::findVariantLink will modify titleText and titleObj into
1162 // the canonical variant if possible
1163 $titleText = is_string( $title ) ? $title : $titleObj->getPrefixedText();
1164 $wgContLang->findVariantLink( $titleText, $titleObj );
1165 $titleWasConverted = $unconvertedTitle !== $titleObj->getPrefixedText();
1168 if ( $titleObj->getNamespace() < 0 ) {
1169 // Handle Special and Media pages
1170 $titleObj = $titleObj->fixSpecialName();
1171 $this->mSpecialTitles[$this->mFakePageId] = $titleObj;
1172 $this->mFakePageId--;
1173 } else {
1174 // Regular page
1175 $linkBatch->addObj( $titleObj );
1179 // Make sure we remember the original title that was
1180 // given to us. This way the caller can correlate new
1181 // titles with the originally requested when e.g. the
1182 // namespace is localized or the capitalization is
1183 // different
1184 if ( $titleWasConverted ) {
1185 $this->mConvertedTitles[$unconvertedTitle] = $titleObj->getPrefixedText();
1186 // In this case the page can't be Special.
1187 if ( is_string( $title ) && $title !== $unconvertedTitle ) {
1188 $this->mNormalizedTitles[$title] = $unconvertedTitle;
1190 } elseif ( is_string( $title ) && $title !== $titleObj->getPrefixedText() ) {
1191 $this->mNormalizedTitles[$title] = $titleObj->getPrefixedText();
1194 // Need gender information
1195 if ( MWNamespace::hasGenderDistinction( $titleObj->getNamespace() ) ) {
1196 $usernames[] = $titleObj->getText();
1199 // Get gender information
1200 $genderCache = GenderCache::singleton();
1201 $genderCache->doQuery( $usernames, __METHOD__ );
1203 return $linkBatch;
1207 * Set data for a title.
1209 * This data may be extracted into an ApiResult using
1210 * self::populateGeneratorData. This should generally be limited to
1211 * data that is likely to be particularly useful to end users rather than
1212 * just being a dump of everything returned in non-generator mode.
1214 * Redirects here will *not* be followed, even if 'redirects' was
1215 * specified, since in the case of multiple redirects we can't know which
1216 * source's data to use on the target.
1218 * @param Title $title
1219 * @param array $data
1221 public function setGeneratorData( Title $title, array $data ) {
1222 $ns = $title->getNamespace();
1223 $dbkey = $title->getDBkey();
1224 $this->mGeneratorData[$ns][$dbkey] = $data;
1228 * Controls how generator data about a redirect source is merged into
1229 * the generator data for the redirect target. When not set no data
1230 * is merged. Note that if multiple titles redirect to the same target
1231 * the order of operations is undefined.
1233 * Example to include generated data from redirect in target, prefering
1234 * the data generated for the destination when there is a collision:
1235 * @code
1236 * $pageSet->setRedirectMergePolicy( function( array $current, array $new ) {
1237 * return $current + $new;
1238 * } );
1239 * @endcode
1241 * @param callable|null $callable Recieves two array arguments, first the
1242 * generator data for the redirect target and second the generator data
1243 * for the redirect source. Returns the resulting generator data to use
1244 * for the redirect target.
1246 public function setRedirectMergePolicy( $callable ) {
1247 $this->mRedirectMergePolicy = $callable;
1251 * Populate the generator data for all titles in the result
1253 * The page data may be inserted into an ApiResult object or into an
1254 * associative array. The $path parameter specifies the path within the
1255 * ApiResult or array to find the "pages" node.
1257 * The "pages" node itself must be an associative array mapping the page ID
1258 * or fake page ID values returned by this pageset (see
1259 * self::getAllTitlesByNamespace() and self::getSpecialTitles()) to
1260 * associative arrays of page data. Each of those subarrays will have the
1261 * data from self::setGeneratorData() merged in.
1263 * Data that was set by self::setGeneratorData() for pages not in the
1264 * "pages" node will be ignored.
1266 * @param ApiResult|array &$result
1267 * @param array $path
1268 * @return bool Whether the data fit
1270 public function populateGeneratorData( &$result, array $path = [] ) {
1271 if ( $result instanceof ApiResult ) {
1272 $data = $result->getResultData( $path );
1273 if ( $data === null ) {
1274 return true;
1276 } else {
1277 $data = &$result;
1278 foreach ( $path as $key ) {
1279 if ( !isset( $data[$key] ) ) {
1280 // Path isn't in $result, so nothing to add, so everything
1281 // "fits"
1282 return true;
1284 $data = &$data[$key];
1287 foreach ( $this->mGeneratorData as $ns => $dbkeys ) {
1288 if ( $ns === -1 ) {
1289 $pages = [];
1290 foreach ( $this->mSpecialTitles as $id => $title ) {
1291 $pages[$title->getDBkey()] = $id;
1293 } else {
1294 if ( !isset( $this->mAllPages[$ns] ) ) {
1295 // No known titles in the whole namespace. Skip it.
1296 continue;
1298 $pages = $this->mAllPages[$ns];
1300 foreach ( $dbkeys as $dbkey => $genData ) {
1301 if ( !isset( $pages[$dbkey] ) ) {
1302 // Unknown title. Forget it.
1303 continue;
1305 $pageId = $pages[$dbkey];
1306 if ( !isset( $data[$pageId] ) ) {
1307 // $pageId didn't make it into the result. Ignore it.
1308 continue;
1311 if ( $result instanceof ApiResult ) {
1312 $path2 = array_merge( $path, [ $pageId ] );
1313 foreach ( $genData as $key => $value ) {
1314 if ( !$result->addValue( $path2, $key, $value ) ) {
1315 return false;
1318 } else {
1319 $data[$pageId] = array_merge( $data[$pageId], $genData );
1324 // Merge data generated about redirect titles into the redirect destination
1325 if ( $this->mRedirectMergePolicy ) {
1326 foreach ( $this->mResolvedRedirectTitles as $titleFrom ) {
1327 $dest = $titleFrom;
1328 while ( isset( $this->mRedirectTitles[$dest->getPrefixedText()] ) ) {
1329 $dest = $this->mRedirectTitles[$dest->getPrefixedText()];
1331 $fromNs = $titleFrom->getNamespace();
1332 $fromDBkey = $titleFrom->getDBkey();
1333 $toPageId = $dest->getArticleID();
1334 if ( isset( $data[$toPageId] ) &&
1335 isset( $this->mGeneratorData[$fromNs][$fromDBkey] )
1337 // It is necesary to set both $data and add to $result, if an ApiResult,
1338 // to ensure multiple redirects to the same destination are all merged.
1339 $data[$toPageId] = call_user_func(
1340 $this->mRedirectMergePolicy,
1341 $data[$toPageId],
1342 $this->mGeneratorData[$fromNs][$fromDBkey]
1344 if ( $result instanceof ApiResult ) {
1345 if ( !$result->addValue( $path, $toPageId, $data[$toPageId], ApiResult::OVERRIDE ) ) {
1346 return false;
1353 return true;
1357 * Get the database connection (read-only)
1358 * @return Database
1360 protected function getDB() {
1361 return $this->mDbSource->getDB();
1365 * Returns the input array of integers with all values < 0 removed
1367 * @param array $array
1368 * @return array
1370 private static function getPositiveIntegers( $array ) {
1371 // bug 25734 API: possible issue with revids validation
1372 // It seems with a load of revision rows, MySQL gets upset
1373 // Remove any < 0 integers, as they can't be valid
1374 foreach ( $array as $i => $int ) {
1375 if ( $int < 0 ) {
1376 unset( $array[$i] );
1380 return $array;
1383 public function getAllowedParams( $flags = 0 ) {
1384 $result = [
1385 'titles' => [
1386 ApiBase::PARAM_ISMULTI => true,
1387 ApiBase::PARAM_HELP_MSG => 'api-pageset-param-titles',
1389 'pageids' => [
1390 ApiBase::PARAM_TYPE => 'integer',
1391 ApiBase::PARAM_ISMULTI => true,
1392 ApiBase::PARAM_HELP_MSG => 'api-pageset-param-pageids',
1394 'revids' => [
1395 ApiBase::PARAM_TYPE => 'integer',
1396 ApiBase::PARAM_ISMULTI => true,
1397 ApiBase::PARAM_HELP_MSG => 'api-pageset-param-revids',
1399 'generator' => [
1400 ApiBase::PARAM_TYPE => null,
1401 ApiBase::PARAM_HELP_MSG => 'api-pageset-param-generator',
1402 ApiBase::PARAM_SUBMODULE_PARAM_PREFIX => 'g',
1404 'redirects' => [
1405 ApiBase::PARAM_DFLT => false,
1406 ApiBase::PARAM_HELP_MSG => $this->mAllowGenerator
1407 ? 'api-pageset-param-redirects-generator'
1408 : 'api-pageset-param-redirects-nogenerator',
1410 'converttitles' => [
1411 ApiBase::PARAM_DFLT => false,
1412 ApiBase::PARAM_HELP_MSG => [
1413 'api-pageset-param-converttitles',
1414 new DeferredStringifier(
1415 function ( IContextSource $context ) {
1416 return $context->getLanguage()
1417 ->commaList( LanguageConverter::$languagesWithVariants );
1419 $this
1425 if ( !$this->mAllowGenerator ) {
1426 unset( $result['generator'] );
1427 } elseif ( $flags & ApiBase::GET_VALUES_FOR_HELP ) {
1428 $result['generator'][ApiBase::PARAM_TYPE] = 'submodule';
1429 $result['generator'][ApiBase::PARAM_SUBMODULE_MAP] = $this->getGenerators();
1432 return $result;
1435 protected function handleParamNormalization( $paramName, $value, $rawValue ) {
1436 parent::handleParamNormalization( $paramName, $value, $rawValue );
1438 if ( $paramName === 'titles' ) {
1439 // For the 'titles' parameter, we want to split it like ApiBase would
1440 // and add any changed titles to $this->mNormalizedTitles
1441 $value = $this->explodeMultiValue( $value, self::LIMIT_SML2 + 1 );
1442 $l = count( $value );
1443 $rawValue = $this->explodeMultiValue( $rawValue, $l );
1444 for ( $i = 0; $i < $l; $i++ ) {
1445 if ( $value[$i] !== $rawValue[$i] ) {
1446 $this->mNormalizedTitles[$rawValue[$i]] = $value[$i];
1452 private static $generators = null;
1455 * Get an array of all available generators
1456 * @return array
1458 private function getGenerators() {
1459 if ( self::$generators === null ) {
1460 $query = $this->mDbSource;
1461 if ( !( $query instanceof ApiQuery ) ) {
1462 // If the parent container of this pageset is not ApiQuery,
1463 // we must create it to get module manager
1464 $query = $this->getMain()->getModuleManager()->getModule( 'query' );
1466 $gens = [];
1467 $prefix = $query->getModulePath() . '+';
1468 $mgr = $query->getModuleManager();
1469 foreach ( $mgr->getNamesWithClasses() as $name => $class ) {
1470 if ( is_subclass_of( $class, 'ApiQueryGeneratorBase' ) ) {
1471 $gens[$name] = $prefix . $name;
1474 ksort( $gens );
1475 self::$generators = $gens;
1478 return self::$generators;