includes/parser/Parser.php

   1 <?php
   2 /**
   3  * PHP parser that converts wiki markup to HTML.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup Parser
  22  */
  23
  24 namespace MediaWiki\Parser;
  25
  26 use BadMethodCallException;
  27 use Exception;
  28 use File;
  29 use HtmlArmor;
  30 use ImageGalleryBase;
  31 use ImageGalleryClassNotFoundException;
  32 use InvalidArgumentException;
  33 use LogicException;
  34 use MapCacheLRU;
  35 use MediaHandler;
  36 use MediaWiki\Cache\CacheKeyHelper;
  37 use MediaWiki\Category\TrackingCategories;
  38 use MediaWiki\Config\ServiceOptions;
  39 use MediaWiki\Content\TextContent;
  40 use MediaWiki\Context\RequestContext;
  41 use MediaWiki\Debug\DeprecationHelper;
  42 use MediaWiki\HookContainer\HookContainer;
  43 use MediaWiki\HookContainer\HookRunner;
  44 use MediaWiki\Html\Html;
  45 use MediaWiki\Http\HttpRequestFactory;
  46 use MediaWiki\Language\ILanguageConverter;
  47 use MediaWiki\Language\Language;
  48 use MediaWiki\Language\LanguageCode;
  49 use MediaWiki\Language\RawMessage;
  50 use MediaWiki\Languages\LanguageConverterFactory;
  51 use MediaWiki\Languages\LanguageNameUtils;
  52 use MediaWiki\Linker\Linker;
  53 use MediaWiki\Linker\LinkRenderer;
  54 use MediaWiki\Linker\LinkRendererFactory;
  55 use MediaWiki\Linker\LinkTarget;
  56 use MediaWiki\MainConfigNames;
  57 use MediaWiki\MediaWikiServices;
  58 use MediaWiki\Message\Message;
  59 use MediaWiki\Output\OutputPage;
  60 use MediaWiki\Page\File\BadFileLookup;
  61 use MediaWiki\Page\PageIdentity;
  62 use MediaWiki\Page\PageReference;
  63 use MediaWiki\Preferences\SignatureValidatorFactory;
  64 use MediaWiki\Request\FauxRequest;
  65 use MediaWiki\Revision\RevisionAccessException;
  66 use MediaWiki\Revision\RevisionRecord;
  67 use MediaWiki\Revision\SlotRecord;
  68 use MediaWiki\SpecialPage\SpecialPage;
  69 use MediaWiki\SpecialPage\SpecialPageFactory;
  70 use MediaWiki\Tidy\TidyDriverBase;
  71 use MediaWiki\Title\MalformedTitleException;
  72 use MediaWiki\Title\MediaWikiTitleCodec;
  73 use MediaWiki\Title\NamespaceInfo;
  74 use MediaWiki\Title\Title;
  75 use MediaWiki\Title\TitleFormatter;
  76 use MediaWiki\User\Options\UserOptionsLookup;
  77 use MediaWiki\User\User;
  78 use MediaWiki\User\UserFactory;
  79 use MediaWiki\User\UserIdentity;
  80 use MediaWiki\User\UserNameUtils;
  81 use MediaWiki\Utils\MWTimestamp;
  82 use MediaWiki\Utils\UrlUtils;
  83 use MediaWiki\Xml\Xml;
  84 use Psr\Log\LoggerInterface;
  85 use RuntimeException;
  86 use SectionProfiler;
  87 use StringUtils;
  88 use UnexpectedValueException;
  89 use Wikimedia\Bcp47Code\Bcp47CodeValue;
  90 use Wikimedia\IPUtils;
  91 use Wikimedia\Message\MessageParam;
  92 use Wikimedia\Message\MessageSpecifier;
  93 use Wikimedia\ObjectCache\WANObjectCache;
  94 use Wikimedia\Parsoid\Core\SectionMetadata;
  95 use Wikimedia\Parsoid\Core\TOCData;
  96 use Wikimedia\Parsoid\DOM\Comment;
  97 use Wikimedia\Parsoid\DOM\DocumentFragment;
  98 use Wikimedia\Parsoid\DOM\Element;
  99 use Wikimedia\Parsoid\DOM\Node;
 100 use Wikimedia\Parsoid\Utils\DOMCompat;
 101 use Wikimedia\Parsoid\Utils\DOMUtils;
 102 use Wikimedia\ScopedCallback;
 103
 104 /**
 105  * @defgroup Parser Parser
 106  */
 107
 108 /**
 109  * PHP Parser - Processes wiki markup (which uses a more user-friendly
 110  * syntax, such as "[[link]]" for making links), and provides a one-way
 111  * transformation of that wiki markup it into (X)HTML output / markup
 112  * (which in turn the browser understands, and can display).
 113  *
 114  * There are seven main entry points into the Parser class:
 115  *
 116  * - Parser::parse()
 117  *     produces HTML output
 118  * - Parser::preSaveTransform()
 119  *     produces altered wiki markup
 120  * - Parser::preprocess()
 121  *     removes HTML comments and expands templates
 122  * - Parser::cleanSig() and Parser::cleanSigInSig()
 123  *     cleans a signature before saving it to preferences
 124  * - Parser::getSection()
 125  *     return the content of a section from an article for section editing
 126  * - Parser::replaceSection()
 127  *     replaces a section by number inside an article
 128  * - Parser::getPreloadText()
 129  *     removes <noinclude> sections and <includeonly> tags
 130  *
 131  * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
 132  *
 133  * @par Settings:
 134  * $wgNamespacesWithSubpages
 135  *
 136  * @par Settings only within ParserOptions:
 137  * $wgAllowExternalImages
 138  * $wgAllowSpecialInclusion
 139  * $wgInterwikiMagic
 140  * $wgMaxArticleSize
 141  *
 142  * @ingroup Parser
 143  */
 144 #[\AllowDynamicProperties]
 145 class Parser {
 146         use DeprecationHelper;
 147
 148         # Flags for Parser::setFunctionHook
 149         public const SFH_NO_HASH = 1;
 150         public const SFH_OBJECT_ARGS = 2;
 151
 152         # Constants needed for external link processing
 153         /**
 154          * Everything except bracket, space, or control characters.
 155          * \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
 156          * as well as U+3000 is IDEOGRAPHIC SPACE for T21052.
 157          * \x{FFFD} is the Unicode replacement character, which the HTML5 spec
 158          * uses to replace invalid HTML characters.
 159          */
 160         public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
 161         /**
 162          * Simplified expression to match an IPv4 or IPv6 address, or
 163          * at least one character of a host name (embeds Parser::EXT_LINK_URL_CLASS)
 164          */
 165         // phpcs:ignore Generic.Files.LineLength
 166         private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
 167         /** RegExp to make image URLs (embeds IPv6 part of Parser::EXT_LINK_ADDR) */
 168         // phpcs:ignore Generic.Files.LineLength
 169         private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
 170                 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)avif|gif|jpg|jpeg|png|svg|webp)$/Sxu';
 171
 172         /** Regular expression for a non-newline space */
 173         private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
 174
 175         /**
 176          * @var int Preprocess wikitext in transclusion mode
 177          * @deprecated Since 1.36
 178          */
 179         public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
 180
 181         # Allowed values for $this->mOutputType
 182         /** Output type: like Parser::parse() */
 183         public const OT_HTML = 1;
 184         /** Output type: like Parser::preSaveTransform() */
 185         public const OT_WIKI = 2;
 186         /** Output type: like Parser::preprocess() */
 187         public const OT_PREPROCESS = 3;
 188         /**
 189          * Output type: like Parser::extractSections() - portions of the
 190          * original are returned unchanged.
 191          */
 192         public const OT_PLAIN = 4;
 193
 194         /**
 195          * @var string Prefix and suffix for temporary replacement strings
 196          * for the multipass parser.
 197          *
 198          * \x7f should never appear in input as it's disallowed in XML.
 199          * Using it at the front also gives us a little extra robustness
 200          * since it shouldn't match when butted up against identifier-like
 201          * string constructs.
 202          *
 203          * Must not consist of all title characters, or else it will change
 204          * the behavior of <nowiki> in a link.
 205          *
 206          * Must have a character that needs escaping in attributes, otherwise
 207          * someone could put a strip marker in an attribute, to get around
 208          * escaping quote marks, and break out of the attribute. Thus we add
 209          * `'".
 210          */
 211         public const MARKER_SUFFIX = "-QINU`\"'\x7f";
 212         public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
 213
 214         /**
 215          * Internal marker used by parser to track where the table of
 216          * contents should be. Various magic words can change the position
 217          * during the parse.  The table of contents is generated during
 218          * the parse, however skins have the final decision on whether the
 219          * table of contents is injected.  This placeholder element
 220          * identifies where in the page the table of contents should be
 221          * injected, if at all.
 222          * @var string
 223          * @see Keep this in sync with BlockLevelPass::execute() and
 224          *  RemexCompatMunger::isTableOfContentsMarker()
 225          * @internal Skins should *not* directly reference TOC_PLACEHOLDER
 226          * but instead use Parser::replaceTableOfContentsMarker().
 227          */
 228         public const TOC_PLACEHOLDER = '<meta property="mw:PageProp/toc" />';
 229
 230         /**
 231          * Permissive regexp matching TOC_PLACEHOLDER.  This allows for some
 232          * minor modifications to the placeholder to be made by extensions
 233          * without breaking the TOC (T317857); note also that Parsoid's version
 234          * of the placeholder might include additional attributes.
 235          * @var string
 236          */
 237         private const TOC_PLACEHOLDER_REGEX = '/<meta\\b[^>]*\\bproperty\\s*=\\s*"mw:PageProp\\/toc"[^>]*>/';
 238
 239         # Persistent:
 240         /** @var array<string,callable> */
 241         private array $mTagHooks = [];
 242         /** @var array<string,array{0:callable,1:int}> */
 243         private array $mFunctionHooks = [];
 244         /** @var array{0:array<string,string>,1:array<string,string>} */
 245         private array $mFunctionSynonyms = [ 0 => [], 1 => [] ];
 246         /** @var string[] */
 247         private array $mStripList = [];
 248         /** @var array<string,string> */
 249         private array $mVarCache = [];
 250         /** @var array<string,array<string,string[]>> */
 251         private array $mImageParams = [];
 252         /** @var array<string,MagicWordArray> */
 253         private array $mImageParamsMagicArray = [];
 254         /** @deprecated since 1.35 */
 255         public $mMarkerIndex = 0;
 256
 257         // Initialised by initializeVariables()
 258         /** @var MagicWordArray */
 259         private MagicWordArray $mVariables;
 260         private MagicWordArray $mSubstWords;
 261
 262         // Initialised in constructor
 263         /** @var string */
 264         private string $mExtLinkBracketedRegex;
 265         private UrlUtils $urlUtils;
 266         private Preprocessor $mPreprocessor;
 267
 268         // Cleared with clearState():
 269         /** @var ParserOutput */
 270         private ParserOutput $mOutput;
 271         private int $mAutonumber = 0;
 272         private StripState $mStripState;
 273         private LinkHolderArray $mLinkHolders;
 274         private int $mLinkID = 0;
 275         private array $mIncludeSizes;
 276         /**
 277          * @internal
 278          * @var int
 279          */
 280         public $mPPNodeCount;
 281         /**
 282          * @internal
 283          * @var int
 284          */
 285         public $mHighestExpansionDepth;
 286         private array $mTplRedirCache;
 287         /** @internal */
 288         public array $mHeadings;
 289         /** @var array<string,false> */
 290         private array $mDoubleUnderscores;
 291         /**
 292          * Number of expensive parser function calls
 293          * @deprecated since 1.35
 294          */
 295         public $mExpensiveFunctionCount;
 296         private bool $mShowToc;
 297         private bool $mForceTocPosition;
 298         private array $mTplDomCache;
 299         private ?UserIdentity $mUser;
 300
 301         # Temporary
 302         # These are variables reset at least once per parse regardless of $clearState
 303
 304         /**
 305          * @var ParserOptions|null
 306          * @deprecated since 1.35, use Parser::getOptions()
 307          */
 308         private $mOptions;
 309
 310         # Deprecated "dynamic" properties
 311         # These used to be dynamic properties added to the parser, but these
 312         # have been deprecated since 1.42.
 313         /** @deprecated since 1.42: T343229 */
 314         public $scribunto_engine;
 315         /** @deprecated since 1.42: T343230 */
 316         public $extCite;
 317         /** @deprecated since 1.42: T343226 */
 318         public $extTemplateStylesCache;
 319         /** @deprecated since 1.42: T357838 */
 320         public $static_tag_buf;
 321         /** @deprecated since 1.42: T203531 */
 322         public $mExtVariables;
 323         /** @deprecated since 1.42: T203532 */
 324         public $mExtArrays;
 325         /** @deprecated since 1.42: T359887 */
 326         public $mExtHashTables;
 327         /** @deprecated since 1.42: T203563 */
 328         public $mExtLoopsCounter;
 329         /** @deprecated since 1.42: T362664 */
 330         public $proofreadRenderingPages;
 331         /** @deprecated since 1.42: T362693 */
 332         public $mTemplatePath;
 333
 334         /**
 335          * Title context, used for self-link rendering and similar things
 336          *
 337          * @deprecated since 1.35, use Parser::getPage()
 338          */
 339         private Title $mTitle;
 340         /** Output type, one of the OT_xxx constants */
 341         private int $mOutputType;
 342         /** When false, suppress extension tag processing for OT_PREPROCESS */
 343         private bool $mStripExtTags = true;
 344         /**
 345          * Shortcut alias, see Parser::setOutputType()
 346          * @deprecated since 1.35
 347          */
 348         private array $ot;
 349         /** ID to display in {{REVISIONID}} tags */
 350         private ?int $mRevisionId = null;
 351         /** The timestamp of the specified revision ID */
 352         private ?string $mRevisionTimestamp = null;
 353         /** User to display in {{REVISIONUSER}} tag */
 354         private ?string $mRevisionUser = null;
 355         /** Size to display in {{REVISIONSIZE}} variable */
 356         private ?int $mRevisionSize = null;
 357         /** @var int|false For {{PAGESIZE}} on current page */
 358         private $mInputSize = false;
 359
 360         private ?RevisionRecord $mRevisionRecordObject = null;
 361
 362         /**
 363          * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
 364          *
 365          * @since 1.24
 366          */
 367         private ?MapCacheLRU $currentRevisionCache = null;
 368
 369         /**
 370          * @var bool|string Recursive call protection.
 371          * @internal
 372          */
 373         private $mInParse = false;
 374
 375         private SectionProfiler $mProfiler;
 376         private ?LinkRenderer $mLinkRenderer = null;
 377
 378         private MagicWordFactory $magicWordFactory;
 379         private Language $contLang;
 380         private LanguageConverterFactory $languageConverterFactory;
 381         private LanguageNameUtils $languageNameUtils;
 382         private ParserFactory $factory;
 383         private SpecialPageFactory $specialPageFactory;
 384         private TitleFormatter $titleFormatter;
 385         /**
 386          * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
 387          * $mOptions, which is public and widely used, and also with the local variable $options used
 388          * for ParserOptions throughout this file.
 389          */
 390         private ServiceOptions $svcOptions;
 391         private LinkRendererFactory $linkRendererFactory;
 392         private NamespaceInfo $nsInfo;
 393         private LoggerInterface $logger;
 394         private BadFileLookup $badFileLookup;
 395         private HookContainer $hookContainer;
 396         private HookRunner $hookRunner;
 397         private TidyDriverBase $tidy;
 398         private WANObjectCache $wanCache;
 399         private UserOptionsLookup $userOptionsLookup;
 400         private UserFactory $userFactory;
 401         private HttpRequestFactory $httpRequestFactory;
 402         private TrackingCategories $trackingCategories;
 403         private SignatureValidatorFactory $signatureValidatorFactory;
 404         private UserNameUtils $userNameUtils;
 405
 406         /**
 407          * @internal For use by ServiceWiring
 408          */
 409         public const CONSTRUCTOR_OPTIONS = [
 410                 // See documentation for the corresponding config options
 411                 // Many of these are only used in (eg) CoreMagicVariables
 412                 MainConfigNames::AllowDisplayTitle,
 413                 MainConfigNames::AllowSlowParserFunctions,
 414                 MainConfigNames::ArticlePath,
 415                 MainConfigNames::EnableScaryTranscluding,
 416                 MainConfigNames::ExtraInterlanguageLinkPrefixes,
 417                 MainConfigNames::FragmentMode,
 418                 MainConfigNames::Localtimezone,
 419                 MainConfigNames::MaxSigChars,
 420                 MainConfigNames::MaxTocLevel,
 421                 MainConfigNames::MiserMode,
 422                 MainConfigNames::RawHtml,
 423                 MainConfigNames::ScriptPath,
 424                 MainConfigNames::Server,
 425                 MainConfigNames::ServerName,
 426                 MainConfigNames::ShowHostnames,
 427                 MainConfigNames::SignatureValidation,
 428                 MainConfigNames::Sitename,
 429                 MainConfigNames::StylePath,
 430                 MainConfigNames::TranscludeCacheExpiry,
 431                 MainConfigNames::PreprocessorCacheThreshold,
 432                 MainConfigNames::ParserEnableLegacyMediaDOM,
 433                 MainConfigNames::EnableParserLimitReporting,
 434                 MainConfigNames::ParserEnableUserLanguage,
 435                 MainConfigNames::ParsoidFragmentSupport,
 436         ];
 437
 438         /**
 439          * Constructing parsers directly is not allowed! Use a ParserFactory.
 440          * @internal
 441          *
 442          * @param ServiceOptions $svcOptions
 443          * @param MagicWordFactory $magicWordFactory
 444          * @param Language $contLang Content language
 445          * @param ParserFactory $factory
 446          * @param UrlUtils $urlUtils
 447          * @param SpecialPageFactory $spFactory
 448          * @param LinkRendererFactory $linkRendererFactory
 449          * @param NamespaceInfo $nsInfo
 450          * @param LoggerInterface $logger
 451          * @param BadFileLookup $badFileLookup
 452          * @param LanguageConverterFactory $languageConverterFactory
 453          * @param LanguageNameUtils $languageNameUtils
 454          * @param HookContainer $hookContainer
 455          * @param TidyDriverBase $tidy
 456          * @param WANObjectCache $wanCache
 457          * @param UserOptionsLookup $userOptionsLookup
 458          * @param UserFactory $userFactory
 459          * @param TitleFormatter $titleFormatter
 460          * @param HttpRequestFactory $httpRequestFactory
 461          * @param TrackingCategories $trackingCategories
 462          * @param SignatureValidatorFactory $signatureValidatorFactory
 463          * @param UserNameUtils $userNameUtils
 464          */
 465         public function __construct(
 466                 ServiceOptions $svcOptions,
 467                 MagicWordFactory $magicWordFactory,
 468                 Language $contLang,
 469                 ParserFactory $factory,
 470                 UrlUtils $urlUtils,
 471                 SpecialPageFactory $spFactory,
 472                 LinkRendererFactory $linkRendererFactory,
 473                 NamespaceInfo $nsInfo,
 474                 LoggerInterface $logger,
 475                 BadFileLookup $badFileLookup,
 476                 LanguageConverterFactory $languageConverterFactory,
 477                 LanguageNameUtils $languageNameUtils,
 478                 HookContainer $hookContainer,
 479                 TidyDriverBase $tidy,
 480                 WANObjectCache $wanCache,
 481                 UserOptionsLookup $userOptionsLookup,
 482                 UserFactory $userFactory,
 483                 TitleFormatter $titleFormatter,
 484                 HttpRequestFactory $httpRequestFactory,
 485                 TrackingCategories $trackingCategories,
 486                 SignatureValidatorFactory $signatureValidatorFactory,
 487                 UserNameUtils $userNameUtils
 488         ) {
 489                 $this->deprecateDynamicPropertiesAccess( '1.42', __CLASS__ );
 490                 $this->deprecatePublicProperty( 'ot', '1.35', __CLASS__ );
 491                 $this->deprecatePublicProperty( 'mTitle', '1.35', __CLASS__ );
 492                 $this->deprecatePublicProperty( 'mOptions', '1.35', __CLASS__ );
 493
 494                 if ( ParserFactory::$inParserFactory === 0 ) {
 495                         // Direct construction of Parser was deprecated in 1.34 and
 496                         // removed in 1.36; use a ParserFactory instead.
 497                         throw new BadMethodCallException( 'Direct construction of Parser not allowed' );
 498                 }
 499                 $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
 500                 $this->svcOptions = $svcOptions;
 501
 502                 $this->urlUtils = $urlUtils;
 503                 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->urlUtils->validProtocols() . ')' .
 504                         self::EXT_LINK_ADDR .
 505                         self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*)\]/Su';
 506
 507                 $this->magicWordFactory = $magicWordFactory;
 508
 509                 $this->contLang = $contLang;
 510
 511                 $this->factory = $factory;
 512                 $this->specialPageFactory = $spFactory;
 513                 $this->linkRendererFactory = $linkRendererFactory;
 514                 $this->nsInfo = $nsInfo;
 515                 $this->logger = $logger;
 516                 $this->badFileLookup = $badFileLookup;
 517
 518                 $this->languageConverterFactory = $languageConverterFactory;
 519                 $this->languageNameUtils = $languageNameUtils;
 520
 521                 $this->hookContainer = $hookContainer;
 522                 $this->hookRunner = new HookRunner( $hookContainer );
 523
 524                 $this->tidy = $tidy;
 525
 526                 $this->wanCache = $wanCache;
 527                 $this->mPreprocessor = new Preprocessor_Hash(
 528                         $this,
 529                         $this->wanCache,
 530                         [
 531                                 'cacheThreshold' => $svcOptions->get( MainConfigNames::PreprocessorCacheThreshold ),
 532                                 'disableLangConversion' => $languageConverterFactory->isConversionDisabled(),
 533                         ]
 534                 );
 535
 536                 $this->userOptionsLookup = $userOptionsLookup;
 537                 $this->userFactory = $userFactory;
 538                 $this->titleFormatter = $titleFormatter;
 539                 $this->httpRequestFactory = $httpRequestFactory;
 540                 $this->trackingCategories = $trackingCategories;
 541                 $this->signatureValidatorFactory = $signatureValidatorFactory;
 542                 $this->userNameUtils = $userNameUtils;
 543
 544                 // These steps used to be done in "::firstCallInit()"
 545                 // (if you're chasing a reference from some old code)
 546                 CoreParserFunctions::register(
 547                         $this,
 548                         new ServiceOptions( CoreParserFunctions::REGISTER_OPTIONS, $svcOptions )
 549                 );
 550                 CoreTagHooks::register(
 551                         $this,
 552                         new ServiceOptions( CoreTagHooks::REGISTER_OPTIONS, $svcOptions )
 553                 );
 554                 $this->initializeVariables();
 555
 556                 $this->hookRunner->onParserFirstCallInit( $this );
 557                 $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Missing' );
 558         }
 559
 560         /**
 561          * Reduce memory usage to reduce the impact of circular references
 562          */
 563         public function __destruct() {
 564                 // @phan-suppress-next-line PhanRedundantCondition Typed property not set in constructor, may be uninitialized
 565                 if ( isset( $this->mLinkHolders ) ) {
 566                         // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
 567                         unset( $this->mLinkHolders );
 568                 }
 569                 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
 570                 foreach ( $this as $name => $value ) {
 571                         unset( $this->$name );
 572                 }
 573         }
 574
 575         /**
 576          * Allow extensions to clean up when the parser is cloned
 577          */
 578         public function __clone() {
 579                 $this->mInParse = false;
 580
 581                 $this->mPreprocessor = clone $this->mPreprocessor;
 582                 $this->mPreprocessor->resetParser( $this );
 583
 584                 $this->hookRunner->onParserCloned( $this );
 585         }
 586
 587         /**
 588          * Used to do various kinds of initialisation on the first call of the
 589          * parser.
 590          * @deprecated since 1.35, this initialization is done in the constructor
 591          *  and manual calls to ::firstCallInit() have no effect.
 592          * @since 1.7
 593          */
 594         public function firstCallInit() {
 595                 /*
 596                  * This method should be hard-deprecated once remaining calls are
 597                  * removed; it no longer does anything.
 598                  */
 599         }
 600
 601         /**
 602          * Clear Parser state
 603          *
 604          * @internal
 605          */
 606         public function clearState() {
 607                 $this->resetOutput();
 608                 $this->mAutonumber = 0;
 609                 $this->mLinkHolders = new LinkHolderArray(
 610                         $this,
 611                         $this->getContentLanguageConverter(),
 612                         $this->getHookContainer()
 613                 );
 614                 $this->mLinkID = 0;
 615                 $this->mRevisionTimestamp = null;
 616                 $this->mRevisionId = null;
 617                 $this->mRevisionUser = null;
 618                 $this->mRevisionSize = null;
 619                 $this->mRevisionRecordObject = null;
 620                 $this->mVarCache = [];
 621                 $this->mUser = null;
 622                 $this->currentRevisionCache = null;
 623
 624                 $this->mStripState = new StripState( $this );
 625
 626                 # Clear these on every parse, T6549
 627                 $this->mTplRedirCache = [];
 628                 $this->mTplDomCache = [];
 629
 630                 $this->mShowToc = true;
 631                 $this->mForceTocPosition = false;
 632                 $this->mIncludeSizes = [
 633                         'post-expand' => 0,
 634                         'arg' => 0,
 635                 ];
 636                 $this->mPPNodeCount = 0;
 637                 $this->mHighestExpansionDepth = 0;
 638                 $this->mHeadings = [];
 639                 $this->mDoubleUnderscores = [];
 640                 $this->mExpensiveFunctionCount = 0;
 641
 642                 $this->mProfiler = new SectionProfiler();
 643
 644                 $this->hookRunner->onParserClearState( $this );
 645         }
 646
 647         /**
 648          * Reset the ParserOutput
 649          * @since 1.34
 650          */
 651         public function resetOutput() {
 652                 $this->mOutput = new ParserOutput;
 653                 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
 654         }
 655
 656         /**
 657          * Convert wikitext to HTML
 658          * Do not call this function recursively.
 659          *
 660          * @param string $text Text we want to parse
 661          * @param-taint $text escapes_htmlnoent
 662          * @param PageReference $page
 663          * @param ParserOptions $options
 664          * @param bool $linestart
 665          * @param bool $clearState
 666          * @param int|null $revid ID of the revision being rendered. This is used to render
 667          *  REVISION* magic words. 0 means that any current revision will be used. Null means
 668          *  that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
 669          *  use the current timestamp.
 670          * @return ParserOutput
 671          * @return-taint escaped
 672          * @since 1.10 method is public
 673          */
 674         public function parse(
 675                 $text, PageReference $page, ParserOptions $options,
 676                 $linestart = true, $clearState = true, $revid = null
 677         ) {
 678                 if ( $clearState ) {
 679                         // We use U+007F DELETE to construct strip markers, so we have to make
 680                         // sure that this character does not occur in the input text.
 681                         $text = strtr( $text, "\x7f", "?" );
 682                         $magicScopeVariable = $this->lock();
 683                 }
 684                 // Strip U+0000 NULL (T159174)
 685                 $text = str_replace( "\000", '', $text );
 686
 687                 $this->startParse( $page, $options, self::OT_HTML, $clearState );
 688
 689                 $this->currentRevisionCache = null;
 690                 $this->mInputSize = strlen( $text );
 691                 $this->mOutput->resetParseStartTime();
 692
 693                 $oldRevisionId = $this->mRevisionId;
 694                 $oldRevisionRecordObject = $this->mRevisionRecordObject;
 695                 $oldRevisionTimestamp = $this->mRevisionTimestamp;
 696                 $oldRevisionUser = $this->mRevisionUser;
 697                 $oldRevisionSize = $this->mRevisionSize;
 698                 if ( $revid !== null ) {
 699                         $this->mRevisionId = $revid;
 700                         $this->mRevisionRecordObject = null;
 701                         $this->mRevisionTimestamp = null;
 702                         $this->mRevisionUser = null;
 703                         $this->mRevisionSize = null;
 704                 }
 705
 706                 $text = $this->internalParse( $text );
 707                 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
 708
 709                 $text = $this->internalParseHalfParsed( $text, true, $linestart );
 710
 711                 /**
 712                  * A converted title will be provided in the output object if title and
 713                  * content conversion are enabled, the article text does not contain
 714                  * a conversion-suppressing double-underscore tag, and no
 715                  * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
 716                  * automatic link conversion.
 717                  */
 718                 if ( !$options->getDisableTitleConversion()
 719                         && !isset( $this->mDoubleUnderscores['nocontentconvert'] )
 720                         && !isset( $this->mDoubleUnderscores['notitleconvert'] )
 721                         && $this->mOutput->getDisplayTitle() === false
 722                 ) {
 723                         $titleText = $this->getTargetLanguageConverter()->getConvRuleTitle();
 724                         if ( $titleText !== false ) {
 725                                 $titleText = Sanitizer::removeSomeTags( $titleText );
 726                         } else {
 727                                 [ $nsText, $nsSeparator, $mainText ] = $this->getTargetLanguageConverter()->convertSplitTitle( $page );
 728                                 // In the future, those three pieces could be stored separately rather than joined into $titleText,
 729                                 // and OutputPage would format them and join them together, to resolve T314399.
 730                                 $titleText = self::formatPageTitle( $nsText, $nsSeparator, $mainText );
 731                         }
 732                         $this->mOutput->setTitleText( $titleText );
 733                 }
 734
 735                 # Recording timing info. Must be called before finalizeAdaptiveCacheExpiry() and
 736                 # makeLimitReport(), which make use of the timing info.
 737                 $this->mOutput->recordTimeProfile();
 738
 739                 # Compute runtime adaptive expiry if set
 740                 $this->mOutput->finalizeAdaptiveCacheExpiry();
 741
 742                 # Warn if too many heavyweight parser functions were used
 743                 if ( $this->mExpensiveFunctionCount > $options->getExpensiveParserFunctionLimit() ) {
 744                         $this->limitationWarn( 'expensive-parserfunction',
 745                                 $this->mExpensiveFunctionCount,
 746                                 $options->getExpensiveParserFunctionLimit()
 747                         );
 748                 }
 749
 750                 # Information on limits, for the benefit of users who try to skirt them
 751                 if ( $this->svcOptions->get( MainConfigNames::EnableParserLimitReporting ) ) {
 752                         $this->makeLimitReport( $this->mOptions, $this->mOutput );
 753                 }
 754
 755                 $this->mOutput->setFromParserOptions( $options );
 756
 757                 $this->mOutput->setRawText( $text );
 758
 759                 $this->mRevisionId = $oldRevisionId;
 760                 $this->mRevisionRecordObject = $oldRevisionRecordObject;
 761                 $this->mRevisionTimestamp = $oldRevisionTimestamp;
 762                 $this->mRevisionUser = $oldRevisionUser;
 763                 $this->mRevisionSize = $oldRevisionSize;
 764                 $this->mInputSize = false;
 765                 $this->currentRevisionCache = null;
 766
 767                 return $this->mOutput;
 768         }
 769
 770         /**
 771          * Set the limit report data in the current ParserOutput.
 772          * @internal
 773          */
 774         public function makeLimitReport(
 775                 ParserOptions $parserOptions, ParserOutput $parserOutput
 776         ) {
 777                 $maxIncludeSize = $parserOptions->getMaxIncludeSize();
 778
 779                 $cpuTime = $parserOutput->getTimeProfile( 'cpu' );
 780                 if ( $cpuTime !== null ) {
 781                         $parserOutput->setLimitReportData( 'limitreport-cputime',
 782                                 sprintf( "%.3f", $cpuTime )
 783                         );
 784                 }
 785
 786                 $wallTime = $parserOutput->getTimeProfile( 'wall' );
 787                 $parserOutput->setLimitReportData( 'limitreport-walltime',
 788                         sprintf( "%.3f", $wallTime )
 789                 );
 790
 791                 $parserOutput->setLimitReportData( 'limitreport-ppvisitednodes',
 792                         [ $this->mPPNodeCount, $parserOptions->getMaxPPNodeCount() ]
 793                 );
 794                 $parserOutput->setLimitReportData( 'limitreport-postexpandincludesize',
 795                         [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
 796                 );
 797                 $parserOutput->setLimitReportData( 'limitreport-templateargumentsize',
 798                         [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
 799                 );
 800                 $parserOutput->setLimitReportData( 'limitreport-expansiondepth',
 801                         [ $this->mHighestExpansionDepth, $parserOptions->getMaxPPExpandDepth() ]
 802                 );
 803                 $parserOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
 804                         [ $this->mExpensiveFunctionCount, $parserOptions->getExpensiveParserFunctionLimit() ]
 805                 );
 806
 807                 foreach ( $this->mStripState->getLimitReport() as [ $key, $value ] ) {
 808                         $parserOutput->setLimitReportData( $key, $value );
 809                 }
 810
 811                 $this->hookRunner->onParserLimitReportPrepare( $this, $parserOutput );
 812
 813                 // Add on template profiling data in human/machine readable way
 814                 $dataByFunc = $this->mProfiler->getFunctionStats();
 815                 uasort( $dataByFunc, static function ( $a, $b ) {
 816                         return $b['real'] <=> $a['real']; // descending order
 817                 } );
 818                 $profileReport = [];
 819                 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
 820                         $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
 821                                 $item['%real'], $item['real'], $item['calls'],
 822                                 htmlspecialchars( $item['name'] ) );
 823                 }
 824
 825                 $parserOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
 826
 827                 // Add other cache related metadata
 828                 if ( $this->svcOptions->get( MainConfigNames::ShowHostnames ) ) {
 829                         $parserOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
 830                 }
 831                 $parserOutput->setLimitReportData( 'cachereport-timestamp',
 832                         $parserOutput->getCacheTime() );
 833                 $parserOutput->setLimitReportData( 'cachereport-ttl',
 834                         $parserOutput->getCacheExpiry() );
 835                 $parserOutput->setLimitReportData( 'cachereport-transientcontent',
 836                         $parserOutput->hasReducedExpiry() );
 837         }
 838
 839         /**
 840          * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
 841          * can be called from an extension tag hook.
 842          *
 843          * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
 844          * instead, which means that lists and links have not been fully parsed yet,
 845          * and strip markers are still present.
 846          *
 847          * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
 848          *
 849          * Use this function if you're a parser tag hook and you want to parse
 850          * wikitext before or after applying additional transformations, and you
 851          * intend to *return the result as hook output*, which will cause it to go
 852          * through the rest of parsing process automatically.
 853          *
 854          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 855          * $text are not expanded
 856          *
 857          * @param string $text Text extension wants to have parsed
 858          * @param-taint $text escapes_htmlnoent
 859          * @param PPFrame|false $frame The frame to use for expanding any template variables
 860          * @return string UNSAFE half-parsed HTML
 861          * @return-taint escaped
 862          * @since 1.8
 863          */
 864         public function recursiveTagParse( $text, $frame = false ) {
 865                 $text = $this->internalParse( $text, false, $frame );
 866                 return $text;
 867         }
 868
 869         /**
 870          * Fully parse wikitext to fully parsed HTML. This recursive parser entry
 871          * point can be called from an extension tag hook.
 872          *
 873          * The output of this function is fully-parsed HTML that is safe for output.
 874          * If you're a parser tag hook, you might want to use recursiveTagParse()
 875          * instead.
 876          *
 877          * If $frame is not provided, then template variables (e.g., {{{1}}}) within
 878          * $text are not expanded
 879          *
 880          * @since 1.25
 881          *
 882          * @param string $text Text extension wants to have parsed
 883          * @param-taint $text escapes_htmlnoent
 884          * @param PPFrame|false $frame The frame to use for expanding any template variables
 885          * @return string Fully parsed HTML
 886          * @return-taint escaped
 887          */
 888         public function recursiveTagParseFully( $text, $frame = false ) {
 889                 $text = $this->recursiveTagParse( $text, $frame );
 890                 $text = $this->internalParseHalfParsed( $text, false );
 891                 return $text;
 892         }
 893
 894         /**
 895          * Needed by Parsoid/PHP to ensure all the hooks for extensions
 896          * are run in the right order. The primary differences between this
 897          * and recursiveTagParseFully are:
 898          * (a) absence of $frame
 899          * (b) passing true to internalParseHalfParse so all hooks are run
 900          * (c) running 'ParserAfterParse' hook at the same point in the parsing
 901          *     pipeline when parse() does it. This kinda mimics Parsoid/JS behavior
 902          *     where exttags are processed by the M/w API.
 903          *
 904          * This is a temporary convenience method and will go away as we proceed
 905          * further with Parsoid <-> Parser.php integration.
 906          *
 907          * @internal
 908          * @deprecated
 909          * @param string $text Wikitext source of the extension
 910          * @return string
 911          * @return-taint escaped
 912          */
 913         public function parseExtensionTagAsTopLevelDoc( $text ) {
 914                 $text = $this->recursiveTagParse( $text );
 915                 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
 916                 $text = $this->internalParseHalfParsed( $text, true );
 917                 return $text;
 918         }
 919
 920         /**
 921          * Expand templates and variables in the text, producing valid, static wikitext.
 922          * Also removes comments.
 923          * Do not call this function recursively.
 924          * @param string $text
 925          * @param ?PageReference $page
 926          * @param ParserOptions $options
 927          * @param int|null $revid
 928          * @param PPFrame|false $frame
 929          * @return mixed|string
 930          * @since 1.8
 931          */
 932         public function preprocess(
 933                 $text,
 934                 ?PageReference $page,
 935                 ParserOptions $options,
 936                 $revid = null,
 937                 $frame = false
 938         ) {
 939                 $magicScopeVariable = $this->lock();
 940                 $this->startParse( $page, $options, self::OT_PREPROCESS, true );
 941                 if ( $revid !== null ) {
 942                         $this->mRevisionId = $revid;
 943                 }
 944                 $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
 945                 $text = $this->replaceVariables( $text, $frame );
 946                 $text = $this->mStripState->unstripBoth( $text );
 947                 return $text;
 948         }
 949
 950         /**
 951          * Recursive parser entry point that can be called from an extension tag
 952          * hook.
 953          *
 954          * @param string $text Text to be expanded
 955          * @param PPFrame|false $frame The frame to use for expanding any template variables
 956          * @return string
 957          * @since 1.19
 958          */
 959         public function recursivePreprocess( $text, $frame = false ) {
 960                 $text = $this->replaceVariables( $text, $frame );
 961                 $text = $this->mStripState->unstripBoth( $text );
 962                 return $text;
 963         }
 964
 965         /**
 966          * Process the wikitext for the "?preload=" feature. (T7210)
 967          *
 968          * "<noinclude>", "<includeonly>" etc. are parsed as for template
 969          * transclusion, comments, templates, arguments, tags hooks and parser
 970          * functions are untouched.
 971          *
 972          * @param string $text
 973          * @param PageReference $page
 974          * @param ParserOptions $options
 975          * @param array $params
 976          * @return string
 977          * @since 1.17
 978          */
 979         public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
 980                 $msg = new RawMessage( $text );
 981                 $text = $msg->params( $params )->plain();
 982
 983                 # Parser (re)initialisation
 984                 $magicScopeVariable = $this->lock();
 985                 $this->startParse( $page, $options, self::OT_PLAIN, true );
 986
 987                 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
 988                 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
 989                 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
 990                 $text = $this->mStripState->unstripBoth( $text );
 991                 return $text;
 992         }
 993
 994         /**
 995          * Set the current user.
 996          * Should only be used when doing pre-save transform.
 997          *
 998          * @param UserIdentity|null $user user identity or null (to reset)
 999          * @since 1.17
1000          */
1001         public function setUser( ?UserIdentity $user ) {
1002                 $this->mUser = $user;
1003         }
1004
1005         /**
1006          * Set the context title
1007          *
1008          * @deprecated since 1.37, use setPage() instead.
1009          * @param Title|null $t
1010          * @since 1.12
1011          */
1012         public function setTitle( ?Title $t = null ) {
1013                 $this->setPage( $t );
1014         }
1015
1016         /**
1017          * @since 1.6
1018          * @deprecated since 1.37, use getPage instead.
1019          * @return Title
1020          */
1021         public function getTitle(): Title {
1022                 return $this->mTitle;
1023         }
1024
1025         /**
1026          * Set the page used as context for parsing, e.g. when resolving relative subpage links.
1027          *
1028          * @since 1.37
1029          * @param ?PageReference $t
1030          */
1031         public function setPage( ?PageReference $t = null ) {
1032                 if ( !$t ) {
1033                         $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1034                 } else {
1035                         // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1036                         // and over again in other methods. Eventually, we will no longer need to have a Title
1037                         // instance internally.
1038                         $t = Title::newFromPageReference( $t );
1039                 }
1040
1041                 if ( $t->hasFragment() ) {
1042                         # Strip the fragment to avoid various odd effects
1043                         $this->mTitle = $t->createFragmentTarget( '' );
1044                 } else {
1045                         $this->mTitle = $t;
1046                 }
1047         }
1048
1049         /**
1050          * Returns the page used as context for parsing, e.g. when resolving relative subpage links.
1051          * @since 1.37
1052          * @return ?PageReference Null if no page is set (deprecated since 1.34)
1053          */
1054         public function getPage(): ?PageReference {
1055                 if ( $this->mTitle->isSpecial( 'Badtitle' ) ) {
1056                         [ , $subPage ] = $this->specialPageFactory->resolveAlias( $this->mTitle->getDBkey() );
1057
1058                         if ( $subPage === 'Missing' ) {
1059                                 wfDeprecated( __METHOD__ . ' without a Title set', '1.34' );
1060                                 return null;
1061                         }
1062                 }
1063
1064                 return $this->mTitle;
1065         }
1066
1067         /**
1068          * Accessor for the output type.
1069          * @return int One of the Parser::OT_... constants
1070          * @since 1.35
1071          */
1072         public function getOutputType(): int {
1073                 return $this->mOutputType;
1074         }
1075
1076         /**
1077          * Mutator for the output type.
1078          * @param int $ot One of the Parser::OT_… constants
1079          * @since 1.8
1080          */
1081         public function setOutputType( $ot ): void {
1082                 $this->mOutputType = $ot;
1083                 # Shortcut alias
1084                 $this->ot = [
1085                         'html' => $ot == self::OT_HTML,
1086                         'wiki' => $ot == self::OT_WIKI,
1087                         'pre' => $ot == self::OT_PREPROCESS,
1088                         'plain' => $ot == self::OT_PLAIN,
1089                 ];
1090         }
1091
1092         /**
1093          * Accessor/mutator for the output type
1094          *
1095          * @param int|null $x New value or null to just get the current one
1096          * @return int
1097          * @deprecated since 1.35, use getOutputType()/setOutputType()
1098          */
1099         public function OutputType( $x = null ) {
1100                 wfDeprecated( __METHOD__, '1.35' );
1101                 return wfSetVar( $this->mOutputType, $x );
1102         }
1103
1104         /**
1105          * @return ParserOutput
1106          * @since 1.14
1107          */
1108         public function getOutput() {
1109                 // @phan-suppress-next-line PhanRedundantCondition False positive, see https://github.com/phan/phan/issues/4720
1110                 if ( !isset( $this->mOutput ) ) {
1111                         wfDeprecated( __METHOD__ . ' before initialization', '1.42' );
1112                         // @phan-suppress-next-line PhanTypeMismatchReturnProbablyReal We don’t want to tell anyone we’re doing this
1113                         return null;
1114                 }
1115                 return $this->mOutput;
1116         }
1117
1118         /**
1119          * @return ParserOptions|null
1120          * @since 1.6
1121          */
1122         public function getOptions() {
1123                 return $this->mOptions;
1124         }
1125
1126         /**
1127          * Mutator for the ParserOptions object
1128          * @param ParserOptions $options The new parser options
1129          * @since 1.35
1130          */
1131         public function setOptions( ParserOptions $options ): void {
1132                 $this->mOptions = $options;
1133         }
1134
1135         /**
1136          * Accessor/mutator for the ParserOptions object
1137          *
1138          * @param ParserOptions|null $x New value or null to just get the current one
1139          * @return ParserOptions Current ParserOptions object
1140          * @deprecated since 1.35, use getOptions() / setOptions()
1141          */
1142         public function Options( $x = null ) {
1143                 wfDeprecated( __METHOD__, '1.35' );
1144                 return wfSetVar( $this->mOptions, $x );
1145         }
1146
1147         /**
1148          * @return int
1149          * @since 1.14
1150          */
1151         public function nextLinkID() {
1152                 return $this->mLinkID++;
1153         }
1154
1155         /**
1156          * @param int $id
1157          * @since 1.8
1158          */
1159         public function setLinkID( $id ) {
1160                 $this->mLinkID = $id;
1161         }
1162
1163         /**
1164          * Get a language object for use in parser functions such as {{FORMATNUM:}}
1165          * @return Language
1166          * @since 1.7
1167          * @deprecated since 1.40; use ::getTargetLanguage() instead.
1168          */
1169         public function getFunctionLang() {
1170                 wfDeprecated( __METHOD__, '1.40' );
1171                 return $this->getTargetLanguage();
1172         }
1173
1174         /**
1175          * Get the target language for the content being parsed. This is usually the
1176          * language that the content is in.
1177          *
1178          * @since 1.19
1179          *
1180          * @return Language
1181          */
1182         public function getTargetLanguage() {
1183                 $target = $this->mOptions->getTargetLanguage();
1184
1185                 if ( $target !== null ) {
1186                         return $target;
1187                 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1188                         return $this->mOptions->getUserLangObj();
1189                 }
1190
1191                 return $this->getTitle()->getPageLanguage();
1192         }
1193
1194         /**
1195          * Get a user either from the user set on Parser if it's set,
1196          * or from the ParserOptions object otherwise.
1197          *
1198          * @since 1.36
1199          * @return UserIdentity
1200          */
1201         public function getUserIdentity(): UserIdentity {
1202                 return $this->mUser ?? $this->getOptions()->getUserIdentity();
1203         }
1204
1205         /**
1206          * Get a preprocessor object
1207          *
1208          * @return Preprocessor
1209          * @since 1.12.0
1210          */
1211         public function getPreprocessor() {
1212                 return $this->mPreprocessor;
1213         }
1214
1215         /**
1216          * Get a LinkRenderer instance to make links with
1217          *
1218          * @since 1.28
1219          * @return LinkRenderer
1220          */
1221         public function getLinkRenderer() {
1222                 // XXX We make the LinkRenderer with current options and then cache it forever
1223                 if ( !$this->mLinkRenderer ) {
1224                         $this->mLinkRenderer = $this->linkRendererFactory->create();
1225                 }
1226
1227                 return $this->mLinkRenderer;
1228         }
1229
1230         /**
1231          * Get the MagicWordFactory that this Parser is using
1232          *
1233          * @since 1.32
1234          * @return MagicWordFactory
1235          */
1236         public function getMagicWordFactory() {
1237                 return $this->magicWordFactory;
1238         }
1239
1240         /**
1241          * Get the content language that this Parser is using
1242          *
1243          * @since 1.32
1244          * @return Language
1245          */
1246         public function getContentLanguage() {
1247                 return $this->contLang;
1248         }
1249
1250         /**
1251          * Get the BadFileLookup instance that this Parser is using
1252          *
1253          * @since 1.35
1254          * @return BadFileLookup
1255          */
1256         public function getBadFileLookup() {
1257                 return $this->badFileLookup;
1258         }
1259
1260         /**
1261          * Replaces all occurrences of HTML-style comments and the given tags
1262          * in the text with a random marker and returns the next text. The output
1263          * parameter $matches will be an associative array filled with data in
1264          * the form:
1265          *
1266          * @code
1267          *   'UNIQ-xxxxx' => [
1268          *     'element',
1269          *     'tag content',
1270          *     [ 'param' => 'x' ],
1271          *     '<element param="x">tag content</element>' ]
1272          * @endcode
1273          *
1274          * @param string[] $elements List of element names. Comments are always extracted.
1275          * @param string $text Source text string.
1276          * @param array[] &$matches Out parameter, Array: extracted tags
1277          * @return string Stripped text
1278          */
1279         public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1280                 static $n = 1;
1281                 $stripped = '';
1282                 $matches = [];
1283
1284                 $taglist = implode( '|', $elements );
1285                 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1286
1287                 while ( $text != '' ) {
1288                         $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1289                         $stripped .= $p[0];
1290                         if ( count( $p ) < 5 ) {
1291                                 break;
1292                         }
1293                         if ( count( $p ) > 5 ) {
1294                                 # comment
1295                                 $element = $p[4];
1296                                 $attributes = '';
1297                                 $close = '';
1298                                 $inside = $p[5];
1299                         } else {
1300                                 # tag
1301                                 [ , $element, $attributes, $close, $inside ] = $p;
1302                         }
1303
1304                         $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1305                         $stripped .= $marker;
1306
1307                         if ( $close === '/>' ) {
1308                                 # Empty element tag, <tag />
1309                                 $content = null;
1310                                 $text = $inside;
1311                                 $tail = null;
1312                         } else {
1313                                 if ( $element === '!--' ) {
1314                                         $end = '/(-->)/';
1315                                 } else {
1316                                         $end = "/(<\\/$element\\s*>)/i";
1317                                 }
1318                                 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1319                                 $content = $q[0];
1320                                 if ( count( $q ) < 3 ) {
1321                                         # No end tag -- let it run out to the end of the text.
1322                                         $tail = '';
1323                                         $text = '';
1324                                 } else {
1325                                         [ , $tail, $text ] = $q;
1326                                 }
1327                         }
1328
1329                         $matches[$marker] = [ $element,
1330                                 $content,
1331                                 Sanitizer::decodeTagAttributes( $attributes ),
1332                                 "<$element$attributes$close$content$tail" ];
1333                 }
1334                 return $stripped;
1335         }
1336
1337         /**
1338          * Get a list of strippable XML-like elements
1339          *
1340          * @return array
1341          */
1342         public function getStripList() {
1343                 return $this->mStripList;
1344         }
1345
1346         /**
1347          * @return StripState
1348          * @since 1.34
1349          */
1350         public function getStripState() {
1351                 return $this->mStripState;
1352         }
1353
1354         /**
1355          * Add an item to the strip state
1356          * Returns the unique tag which must be inserted into the stripped text
1357          * The tag will be replaced with the original text in unstrip()
1358          *
1359          * @param string $text
1360          *
1361          * @return string
1362          */
1363         public function insertStripItem( $text ) {
1364                 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1365                 $this->mMarkerIndex++;
1366                 $this->mStripState->addGeneral( $marker, $text );
1367                 return $marker;
1368         }
1369
1370         /**
1371          * Parse the wiki syntax used to render tables.
1372          *
1373          * @param string $text
1374          * @return string
1375          */
1376         private function handleTables( $text ) {
1377                 $lines = StringUtils::explode( "\n", $text );
1378                 $out = '';
1379                 $td_history = []; # Is currently a td tag open?
1380                 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1381                 $tr_history = []; # Is currently a tr tag open?
1382                 $tr_attributes = []; # history of tr attributes
1383                 $has_opened_tr = []; # Did this table open a <tr> element?
1384                 $indent_level = 0; # indent level of the table
1385
1386                 foreach ( $lines as $outLine ) {
1387                         $line = trim( $outLine );
1388
1389                         if ( $line === '' ) { # empty line, go to next line
1390                                 $out .= $outLine . "\n";
1391                                 continue;
1392                         }
1393
1394                         $first_character = $line[0];
1395                         $first_two = substr( $line, 0, 2 );
1396                         $matches = [];
1397
1398                         if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1399                                 # First check if we are starting a new table
1400                                 $indent_level = strlen( $matches[1] );
1401
1402                                 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1403                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1404
1405                                 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1406                                 $td_history[] = false;
1407                                 $last_tag_history[] = '';
1408                                 $tr_history[] = false;
1409                                 $tr_attributes[] = '';
1410                                 $has_opened_tr[] = false;
1411                         } elseif ( count( $td_history ) == 0 ) {
1412                                 # Don't do any of the following
1413                                 $out .= $outLine . "\n";
1414                                 continue;
1415                         } elseif ( $first_two === '|}' ) {
1416                                 # We are ending a table
1417                                 $line = '</table>' . substr( $line, 2 );
1418                                 $last_tag = array_pop( $last_tag_history );
1419
1420                                 if ( !array_pop( $has_opened_tr ) ) {
1421                                         $line = "<tr><td></td></tr>{$line}";
1422                                 }
1423
1424                                 if ( array_pop( $tr_history ) ) {
1425                                         $line = "</tr>{$line}";
1426                                 }
1427
1428                                 if ( array_pop( $td_history ) ) {
1429                                         $line = "</{$last_tag}>{$line}";
1430                                 }
1431                                 array_pop( $tr_attributes );
1432                                 if ( $indent_level > 0 ) {
1433                                         $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1434                                 } else {
1435                                         $outLine = $line;
1436                                 }
1437                         } elseif ( $first_two === '|-' ) {
1438                                 # Now we have a table row
1439                                 $line = preg_replace( '#^\|-+#', '', $line );
1440
1441                                 # Whats after the tag is now only attributes
1442                                 $attributes = $this->mStripState->unstripBoth( $line );
1443                                 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1444                                 array_pop( $tr_attributes );
1445                                 $tr_attributes[] = $attributes;
1446
1447                                 $line = '';
1448                                 $last_tag = array_pop( $last_tag_history );
1449                                 array_pop( $has_opened_tr );
1450                                 $has_opened_tr[] = true;
1451
1452                                 if ( array_pop( $tr_history ) ) {
1453                                         $line = '</tr>';
1454                                 }
1455
1456                                 if ( array_pop( $td_history ) ) {
1457                                         $line = "</{$last_tag}>{$line}";
1458                                 }
1459
1460                                 $outLine = $line;
1461                                 $tr_history[] = false;
1462                                 $td_history[] = false;
1463                                 $last_tag_history[] = '';
1464                         } elseif ( $first_character === '|'
1465                                 || $first_character === '!'
1466                                 || $first_two === '|+'
1467                         ) {
1468                                 # This might be cell elements, td, th or captions
1469                                 if ( $first_two === '|+' ) {
1470                                         $first_character = '+';
1471                                         $line = substr( $line, 2 );
1472                                 } else {
1473                                         $line = substr( $line, 1 );
1474                                 }
1475
1476                                 // Implies both are valid for table headings.
1477                                 if ( $first_character === '!' ) {
1478                                         $line = StringUtils::replaceMarkup( '!!', '||', $line );
1479                                 }
1480
1481                                 # Split up multiple cells on the same line.
1482                                 # FIXME : This can result in improper nesting of tags processed
1483                                 # by earlier parser steps.
1484                                 $cells = explode( '||', $line );
1485
1486                                 $outLine = '';
1487
1488                                 # Loop through each table cell
1489                                 foreach ( $cells as $cell ) {
1490                                         $previous = '';
1491                                         if ( $first_character !== '+' ) {
1492                                                 $tr_after = array_pop( $tr_attributes );
1493                                                 if ( !array_pop( $tr_history ) ) {
1494                                                         $previous = "<tr{$tr_after}>\n";
1495                                                 }
1496                                                 $tr_history[] = true;
1497                                                 $tr_attributes[] = '';
1498                                                 array_pop( $has_opened_tr );
1499                                                 $has_opened_tr[] = true;
1500                                         }
1501
1502                                         $last_tag = array_pop( $last_tag_history );
1503
1504                                         if ( array_pop( $td_history ) ) {
1505                                                 $previous = "</{$last_tag}>\n{$previous}";
1506                                         }
1507
1508                                         if ( $first_character === '|' ) {
1509                                                 $last_tag = 'td';
1510                                         } elseif ( $first_character === '!' ) {
1511                                                 $last_tag = 'th';
1512                                         } elseif ( $first_character === '+' ) {
1513                                                 $last_tag = 'caption';
1514                                         } else {
1515                                                 $last_tag = '';
1516                                         }
1517
1518                                         $last_tag_history[] = $last_tag;
1519
1520                                         # A cell could contain both parameters and data
1521                                         $cell_data = explode( '|', $cell, 2 );
1522
1523                                         # T2553: Note that a '|' inside an invalid link should not
1524                                         # be mistaken as delimiting cell parameters
1525                                         # Bug T153140: Neither should language converter markup.
1526                                         if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1527                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1528                                         } elseif ( count( $cell_data ) == 1 ) {
1529                                                 // Whitespace in cells is trimmed
1530                                                 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1531                                         } else {
1532                                                 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1533                                                 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1534                                                 // Whitespace in cells is trimmed
1535                                                 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1536                                         }
1537
1538                                         $outLine .= $cell;
1539                                         $td_history[] = true;
1540                                 }
1541                         }
1542                         $out .= $outLine . "\n";
1543                 }
1544
1545                 # Closing open td, tr && table
1546                 while ( count( $td_history ) > 0 ) {
1547                         if ( array_pop( $td_history ) ) {
1548                                 $out .= "</td>\n";
1549                         }
1550                         if ( array_pop( $tr_history ) ) {
1551                                 $out .= "</tr>\n";
1552                         }
1553                         if ( !array_pop( $has_opened_tr ) ) {
1554                                 $out .= "<tr><td></td></tr>\n";
1555                         }
1556
1557                         $out .= "</table>\n";
1558                 }
1559
1560                 # Remove trailing line-ending (b/c)
1561                 if ( substr( $out, -1 ) === "\n" ) {
1562                         $out = substr( $out, 0, -1 );
1563                 }
1564
1565                 # special case: don't return empty table
1566                 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1567                         $out = '';
1568                 }
1569
1570                 return $out;
1571         }
1572
1573         /**
1574          * Helper function for parse() that transforms wiki markup into half-parsed
1575          * HTML. Only called for $mOutputType == self::OT_HTML.
1576          *
1577          * @internal
1578          *
1579          * @param string $text The text to parse
1580          * @param-taint $text escapes_html
1581          * @param bool $isMain Whether this is being called from the main parse() function
1582          * @param PPFrame|false $frame A pre-processor frame
1583          *
1584          * @return string
1585          */
1586         public function internalParse( $text, $isMain = true, $frame = false ) {
1587                 $origText = $text;
1588
1589                 # Hook to suspend the parser in this state
1590                 if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1591                         return $text;
1592                 }
1593
1594                 # if $frame is provided, then use $frame for replacing any variables
1595                 if ( $frame ) {
1596                         # use frame depth to infer how include/noinclude tags should be handled
1597                         # depth=0 means this is the top-level document; otherwise it's an included document
1598                         if ( !$frame->depth ) {
1599                                 $flag = 0;
1600                         } else {
1601                                 $flag = Preprocessor::DOM_FOR_INCLUSION;
1602                         }
1603                         $dom = $this->preprocessToDom( $text, $flag );
1604                         $text = $frame->expand( $dom );
1605                 } else {
1606                         # if $frame is not provided, then use old-style replaceVariables
1607                         $text = $this->replaceVariables( $text );
1608                 }
1609
1610                 $text = Sanitizer::internalRemoveHtmlTags(
1611                         $text,
1612                         // Callback from the Sanitizer for expanding items found in
1613                         // HTML attribute values, so they can be safely tested and escaped.
1614                         function ( &$text, $frame = false ) {
1615                                 $text = $this->replaceVariables( $text, $frame );
1616                                 $text = $this->mStripState->unstripBoth( $text );
1617                         },
1618                         false,
1619                         [],
1620                         []
1621                 );
1622                 $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1623
1624                 # Tables need to come after variable replacement for things to work
1625                 # properly; putting them before other transformations should keep
1626                 # exciting things like link expansions from showing up in surprising
1627                 # places.
1628                 $text = $this->handleTables( $text );
1629
1630                 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1631
1632                 $text = $this->handleDoubleUnderscore( $text );
1633
1634                 $text = $this->handleHeadings( $text );
1635                 $text = $this->handleInternalLinks( $text );
1636                 $text = $this->handleAllQuotes( $text );
1637                 $text = $this->handleExternalLinks( $text );
1638
1639                 # handleInternalLinks may sometimes leave behind
1640                 # absolute URLs, which have to be masked to hide them from handleExternalLinks
1641                 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1642
1643                 $text = $this->handleMagicLinks( $text );
1644                 $text = $this->finalizeHeadings( $text, $origText, $isMain );
1645
1646                 return $text;
1647         }
1648
1649         /**
1650          * Shorthand for getting a Language Converter for Target language
1651          *
1652          * @since public since 1.38
1653          * @return ILanguageConverter
1654          */
1655         public function getTargetLanguageConverter(): ILanguageConverter {
1656                 return $this->languageConverterFactory->getLanguageConverter(
1657                         $this->getTargetLanguage()
1658                 );
1659         }
1660
1661         /**
1662          * Shorthand for getting a Language Converter for Content language
1663          *
1664          * @return ILanguageConverter
1665          */
1666         private function getContentLanguageConverter(): ILanguageConverter {
1667                 return $this->languageConverterFactory->getLanguageConverter(
1668                         $this->getContentLanguage()
1669                 );
1670         }
1671
1672         /**
1673          * Get a HookContainer capable of returning metadata about hooks or running
1674          * extension hooks.
1675          *
1676          * @since 1.35
1677          * @return HookContainer
1678          */
1679         protected function getHookContainer() {
1680                 return $this->hookContainer;
1681         }
1682
1683         /**
1684          * Get a HookRunner for calling core hooks
1685          *
1686          * @internal This is for use by core only. Hook interfaces may be removed
1687          *   without notice.
1688          * @since 1.35
1689          * @return HookRunner
1690          */
1691         protected function getHookRunner() {
1692                 return $this->hookRunner;
1693         }
1694
1695         /**
1696          * Helper function for parse() that transforms half-parsed HTML into fully
1697          * parsed HTML.
1698          *
1699          * @param string $text
1700          * @param bool $isMain
1701          * @param bool $linestart
1702          * @return string
1703          */
1704         private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1705                 $text = $this->mStripState->unstripGeneral( $text );
1706
1707                 $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1708
1709                 $this->replaceLinkHoldersPrivate( $text );
1710
1711                 /**
1712                  * The input doesn't get language converted if
1713                  * a) It's disabled
1714                  * b) Content isn't converted
1715                  * c) It's a conversion table
1716                  * d) it is an interface message (which is in the user language)
1717                  */
1718                 $converter = null;
1719                 if ( !( $this->mOptions->getDisableContentConversion()
1720                         || isset( $this->mDoubleUnderscores['nocontentconvert'] )
1721                         || $this->mOptions->getInterfaceMessage() )
1722                 ) {
1723                         # The position of the convert() call should not be changed. it
1724                         # assumes that the links are all replaced and the only thing left
1725                         # is the <nowiki> mark.
1726                         $converter = $this->getTargetLanguageConverter();
1727                         $text = $converter->convert( $text );
1728                         // TOC will be converted below.
1729                 }
1730                 // Convert the TOC.   This is done *after* the main text
1731                 // so that all the editor-defined conversion rules (by convention
1732                 // defined at the start of the article) are applied to the TOC
1733                 self::localizeTOC(
1734                         $this->mOutput->getTOCData(),
1735                         $this->getTargetLanguage(),
1736                         $converter // null if conversion is to be suppressed.
1737                 );
1738                 if ( $converter ) {
1739                         $this->mOutput->setLanguage( new Bcp47CodeValue(
1740                                 LanguageCode::bcp47( $converter->getPreferredVariant() )
1741                         ) );
1742                 } else {
1743                         $this->mOutput->setLanguage( $this->getTargetLanguage() );
1744                 }
1745
1746                 $text = $this->mStripState->unstripNoWiki( $text );
1747
1748                 $text = $this->mStripState->unstripGeneral( $text );
1749
1750                 $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1751
1752                 if ( $isMain ) {
1753                         $this->hookRunner->onParserAfterTidy( $this, $text );
1754                 }
1755
1756                 return $text;
1757         }
1758
1759         /**
1760          * Replace special strings like "ISBN xxx" and "RFC xxx" with
1761          * magic external links.
1762          *
1763          * DML
1764          *
1765          * @param string $text
1766          *
1767          * @return string
1768          */
1769         private function handleMagicLinks( $text ) {
1770                 $prots = $this->urlUtils->validAbsoluteProtocols();
1771                 $urlChar = self::EXT_LINK_URL_CLASS;
1772                 $addr = self::EXT_LINK_ADDR;
1773                 $space = self::SPACE_NOT_NL; #  non-newline space
1774                 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1775                 $spaces = "$space++"; # possessive match of 1 or more spaces
1776                 $text = preg_replace_callback(
1777                         '!(?:                        # Start cases
1778                                 (<a[ \t\r\n>].*?</a>) |    # m[1]: Skip link text
1779                                 (<.*?>) |                  # m[2]: Skip stuff inside HTML elements' . "
1780                                 (\b                        # m[3]: Free external links
1781                                         (?i:$prots)
1782                                         ($addr$urlChar*)         # m[4]: Post-protocol path
1783                                 ) |
1784                                 \b(?:RFC|PMID) $spaces     # m[5]: RFC or PMID, capture number
1785                                         ([0-9]+)\b |
1786                                 \bISBN $spaces (           # m[6]: ISBN, capture number
1787                                         (?: 97[89] $spdash? )?   #  optional 13-digit ISBN prefix
1788                                         (?: [0-9]  $spdash? ){9} #  9 digits with opt. delimiters
1789                                         [0-9Xx]                  #  check digit
1790                                 )\b
1791                         )!xu",
1792                         [ $this, 'magicLinkCallback' ],
1793                         $text
1794                 );
1795                 return $text;
1796         }
1797
1798         /**
1799          * @param array $m
1800          * @return string HTML
1801          */
1802         private function magicLinkCallback( array $m ) {
1803                 if ( isset( $m[1] ) && $m[1] !== '' ) {
1804                         # Skip anchor
1805                         return $m[0];
1806                 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1807                         # Skip HTML element
1808                         return $m[0];
1809                 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1810                         # Free external link
1811                         return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1812                 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1813                         # RFC or PMID
1814                         if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1815                                 if ( !$this->mOptions->getMagicRFCLinks() ) {
1816                                         return $m[0];
1817                                 }
1818                                 $keyword = 'RFC';
1819                                 $urlmsg = 'rfcurl';
1820                                 $cssClass = 'mw-magiclink-rfc';
1821                                 $trackingCat = 'magiclink-tracking-rfc';
1822                                 $id = $m[5];
1823                         } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1824                                 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1825                                         return $m[0];
1826                                 }
1827                                 $keyword = 'PMID';
1828                                 $urlmsg = 'pubmedurl';
1829                                 $cssClass = 'mw-magiclink-pmid';
1830                                 $trackingCat = 'magiclink-tracking-pmid';
1831                                 $id = $m[5];
1832                         } else {
1833                                 // Should never happen
1834                                 throw new UnexpectedValueException( __METHOD__ . ': unrecognised match type "' .
1835                                         substr( $m[0], 0, 20 ) . '"' );
1836                         }
1837                         $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1838                         $this->addTrackingCategory( $trackingCat );
1839                         return $this->getLinkRenderer()->makeExternalLink(
1840                                 $url,
1841                                 "{$keyword} {$id}",
1842                                 $this->getTitle(),
1843                                 $cssClass,
1844                                 []
1845                         );
1846                 } elseif ( isset( $m[6] ) && $m[6] !== ''
1847                         && $this->mOptions->getMagicISBNLinks()
1848                 ) {
1849                         # ISBN
1850                         $isbn = $m[6];
1851                         $space = self::SPACE_NOT_NL; #  non-newline space
1852                         $isbn = preg_replace( "/$space/", ' ', $isbn );
1853                         $num = strtr( $isbn, [
1854                                 '-' => '',
1855                                 ' ' => '',
1856                                 'x' => 'X',
1857                         ] );
1858                         $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1859                         return $this->getLinkRenderer()->makeKnownLink(
1860                                 SpecialPage::getTitleFor( 'Booksources', $num ),
1861                                 "ISBN $isbn",
1862                                 [
1863                                         'class' => 'internal mw-magiclink-isbn',
1864                                         'title' => false // suppress title attribute
1865                                 ]
1866                         );
1867                 } else {
1868                         return $m[0];
1869                 }
1870         }
1871
1872         /**
1873          * Make a free external link, given a user-supplied URL
1874          *
1875          * @param string $url
1876          * @param int $numPostProto
1877          *   The number of characters after the protocol.
1878          * @return string HTML
1879          * @internal
1880          */
1881         private function makeFreeExternalLink( $url, $numPostProto ) {
1882                 $trail = '';
1883
1884                 # The characters '<' and '>' (which were escaped by
1885                 # internalRemoveHtmlTags()) should not be included in
1886                 # URLs, per RFC 2396.
1887                 # Make &nbsp; terminate a URL as well (bug T84937)
1888                 $m2 = [];
1889                 if ( preg_match(
1890                         '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1891                         $url,
1892                         $m2,
1893                         PREG_OFFSET_CAPTURE
1894                 ) ) {
1895                         $trail = substr( $url, $m2[0][1] ) . $trail;
1896                         $url = substr( $url, 0, $m2[0][1] );
1897                 }
1898
1899                 # Move trailing punctuation to $trail
1900                 $sep = ',;\.:!?';
1901                 # If there is no left bracket, then consider right brackets fair game too
1902                 if ( strpos( $url, '(' ) === false ) {
1903                         $sep .= ')';
1904                 }
1905
1906                 $urlRev = strrev( $url );
1907                 $numSepChars = strspn( $urlRev, $sep );
1908                 # Don't break a trailing HTML entity by moving the ; into $trail
1909                 # This is in hot code, so use substr_compare to avoid having to
1910                 # create a new string object for the comparison
1911                 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1912                         # more optimization: instead of running preg_match with a $
1913                         # anchor, which can be slow, do the match on the reversed
1914                         # string starting at the desired offset.
1915                         # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1916                         if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1917                                 $numSepChars--;
1918                         }
1919                 }
1920                 if ( $numSepChars ) {
1921                         $trail = substr( $url, -$numSepChars ) . $trail;
1922                         $url = substr( $url, 0, -$numSepChars );
1923                 }
1924
1925                 # Verify that we still have a real URL after trail removal, and
1926                 # not just lone protocol
1927                 if ( strlen( $trail ) >= $numPostProto ) {
1928                         return $url . $trail;
1929                 }
1930
1931                 $url = Sanitizer::cleanUrl( $url );
1932
1933                 # Is this an external image?
1934                 $text = $this->maybeMakeExternalImage( $url );
1935                 if ( $text === false ) {
1936                         # Not an image, make a link
1937                         $text = $this->getLinkRenderer()->makeExternalLink(
1938                                 $url,
1939                                 $this->getTargetLanguageConverter()->markNoConversion( $url ),
1940                                 $this->getTitle(),
1941                                 'free',
1942                                 $this->getExternalLinkAttribs( $url )
1943                         );
1944                         # Register it in the output object...
1945                         $this->mOutput->addExternalLink( $url );
1946                 }
1947                 return $text . $trail;
1948         }
1949
1950         /**
1951          * Parse headers and return html
1952          *
1953          * @param string $text
1954          * @return string
1955          */
1956         private function handleHeadings( $text ) {
1957                 for ( $i = 6; $i >= 1; --$i ) {
1958                         $h = str_repeat( '=', $i );
1959                         // Trim non-newline whitespace from headings
1960                         // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1961                         $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1962                 }
1963                 return $text;
1964         }
1965
1966         /**
1967          * Replace single quotes with HTML markup
1968          *
1969          * @param string $text
1970          *
1971          * @return string The altered text
1972          */
1973         private function handleAllQuotes( $text ) {
1974                 $outtext = '';
1975                 $lines = StringUtils::explode( "\n", $text );
1976                 foreach ( $lines as $line ) {
1977                         $outtext .= $this->doQuotes( $line ) . "\n";
1978                 }
1979                 $outtext = substr( $outtext, 0, -1 );
1980                 return $outtext;
1981         }
1982
1983         /**
1984          * Helper function for handleAllQuotes()
1985          *
1986          * @param string $text
1987          *
1988          * @return string
1989          * @internal
1990          */
1991         public function doQuotes( $text ) {
1992                 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1993                 $countarr = count( $arr );
1994                 if ( $countarr == 1 ) {
1995                         return $text;
1996                 }
1997
1998                 // First, do some preliminary work. This may shift some apostrophes from
1999                 // being mark-up to being text. It also counts the number of occurrences
2000                 // of bold and italics mark-ups.
2001                 $numbold = 0;
2002                 $numitalics = 0;
2003                 for ( $i = 1; $i < $countarr; $i += 2 ) {
2004                         $thislen = strlen( $arr[$i] );
2005                         // If there are ever four apostrophes, assume the first is supposed to
2006                         // be text, and the remaining three constitute mark-up for bold text.
2007                         // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
2008                         if ( $thislen == 4 ) {
2009                                 $arr[$i - 1] .= "'";
2010                                 $arr[$i] = "'''";
2011                                 $thislen = 3;
2012                         } elseif ( $thislen > 5 ) {
2013                                 // If there are more than 5 apostrophes in a row, assume they're all
2014                                 // text except for the last 5.
2015                                 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
2016                                 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
2017                                 $arr[$i] = "'''''";
2018                                 $thislen = 5;
2019                         }
2020                         // Count the number of occurrences of bold and italics mark-ups.
2021                         if ( $thislen == 2 ) {
2022                                 $numitalics++;
2023                         } elseif ( $thislen == 3 ) {
2024                                 $numbold++;
2025                         } elseif ( $thislen == 5 ) {
2026                                 $numitalics++;
2027                                 $numbold++;
2028                         }
2029                 }
2030
2031                 // If there is an odd number of both bold and italics, it is likely
2032                 // that one of the bold ones was meant to be an apostrophe followed
2033                 // by italics. Which one we cannot know for certain, but it is more
2034                 // likely to be one that has a single-letter word before it.
2035                 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
2036                         $firstsingleletterword = -1;
2037                         $firstmultiletterword = -1;
2038                         $firstspace = -1;
2039                         for ( $i = 1; $i < $countarr; $i += 2 ) {
2040                                 if ( strlen( $arr[$i] ) == 3 ) {
2041                                         $x1 = substr( $arr[$i - 1], -1 );
2042                                         $x2 = substr( $arr[$i - 1], -2, 1 );
2043                                         if ( $x1 === ' ' ) {
2044                                                 if ( $firstspace == -1 ) {
2045                                                         $firstspace = $i;
2046                                                 }
2047                                         } elseif ( $x2 === ' ' ) {
2048                                                 $firstsingleletterword = $i;
2049                                                 // if $firstsingleletterword is set, we don't
2050                                                 // look at the other options, so we can bail early.
2051                                                 break;
2052                                         } elseif ( $firstmultiletterword == -1 ) {
2053                                                 $firstmultiletterword = $i;
2054                                         }
2055                                 }
2056                         }
2057
2058                         // If there is a single-letter word, use it!
2059                         if ( $firstsingleletterword > -1 ) {
2060                                 $arr[$firstsingleletterword] = "''";
2061                                 $arr[$firstsingleletterword - 1] .= "'";
2062                         } elseif ( $firstmultiletterword > -1 ) {
2063                                 // If not, but there's a multi-letter word, use that one.
2064                                 $arr[$firstmultiletterword] = "''";
2065                                 $arr[$firstmultiletterword - 1] .= "'";
2066                         } elseif ( $firstspace > -1 ) {
2067                                 // ... otherwise use the first one that has neither.
2068                                 // (notice that it is possible for all three to be -1 if, for example,
2069                                 // there is only one pentuple-apostrophe in the line)
2070                                 $arr[$firstspace] = "''";
2071                                 $arr[$firstspace - 1] .= "'";
2072                         }
2073                 }
2074
2075                 // Now let's actually convert our apostrophic mush to HTML!
2076                 $output = '';
2077                 $buffer = '';
2078                 $state = '';
2079                 $i = 0;
2080                 foreach ( $arr as $r ) {
2081                         if ( ( $i % 2 ) == 0 ) {
2082                                 if ( $state === 'both' ) {
2083                                         $buffer .= $r;
2084                                 } else {
2085                                         $output .= $r;
2086                                 }
2087                         } else {
2088                                 $thislen = strlen( $r );
2089                                 if ( $thislen == 2 ) {
2090                                         // two quotes - open or close italics
2091                                         if ( $state === 'i' ) {
2092                                                 $output .= '</i>';
2093                                                 $state = '';
2094                                         } elseif ( $state === 'bi' ) {
2095                                                 $output .= '</i>';
2096                                                 $state = 'b';
2097                                         } elseif ( $state === 'ib' ) {
2098                                                 $output .= '</b></i><b>';
2099                                                 $state = 'b';
2100                                         } elseif ( $state === 'both' ) {
2101                                                 $output .= '<b><i>' . $buffer . '</i>';
2102                                                 $state = 'b';
2103                                         } else { // $state can be 'b' or ''
2104                                                 $output .= '<i>';
2105                                                 $state .= 'i';
2106                                         }
2107                                 } elseif ( $thislen == 3 ) {
2108                                         // three quotes - open or close bold
2109                                         if ( $state === 'b' ) {
2110                                                 $output .= '</b>';
2111                                                 $state = '';
2112                                         } elseif ( $state === 'bi' ) {
2113                                                 $output .= '</i></b><i>';
2114                                                 $state = 'i';
2115                                         } elseif ( $state === 'ib' ) {
2116                                                 $output .= '</b>';
2117                                                 $state = 'i';
2118                                         } elseif ( $state === 'both' ) {
2119                                                 $output .= '<i><b>' . $buffer . '</b>';
2120                                                 $state = 'i';
2121                                         } else { // $state can be 'i' or ''
2122                                                 $output .= '<b>';
2123                                                 $state .= 'b';
2124                                         }
2125                                 } elseif ( $thislen == 5 ) {
2126                                         // five quotes - open or close both separately
2127                                         if ( $state === 'b' ) {
2128                                                 $output .= '</b><i>';
2129                                                 $state = 'i';
2130                                         } elseif ( $state === 'i' ) {
2131                                                 $output .= '</i><b>';
2132                                                 $state = 'b';
2133                                         } elseif ( $state === 'bi' ) {
2134                                                 $output .= '</i></b>';
2135                                                 $state = '';
2136                                         } elseif ( $state === 'ib' ) {
2137                                                 $output .= '</b></i>';
2138                                                 $state = '';
2139                                         } elseif ( $state === 'both' ) {
2140                                                 $output .= '<i><b>' . $buffer . '</b></i>';
2141                                                 $state = '';
2142                                         } else { // ($state == '')
2143                                                 $buffer = '';
2144                                                 $state = 'both';
2145                                         }
2146                                 }
2147                         }
2148                         $i++;
2149                 }
2150                 // Now close all remaining tags.  Notice that the order is important.
2151                 if ( $state === 'b' || $state === 'ib' ) {
2152                         $output .= '</b>';
2153                 }
2154                 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2155                         $output .= '</i>';
2156                 }
2157                 if ( $state === 'bi' ) {
2158                         $output .= '</b>';
2159                 }
2160                 // There might be lonely ''''', so make sure we have a buffer
2161                 if ( $state === 'both' && $buffer ) {
2162                         $output .= '<b><i>' . $buffer . '</i></b>';
2163                 }
2164                 return $output;
2165         }
2166
2167         /**
2168          * Replace external links (REL)
2169          *
2170          * Note: this is all very hackish and the order of execution matters a lot.
2171          * Make sure to run tests/parser/parserTests.php if you change this code.
2172          *
2173          * @param string $text
2174          * @return string
2175          */
2176         private function handleExternalLinks( $text ) {
2177                 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2178                 // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2179                 if ( $bits === false ) {
2180                         throw new RuntimeException( "PCRE failure" );
2181                 }
2182                 $s = array_shift( $bits );
2183
2184                 $i = 0;
2185                 while ( $i < count( $bits ) ) {
2186                         $url = $bits[$i++];
2187                         $i++; // protocol
2188                         $text = $bits[$i++];
2189                         $trail = $bits[$i++];
2190
2191                         # The characters '<' and '>' (which were escaped by
2192                         # internalRemoveHtmlTags()) should not be included in
2193                         # URLs, per RFC 2396.
2194                         $m2 = [];
2195                         if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2196                                 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2197                                 $url = substr( $url, 0, $m2[0][1] );
2198                         }
2199
2200                         # If the link text is an image URL, replace it with an <img> tag
2201                         # This happened by accident in the original parser, but some people used it extensively
2202                         $img = $this->maybeMakeExternalImage( $text );
2203                         if ( $img !== false ) {
2204                                 $text = $img;
2205                         }
2206
2207                         $dtrail = '';
2208
2209                         # Set linktype for CSS
2210                         $linktype = 'text';
2211
2212                         # No link text, e.g. [http://domain.tld/some.link]
2213                         if ( $text == '' ) {
2214                                 # Autonumber
2215                                 $langObj = $this->getTargetLanguage();
2216                                 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2217                                 $linktype = 'autonumber';
2218                         } else {
2219                                 # Have link text, e.g. [http://domain.tld/some.link text]s
2220                                 # Check for trail
2221                                 [ $dtrail, $trail ] = Linker::splitTrail( $trail );
2222                         }
2223
2224                         // Excluding protocol-relative URLs may avoid many false positives.
2225                         if ( preg_match( '/^(?:' . $this->urlUtils->validAbsoluteProtocols() . ')/', $text ) ) {
2226                                 $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2227                         }
2228
2229                         $url = Sanitizer::cleanUrl( $url );
2230
2231                         # Use the encoded URL
2232                         # This means that users can paste URLs directly into the text
2233                         # Funny characters like ö aren't valid in URLs anyway
2234                         # This was changed in August 2004
2235                         $s .= $this->getLinkRenderer()->makeExternalLink(
2236                                 $url,
2237                                 // @phan-suppress-next-line SecurityCheck-XSS
2238                                 new HtmlArmor( $text ),
2239                                 $this->getTitle(),
2240                                 $linktype,
2241                                 $this->getExternalLinkAttribs( $url )
2242                         ) . $dtrail . $trail;
2243
2244                         # Register link in the output object.
2245                         $this->mOutput->addExternalLink( $url );
2246                 }
2247
2248                 // @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive from array_shift
2249                 return $s;
2250         }
2251
2252         /**
2253          * Get the rel attribute for a particular external link.
2254          *
2255          * @since 1.21
2256          * @internal
2257          * @param string|false $url Optional URL, to extract the domain from for rel =>
2258          *   nofollow if appropriate
2259          * @param LinkTarget|PageReference|null $title Optional page, for wgNoFollowNsExceptions lookups
2260          * @return string|null Rel attribute for $url
2261          */
2262         public static function getExternalLinkRel( $url = false, $title = null ) {
2263                 $mainConfig = MediaWikiServices::getInstance()->getMainConfig();
2264                 $noFollowLinks = $mainConfig->get( MainConfigNames::NoFollowLinks );
2265                 $noFollowNsExceptions = $mainConfig->get( MainConfigNames::NoFollowNsExceptions );
2266                 $noFollowDomainExceptions = $mainConfig->get( MainConfigNames::NoFollowDomainExceptions );
2267                 $ns = $title ? $title->getNamespace() : false;
2268                 if (
2269                         $noFollowLinks && !in_array( $ns, $noFollowNsExceptions )
2270                         && !wfGetUrlUtils()->matchesDomainList( (string)$url, $noFollowDomainExceptions )
2271                 ) {
2272                         return 'nofollow';
2273                 }
2274                 return null;
2275         }
2276
2277         /**
2278          * Get an associative array of additional HTML attributes appropriate for a
2279          * particular external link.  This currently may include rel => nofollow
2280          * (depending on configuration, namespace, and the URL's domain) and/or a
2281          * target attribute (depending on configuration).
2282          *
2283          * @internal
2284          * @param string $url URL to extract the domain from for rel =>
2285          *   nofollow if appropriate
2286          * @return array Associative array of HTML attributes
2287          */
2288         public function getExternalLinkAttribs( $url ) {
2289                 $attribs = [];
2290                 $rel = self::getExternalLinkRel( $url, $this->getTitle() ) ?? '';
2291
2292                 $target = $this->mOptions->getExternalLinkTarget();
2293                 if ( $target ) {
2294                         $attribs['target'] = $target;
2295                         if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2296                                 // T133507. New windows can navigate parent cross-origin.
2297                                 // Including noreferrer due to lacking browser
2298                                 // support of noopener. Eventually noreferrer should be removed.
2299                                 if ( $rel !== '' ) {
2300                                         $rel .= ' ';
2301                                 }
2302                                 $rel .= 'noreferrer noopener';
2303                         }
2304                 }
2305                 if ( $rel !== '' ) {
2306                         $attribs['rel'] = $rel;
2307                 }
2308                 return $attribs;
2309         }
2310
2311         /**
2312          * Replace unusual escape codes in a URL with their equivalent characters
2313          *
2314          * This generally follows the syntax defined in RFC 3986, with special
2315          * consideration for HTTP query strings.
2316          *
2317          * @internal
2318          * @param string $url
2319          * @return string
2320          */
2321         public static function normalizeLinkUrl( $url ) {
2322                 # Test for RFC 3986 IPv6 syntax
2323                 $scheme = '[a-z][a-z0-9+.-]*:';
2324                 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2325                 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2326                 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2327                         IPUtils::isValid( rawurldecode( $m[1] ) )
2328                 ) {
2329                         $isIPv6 = rawurldecode( $m[1] );
2330                 } else {
2331                         $isIPv6 = false;
2332                 }
2333
2334                 # Make sure unsafe characters are encoded
2335                 $url = preg_replace_callback(
2336                         '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]+/',
2337                         static fn ( $m ) => rawurlencode( $m[0] ),
2338                         $url
2339                 );
2340
2341                 $ret = '';
2342                 $end = strlen( $url );
2343
2344                 # Fragment part - 'fragment'
2345                 $start = strpos( $url, '#' );
2346                 if ( $start !== false && $start < $end ) {
2347                         $ret = self::normalizeUrlComponent(
2348                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2349                         $end = $start;
2350                 }
2351
2352                 # Query part - 'query' minus &=+;
2353                 $start = strpos( $url, '?' );
2354                 if ( $start !== false && $start < $end ) {
2355                         $ret = self::normalizeUrlComponent(
2356                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2357                         $end = $start;
2358                 }
2359
2360                 # Path part - 'pchar', remove dot segments
2361                 # (find first '/' after the optional '//' after the scheme)
2362                 $start = strpos( $url, '//' );
2363                 $start = strpos( $url, '/', $start === false ? 0 : $start + 2 );
2364                 if ( $start !== false && $start < $end ) {
2365                         $ret = UrlUtils::removeDotSegments( self::normalizeUrlComponent(
2366                                 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}/?' ) ) . $ret;
2367                         $end = $start;
2368                 }
2369
2370                 # Scheme and host part - 'pchar'
2371                 # (we assume no userinfo or encoded colons in the host)
2372                 $ret = self::normalizeUrlComponent(
2373                         substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2374
2375                 # Fix IPv6 syntax
2376                 if ( $isIPv6 !== false ) {
2377                         $ipv6Host = "%5B({$isIPv6})%5D";
2378                         $ret = preg_replace(
2379                                 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2380                                 "$1[$2]",
2381                                 $ret
2382                         );
2383                 }
2384
2385                 return $ret;
2386         }
2387
2388         private static function normalizeUrlComponent( $component, $unsafe ) {
2389                 $callback = static function ( $matches ) use ( $unsafe ) {
2390                         $char = urldecode( $matches[0] );
2391                         $ord = ord( $char );
2392                         if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2393                                 # Unescape it
2394                                 return $char;
2395                         } else {
2396                                 # Leave it escaped, but use uppercase for a-f
2397                                 return strtoupper( $matches[0] );
2398                         }
2399                 };
2400                 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2401         }
2402
2403         /**
2404          * make an image if it's allowed, either through the global
2405          * option, through the exception, or through the on-wiki whitelist
2406          *
2407          * @param string $url
2408          *
2409          * @return string
2410          */
2411         private function maybeMakeExternalImage( $url ) {
2412                 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2413                 $imagesexception = (bool)$imagesfrom;
2414                 $text = false;
2415                 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2416                 if ( $imagesexception && is_array( $imagesfrom ) ) {
2417                         $imagematch = false;
2418                         foreach ( $imagesfrom as $match ) {
2419                                 if ( strpos( $url, $match ) === 0 ) {
2420                                         $imagematch = true;
2421                                         break;
2422                                 }
2423                         }
2424                 } elseif ( $imagesexception ) {
2425                         $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2426                 } else {
2427                         $imagematch = false;
2428                 }
2429
2430                 if ( $this->mOptions->getAllowExternalImages()
2431                         || ( $imagesexception && $imagematch )
2432                 ) {
2433                         if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2434                                 # Image found
2435                                 $text = Linker::makeExternalImage( $url );
2436                         }
2437                 }
2438                 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2439                         && preg_match( self::EXT_IMAGE_REGEX, $url )
2440                 ) {
2441                         $whitelist = explode(
2442                                 "\n",
2443                                 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2444                         );
2445
2446                         foreach ( $whitelist as $entry ) {
2447                                 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2448                                 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2449                                         continue;
2450                                 }
2451                                 // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2452                                 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2453                                         # Image matches a whitelist entry
2454                                         $text = Linker::makeExternalImage( $url );
2455                                         break;
2456                                 }
2457                         }
2458                 }
2459                 return $text;
2460         }
2461
2462         /**
2463          * Process [[ ]] wikilinks
2464          *
2465          * @param string $text
2466          *
2467          * @return string Processed text
2468          */
2469         private function handleInternalLinks( $text ) {
2470                 $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2471                 return $text;
2472         }
2473
2474         /**
2475          * Process [[ ]] wikilinks (RIL)
2476          * @param string &$s
2477          * @return LinkHolderArray
2478          */
2479         private function handleInternalLinks2( &$s ) {
2480                 static $tc = false, $e1, $e1_img;
2481                 # the % is needed to support urlencoded titles as well
2482                 if ( !$tc ) {
2483                         $tc = Title::legalChars() . '#%';
2484                         # Match a link having the form [[namespace:link|alternate]]trail
2485                         $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2486                         # Match cases where there is no "]]", which might still be images
2487                         $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2488                 }
2489
2490                 $holders = new LinkHolderArray(
2491                         $this,
2492                         $this->getContentLanguageConverter(),
2493                         $this->getHookContainer() );
2494
2495                 # split the entire text string on occurrences of [[
2496                 $a = StringUtils::explode( '[[', ' ' . $s );
2497                 # get the first element (all text up to first [[), and remove the space we added
2498                 $s = $a->current();
2499                 $a->next();
2500                 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2501                 $s = substr( $s, 1 );
2502
2503                 $nottalk = !$this->getTitle()->isTalkPage();
2504
2505                 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2506                 $e2 = null;
2507                 if ( $useLinkPrefixExtension ) {
2508                         # Match the end of a line for a word that's not followed by whitespace,
2509                         # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2510                         $charset = $this->contLang->linkPrefixCharset();
2511                         $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2512                         $m = [];
2513                         if ( preg_match( $e2, $s, $m ) ) {
2514                                 $first_prefix = $m[2];
2515                         } else {
2516                                 $first_prefix = false;
2517                         }
2518                         $prefix = false;
2519                 } else {
2520                         $first_prefix = false;
2521                         $prefix = '';
2522                 }
2523
2524                 # Some namespaces don't allow subpages
2525                 $useSubpages = $this->nsInfo->hasSubpages(
2526                         $this->getTitle()->getNamespace()
2527                 );
2528
2529                 # Loop for each link
2530                 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2531                         # Check for excessive memory usage
2532                         if ( $holders->isBig() ) {
2533                                 # Too big
2534                                 # Do the existence check, replace the link holders and clear the array
2535                                 $holders->replace( $s );
2536                                 $holders->clear();
2537                         }
2538
2539                         if ( $useLinkPrefixExtension ) {
2540                                 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal $e2 is set under this condition
2541                                 if ( preg_match( $e2, $s, $m ) ) {
2542                                         [ , $s, $prefix ] = $m;
2543                                 } else {
2544                                         $prefix = '';
2545                                 }
2546                                 # first link
2547                                 if ( $first_prefix ) {
2548                                         $prefix = $first_prefix;
2549                                         $first_prefix = false;
2550                                 }
2551                         }
2552
2553                         $might_be_img = false;
2554
2555                         if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2556                                 $text = $m[2];
2557                                 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2558                                 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2559                                 # the real problem is with the $e1 regex
2560                                 # See T1500.
2561                                 # Still some problems for cases where the ] is meant to be outside punctuation,
2562                                 # and no image is in sight. See T4095.
2563                                 if ( $text !== ''
2564                                         && substr( $m[3], 0, 1 ) === ']'
2565                                         && strpos( $text, '[' ) !== false
2566                                 ) {
2567                                         $text .= ']'; # so that handleExternalLinks($text) works later
2568                                         $m[3] = substr( $m[3], 1 );
2569                                 }
2570                                 # fix up urlencoded title texts
2571                                 if ( strpos( $m[1], '%' ) !== false ) {
2572                                         # Should anchors '#' also be rejected?
2573                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2574                                 }
2575                                 $trail = $m[3];
2576                         } elseif ( preg_match( $e1_img, $line, $m ) ) {
2577                                 # Invalid, but might be an image with a link in its caption
2578                                 $might_be_img = true;
2579                                 $text = $m[2];
2580                                 if ( strpos( $m[1], '%' ) !== false ) {
2581                                         $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2582                                 }
2583                                 $trail = "";
2584                         } else { # Invalid form; output directly
2585                                 $s .= $prefix . '[[' . $line;
2586                                 continue;
2587                         }
2588
2589                         // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset preg_match success when reached here
2590                         $origLink = ltrim( $m[1], ' ' );
2591
2592                         # Don't allow internal links to pages containing
2593                         # PROTO: where PROTO is a valid URL protocol; these
2594                         # should be external links.
2595                         if ( preg_match( '/^(?i:' . $this->urlUtils->validProtocols() . ')/', $origLink ) ) {
2596                                 $s .= $prefix . '[[' . $line;
2597                                 continue;
2598                         }
2599
2600                         # Make subpage if necessary
2601                         if ( $useSubpages ) {
2602                                 $link = Linker::normalizeSubpageLink(
2603                                         $this->getTitle(), $origLink, $text
2604                                 );
2605                         } else {
2606                                 $link = $origLink;
2607                         }
2608
2609                         // \x7f isn't a default legal title char, so most likely strip
2610                         // markers will force us into the "invalid form" path above.  But,
2611                         // just in case, let's assert that xmlish tags aren't valid in
2612                         // the title position.
2613                         $unstrip = $this->mStripState->killMarkers( $link );
2614                         $noMarkers = ( $unstrip === $link );
2615
2616                         $nt = $noMarkers ? Title::newFromText( $link ) : null;
2617                         if ( $nt === null ) {
2618                                 $s .= $prefix . '[[' . $line;
2619                                 continue;
2620                         }
2621
2622                         $ns = $nt->getNamespace();
2623                         $iw = $nt->getInterwiki();
2624
2625                         $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2626
2627                         if ( $might_be_img ) { # if this is actually an invalid link
2628                                 if ( $ns === NS_FILE && $noforce ) { # but might be an image
2629                                         $found = false;
2630                                         while ( true ) {
2631                                                 # look at the next 'line' to see if we can close it there
2632                                                 $a->next();
2633                                                 $next_line = $a->current();
2634                                                 if ( $next_line === false || $next_line === null ) {
2635                                                         break;
2636                                                 }
2637                                                 $m = explode( ']]', $next_line, 3 );
2638                                                 if ( count( $m ) == 3 ) {
2639                                                         # the first ]] closes the inner link, the second the image
2640                                                         $found = true;
2641                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2642                                                         $trail = $m[2];
2643                                                         break;
2644                                                 } elseif ( count( $m ) == 2 ) {
2645                                                         # if there's exactly one ]] that's fine, we'll keep looking
2646                                                         $text .= "[[{$m[0]}]]{$m[1]}";
2647                                                 } else {
2648                                                         # if $next_line is invalid too, we need look no further
2649                                                         $text .= '[[' . $next_line;
2650                                                         break;
2651                                                 }
2652                                         }
2653                                         if ( !$found ) {
2654                                                 # we couldn't find the end of this imageLink, so output it raw
2655                                                 # but don't ignore what might be perfectly normal links in the text we've examined
2656                                                 $holders->merge( $this->handleInternalLinks2( $text ) );
2657                                                 $s .= "{$prefix}[[$link|$text";
2658                                                 # note: no $trail, because without an end, there *is* no trail
2659                                                 continue;
2660                                         }
2661                                 } else { # it's not an image, so output it raw
2662                                         $s .= "{$prefix}[[$link|$text";
2663                                         # note: no $trail, because without an end, there *is* no trail
2664                                         continue;
2665                                 }
2666                         }
2667
2668                         $wasblank = ( $text == '' );
2669                         if ( $wasblank ) {
2670                                 $text = $link;
2671                                 if ( !$noforce ) {
2672                                         # Strip off leading ':'
2673                                         $text = substr( $text, 1 );
2674                                 }
2675                         } else {
2676                                 # T6598 madness. Handle the quotes only if they come from the alternate part
2677                                 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2678                                 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2679                                 #    -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2680                                 $text = $this->doQuotes( $text );
2681                         }
2682
2683                         # Link not escaped by : , create the various objects
2684                         if ( $noforce && !$nt->wasLocalInterwiki() ) {
2685                                 # Interwikis
2686                                 if (
2687                                         $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2688                                                 $this->languageNameUtils->getLanguageName(
2689                                                         $iw,
2690                                                         LanguageNameUtils::AUTONYMS,
2691                                                         LanguageNameUtils::DEFINED
2692                                                 )
2693                                                 || in_array( $iw, $this->svcOptions->get( MainConfigNames::ExtraInterlanguageLinkPrefixes ) )
2694                                         )
2695                                 ) {
2696                                         # T26502: duplicates are resolved in ParserOutput
2697                                         $this->mOutput->addLanguageLink( $nt );
2698
2699                                         /**
2700                                          * Strip the whitespace interlanguage links produce, see
2701                                          * T10897, T175416, and T359886.
2702                                          */
2703                                         $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2704                                         continue;
2705                                 }
2706
2707                                 if ( $ns === NS_FILE ) {
2708                                         if ( $wasblank ) {
2709                                                 # if no parameters were passed, $text
2710                                                 # becomes something like "File:Foo.png",
2711                                                 # which we don't want to pass on to the
2712                                                 # image generator
2713                                                 $text = '';
2714                                         } else {
2715                                                 # recursively parse links inside the image caption
2716                                                 # actually, this will parse them in any other parameters, too,
2717                                                 # but it might be hard to fix that, and it doesn't matter ATM
2718                                                 $text = $this->handleExternalLinks( $text );
2719                                                 $holders->merge( $this->handleInternalLinks2( $text ) );
2720                                         }
2721                                         # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2722                                         $s .= $prefix . $this->armorLinks(
2723                                                 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2724                                         continue;
2725                                 } elseif ( $ns === NS_CATEGORY ) {
2726                                         # Strip newlines from the left hand context of Category
2727                                         # links.
2728                                         # See T2087, T87753, T174639, T359886
2729                                         $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2730
2731                                         $sortkey = ''; // filled in by CategoryLinksTable
2732                                         if ( !$wasblank ) {
2733                                                 $sortkey = $text;
2734                                         }
2735                                         $this->mOutput->addCategory( $nt, $sortkey );
2736
2737                                         continue;
2738                                 }
2739                         }
2740
2741                         # Self-link checking. For some languages, variants of the title are checked in
2742                         # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2743                         # for linking to a different variant.
2744                         if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) ) {
2745                                 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail, '',
2746                                         Sanitizer::escapeIdForLink( $nt->getFragment() ) );
2747                                 continue;
2748                         }
2749
2750                         # NS_MEDIA is a pseudo-namespace for linking directly to a file
2751                         # @todo FIXME: Should do batch file existence checks, see comment below
2752                         if ( $ns === NS_MEDIA ) {
2753                                 # Give extensions a chance to select the file revision for us
2754                                 $options = [];
2755                                 $descQuery = false;
2756                                 $this->hookRunner->onBeforeParserFetchFileAndTitle(
2757                                         // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
2758                                         $this, $nt, $options, $descQuery
2759                                 );
2760                                 # Fetch and register the file (file title may be different via hooks)
2761                                 [ $file, $nt ] = $this->fetchFileAndTitle( $nt, $options );
2762                                 # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2763                                 $s .= $prefix . $this->armorLinks(
2764                                         Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2765                                 continue;
2766                         }
2767
2768                         # Some titles, such as valid special pages or files in foreign repos, should
2769                         # be shown as bluelinks even though they're not included in the page table
2770                         # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2771                         # batch file existence checks for NS_FILE and NS_MEDIA
2772                         if ( $iw == '' && $nt->isAlwaysKnown() ) {
2773                                 $this->mOutput->addLink( $nt );
2774                                 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2775                         } else {
2776                                 # Links will be added to the output link list after checking
2777                                 $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2778                         }
2779                 }
2780                 return $holders;
2781         }
2782
2783         /**
2784          * Render a forced-blue link inline; protect against double expansion of
2785          * URLs if we're in a mode that prepends full URL prefixes to internal links.
2786          * Since this little disaster has to split off the trail text to avoid
2787          * breaking URLs in the following text without breaking trails on the
2788          * wiki links, it's been made into a horrible function.
2789          *
2790          * @param LinkTarget $nt
2791          * @param string $text
2792          * @param string $trail
2793          * @param string $prefix
2794          * @return string HTML-wikitext mix oh yuck
2795          */
2796         private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2797                 [ $inside, $trail ] = Linker::splitTrail( $trail );
2798
2799                 if ( $text == '' ) {
2800                         $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2801                 }
2802
2803                 $link = $this->getLinkRenderer()->makeKnownLink(
2804                         $nt, new HtmlArmor( "$prefix$text$inside" )
2805                 );
2806
2807                 return $this->armorLinks( $link ) . $trail;
2808         }
2809
2810         /**
2811          * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2812          * going to go through further parsing steps before inline URL expansion.
2813          *
2814          * Not needed quite as much as it used to be since free links are a bit
2815          * more sensible these days. But bracketed links are still an issue.
2816          *
2817          * @param string $text More-or-less HTML
2818          * @return string Less-or-more HTML with NOPARSE bits
2819          */
2820         private function armorLinks( $text ) {
2821                 return preg_replace( '/\b((?i)' . $this->urlUtils->validProtocols() . ')/',
2822                         self::MARKER_PREFIX . "NOPARSE$1", $text );
2823         }
2824
2825         /**
2826          * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2827          *
2828          * @param string $text
2829          * @param bool $linestart Whether or not this is at the start of a line.
2830          * @internal
2831          * @return string The lists rendered as HTML
2832          * @deprecated since 1.35, will not be supported in future parsers
2833          */
2834         public function doBlockLevels( $text, $linestart ) {
2835                 wfDeprecated( __METHOD__, '1.35' );
2836                 return BlockLevelPass::doBlockLevels( $text, $linestart );
2837         }
2838
2839         /**
2840          * Return value of a magic variable (like PAGENAME)
2841          *
2842          * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2843          * @param PPFrame|false $frame
2844          *
2845          * @return string
2846          */
2847         private function expandMagicVariable( $index, $frame = false ) {
2848                 /**
2849                  * Some of these require message or data lookups and can be
2850                  * expensive to check many times.
2851                  */
2852                 if ( isset( $this->mVarCache[$index] ) ) {
2853                         return $this->mVarCache[$index];
2854                 }
2855
2856                 $ts = new MWTimestamp( $this->mOptions->getTimestamp() /* TS_MW */ );
2857                 if ( $this->hookContainer->isRegistered( 'ParserGetVariableValueTs' ) ) {
2858                         $s = $ts->getTimestamp( TS_UNIX );
2859                         $this->hookRunner->onParserGetVariableValueTs( $this, $s );
2860                         $ts = new MWTimestamp( $s );
2861                 }
2862
2863                 $value = CoreMagicVariables::expand(
2864                         $this, $index, $ts, $this->svcOptions, $this->logger
2865                 );
2866
2867                 if ( $value === null ) {
2868                         // Not a defined core magic word
2869                         // Don't give this hook unrestricted access to mVarCache
2870                         $fakeCache = [];
2871                         $this->hookRunner->onParserGetVariableValueSwitch(
2872                                 // @phan-suppress-next-line PhanTypeMismatchArgument $value is passed as null but returned as string
2873                                 $this, $fakeCache, $index, $value, $frame
2874                         );
2875                         // Cache the value returned by the hook by falling through here.
2876                         // Assert the the hook returned a non-null value for this MV
2877                         '@phan-var string $value';
2878                 }
2879
2880                 $this->mVarCache[$index] = $value;
2881
2882                 return $value;
2883         }
2884
2885         /**
2886          * Initialize the magic variables (like CURRENTMONTHNAME) and
2887          * substitution modifiers.
2888          */
2889         private function initializeVariables() {
2890                 $variableIDs = $this->magicWordFactory->getVariableIDs();
2891
2892                 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2893                 $this->mSubstWords = $this->magicWordFactory->getSubstArray();
2894         }
2895
2896         /**
2897          * Get the document object model for the given wikitext
2898          *
2899          * @see Preprocessor::preprocessToObj()
2900          *
2901          * The generated DOM tree must depend only on the input text and the flags.
2902          * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a
2903          * regression of T6899.
2904          *
2905          * Any flag added to the $flags parameter here, or any other parameter liable to cause a
2906          * change in the DOM tree for a given text, must be passed through the section identifier
2907          * in the section edit link and thus back to extractSections().
2908          *
2909          * @param string $text Wikitext
2910          * @param int $flags Bit field of Preprocessor::DOM_* constants
2911          * @return PPNode
2912          * @since 1.23 method is public
2913          */
2914         public function preprocessToDom( $text, $flags = 0 ) {
2915                 return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2916         }
2917
2918         /**
2919          * Replace magic variables, templates, and template arguments
2920          * with the appropriate text. Templates are substituted recursively,
2921          * taking care to avoid infinite loops.
2922          *
2923          * Note that the substitution depends on value of $mOutputType:
2924          *  self::OT_WIKI: only {{subst:}} templates
2925          *  self::OT_PREPROCESS: templates but not extension tags
2926          *  self::OT_HTML: all templates and extension tags
2927          *
2928          * @param string $text The text to transform
2929          * @param false|PPFrame|array $frame Object describing the arguments passed to the
2930          *   template. Arguments may also be provided as an associative array, as
2931          *   was the usual case before MW1.12. Providing arguments this way may be
2932          *   useful for extensions wishing to perform variable replacement
2933          *   explicitly.
2934          * @param bool $argsOnly Only do argument (triple-brace) expansion, not
2935          *   double-brace expansion.
2936          * @param bool $stripExtTags When true, put extension tags in general strip state; when
2937          *   false extension tags are skipped during OT_PREPROCESS
2938          * @return string
2939          * @since 1.24 method is public
2940          */
2941         public function replaceVariables( $text, $frame = false, $argsOnly = false, $stripExtTags = true ) {
2942                 # Is there any text? Also, Prevent too big inclusions!
2943                 $textSize = strlen( $text );
2944                 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2945                         return $text;
2946                 }
2947
2948                 if ( $frame === false ) {
2949                         $frame = $this->getPreprocessor()->newFrame();
2950                 } elseif ( !( $frame instanceof PPFrame ) ) {
2951                         wfDeprecated(
2952                                 __METHOD__ . " called using plain parameters instead of " .
2953                                 "a PPFrame instance. Creating custom frame.",
2954                                 '1.43'
2955                         );
2956                         $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2957                 }
2958
2959                 $dom = $this->preprocessToDom( $text );
2960                 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2961                 [ $stripExtTags, $this->mStripExtTags ] = [ $this->mStripExtTags, $stripExtTags ];
2962                 $text = $frame->expand( $dom, $flags );
2963                 $this->mStripExtTags = $stripExtTags;
2964
2965                 return $text;
2966         }
2967
2968         /**
2969          * Warn the user when a parser limitation is reached
2970          * Will warn at most once the user per limitation type
2971          *
2972          * The results are shown during preview and run through the Parser (See EditPage.php)
2973          *
2974          * @param string $limitationType Should be one of:
2975          *   'expensive-parserfunction' (corresponding messages:
2976          *       'expensive-parserfunction-warning',
2977          *       'expensive-parserfunction-category')
2978          *   'post-expand-template-argument' (corresponding messages:
2979          *       'post-expand-template-argument-warning',
2980          *       'post-expand-template-argument-category')
2981          *   'post-expand-template-inclusion' (corresponding messages:
2982          *       'post-expand-template-inclusion-warning',
2983          *       'post-expand-template-inclusion-category')
2984          *   'node-count-exceeded' (corresponding messages:
2985          *       'node-count-exceeded-warning',
2986          *       'node-count-exceeded-category')
2987          *   'expansion-depth-exceeded' (corresponding messages:
2988          *       'expansion-depth-exceeded-warning',
2989          *       'expansion-depth-exceeded-category')
2990          * @param string|int|null $current Current value
2991          * @param string|int|null $max Maximum allowed, when an explicit limit has been
2992          *       exceeded, provide the values (optional)
2993          * @internal
2994          */
2995         public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2996                 # does no harm if $current and $max are present but are unnecessary for the message
2997                 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2998                 # only during preview, and that would split the parser cache unnecessarily.
2999                 $this->mOutput->addWarningMsg(
3000                         "$limitationType-warning",
3001                         Message::numParam( $current ),
3002                         Message::numParam( $max )
3003                 );
3004                 $this->addTrackingCategory( "$limitationType-category" );
3005         }
3006
3007         /**
3008          * Return the text of a template, after recursively
3009          * replacing any variables or templates within the template.
3010          *
3011          * @param array $piece The parts of the template
3012          *   $piece['title']: the title, i.e. the part before the |
3013          *   $piece['parts']: the parameter array
3014          *   $piece['lineStart']: whether the brace was at the start of a line
3015          * @param PPFrame $frame The current frame, contains template arguments
3016          * @throws Exception
3017          * @return string|array The text of the template
3018          * @internal
3019          */
3020         public function braceSubstitution( array $piece, PPFrame $frame ) {
3021                 // Flags
3022
3023                 // $text has been filled
3024                 $found = false;
3025                 $text = '';
3026                 // wiki markup in $text should be escaped
3027                 $nowiki = false;
3028                 // $text is HTML, armour it against wikitext transformation
3029                 $isHTML = false;
3030                 // Force interwiki transclusion to be done in raw mode not rendered
3031                 $forceRawInterwiki = false;
3032                 // $text is a DOM node needing expansion in a child frame
3033                 $isChildObj = false;
3034                 // $text is a DOM node needing expansion in the current frame
3035                 $isLocalObj = false;
3036
3037                 # Title object, where $text came from
3038                 $title = false;
3039
3040                 # $part1 is the bit before the first |, and must contain only title characters.
3041                 # Various prefixes will be stripped from it later.
3042                 $titleWithSpaces = $frame->expand( $piece['title'] );
3043                 $part1 = trim( $titleWithSpaces );
3044                 $titleText = false;
3045
3046                 # Original title text preserved for various purposes
3047                 $originalTitle = $part1;
3048
3049                 # $args is a list of argument nodes, starting from index 0, not including $part1
3050                 $args = $piece['parts'];
3051
3052                 $profileSection = null; // profile templates
3053
3054                 $sawDeprecatedTemplateEquals = false; // T91154
3055
3056                 # SUBST
3057                 // @phan-suppress-next-line PhanImpossibleCondition
3058                 if ( !$found ) {
3059                         $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3060                         $part1 = trim( $part1 );
3061
3062                         # Possibilities for substMatch: "subst", "safesubst" or FALSE
3063                         # Decide whether to expand template or keep wikitext as-is.
3064                         if ( $this->ot['wiki'] ) {
3065                                 if ( $substMatch === false ) {
3066                                         $literal = true;  # literal when in PST with no prefix
3067                                 } else {
3068                                         $literal = false; # expand when in PST with subst: or safesubst:
3069                                 }
3070                         } else {
3071                                 if ( $substMatch == 'subst' ) {
3072                                         $literal = true;  # literal when not in PST with plain subst:
3073                                 } else {
3074                                         $literal = false; # expand when not in PST with safesubst: or no prefix
3075                                 }
3076                         }
3077                         if ( $literal ) {
3078                                 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3079                                 $isLocalObj = true;
3080                                 $found = true;
3081                         }
3082                 }
3083
3084                 # Variables
3085                 if ( !$found && $args->getLength() == 0 ) {
3086                         $id = $this->mVariables->matchStartToEnd( $part1 );
3087                         if ( $id !== false ) {
3088                                 if ( strpos( $part1, ':' ) !== false ) {
3089                                         wfDeprecatedMsg(
3090                                                 'Registering a magic variable with a name including a colon',
3091                                                 '1.39', false, false
3092                                         );
3093                                 }
3094                                 $text = $this->expandMagicVariable( $id, $frame );
3095                                 $found = true;
3096                         }
3097                 }
3098
3099                 # MSG, MSGNW and RAW
3100                 if ( !$found ) {
3101                         # Check for MSGNW:
3102                         $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3103                         if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3104                                 $nowiki = true;
3105                         } else {
3106                                 # Remove obsolete MSG:
3107                                 $mwMsg = $this->magicWordFactory->get( 'msg' );
3108                                 $mwMsg->matchStartAndRemove( $part1 );
3109                         }
3110
3111                         # Check for RAW:
3112                         $mwRaw = $this->magicWordFactory->get( 'raw' );
3113                         if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3114                                 $forceRawInterwiki = true;
3115                         }
3116                 }
3117
3118                 # Parser functions
3119                 if ( !$found ) {
3120                         $colonPos = strpos( $part1, ':' );
3121                         if ( $colonPos !== false ) {
3122                                 $func = substr( $part1, 0, $colonPos );
3123                                 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3124                                 $argsLength = $args->getLength();
3125                                 for ( $i = 0; $i < $argsLength; $i++ ) {
3126                                         $funcArgs[] = $args->item( $i );
3127                                 }
3128
3129                                 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3130
3131                                 // Extract any forwarded flags
3132                                 if ( isset( $result['title'] ) ) {
3133                                         $title = $result['title'];
3134                                 }
3135                                 if ( isset( $result['found'] ) ) {
3136                                         $found = $result['found'];
3137                                 }
3138                                 if ( array_key_exists( 'text', $result ) ) {
3139                                         // a string or null
3140                                         $text = $result['text'];
3141                                 }
3142                                 if ( isset( $result['nowiki'] ) ) {
3143                                         $nowiki = $result['nowiki'];
3144                                 }
3145                                 if ( isset( $result['isHTML'] ) ) {
3146                                         $isHTML = $result['isHTML'];
3147                                 }
3148                                 if ( isset( $result['forceRawInterwiki'] ) ) {
3149                                         $forceRawInterwiki = $result['forceRawInterwiki'];
3150                                 }
3151                                 if ( isset( $result['isChildObj'] ) ) {
3152                                         $isChildObj = $result['isChildObj'];
3153                                 }
3154                                 if ( isset( $result['isLocalObj'] ) ) {
3155                                         $isLocalObj = $result['isLocalObj'];
3156                                 }
3157                         }
3158                 }
3159
3160                 # Finish mangling title and then check for loops.
3161                 # Set $title to a Title object and $titleText to the PDBK
3162                 if ( !$found ) {
3163                         $ns = NS_TEMPLATE;
3164                         # Split the title into page and subpage
3165                         $subpage = '';
3166                         $relative = Linker::normalizeSubpageLink(
3167                                 $this->getTitle(), $part1, $subpage
3168                         );
3169                         if ( $part1 !== $relative ) {
3170                                 $part1 = $relative;
3171                                 $ns = $this->getTitle()->getNamespace();
3172                         }
3173                         $title = Title::newFromText( $part1, $ns );
3174                         if ( $title ) {
3175                                 $titleText = $title->getPrefixedText();
3176                                 # Check for language variants if the template is not found
3177                                 if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3178                                         $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3179                                 }
3180                                 # Do recursion depth check
3181                                 $limit = $this->mOptions->getMaxTemplateDepth();
3182                                 if ( $frame->depth >= $limit ) {
3183                                         $found = true;
3184                                         $text = '<span class="error">'
3185                                                 . wfMessage( 'parser-template-recursion-depth-warning' )
3186                                                         ->numParams( $limit )->inContentLanguage()->text()
3187                                                 . '</span>';
3188                                 }
3189                         }
3190                 }
3191
3192                 # Load from database
3193                 if ( !$found && $title ) {
3194                         $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3195                         if ( !$title->isExternal() ) {
3196                                 if ( $title->isSpecialPage()
3197                                         && $this->mOptions->getAllowSpecialInclusion()
3198                                         && $this->ot['html']
3199                                 ) {
3200                                         $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3201                                         // Pass the template arguments as URL parameters.
3202                                         // "uselang" will have no effect since the Language object
3203                                         // is forced to the one defined in ParserOptions.
3204                                         $pageArgs = [];
3205                                         $argsLength = $args->getLength();
3206                                         for ( $i = 0; $i < $argsLength; $i++ ) {
3207                                                 $bits = $args->item( $i )->splitArg();
3208                                                 if ( strval( $bits['index'] ) === '' ) {
3209                                                         $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3210                                                         $value = trim( $frame->expand( $bits['value'] ) );
3211                                                         $pageArgs[$name] = $value;
3212                                                 }
3213                                         }
3214
3215                                         // Create a new context to execute the special page, that is expensive
3216                                         if ( $this->incrementExpensiveFunctionCount() ) {
3217                                                 $context = new RequestContext;
3218                                                 $context->setTitle( $title );
3219                                                 $context->setRequest( new FauxRequest( $pageArgs ) );
3220                                                 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3221                                                         $context->setUser( $this->userFactory->newFromUserIdentity( $this->getUserIdentity() ) );
3222                                                 } else {
3223                                                         // If this page is cached, then we better not be per user.
3224                                                         $context->setUser( User::newFromName( '127.0.0.1', false ) );
3225                                                 }
3226                                                 $context->setLanguage( $this->mOptions->getUserLangObj() );
3227                                                 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3228                                                 if ( $ret ) {
3229                                                         $text = $context->getOutput()->getHTML();
3230                                                         $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3231                                                         $found = true;
3232                                                         $isHTML = true;
3233                                                         if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3234                                                                 $this->mOutput->updateRuntimeAdaptiveExpiry(
3235                                                                         $specialPage->maxIncludeCacheTime()
3236                                                                 );
3237                                                         }
3238                                                 }
3239                                         }
3240                                 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3241                                         $found = false; # access denied
3242                                         $this->logger->debug(
3243                                                 __METHOD__ .
3244                                                 ": template inclusion denied for " . $title->getPrefixedDBkey()
3245                                         );
3246                                 } else {
3247                                         [ $text, $title ] = $this->getTemplateDom( $title );
3248                                         if ( $text !== false ) {
3249                                                 $found = true;
3250                                                 $isChildObj = true;
3251                                                 if (
3252                                                         $title->getNamespace() === NS_TEMPLATE &&
3253                                                         $title->getDBkey() === '=' &&
3254                                                         $originalTitle === '='
3255                                                 ) {
3256                                                         // Note that we won't get here if `=` is evaluated
3257                                                         // (in the future) as a parser function, nor if
3258                                                         // the Template namespace is given explicitly,
3259                                                         // ie `{{Template:=}}`.  Only `{{=}}` triggers.
3260                                                         $sawDeprecatedTemplateEquals = true; // T91154
3261                                                 }
3262                                         }
3263                                 }
3264
3265                                 # If the title is valid but undisplayable, make a link to it
3266                                 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3267                                         $text = "[[:$titleText]]";
3268                                         $found = true;
3269                                 }
3270                         } elseif ( $title->isTrans() ) {
3271                                 # Interwiki transclusion
3272                                 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3273                                         $text = $this->interwikiTransclude( $title, 'render' );
3274                                         $isHTML = true;
3275                                 } else {
3276                                         $text = $this->interwikiTransclude( $title, 'raw' );
3277                                         # Preprocess it like a template
3278                                         $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3279                                         $isChildObj = true;
3280                                 }
3281                                 $found = true;
3282                         }
3283
3284                         # Do infinite loop check
3285                         # This has to be done after redirect resolution to avoid infinite loops via redirects
3286                         if ( !$frame->loopCheck( $title ) ) {
3287                                 $found = true;
3288                                 $text = '<span class="error">'
3289                                         . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3290                                         . '</span>';
3291                                 $this->addTrackingCategory( 'template-loop-category' );
3292                                 $this->mOutput->addWarningMsg(
3293                                         'template-loop-warning',
3294                                         Message::plaintextParam( $titleText )
3295                                 );
3296                                 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3297                         }
3298                 }
3299
3300                 # If we haven't found text to substitute by now, we're done
3301                 # Recover the source wikitext and return it
3302                 if ( !$found ) {
3303                         $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3304                         if ( $profileSection ) {
3305                                 $this->mProfiler->scopedProfileOut( $profileSection );
3306                         }
3307                         return [ 'object' => $text ];
3308                 }
3309
3310                 # Expand DOM-style return values in a child frame
3311                 if ( $isChildObj ) {
3312                         # Clean up argument array
3313                         $newFrame = $frame->newChild( $args, $title );
3314
3315                         if ( $nowiki ) {
3316                                 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3317                         } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3318                                 # Expansion is eligible for the empty-frame cache
3319                                 $text = $newFrame->cachedExpand( $titleText, $text );
3320                         } else {
3321                                 # Uncached expansion
3322                                 $text = $newFrame->expand( $text );
3323                         }
3324                 }
3325                 if ( $isLocalObj && $nowiki ) {
3326                         $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3327                         $isLocalObj = false;
3328                 }
3329
3330                 if ( $profileSection ) {
3331                         $this->mProfiler->scopedProfileOut( $profileSection );
3332                 }
3333                 if (
3334                         $sawDeprecatedTemplateEquals &&
3335                         $this->mStripState->unstripBoth( $text ) !== '='
3336                 ) {
3337                         // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3338                         // use {{Template:=}} if you must.
3339                         $this->addTrackingCategory( 'template-equals-category' );
3340                         $this->mOutput->addWarningMsg( 'template-equals-warning' );
3341                 }
3342
3343                 # Replace raw HTML by a placeholder
3344                 if ( $isHTML ) {
3345                         // @phan-suppress-next-line SecurityCheck-XSS
3346                         $text = $this->insertStripItem( $text );
3347                 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3348                         # Escape nowiki-style return values
3349                         // @phan-suppress-next-line SecurityCheck-DoubleEscaped
3350                         $text = wfEscapeWikiText( $text );
3351                 } elseif ( is_string( $text )
3352                         && !$piece['lineStart']
3353                         && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3354                 ) {
3355                         # T2529: if the template begins with a table or block-level
3356                         # element, it should be treated as beginning a new line.
3357                         # This behavior is somewhat controversial.
3358                         $text = "\n" . $text;
3359                 }
3360
3361                 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3362                         # Error, oversize inclusion
3363                         if ( $titleText !== false ) {
3364                                 # Make a working, properly escaped link if possible (T25588)
3365                                 $text = "[[:$titleText]]";
3366                         } else {
3367                                 # This will probably not be a working link, but at least it may
3368                                 # provide some hint of where the problem is
3369                                 $originalTitle = preg_replace( '/^:/', '', $originalTitle );
3370                                 $text = "[[:$originalTitle]]";
3371                         }
3372                         $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3373                                 . 'post-expand include size too large -->' );
3374                         $this->limitationWarn( 'post-expand-template-inclusion' );
3375                 }
3376
3377                 if ( $isLocalObj ) {
3378                         $ret = [ 'object' => $text ];
3379                 } else {
3380                         $ret = [ 'text' => $text ];
3381                 }
3382
3383                 return $ret;
3384         }
3385
3386         /**
3387          * Call a parser function and return an array with text and flags.
3388          *
3389          * The returned array will always contain a boolean 'found', indicating
3390          * whether the parser function was found or not. It may also contain the
3391          * following:
3392          *  text: string|object, resulting wikitext or PP DOM object
3393          *  isHTML: bool, $text is HTML, armour it against wikitext transformation
3394          *  isChildObj: bool, $text is a DOM node needing expansion in a child frame
3395          *  isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3396          *  nowiki: bool, wiki markup in $text should be escaped
3397          *
3398          * @since 1.21
3399          * @param PPFrame $frame The current frame, contains template arguments
3400          * @param string $function Function name
3401          * @param array $args Arguments to the function
3402          * @return array
3403          */
3404         public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3405                 # Case sensitive functions
3406                 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3407                         $function = $this->mFunctionSynonyms[1][$function];
3408                 } else {
3409                         # Case insensitive functions
3410                         $function = $this->contLang->lc( $function );
3411                         if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3412                                 $function = $this->mFunctionSynonyms[0][$function];
3413                         } else {
3414                                 return [ 'found' => false ];
3415                         }
3416                 }
3417
3418                 [ $callback, $flags ] = $this->mFunctionHooks[$function];
3419
3420                 $allArgs = [ $this ];
3421                 if ( $flags & self::SFH_OBJECT_ARGS ) {
3422                         # Convert arguments to PPNodes and collect for appending to $allArgs
3423                         $funcArgs = [];
3424                         foreach ( $args as $k => $v ) {
3425                                 if ( $v instanceof PPNode || $k === 0 ) {
3426                                         $funcArgs[] = $v;
3427                                 } else {
3428                                         $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3429                                 }
3430                         }
3431
3432                         # Add a frame parameter, and pass the arguments as an array
3433                         $allArgs[] = $frame;
3434                         $allArgs[] = $funcArgs;
3435                 } else {
3436                         # Convert arguments to plain text and append to $allArgs
3437                         foreach ( $args as $k => $v ) {
3438                                 if ( $v instanceof PPNode ) {
3439                                         $allArgs[] = trim( $frame->expand( $v ) );
3440                                 } elseif ( is_int( $k ) && $k >= 0 ) {
3441                                         $allArgs[] = trim( $v );
3442                                 } else {
3443                                         $allArgs[] = trim( "$k=$v" );
3444                                 }
3445                         }
3446                 }
3447
3448                 $result = $callback( ...$allArgs );
3449
3450                 # The interface for function hooks allows them to return a wikitext
3451                 # string or an array containing the string and any flags. This mungs
3452                 # things around to match what this method should return.
3453                 if ( !is_array( $result ) ) {
3454                         $result = [
3455                                 'found' => true,
3456                                 'text' => $result,
3457                         ];
3458                 } else {
3459                         if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3460                                 $result['text'] = $result[0];
3461                         }
3462                         unset( $result[0] );
3463                         $result += [
3464                                 'found' => true,
3465                         ];
3466                 }
3467
3468                 $noparse = true;
3469                 $preprocessFlags = 0;
3470                 if ( isset( $result['noparse'] ) ) {
3471                         $noparse = $result['noparse'];
3472                 }
3473                 if ( isset( $result['preprocessFlags'] ) ) {
3474                         $preprocessFlags = $result['preprocessFlags'];
3475                 }
3476
3477                 if ( !$noparse ) {
3478                         $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3479                         $result['isChildObj'] = true;
3480                 }
3481
3482                 return $result;
3483         }
3484
3485         /**
3486          * Get the semi-parsed DOM representation of a template with a given title,
3487          * and its redirect destination title. Cached.
3488          *
3489          * @param LinkTarget $title
3490          *
3491          * @return array
3492          * @since 1.12
3493          */
3494         public function getTemplateDom( LinkTarget $title ) {
3495                 $cacheTitle = $title;
3496                 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3497
3498                 if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3499                         [ $ns, $dbk ] = $this->mTplRedirCache[$titleKey];
3500                         $title = Title::makeTitle( $ns, $dbk );
3501                         $titleKey = CacheKeyHelper::getKeyForPage( $title );
3502                 }
3503                 if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3504                         return [ $this->mTplDomCache[$titleKey], $title ];
3505                 }
3506
3507                 # Cache miss, go to the database
3508                 [ $text, $title ] = $this->fetchTemplateAndTitle( $title );
3509
3510                 if ( $text === false ) {
3511                         $this->mTplDomCache[$titleKey] = false;
3512                         return [ false, $title ];
3513                 }
3514
3515                 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3516                 $this->mTplDomCache[$titleKey] = $dom;
3517
3518                 if ( !$title->isSamePageAs( $cacheTitle ) ) {
3519                         $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3520                                 [ $title->getNamespace(), $title->getDBkey() ];
3521                 }
3522
3523                 return [ $dom, $title ];
3524         }
3525
3526         /**
3527          * Fetch the current revision of a given title as a RevisionRecord.
3528          * Note that the revision (and even the title) may not exist in the database,
3529          * so everything contributing to the output of the parser should use this method
3530          * where possible, rather than getting the revisions themselves. This
3531          * method also caches its results, so using it benefits performance.
3532          *
3533          * This can return null if the callback returns false
3534          *
3535          * @since 1.35
3536          * @param LinkTarget $link
3537          * @return RevisionRecord|null
3538          */
3539         public function fetchCurrentRevisionRecordOfTitle( LinkTarget $link ) {
3540                 $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3541                 if ( !$this->currentRevisionCache ) {
3542                         $this->currentRevisionCache = new MapCacheLRU( 100 );
3543                 }
3544                 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3545                         $title = Title::newFromLinkTarget( $link ); // hook signature compat
3546                         $revisionRecord =
3547                                 // Defaults to Parser::statelessFetchRevisionRecord()
3548                                 call_user_func(
3549                                         $this->mOptions->getCurrentRevisionRecordCallback(),
3550                                         $title,
3551                                         $this
3552                                 );
3553                         if ( $revisionRecord === false ) {
3554                                 // Parser::statelessFetchRevisionRecord() can return false;
3555                                 // normalize it to null.
3556                                 $revisionRecord = null;
3557                         }
3558                         $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3559                 }
3560                 return $this->currentRevisionCache->get( $cacheKey );
3561         }
3562
3563         /**
3564          * @param LinkTarget $link
3565          * @return bool
3566          * @since 1.34
3567          * @internal
3568          */
3569         public function isCurrentRevisionOfTitleCached( LinkTarget $link ) {
3570                 $key = CacheKeyHelper::getKeyForPage( $link );
3571                 return (
3572                         $this->currentRevisionCache &&
3573                         $this->currentRevisionCache->has( $key )
3574                 );
3575         }
3576
3577         /**
3578          * Wrapper around RevisionLookup::getKnownCurrentRevision
3579          *
3580          * @since 1.34
3581          * @param LinkTarget $link
3582          * @param Parser|null $parser
3583          * @return RevisionRecord|false False if missing
3584          */
3585         public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3586                 if ( $link instanceof PageIdentity ) {
3587                         // probably a Title, just use it.
3588                         $page = $link;
3589                 } else {
3590                         // XXX: use RevisionStore::getPageForLink()!
3591                         //      ...but get the info for the current revision at the same time?
3592                         //      Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3593                         $page = Title::newFromLinkTarget( $link );
3594                 }
3595
3596                 $revRecord = MediaWikiServices::getInstance()
3597                         ->getRevisionLookup()
3598                         ->getKnownCurrentRevision( $page );
3599                 return $revRecord;
3600         }
3601
3602         /**
3603          * Fetch the unparsed text of a template and register a reference to it.
3604          * @param LinkTarget $link
3605          * @return array ( string or false, Title )
3606          * @since 1.11
3607          */
3608         public function fetchTemplateAndTitle( LinkTarget $link ) {
3609                 // Use Title for compatibility with callbacks and return type
3610                 $title = Title::newFromLinkTarget( $link );
3611
3612                 // Defaults to Parser::statelessFetchTemplate()
3613                 $templateCb = $this->mOptions->getTemplateCallback();
3614                 $stuff = $templateCb( $title, $this );
3615                 $revRecord = $stuff['revision-record'] ?? null;
3616
3617                 $text = $stuff['text'];
3618                 if ( is_string( $stuff['text'] ) ) {
3619                         // We use U+007F DELETE to distinguish strip markers from regular text
3620                         $text = strtr( $text, "\x7f", "?" );
3621                 }
3622                 $finalTitle = $stuff['finalTitle'] ?? $title;
3623                 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3624                         $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3625                         if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3626                                 // Self-transclusion; final result may change based on the new page version
3627                                 try {
3628                                         $sha1 = $revRecord->getSha1();
3629                                 } catch ( RevisionAccessException $e ) {
3630                                         $sha1 = null;
3631                                 }
3632                                 $this->setOutputFlag( ParserOutputFlags::VARY_REVISION_SHA1, 'Self transclusion' );
3633                                 $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3634                         }
3635                 }
3636
3637                 return [ $text, $finalTitle ];
3638         }
3639
3640         /**
3641          * Static function to get a template
3642          * Can be overridden via ParserOptions::setTemplateCallback().
3643          *
3644          * @param LinkTarget $page
3645          * @param Parser|false $parser
3646          *
3647          * @return array
3648          * @since 1.12
3649          */
3650         public static function statelessFetchTemplate( $page, $parser = false ) {
3651                 $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3652                 $text = $skip = false;
3653                 $finalTitle = $title;
3654                 $deps = [];
3655                 $revRecord = null;
3656                 $contextTitle = $parser ? $parser->getTitle() : null;
3657
3658                 # Loop to fetch the article, with up to 2 redirects
3659
3660                 # Note that $title (including redirect targets) could be
3661                 # external; we do allow hooks a chance to redirect the
3662                 # external title to a local one (which might be useful), but
3663                 # are careful not to add external titles to the dependency
3664                 # list. (T362221)
3665
3666                 $services = MediaWikiServices::getInstance();
3667                 $revLookup = $services->getRevisionLookup();
3668                 $hookRunner = new HookRunner( $services->getHookContainer() );
3669                 for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3670                         # Give extensions a chance to select the revision instead
3671                         $revRecord = null; # Assume no hook
3672                         $origTitle = $title;
3673                         $titleChanged = false;
3674                         $hookRunner->onBeforeParserFetchTemplateRevisionRecord(
3675                                 # The $title is a not a PageIdentity, as it may
3676                                 # contain fragments or even represent an attempt to transclude
3677                                 # a broken or otherwise-missing Title, which the hook may
3678                                 # fix up.  Similarly, the $contextTitle may represent a special
3679                                 # page or other page which "exists" as a parsing context but
3680                                 # is not in the DB.
3681                                 $contextTitle, $title,
3682                                 $skip, $revRecord
3683                         );
3684
3685                         if ( $skip ) {
3686                                 $text = false;
3687                                 if ( !$title->isExternal() ) {
3688                                         $deps[] = [
3689                                                 'title' => $title,
3690                                                 'page_id' => $title->getArticleID(),
3691                                                 'rev_id' => null
3692                                         ];
3693                                 }
3694                                 break;
3695                         }
3696                         # Get the revision
3697                         if ( !$revRecord ) {
3698                                 if ( $parser ) {
3699                                         $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3700                                 } else {
3701                                         $revRecord = $revLookup->getRevisionByTitle( $title );
3702                                 }
3703                         }
3704                         if ( $revRecord ) {
3705                                 # Update title, as $revRecord may have been changed by hook
3706                                 $title = Title::newFromLinkTarget(
3707                                         $revRecord->getPageAsLinkTarget()
3708                                 );
3709                                 // Assuming title is not external if we've got a $revRecord
3710                                 $deps[] = [
3711                                         'title' => $title,
3712                                         'page_id' => $revRecord->getPageId(),
3713                                         'rev_id' => $revRecord->getId(),
3714                                 ];
3715                         } elseif ( !$title->isExternal() ) {
3716                                 $deps[] = [
3717                                         'title' => $title,
3718                                         'page_id' => $title->getArticleID(),
3719                                         'rev_id' => null,
3720                                 ];
3721                         }
3722                         if ( !$title->equals( $origTitle ) ) {
3723                                 # If we fetched a rev from a different title, register
3724                                 # the original title too...
3725                                 if ( !$origTitle->isExternal() ) {
3726                                         $deps[] = [
3727                                                 'title' => $origTitle,
3728                                                 'page_id' => $origTitle->getArticleID(),
3729                                                 'rev_id' => null,
3730                                         ];
3731                                 }
3732                                 $titleChanged = true;
3733                         }
3734                         # If there is no current revision, there is no page
3735                         if ( $revRecord === null || $revRecord->getId() === null ) {
3736                                 $linkCache = $services->getLinkCache();
3737                                 $linkCache->addBadLinkObj( $title );
3738                         }
3739                         if ( $revRecord ) {
3740                                 if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3741                                         // We've added this (missing) title to the dependencies;
3742                                         // give the hook another chance to redirect it to an
3743                                         // actual page.
3744                                         $text = false;
3745                                         $finalTitle = $title;
3746                                         continue;
3747                                 }
3748                                 if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3749                                         $content = $revRecord->getContent( SlotRecord::MAIN );
3750                                         $text = $content ? $content->getWikitextForTransclusion() : null;
3751                                 } else {
3752                                         $text = false;
3753                                 }
3754
3755                                 if ( $text === false || $text === null ) {
3756                                         $text = false;
3757                                         break;
3758                                 }
3759                         } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3760                                 $message = wfMessage( $services->getContentLanguage()->
3761                                         lcfirst( $title->getText() ) )->inContentLanguage();
3762                                 if ( !$message->exists() ) {
3763                                         $text = false;
3764                                         break;
3765                                 }
3766                                 $text = $message->plain();
3767                                 break;
3768                         } else {
3769                                 break;
3770                         }
3771                         // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Only reached when content is set
3772                         if ( !$content ) {
3773                                 break;
3774                         }
3775                         # Redirect?
3776                         $finalTitle = $title;
3777                         $title = $content->getRedirectTarget();
3778                 }
3779
3780                 $retValues = [
3781                         // previously, when this also returned a Revision object, we set
3782                         // 'revision-record' to false instead of null if it was unavailable,
3783                         // so that callers to use isset and then rely on the revision-record
3784                         // key instead of the revision key, even if there was no corresponding
3785                         // object - we continue to set to false here for backwards compatability
3786                         'revision-record' => $revRecord ?: false,
3787                         'text' => $text,
3788                         'finalTitle' => $finalTitle,
3789                         'deps' => $deps
3790                 ];
3791                 return $retValues;
3792         }
3793
3794         /**
3795          * Fetch a file and its title and register a reference to it.
3796          * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3797          * @param LinkTarget $link
3798          * @param array $options Array of options to RepoGroup::findFile
3799          * @return array ( File or false, Title of file )
3800          * @since 1.18
3801          */
3802         public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3803                 $file = $this->fetchFileNoRegister( $link, $options );
3804
3805                 $time = $file ? $file->getTimestamp() : false;
3806                 $sha1 = $file ? $file->getSha1() : false;
3807                 # Register the file as a dependency...
3808                 $this->mOutput->addImage( $link, $time, $sha1 );
3809                 if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3810                         # Update fetched file title after resolving redirects, etc.
3811                         $link = $file->getTitle();
3812                         $this->mOutput->addImage( $link, $time, $sha1 );
3813                 }
3814
3815                 $title = Title::newFromLinkTarget( $link ); // for return type compat
3816                 return [ $file, $title ];
3817         }
3818
3819         /**
3820          * Helper function for fetchFileAndTitle.
3821          *
3822          * Also useful if you need to fetch a file but not use it yet,
3823          * for example to get the file's handler.
3824          *
3825          * @param LinkTarget $link
3826          * @param array $options Array of options to RepoGroup::findFile
3827          * @return File|false
3828          */
3829         protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3830                 if ( isset( $options['broken'] ) ) {
3831                         $file = false; // broken thumbnail forced by hook
3832                 } else {
3833                         $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3834                         if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3835                                 $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3836                         } else { // get by (name,timestamp)
3837                                 $file = $repoGroup->findFile( $link, $options );
3838                         }
3839                 }
3840                 return $file;
3841         }
3842
3843         /**
3844          * Transclude an interwiki link.
3845          *
3846          * @param LinkTarget $link
3847          * @param string $action Usually one of (raw, render)
3848          *
3849          * @return string
3850          * @internal
3851          */
3852         public function interwikiTransclude( LinkTarget $link, $action ) {
3853                 if ( !$this->svcOptions->get( MainConfigNames::EnableScaryTranscluding ) ) {
3854                         return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3855                 }
3856
3857                 // TODO: extract relevant functionality from Title
3858                 $title = Title::newFromLinkTarget( $link );
3859
3860                 $url = $title->getFullURL( [ 'action' => $action ] );
3861                 if ( strlen( $url ) > 1024 ) {
3862                         return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3863                 }
3864
3865                 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3866
3867                 $fname = __METHOD__;
3868
3869                 $cache = $this->wanCache;
3870                 $data = $cache->getWithSetCallback(
3871                         $cache->makeGlobalKey(
3872                                 'interwiki-transclude',
3873                                 ( $wikiId !== false ) ? $wikiId : 'external',
3874                                 sha1( $url )
3875                         ),
3876                         $this->svcOptions->get( MainConfigNames::TranscludeCacheExpiry ),
3877                         function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3878                                 $req = $this->httpRequestFactory->create( $url, [], $fname );
3879
3880                                 $status = $req->execute(); // Status object
3881                                 if ( !$status->isOK() ) {
3882                                         $ttl = $cache::TTL_UNCACHEABLE;
3883                                 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3884                                         $ttl = min( $cache::TTL_LAGGED, $ttl );
3885                                 }
3886
3887                                 return [
3888                                         'text' => $status->isOK() ? $req->getContent() : null,
3889                                         'code' => $req->getStatus()
3890                                 ];
3891                         },
3892                         [
3893                                 'checkKeys' => ( $wikiId !== false )
3894                                         ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3895                                         : [],
3896                                 'pcGroup' => 'interwiki-transclude:5',
3897                                 'pcTTL' => $cache::TTL_PROC_LONG
3898                         ]
3899                 );
3900
3901                 if ( is_string( $data['text'] ) ) {
3902                         $text = $data['text'];
3903                 } elseif ( $data['code'] != 200 ) {
3904                         // Though we failed to fetch the content, this status is useless.
3905                         $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3906                                 ->params( $url, $data['code'] )->inContentLanguage()->text();
3907                 } else {
3908                         $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3909                 }
3910
3911                 return $text;
3912         }
3913
3914         /**
3915          * Triple brace replacement -- used for template arguments
3916          *
3917          * @param array $piece
3918          * @param PPFrame $frame
3919          *
3920          * @return array
3921          * @internal
3922          */
3923         public function argSubstitution( array $piece, PPFrame $frame ) {
3924                 $error = false;
3925                 $parts = $piece['parts'];
3926                 $nameWithSpaces = $frame->expand( $piece['title'] );
3927                 $argName = trim( $nameWithSpaces );
3928                 $object = false;
3929                 $text = $frame->getArgument( $argName );
3930                 if ( $text === false && $parts->getLength() > 0
3931                         && ( $this->ot['html']
3932                                 || $this->ot['pre']
3933                                 || ( $this->ot['wiki'] && $frame->isTemplate() )
3934                         )
3935                 ) {
3936                         # No match in frame, use the supplied default
3937                         $object = $parts->item( 0 )->getChildren();
3938                 }
3939                 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3940                         $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3941                         $this->limitationWarn( 'post-expand-template-argument' );
3942                 }
3943
3944                 if ( $text === false && $object === false ) {
3945                         # No match anywhere
3946                         $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3947                 }
3948                 if ( $error !== false ) {
3949                         $text .= $error;
3950                 }
3951                 if ( $object !== false ) {
3952                         $ret = [ 'object' => $object ];
3953                 } else {
3954                         $ret = [ 'text' => $text ];
3955                 }
3956
3957                 return $ret;
3958         }
3959
3960         /**
3961          * @param string $lowerTagName
3962          * @return bool
3963          */
3964         public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool {
3965                 $parsoidSiteConfig = MediaWikiServices::getInstance()->getParsoidSiteConfig();
3966                 return $parsoidSiteConfig->tagNeedsNowikiStrippedInTagPF( $lowerTagName );
3967         }
3968
3969         /**
3970          * Return the text to be used for a given extension tag.
3971          * This is the ghost of strip().
3972          *
3973          * @param array $params Associative array of parameters:
3974          *     name       PPNode for the tag name
3975          *     attr       PPNode for unparsed text where tag attributes are thought to be
3976          *     attributes Optional associative array of parsed attributes
3977          *     inner      Contents of extension element
3978          *     noClose    Original text did not have a close tag
3979          * @param PPFrame $frame
3980          * @param bool $processNowiki Process nowiki tags by running the nowiki tag handler
3981          *     Normally, nowikis are only processed for the HTML output type. With this
3982          *     arg set to true, they are processed (and converted to a nowiki strip marker)
3983          *     for all output types.
3984          * @return string
3985          * @internal
3986          * @since 1.12
3987          */
3988         public function extensionSubstitution( array $params, PPFrame $frame, bool $processNowiki = false ) {
3989                 static $errorStr = '<span class="error">';
3990
3991                 $name = $frame->expand( $params['name'] );
3992                 if ( str_starts_with( $name, $errorStr ) ) {
3993                         // Probably expansion depth or node count exceeded. Just punt the
3994                         // error up.
3995                         return $name;
3996                 }
3997
3998                 // Parse attributes from XML-like wikitext syntax
3999                 $attrText = !isset( $params['attr'] ) ? '' : $frame->expand( $params['attr'] );
4000                 if ( str_starts_with( $attrText, $errorStr ) ) {
4001                         // See above
4002                         return $attrText;
4003                 }
4004
4005                 // We can't safely check if the expansion for $content resulted in an
4006                 // error, because the content could happen to be the error string
4007                 // (T149622).
4008                 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4009
4010                 $marker = self::MARKER_PREFIX . "-$name-"
4011                         . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4012
4013                 $normalizedName = strtolower( $name );
4014                 $isNowiki = $normalizedName === 'nowiki';
4015                 $markerType = $isNowiki ? 'nowiki' : 'general';
4016                 if ( $this->ot['html'] || ( $processNowiki && $isNowiki ) ) {
4017                         $attributes = Sanitizer::decodeTagAttributes( $attrText );
4018                         // Merge in attributes passed via {{#tag:}} parser function
4019                         if ( isset( $params['attributes'] ) ) {
4020                                 $attributes += $params['attributes'];
4021                         }
4022
4023                         if ( isset( $this->mTagHooks[$normalizedName] ) ) {
4024                                 // Note that $content may be null here, for example if the
4025                                 // tag is self-closed.
4026                                 $output = call_user_func_array( $this->mTagHooks[$normalizedName],
4027                                         [ $content, $attributes, $this, $frame ] );
4028                         } else {
4029                                 $output = '<span class="error">Invalid tag extension name: ' .
4030                                         htmlspecialchars( $normalizedName ) . '</span>';
4031                         }
4032
4033                         if ( is_array( $output ) ) {
4034                                 // Extract flags
4035                                 $flags = $output;
4036                                 $output = $flags[0];
4037                                 if ( isset( $flags['markerType'] ) ) {
4038                                         $markerType = $flags['markerType'];
4039                                 }
4040                         }
4041                 } else {
4042                         // We're substituting a {{subst:#tag:}} parser function.
4043                         // Convert the attributes it passed into the XML-like string.
4044                         if ( isset( $params['attributes'] ) ) {
4045                                 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4046                                         $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4047                                                 htmlspecialchars( $this->getStripState()->unstripBoth( $attrValue ), ENT_COMPAT ) . '"';
4048                                 }
4049                         }
4050                         if ( $content === null ) {
4051                                 $output = "<$name$attrText/>";
4052                         } else {
4053                                 $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
4054                                 if ( str_starts_with( $close, $errorStr ) ) {
4055                                         // See above
4056                                         return $close;
4057                                 }
4058                                 $output = "<$name$attrText>$content$close";
4059                         }
4060                         if ( !$this->mStripExtTags ) {
4061                                 if ( $this->svcOptions->get( MainConfigNames::ParsoidFragmentSupport ) === 'v2' ) {
4062                                         $markerType = 'exttag';
4063                                 } else {
4064                                         $markerType = 'none';
4065                                 }
4066                         }
4067                 }
4068
4069                 if ( $markerType === 'none' ) {
4070                         return $output;
4071                 } elseif ( $markerType === 'nowiki' ) {
4072                         $this->mStripState->addNoWiki( $marker, $output );
4073                 } elseif ( $markerType === 'general' ) {
4074                         $this->mStripState->addGeneral( $marker, $output );
4075                 } elseif ( $markerType === 'exttag' ) {
4076                         $this->mStripState->addExtTag( $marker, $output );
4077                 } else {
4078                         throw new UnexpectedValueException( __METHOD__ . ': invalid marker type' );
4079                 }
4080                 return $marker;
4081         }
4082
4083         /**
4084          * Increment an include size counter
4085          *
4086          * @param string $type The type of expansion
4087          * @param int $size The size of the text
4088          * @return bool False if this inclusion would take it over the maximum, true otherwise
4089          */
4090         private function incrementIncludeSize( $type, $size ) {
4091                 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4092                         return false;
4093                 } else {
4094                         $this->mIncludeSizes[$type] += $size;
4095                         return true;
4096                 }
4097         }
4098
4099         /**
4100          * @return bool False if the limit has been exceeded
4101          * @since 1.13
4102          */
4103         public function incrementExpensiveFunctionCount() {
4104                 $this->mExpensiveFunctionCount++;
4105                 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4106         }
4107
4108         /**
4109          * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4110          * Fills $this->mDoubleUnderscores, returns the modified text
4111          *
4112          * @param string $text
4113          * @return string
4114          */
4115         private function handleDoubleUnderscore( $text ) {
4116                 # The position of __TOC__ needs to be recorded
4117                 $mw = $this->magicWordFactory->get( 'toc' );
4118                 if ( $mw->match( $text ) ) {
4119                         $this->mShowToc = true;
4120                         $this->mForceTocPosition = true;
4121
4122                         # Set a placeholder. At the end we'll fill it in with the TOC.
4123                         $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 );
4124
4125                         # Only keep the first one.
4126                         $text = $mw->replace( '', $text );
4127                         # For consistency with all other double-underscores
4128                         # (see below)
4129                         $this->mOutput->setUnsortedPageProperty( 'toc' );
4130                 }
4131
4132                 # Now match and remove the rest of them
4133                 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4134                 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4135
4136                 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4137                         $this->mOutput->setNoGallery( true );
4138                 }
4139                 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4140                         $this->mShowToc = false;
4141                 }
4142                 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4143                         && $this->getTitle()->getNamespace() === NS_CATEGORY
4144                 ) {
4145                         $this->addTrackingCategory( 'hidden-category-category' );
4146                 }
4147                 # (T10068) Allow control over whether robots index a page.
4148                 # __INDEX__ always overrides __NOINDEX__, see T16899
4149                 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4150                         $this->mOutput->setIndexPolicy( 'noindex' );
4151                         $this->addTrackingCategory( 'noindex-category' );
4152                 }
4153                 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4154                         $this->mOutput->setIndexPolicy( 'index' );
4155                         $this->addTrackingCategory( 'index-category' );
4156                 }
4157
4158                 # Cache all double underscores in the database
4159                 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4160                         $this->mOutput->setUnsortedPageProperty( $key );
4161                 }
4162
4163                 return $text;
4164         }
4165
4166         /**
4167          * @see TrackingCategories::addTrackingCategory()
4168          * @param string $msg Message key
4169          * @return bool Whether the addition was successful
4170          * @since 1.19 method is public
4171          */
4172         public function addTrackingCategory( $msg ) {
4173                 return $this->trackingCategories->addTrackingCategory(
4174                         $this->mOutput, $msg, $this->getPage()
4175                 );
4176         }
4177
4178         /**
4179          * Helper function to correctly set the target language and title of
4180          * a message based on the parser context. Most uses of system messages
4181          * inside extensions or parser functions should use this method (instead
4182          * of directly using `wfMessage`) to ensure that the cache is not
4183          * polluted.
4184          *
4185          * @param string $msg The localization message key
4186          * @phpcs:ignore Generic.Files.LineLength
4187          * @param MessageParam|MessageSpecifier|string|int|float|list<MessageParam|MessageSpecifier|string|int|float> ...$params
4188          *   See Message::params()
4189          * @return Message
4190          * @since 1.40
4191          * @see https://phabricator.wikimedia.org/T202481
4192          */
4193         public function msg( string $msg, ...$params ): Message {
4194                 return wfMessage( $msg, ...$params )
4195                         ->inLanguage( $this->getTargetLanguage() )
4196                         ->page( $this->getPage() );
4197         }
4198
4199         private function cleanUpTocLine( Node $container ) {
4200                 '@phan-var Element|DocumentFragment $container';  // @var Element|DocumentFragment $container
4201                 # Strip out HTML
4202                 # Allowed tags are:
4203                 # * <sup> and <sub> (T10393)
4204                 # * <i> (T28375)
4205                 # * <b> (r105284)
4206                 # * <bdi> (T74884)
4207                 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4208                 # * <s> and <strike> (T35715)
4209                 # * <q> (T251672)
4210                 # We strip any parameter from accepted tags, except dir="rtl|ltr" from <span>,
4211                 # to allow setting directionality in toc items.
4212                 $allowedTags = [ 'span', 'sup', 'sub', 'bdi', 'i', 'b', 's', 'strike', 'q' ];
4213                 $node = $container->firstChild;
4214                 while ( $node !== null ) {
4215                         $next = $node->nextSibling;
4216                         if ( $node instanceof Element ) {
4217                                 $nodeName = DOMCompat::nodeName( $node );
4218                                 if ( in_array( $nodeName, [ 'style', 'script' ], true ) ) {
4219                                         # Remove any <style> or <script> tags (T198618)
4220                                         DOMCompat::remove( $node );
4221                                 } elseif ( in_array( $nodeName, $allowedTags, true ) ) {
4222                                         // Keep tag, remove attributes
4223                                         $removeAttrs = [];
4224                                         foreach ( $node->attributes as $attr ) {
4225                                                 if (
4226                                                         $nodeName === 'span' && $attr->name === 'dir'
4227                                                         && ( $attr->value === 'rtl' || $attr->value === 'ltr' )
4228                                                 ) {
4229                                                         // Keep <span dir="rtl"> and <span dir="ltr">
4230                                                         continue;
4231                                                 }
4232                                                 $removeAttrs[] = $attr;
4233                                         }
4234                                         foreach ( $removeAttrs as $attr ) {
4235                                                 $node->removeAttributeNode( $attr );
4236                                         }
4237                                         $this->cleanUpTocLine( $node );
4238                                         # Strip '<span></span>', which is the result from the above if
4239                                         # <span id="foo"></span> is used to produce an additional anchor
4240                                         # for a section.
4241                                         if ( $nodeName === 'span' && !$node->hasChildNodes() ) {
4242                                                 DOMCompat::remove( $node );
4243                                         }
4244                                 } else {
4245                                         // Strip tag
4246                                         $next = $node->firstChild;
4247                                         // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
4248                                         while ( $childNode = $node->firstChild ) {
4249                                                 $node->parentNode->insertBefore( $childNode, $node );
4250                                         }
4251                                         DOMCompat::remove( $node );
4252                                 }
4253                         } elseif ( $node instanceof Comment ) {
4254                                 // Extensions may add comments to headings;
4255                                 // these shouldn't appear in the ToC either.
4256                                 DOMCompat::remove( $node );
4257                         }
4258                         $node = $next;
4259                 }
4260         }
4261
4262         /**
4263          * This function accomplishes several tasks:
4264          * 1) Auto-number headings if that option is enabled
4265          * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4266          * 3) Add a Table of contents on the top for users who have enabled the option
4267          * 4) Auto-anchor headings
4268          *
4269          * It loops through all headlines, collects the necessary data, then splits up the
4270          * string and re-inserts the newly formatted headlines.
4271          *
4272          * @param string $text
4273          * @param string $origText Original, untouched wikitext
4274          * @param bool $isMain
4275          * @return string
4276          */
4277         private function finalizeHeadings( $text, $origText, $isMain = true ) {
4278                 # Inhibit editsection links if requested in the page
4279                 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4280                         $maybeShowEditLink = false;
4281                 } else {
4282                         $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4283                 }
4284
4285                 # Get all headlines for numbering them and adding funky stuff like [edit]
4286                 # links - this is for later, but we need the number of headlines right now
4287                 # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4288                 # be trimmed here since whitespace in HTML headings is significant.
4289                 $matches = [];
4290                 $numMatches = preg_match_all(
4291                         '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4292                         $text,
4293                         $matches
4294                 );
4295
4296                 # if there are fewer than 4 headlines in the article, do not show TOC
4297                 # unless it's been explicitly enabled.
4298                 $enoughToc = $this->mShowToc &&
4299                         ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4300
4301                 # Allow user to stipulate that a page should have a "new section"
4302                 # link added via __NEWSECTIONLINK__
4303                 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4304                         $this->mOutput->setNewSection( true );
4305                 }
4306
4307                 # Allow user to remove the "new section"
4308                 # link via __NONEWSECTIONLINK__
4309                 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4310                         $this->mOutput->setHideNewSection( true );
4311                 }
4312
4313                 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4314                 # override above conditions and always show TOC above first header
4315                 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4316                         $this->mShowToc = true;
4317                         $enoughToc = true;
4318                 }
4319
4320                 # headline counter
4321                 $headlineCount = 0;
4322                 $haveTocEntries = false;
4323
4324                 # Ugh .. the TOC should have neat indentation levels which can be
4325                 # passed to the skin functions. These are determined here
4326                 $full = '';
4327                 $head = [];
4328                 $level = 0;
4329                 $tocData = new TOCData();
4330                 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4331                 $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4332                 $oldType = $this->mOutputType;
4333                 $this->setOutputType( self::OT_WIKI );
4334                 $frame = $this->getPreprocessor()->newFrame();
4335                 $root = $this->preprocessToDom( $origText );
4336                 $node = $root->getFirstChild();
4337                 $cpOffset = 0;
4338                 $refers = [];
4339
4340                 $headlines = $numMatches !== false ? $matches[3] : [];
4341
4342                 $maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel );
4343                 $domDocument = DOMUtils::parseHTML( '' );
4344                 foreach ( $headlines as $headline ) {
4345                         $isTemplate = false;
4346                         $titleText = false;
4347                         $sectionIndex = false;
4348                         $markerMatches = [];
4349                         if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4350                                 $serial = (int)$markerMatches[1];
4351                                 [ $titleText, $sectionIndex ] = $this->mHeadings[$serial];
4352                                 $isTemplate = ( $titleText != $baseTitleText );
4353                                 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4354                         }
4355
4356                         $sectionMetadata = SectionMetadata::fromLegacy( [
4357                                 "fromtitle" => $titleText ?: null,
4358                                 "index" => $sectionIndex === false
4359                                         ? '' : ( ( $isTemplate ? 'T-' : '' ) . $sectionIndex )
4360                         ] );
4361                         $tocData->addSection( $sectionMetadata );
4362
4363                         $oldLevel = $level;
4364                         $level = (int)$matches[1][$headlineCount];
4365                         $tocData->processHeading( $oldLevel, $level, $sectionMetadata );
4366
4367                         if ( $tocData->getCurrentTOCLevel() < $maxTocLevel ) {
4368                                 $haveTocEntries = true;
4369                         }
4370
4371                         # The safe header is a version of the header text safe to use for links
4372
4373                         # Remove link placeholders by the link text.
4374                         #     <!--LINK number-->
4375                         # turns into
4376                         #     link text with suffix
4377                         # Do this before unstrip since link text can contain strip markers
4378                         $safeHeadline = $this->replaceLinkHoldersText( $headline );
4379
4380                         # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4381                         $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4382
4383                         // Run Tidy to convert wikitext entities to HTML entities (T355386),
4384                         // conveniently also giving us a way to handle French spaces (T324763)
4385                         $safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
4386
4387                         // Wrap the safe headline to parse the heading attributes
4388                         // Literal HTML tags should be sanitized at this point
4389                         // cleanUpTocLine will strip the headline tag
4390                         $wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $safeHeadline . "</h$level>";
4391
4392                         // Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
4393                         // and ensures that we generate balanced HTML at the end (T218330).
4394                         $headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $wrappedHeadline );
4395
4396                         // Extract a user defined id on the heading
4397                         // A heading is expected as the first child and could be asserted
4398                         $h = $headlineDom->firstChild;
4399                         $headingId = ( $h instanceof Element && DOMUtils::isHeading( $h ) ) ?
4400                                 DOMCompat::getAttribute( $h, 'id' ) : null;
4401
4402                         $this->cleanUpTocLine( $headlineDom );
4403
4404                         // Serialize back to HTML
4405                         $tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) );
4406
4407                         # For the anchor, strip out HTML-y stuff period
4408                         $safeHeadline = trim( $headlineDom->textContent );
4409
4410                         # Save headline for section edit hint before it's normalized for the link
4411                         $headlineHint = htmlspecialchars( $safeHeadline );
4412
4413                         $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4414                         $safeHeadline = self::normalizeSectionName( $safeHeadline );
4415
4416                         if ( $headingId !== null && $headingId !== '' ) {
4417                                 $safeHeadline = $headingId;
4418                         }
4419
4420                         $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4421                         $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4422                         $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4423                         if ( $fallbackHeadline === $safeHeadline ) {
4424                                 # No reason to have both (in fact, we can't)
4425                                 $fallbackHeadline = false;
4426                         }
4427
4428                         # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4429                         $arrayKey = strtolower( $safeHeadline );
4430                         if ( $fallbackHeadline === false ) {
4431                                 $fallbackArrayKey = false;
4432                         } else {
4433                                 $fallbackArrayKey = strtolower( $fallbackHeadline );
4434                         }
4435
4436                         # Create the anchor for linking from the TOC to the section
4437                         $anchor = $safeHeadline;
4438                         $fallbackAnchor = $fallbackHeadline;
4439                         if ( isset( $refers[$arrayKey] ) ) {
4440                                 for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i );
4441                                 $anchor .= "_$i";
4442                                 $linkAnchor .= "_$i";
4443                                 $refers["{$arrayKey}_$i"] = true;
4444                         } else {
4445                                 $refers[$arrayKey] = true;
4446                         }
4447                         if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4448                                 for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i );
4449                                 $fallbackAnchor .= "_$i";
4450                                 $refers["{$fallbackArrayKey}_$i"] = true;
4451                         } else {
4452                                 $refers[$fallbackArrayKey] = true;
4453                         }
4454
4455                         # Add the section to the section tree
4456                         # Find the DOM node for this header
4457                         $noOffset = ( $isTemplate || $sectionIndex === false );
4458                         while ( $node && !$noOffset ) {
4459                                 if ( $node->getName() === 'h' ) {
4460                                         $bits = $node->splitHeading();
4461                                         if ( $bits['i'] == $sectionIndex ) {
4462                                                 break;
4463                                         }
4464                                 }
4465                                 $cpOffset += mb_strlen(
4466                                         $this->mStripState->unstripBoth(
4467                                                 $frame->expand( $node, PPFrame::RECOVER_ORIG )
4468                                         )
4469                                 );
4470                                 $node = $node->getNextSibling();
4471                         }
4472                         $sectionMetadata->line = $tocline;
4473                         $sectionMetadata->codepointOffset = ( $noOffset ? null : $cpOffset );
4474                         $sectionMetadata->anchor = $anchor;
4475                         $sectionMetadata->linkAnchor = $linkAnchor;
4476
4477                         if ( $maybeShowEditLink && $sectionIndex !== false ) {
4478                                 // Output edit section links as markers with styles that can be customized by skins
4479                                 if ( $isTemplate ) {
4480                                         # Put a T flag in the section identifier, to indicate to extractSections()
4481                                         # that sections inside <includeonly> should be counted.
4482                                         $editsectionPage = $titleText;
4483                                         $editsectionSection = "T-$sectionIndex";
4484                                 } else {
4485                                         $editsectionPage = $this->getTitle()->getPrefixedText();
4486                                         $editsectionSection = $sectionIndex;
4487                                 }
4488                                 // Construct a pseudo-HTML tag as a placeholder for the section edit link. It is replaced in
4489                                 // MediaWiki\OutputTransform\Stages\HandleSectionLinks with the real link.
4490                                 //
4491                                 // Any HTML markup in the input has already been escaped,
4492                                 // so we don't have to worry about a user trying to input one of these markers directly.
4493                                 //
4494                                 // We put the page and section in attributes to stop the language converter from
4495                                 // converting them, but put the headline hint in tag content
4496                                 // because it is supposed to be able to convert that.
4497                                 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT );
4498                                 $editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"';
4499                                 $editlink .= '>' . $headlineHint . '</mw:editsection>';
4500                         } else {
4501                                 $editlink = '';
4502                         }
4503                         // Reconstruct the original <h#> tag with added attributes. It is replaced in
4504                         // MediaWiki\OutputTransform\Stages\HandleSectionLinks to add anchors and stuff.
4505                         //
4506                         // data-mw-... attributes are forbidden in Sanitizer::isReservedDataAttribute(),
4507                         // so we don't have to worry about a user trying to input one of these markers directly.
4508                         //
4509                         // We put the anchors in attributes to stop the language converter from converting them.
4510                         $head[$headlineCount] = "<h$level" . Html::expandAttributes( [
4511                                 'data-mw-anchor' => $anchor,
4512                                 'data-mw-fallback-anchor' => $fallbackAnchor,
4513                         ] ) . $matches['attrib'][$headlineCount] . $headline . $editlink . "</h$level>";
4514
4515                         $headlineCount++;
4516                 }
4517
4518                 $this->setOutputType( $oldType );
4519
4520                 # Never ever show TOC if no headers (or suppressed)
4521                 $suppressToc = $this->mOptions->getSuppressTOC();
4522                 if ( !$haveTocEntries ) {
4523                         $enoughToc = false;
4524                 }
4525                 $addTOCPlaceholder = false;
4526
4527                 if ( $isMain && !$suppressToc ) {
4528                         // We generally output the section information via the API
4529                         // even if there isn't "enough" of a ToC to merit showing
4530                         // it -- but the "suppress TOC" parser option is set when
4531                         // any sections that might be found aren't "really there"
4532                         // (ie, JavaScript content that might have spurious === or
4533                         // <h2>: T307691) so we will *not* set section information
4534                         // in that case.
4535                         $this->mOutput->setTOCData( $tocData );
4536
4537                         // T294950: Record a suggestion that the TOC should be shown.
4538                         // Skins are free to ignore this suggestion and implement their
4539                         // own criteria for showing/suppressing TOC (T318186).
4540                         if ( $enoughToc ) {
4541                                 $this->mOutput->setOutputFlag( ParserOutputFlags::SHOW_TOC );
4542                                 if ( !$this->mForceTocPosition ) {
4543                                         $addTOCPlaceholder = true;
4544                                 }
4545                         }
4546
4547                         // If __NOTOC__ is used on the page (and not overridden by
4548                         // __TOC__ or __FORCETOC__) set the NO_TOC flag to tell
4549                         // the skin that although the section information is
4550                         // valid, it should perhaps not be presented as a Table Of
4551                         // Contents.
4552                         if ( !$this->mShowToc ) {
4553                                 $this->mOutput->setOutputFlag( ParserOutputFlags::NO_TOC );
4554                         }
4555                 }
4556
4557                 # split up and insert constructed headlines
4558                 $blocks = preg_split( '/<h[1-6]\b[^>]*>.*?<\/h[1-6]>/is', $text );
4559                 $i = 0;
4560
4561                 // build an array of document sections
4562                 $sections = [];
4563                 foreach ( $blocks as $block ) {
4564                         // $head is zero-based, sections aren't.
4565                         if ( empty( $head[$i - 1] ) ) {
4566                                 $sections[$i] = $block;
4567                         } else {
4568                                 $sections[$i] = $head[$i - 1] . $block;
4569                         }
4570
4571                         $i++;
4572                 }
4573
4574                 if ( $addTOCPlaceholder ) {
4575                         // append the TOC at the beginning
4576                         // Top anchor now in skin
4577                         // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset At least one element when enoughToc is true
4578                         $sections[0] .= self::TOC_PLACEHOLDER . "\n";
4579                 }
4580
4581                 $full .= implode( '', $sections );
4582
4583                 return $full;
4584         }
4585
4586         /**
4587          * Localize the TOC into the given target language; this includes
4588          * invoking the language converter on the headings.
4589          * @param ?TOCData $tocData The Table of Contents
4590          * @param Language $lang The target language
4591          * @param ?ILanguageConverter $converter The target language converter, or
4592          *   null if language conversion is to be suppressed.
4593          * @internal
4594          */
4595         private static function localizeTOC(
4596                 ?TOCData $tocData, Language $lang, ?ILanguageConverter $converter
4597         ) {
4598                 if ( $tocData === null ) {
4599                         return; // Nothing to do
4600                 }
4601                 foreach ( $tocData->getSections() as $s ) {
4602                         // Localize heading
4603                         if ( $converter ) {
4604                                 // T331316: don't use 'convert' or 'convertTo' as these reset
4605                                 // the language converter state.
4606                                 $s->line = $converter->convertTo(
4607                                         $s->line, $converter->getPreferredVariant(), false
4608                                 );
4609                         }
4610                         // Localize numbering
4611                         $dot = '.';
4612                         $pieces = explode( $dot, $s->number );
4613                         $numbering = '';
4614                         foreach ( $pieces as $i => $p ) {
4615                                 if ( $i > 0 ) {
4616                                         $numbering .= $dot;
4617                                 }
4618                                 $numbering .= $lang->formatNum( $p );
4619                         }
4620                         $s->number = $numbering;
4621                 }
4622         }
4623
4624         /**
4625          * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4626          * conversion, substituting signatures, {{subst:}} templates, etc.
4627          *
4628          * @param string $text The text to transform
4629          * @param PageReference $page the current article
4630          * @param UserIdentity $user the current user
4631          * @param ParserOptions $options Parsing options
4632          * @param bool $clearState Whether to clear the parser state first
4633          * @return string The altered wiki markup
4634          * @since 1.3
4635          */
4636         public function preSaveTransform(
4637                 $text,
4638                 PageReference $page,
4639                 UserIdentity $user,
4640                 ParserOptions $options,
4641                 $clearState = true
4642         ) {
4643                 if ( $clearState ) {
4644                         $magicScopeVariable = $this->lock();
4645                 }
4646                 $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4647                 $this->setUser( $user );
4648
4649                 // Strip U+0000 NULL (T159174)
4650                 $text = str_replace( "\000", '', $text );
4651
4652                 // We still normalize line endings (including trimming trailing whitespace) for
4653                 // backwards-compatibility with other code that just calls PST, but this should already
4654                 // be handled in TextContent subclasses
4655                 $text = TextContent::normalizeLineEndings( $text );
4656
4657                 if ( $options->getPreSaveTransform() ) {
4658                         $text = $this->pstPass2( $text, $user );
4659                 }
4660                 $text = $this->mStripState->unstripBoth( $text );
4661
4662                 // Trim trailing whitespace again, because the previous steps can introduce it.
4663                 $text = rtrim( $text );
4664
4665                 $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4666
4667                 $this->setUser( null ); # Reset
4668
4669                 return $text;
4670         }
4671
4672         /**
4673          * Pre-save transform helper function
4674          *
4675          * @param string $text
4676          * @param UserIdentity $user
4677          *
4678          * @return string
4679          */
4680         private function pstPass2( $text, UserIdentity $user ) {
4681                 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4682                 # $this->contLang here in order to give everyone the same signature and use the default one
4683                 # rather than the one selected in each user's preferences.  (see also T14815)
4684                 $ts = $this->mOptions->getTimestamp();
4685                 $timestamp = MWTimestamp::getLocalInstance( $ts );
4686                 $ts = $timestamp->format( 'YmdHis' );
4687                 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4688
4689                 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4690
4691                 # Variable replacement
4692                 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4693                 $text = $this->replaceVariables( $text );
4694
4695                 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4696                 # which may corrupt this parser instance via its wfMessage()->text() call-
4697
4698                 # Signatures
4699                 if ( strpos( $text, '~~~' ) !== false ) {
4700                         $sigText = $this->getUserSig( $user );
4701                         $text = strtr( $text, [
4702                                 '~~~~~' => $d,
4703                                 '~~~~' => "$sigText $d",
4704                                 '~~~' => $sigText
4705                         ] );
4706                         # The main two signature forms used above are time-sensitive
4707                         $this->setOutputFlag( ParserOutputFlags::USER_SIGNATURE, 'User signature detected' );
4708                 }
4709
4710                 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4711                 $tc = '[' . Title::legalChars() . ']';
4712                 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4713
4714                 // [[ns:page (context)|]]
4715                 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4716                 // [[ns:page（context）|]] (double-width brackets, added in r40257)
4717                 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?（$tc+）)\\|]]/";
4718                 // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4719                 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |，|، )$tc+|)\\|]]/";
4720                 // [[|page]] (reverse pipe trick: add context from page title)
4721                 $p2 = "/\[\[\\|($tc+)]]/";
4722
4723                 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4724                 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4725                 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4726                 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4727
4728                 $t = $this->getTitle()->getText();
4729                 $m = [];
4730                 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4731                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4732                 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4733                         $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4734                 } else {
4735                         # if there's no context, don't bother duplicating the title
4736                         $text = preg_replace( $p2, '[[\\1]]', $text );
4737                 }
4738
4739                 return $text;
4740         }
4741
4742         /**
4743          * Fetch the user's signature text, if any, and normalize to
4744          * validated, ready-to-insert wikitext.
4745          * If you have pre-fetched the nickname or the fancySig option, you can
4746          * specify them here to save a database query.
4747          * Do not reuse this parser instance after calling getUserSig(),
4748          * as it may have changed.
4749          *
4750          * @param UserIdentity $user
4751          * @param string|false $nickname Nickname to use or false to use user's default nickname
4752          * @param bool|null $fancySig whether the nicknname is the complete signature
4753          *    or null to use default value
4754          * @return string
4755          * @since 1.6
4756          */
4757         public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4758                 $username = $user->getName();
4759
4760                 # If not given, retrieve from the user object.
4761                 if ( $nickname === false ) {
4762                         $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4763                 }
4764
4765                 $fancySig ??= $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4766
4767                 if ( $nickname === null || $nickname === '' ) {
4768                         // Empty value results in the default signature (even when fancysig is enabled)
4769                         $nickname = $username;
4770                 } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( MainConfigNames::MaxSigChars ) ) {
4771                         $nickname = $username;
4772                         $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4773                 } elseif ( $fancySig !== false ) {
4774                         # Sig. might contain markup; validate this
4775                         $isValid = $this->validateSig( $nickname ) !== false;
4776
4777                         # New validator
4778                         $sigValidation = $this->svcOptions->get( MainConfigNames::SignatureValidation );
4779                         if ( $isValid && $sigValidation === 'disallow' ) {
4780                                 $parserOpts = new ParserOptions(
4781                                         $this->mOptions->getUserIdentity(),
4782                                         $this->contLang
4783                                 );
4784                                 $validator = $this->signatureValidatorFactory
4785                                         ->newSignatureValidator( $user, null, $parserOpts );
4786                                 $isValid = !$validator->validateSignature( $nickname );
4787                         }
4788
4789                         if ( $isValid ) {
4790                                 # Validated; clean up (if needed) and return it
4791                                 return $this->cleanSig( $nickname, true );
4792                         } else {
4793                                 # Failed to validate; fall back to the default
4794                                 $nickname = $username;
4795                                 $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4796                         }
4797                 }
4798
4799                 # Make sure nickname doesnt get a sig in a sig
4800                 $nickname = self::cleanSigInSig( $nickname );
4801
4802                 # If we're still here, make it a link to the user page
4803                 $userText = wfEscapeWikiText( $username );
4804                 $nickText = wfEscapeWikiText( $nickname );
4805                 if ( $this->userNameUtils->isTemp( $username ) ) {
4806                         $msgName = 'signature-temp';
4807                 } elseif ( $user->isRegistered() ) {
4808                         $msgName = 'signature';
4809                 } else {
4810                         $msgName = 'signature-anon';
4811                 }
4812
4813                 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4814                         ->page( $this->getPage() )->text();
4815         }
4816
4817         /**
4818          * Check that the user's signature contains no bad XML
4819          *
4820          * @param string $text
4821          * @return string|false An expanded string, or false if invalid.
4822          * @since 1.6
4823          */
4824         public function validateSig( $text ) {
4825                 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4826         }
4827
4828         /**
4829          * Clean up signature text
4830          *
4831          * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4832          * 2) Substitute all transclusions
4833          *
4834          * @param string $text
4835          * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4836          * @return string Signature text
4837          * @since 1.6
4838          */
4839         public function cleanSig( $text, $parsing = false ) {
4840                 if ( !$parsing ) {
4841                         $magicScopeVariable = $this->lock();
4842                         $this->startParse(
4843                                 $this->mTitle,
4844                                 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
4845                                 self::OT_PREPROCESS,
4846                                 true
4847                         );
4848                 }
4849
4850                 # Option to disable this feature
4851                 if ( !$this->mOptions->getCleanSignatures() ) {
4852                         return $text;
4853                 }
4854
4855                 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4856                 #  => Move this logic to braceSubstitution()
4857                 $substWord = $this->magicWordFactory->get( 'subst' );
4858                 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4859                 $substText = '{{' . $substWord->getSynonym( 0 );
4860
4861                 $text = preg_replace( $substRegex, $substText, $text );
4862                 $text = self::cleanSigInSig( $text );
4863                 $dom = $this->preprocessToDom( $text );
4864                 $frame = $this->getPreprocessor()->newFrame();
4865                 $text = $frame->expand( $dom );
4866
4867                 if ( !$parsing ) {
4868                         $text = $this->mStripState->unstripBoth( $text );
4869                 }
4870
4871                 return $text;
4872         }
4873
4874         /**
4875          * Strip 3, 4 or 5 tildes out of signatures.
4876          *
4877          * @param string $text
4878          * @return string Signature text with /~{3,5}/ removed
4879          * @since 1.7
4880          */
4881         public static function cleanSigInSig( $text ) {
4882                 $text = preg_replace( '/~{3,5}/', '', $text );
4883                 return $text;
4884         }
4885
4886         /**
4887          * Replace table of contents marker in parsed HTML.
4888          *
4889          * Used to remove or replace the marker.  This method should be
4890          * used instead of direct access to Parser::TOC_PLACEHOLDER, since
4891          * in the future the placeholder might have additional attributes
4892          * attached which should be ignored when the replacement is made.
4893          *
4894          * @since 1.38
4895          * @stable
4896          *
4897          * @param string $text Parsed HTML
4898          * @param string $toc HTML table of contents string, or else an empty
4899          *   string to remove the marker.
4900          * @return string Result HTML
4901          */
4902         public static function replaceTableOfContentsMarker( $text, $toc ) {
4903                 $replaced = false;
4904                 // remove the additional metas. while not strictly necessary, this also ensures idempotence if we run
4905                 // the pass more than once on a given content and TOC markers are not inserted by $toc. At the same time,
4906                 // if $toc inserts TOC markers (which, as of 2024-05, it shouldn't be able to), these are preserved by the
4907                 // fact that we run a single pass with a callback (rather than doing a first replacement with the $toc and
4908                 // a replacement of leftover markers as a second pass).
4909                 $callback = static function ( array $matches ) use( &$replaced, $toc ): string {
4910                         if ( !$replaced ) {
4911                                 $replaced = true;
4912                                 return $toc;
4913                         }
4914                         return '';
4915                 };
4916
4917                 return preg_replace_callback( self::TOC_PLACEHOLDER_REGEX, $callback, $text );
4918         }
4919
4920         /**
4921          * Set up some variables which are usually set up in parse()
4922          * so that an external function can call some class members with confidence
4923          *
4924          * @param ?PageReference $page
4925          * @param ParserOptions $options
4926          * @param int $outputType One of the Parser::OT_… constants
4927          * @param bool $clearState
4928          * @param int|null $revId
4929          * @since 1.3
4930          */
4931         public function startExternalParse( ?PageReference $page, ParserOptions $options,
4932                 $outputType, $clearState = true, $revId = null
4933         ) {
4934                 $this->startParse( $page, $options, $outputType, $clearState );
4935                 if ( $revId !== null ) {
4936                         $this->mRevisionId = $revId;
4937                 }
4938         }
4939
4940         /**
4941          * @param ?PageReference $page
4942          * @param ParserOptions $options
4943          * @param int $outputType
4944          * @param bool $clearState
4945          */
4946         private function startParse( ?PageReference $page, ParserOptions $options,
4947                 $outputType, $clearState = true
4948         ) {
4949                 $this->setPage( $page );
4950                 $this->mOptions = $options;
4951                 $this->setOutputType( $outputType );
4952                 if ( $clearState ) {
4953                         $this->clearState();
4954                 }
4955         }
4956
4957         /**
4958          * Wrapper for preprocess()
4959          *
4960          * @param string $text The text to preprocess
4961          * @param ParserOptions $options
4962          * @param ?PageReference $page The context page
4963          * @return string
4964          * @since 1.3
4965          */
4966         public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
4967                 static $executing = false;
4968
4969                 # Guard against infinite recursion
4970                 if ( $executing ) {
4971                         return $text;
4972                 }
4973                 $executing = true;
4974
4975                 $text = $this->preprocess( $text, $page ?? $this->mTitle, $options );
4976
4977                 $executing = false;
4978                 return $text;
4979         }
4980
4981         /**
4982          * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4983          * The callback should have the following form:
4984          *    function myParserHook( $text, array $params, Parser $parser, PPFrame $frame ) { ... }
4985          *
4986          * Transform and return $text. Use $parser for any required context, e.g. use
4987          * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4988          *
4989          * Hooks may return extended information by returning an array, of which the
4990          * first numbered element (index 0) must be the return string. The following other
4991          * keys are used:
4992          *  - 'markerType': used by some core tag hooks to override which strip
4993          *    array their results are placed in, 'general' or 'nowiki'.
4994          *
4995          * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4996          * @param callable $callback The callback to use for the tag
4997          * @return callable|null The old value of the mTagHooks array associated with the hook
4998          * @since 1.3
4999          */
5000         public function setHook( $tag, callable $callback ) {
5001                 $tag = strtolower( $tag );
5002                 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5003                         throw new InvalidArgumentException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5004                 }
5005                 $oldVal = $this->mTagHooks[$tag] ?? null;
5006                 $this->mTagHooks[$tag] = $callback;
5007                 if ( !in_array( $tag, $this->mStripList ) ) {
5008                         $this->mStripList[] = $tag;
5009                 }
5010
5011                 return $oldVal;
5012         }
5013
5014         /**
5015          * Remove all tag hooks
5016          * @since 1.12
5017          */
5018         public function clearTagHooks() {
5019                 $this->mTagHooks = [];
5020                 $this->mStripList = [];
5021         }
5022
5023         /**
5024          * Create a function, e.g. {{sum:1|2|3}}
5025          * The callback function should have the form:
5026          *    function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5027          *
5028          * Or with Parser::SFH_OBJECT_ARGS:
5029          *    function myParserFunction( $parser, $frame, $args ) { ... }
5030          *
5031          * The callback may either return the text result of the function, or an array with the text
5032          * in element 0, and a number of flags in the other elements. The names of the flags are
5033          * specified in the keys. Valid flags are:
5034          *   found                     The text returned is valid, stop processing the template. This
5035          *                             is on by default.
5036          *   nowiki                    Wiki markup in the return value should be escaped
5037          *   isHTML                    The returned text is HTML, armour it against wikitext transformation
5038          *
5039          * @param string $id The magic word ID
5040          * @param callable $callback The callback function (and object) to use
5041          * @param int $flags A combination of the following flags:
5042          *     Parser::SFH_NO_HASH      No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5043          *
5044          *     Parser::SFH_OBJECT_ARGS  Pass the template arguments as PPNode objects instead of text.
5045          *     This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5046          *     branches and thus speed up parsing. It is also possible to analyse the parse tree of
5047          *     the arguments, and to control the way they are expanded.
5048          *
5049          *     The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5050          *     arguments, for instance:
5051          *         $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5052          *
5053          *     For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5054          *     future versions. Please call $frame->expand() on it anyway so that your code keeps
5055          *     working if/when this is changed.
5056          *
5057          *     If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5058          *     expansion.
5059          *
5060          *     Please read the documentation in includes/parser/Preprocessor.php for more information
5061          *     about the methods available in PPFrame and PPNode.
5062          *
5063          * @return string|callable|null The old callback function for this name, if any
5064          * @since 1.6
5065          */
5066         public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5067                 $oldVal = $this->mFunctionHooks[$id][0] ?? null;
5068                 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5069
5070                 # Add to function cache
5071                 $mw = $this->magicWordFactory->get( $id );
5072
5073                 $synonyms = $mw->getSynonyms();
5074                 $sensitive = intval( $mw->isCaseSensitive() );
5075
5076                 foreach ( $synonyms as $syn ) {
5077                         # Case
5078                         if ( !$sensitive ) {
5079                                 $syn = $this->contLang->lc( $syn );
5080                         }
5081                         # Add leading hash
5082                         if ( !( $flags & self::SFH_NO_HASH ) ) {
5083                                 $syn = '#' . $syn;
5084                         }
5085                         # Remove trailing colon
5086                         if ( substr( $syn, -1, 1 ) === ':' ) {
5087                                 $syn = substr( $syn, 0, -1 );
5088                         }
5089                         $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5090                 }
5091                 return $oldVal;
5092         }
5093
5094         /**
5095          * Get all registered function hook identifiers
5096          *
5097          * @return array
5098          * @since 1.8
5099          */
5100         public function getFunctionHooks() {
5101                 return array_keys( $this->mFunctionHooks );
5102         }
5103
5104         /**
5105          * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5106          * Placeholders created in Linker::link()
5107          *
5108          * @param string &$text
5109          * @deprecated since 1.34; should not be used outside parser class.
5110          */
5111         public function replaceLinkHolders( &$text ) {
5112                 $this->replaceLinkHoldersPrivate( $text );
5113         }
5114
5115         /**
5116          * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5117          * Placeholders created in Linker::link()
5118          *
5119          * @param string &$text
5120          */
5121         private function replaceLinkHoldersPrivate( &$text ) {
5122                 $this->mLinkHolders->replace( $text );
5123         }
5124
5125         /**
5126          * Replace "<!--LINK-->" link placeholders with plain text of links
5127          * (not HTML-formatted).
5128          *
5129          * @param string $text
5130          * @return string
5131          */
5132         private function replaceLinkHoldersText( $text ) {
5133                 return $this->mLinkHolders->replaceText( $text );
5134         }
5135
5136         /**
5137          * Renders an image gallery from a text with one line per image.
5138          * text labels may be given by using |-style alternative text. E.g.
5139          *   Image:one.jpg|The number "1"
5140          *   Image:tree.jpg|A tree
5141          * given as text will return the HTML of a gallery with two images,
5142          * labeled 'The number "1"' and
5143          * 'A tree'.
5144          *
5145          * @param string $text
5146          * @param array $params
5147          * @return string HTML
5148          * @internal
5149          */
5150         public function renderImageGallery( $text, array $params ) {
5151                 $mode = false;
5152                 if ( isset( $params['mode'] ) ) {
5153                         $mode = $params['mode'];
5154                 }
5155
5156                 try {
5157                         $ig = ImageGalleryBase::factory( $mode );
5158                 } catch ( ImageGalleryClassNotFoundException $e ) {
5159                         // If invalid type set, fallback to default.
5160                         $ig = ImageGalleryBase::factory( false );
5161                 }
5162
5163                 $ig->setContextTitle( $this->getTitle() );
5164                 $ig->setShowBytes( false );
5165                 $ig->setShowDimensions( false );
5166                 $ig->setShowFilename( false );
5167                 $ig->setParser( $this );
5168                 $ig->setHideBadImages();
5169                 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5170
5171                 if ( isset( $params['showfilename'] ) ) {
5172                         $ig->setShowFilename( true );
5173                 } else {
5174                         $ig->setShowFilename( false );
5175                 }
5176                 if ( isset( $params['caption'] ) ) {
5177                         // NOTE: We aren't passing a frame here or below.  Frame info
5178                         // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5179                         // See T107332#4030581
5180                         $caption = $this->recursiveTagParse( $params['caption'] );
5181                         $ig->setCaptionHtml( $caption );
5182                 }
5183                 if ( isset( $params['perrow'] ) ) {
5184                         $ig->setPerRow( $params['perrow'] );
5185                 }
5186                 if ( isset( $params['widths'] ) ) {
5187                         $ig->setWidths( $params['widths'] );
5188                 }
5189                 if ( isset( $params['heights'] ) ) {
5190                         $ig->setHeights( $params['heights'] );
5191                 }
5192                 $ig->setAdditionalOptions( $params );
5193
5194                 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5195
5196                 $lines = StringUtils::explode( "\n", $text );
5197                 foreach ( $lines as $line ) {
5198                         # match lines like these:
5199                         # Image:someimage.jpg|This is some image
5200                         $matches = [];
5201                         preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5202                         # Skip empty lines
5203                         if ( count( $matches ) == 0 ) {
5204                                 continue;
5205                         }
5206
5207                         if ( strpos( $matches[0], '%' ) !== false ) {
5208                                 $matches[1] = rawurldecode( $matches[1] );
5209                         }
5210                         $title = Title::newFromText( $matches[1], NS_FILE );
5211                         if ( $title === null ) {
5212                                 # Bogus title. Ignore these so we don't bomb out later.
5213                                 continue;
5214                         }
5215
5216                         # We need to get what handler the file uses, to figure out parameters.
5217                         # Note, a hook can override the file name, and chose an entirely different
5218                         # file (which potentially could be of a different type and have different handler).
5219                         $options = [];
5220                         $descQuery = false;
5221                         $this->hookRunner->onBeforeParserFetchFileAndTitle(
5222                                 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5223                                 $this, $title, $options, $descQuery
5224                         );
5225                         # Don't register it now, as TraditionalImageGallery does that later.
5226                         $file = $this->fetchFileNoRegister( $title, $options );
5227                         $handler = $file ? $file->getHandler() : false;
5228
5229                         $paramMap = [
5230                                 'img_alt' => 'gallery-internal-alt',
5231                                 'img_link' => 'gallery-internal-link',
5232                         ];
5233                         if ( $handler ) {
5234                                 $paramMap += $handler->getParamMap();
5235                                 // We don't want people to specify per-image widths.
5236                                 // Additionally the width parameter would need special casing anyhow.
5237                                 unset( $paramMap['img_width'] );
5238                         }
5239
5240                         $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5241
5242                         $label = '';
5243                         $alt = null;
5244                         $handlerOptions = [];
5245                         $imageOptions = [];
5246                         $hasAlt = false;
5247
5248                         if ( isset( $matches[3] ) ) {
5249                                 // look for an |alt= definition while trying not to break existing
5250                                 // captions with multiple pipes (|) in it, until a more sensible grammar
5251                                 // is defined for images in galleries
5252
5253                                 // FIXME: Doing recursiveTagParse at this stage is a bit odd,
5254                                 // and different from makeImage.
5255                                 $matches[3] = $this->recursiveTagParse( $matches[3] );
5256                                 // Protect LanguageConverter markup
5257                                 $parameterMatches = StringUtils::delimiterExplode(
5258                                         '-{', '}-',
5259                                         '|',
5260                                         $matches[3],
5261                                         true /* nested */
5262                                 );
5263
5264                                 foreach ( $parameterMatches as $parameterMatch ) {
5265                                         [ $magicName, $match ] = $mwArray->matchVariableStartToEnd( trim( $parameterMatch ) );
5266                                         if ( !$magicName ) {
5267                                                 // Last pipe wins.
5268                                                 $label = $parameterMatch;
5269                                                 continue;
5270                                         }
5271
5272                                         $paramName = $paramMap[$magicName];
5273                                         switch ( $paramName ) {
5274                                                 case 'gallery-internal-alt':
5275                                                         $hasAlt = true;
5276                                                         $alt = $this->stripAltText( $match, false );
5277                                                         break;
5278                                                 case 'gallery-internal-link':
5279                                                         $linkValue = $this->stripAltText( $match, false );
5280                                                         if ( preg_match( '/^-{R\|(.*)}-$/', $linkValue ) ) {
5281                                                                 // Result of LanguageConverter::markNoConversion
5282                                                                 // invoked on an external link.
5283                                                                 $linkValue = substr( $linkValue, 4, -2 );
5284                                                         }
5285                                                         [ $type, $target ] = $this->parseLinkParameter( $linkValue );
5286                                                         if ( $type ) {
5287                                                                 if ( $type === 'no-link' ) {
5288                                                                         $target = true;
5289                                                                 }
5290                                                                 $imageOptions[$type] = $target;
5291                                                         }
5292                                                         break;
5293                                                 default:
5294                                                         // Must be a handler specific parameter.
5295                                                         if ( $handler->validateParam( $paramName, $match ) ) {
5296                                                                 $handlerOptions[$paramName] = $match;
5297                                                         } else {
5298                                                                 // Guess not, consider it as caption.
5299                                                                 $this->logger->debug(
5300                                                                         "$parameterMatch failed parameter validation" );
5301                                                                 $label = $parameterMatch;
5302                                                         }
5303                                         }
5304                                 }
5305                         }
5306
5307                         // Match makeImage when !$hasVisibleCaption
5308                         if ( !$hasAlt ) {
5309                                 if ( $label !== '' ) {
5310                                         $alt = $this->stripAltText( $label, false );
5311                                 } else {
5312                                         if ( $enableLegacyMediaDOM ) {
5313                                                 $alt = $title->getText();
5314                                         }
5315                                 }
5316                         }
5317                         $imageOptions['title'] = $this->stripAltText( $label, false );
5318
5319                         // Match makeImage which sets this unconditionally
5320                         $handlerOptions['targetlang'] = $this->getTargetLanguage()->getCode();
5321
5322                         $ig->add(
5323                                 $title, $label, $alt, '', $handlerOptions,
5324                                 ImageGalleryBase::LOADING_DEFAULT, $imageOptions
5325                         );
5326                 }
5327                 $html = $ig->toHTML();
5328                 $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5329                 return $html;
5330         }
5331
5332         /**
5333          * @param MediaHandler|false $handler
5334          * @return array
5335          */
5336         private function getImageParams( $handler ) {
5337                 if ( $handler ) {
5338                         $handlerClass = get_class( $handler );
5339                 } else {
5340                         $handlerClass = '';
5341                 }
5342                 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5343                         # Initialise static lists
5344                         static $internalParamNames = [
5345                                 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5346                                 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5347                                         'bottom', 'text-bottom' ],
5348                                 'frame' => [ 'thumbnail', 'framed', 'frameless', 'border',
5349                                         // These parameters take arguments, so to ensure literals
5350                                         // have precedence, keep them listed last (T372935):
5351                                         'manualthumb', 'upright', 'link', 'alt', 'class' ],
5352                         ];
5353                         static $internalParamMap;
5354                         if ( !$internalParamMap ) {
5355                                 $internalParamMap = [];
5356                                 foreach ( $internalParamNames as $type => $names ) {
5357                                         foreach ( $names as $name ) {
5358                                                 // For grep: img_left, img_right, img_center, img_none,
5359                                                 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5360                                                 // img_bottom, img_text_bottom,
5361                                                 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5362                                                 // img_border, img_link, img_alt, img_class
5363                                                 $magicName = str_replace( '-', '_', "img_$name" );
5364                                                 $internalParamMap[$magicName] = [ $type, $name ];
5365                                         }
5366                                 }
5367                         }
5368
5369                         # Add handler params
5370                         # Since img_width is one of these, it is important it is listed
5371                         # *after* the literal parameter names above (T372935).
5372                         $paramMap = $internalParamMap;
5373                         if ( $handler ) {
5374                                 $handlerParamMap = $handler->getParamMap();
5375                                 foreach ( $handlerParamMap as $magic => $paramName ) {
5376                                         $paramMap[$magic] = [ 'handler', $paramName ];
5377                                 }
5378                         } else {
5379                                 // Parse the size for non-existent files.  See T273013
5380                                 $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5381                         }
5382                         $this->mImageParams[$handlerClass] = $paramMap;
5383                         $this->mImageParamsMagicArray[$handlerClass] =
5384                                 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5385                 }
5386                 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5387         }
5388
5389         /**
5390          * Parse image options text and use it to make an image
5391          *
5392          * @param LinkTarget $link
5393          * @param string $options
5394          * @param LinkHolderArray|false $holders
5395          * @return string HTML
5396          * @since 1.5
5397          */
5398         public function makeImage( LinkTarget $link, $options, $holders = false ) {
5399                 # Check if the options text is of the form "options|alt text"
5400                 # Options are:
5401                 #  * thumbnail  make a thumbnail with enlarge-icon and caption, alignment depends on lang
5402                 #  * left       no resizing, just left align. label is used for alt= only
5403                 #  * right      same, but right aligned
5404                 #  * none       same, but not aligned
5405                 #  * ___px      scale to ___ pixels width, no aligning. e.g. use in taxobox
5406                 #  * center     center the image
5407                 #  * framed     Keep original image size, no magnify-button.
5408                 #  * frameless  like 'thumb' but without a frame. Keeps user preferences for width
5409                 #  * upright    reduce width for upright images, rounded to full __0 px
5410                 #  * border     draw a 1px border around the image
5411                 #  * alt        Text for HTML alt attribute (defaults to empty)
5412                 #  * class      Set a class for img node
5413                 #  * link       Set the target of the image link. Can be external, interwiki, or local
5414                 # vertical-align values (no % or length right now):
5415                 #  * baseline
5416                 #  * sub
5417                 #  * super
5418                 #  * top
5419                 #  * text-top
5420                 #  * middle
5421                 #  * bottom
5422                 #  * text-bottom
5423
5424                 # Protect LanguageConverter markup when splitting into parts
5425                 $parts = StringUtils::delimiterExplode(
5426                         '-{', '}-', '|', $options, true /* allow nesting */
5427                 );
5428
5429                 # Give extensions a chance to select the file revision for us
5430                 $options = [];
5431                 $descQuery = false;
5432                 $title = Title::castFromLinkTarget( $link ); // hook signature compat
5433                 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5434                         // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5435                         $this, $title, $options, $descQuery
5436                 );
5437                 # Fetch and register the file (file title may be different via hooks)
5438                 [ $file, $link ] = $this->fetchFileAndTitle( $link, $options );
5439
5440                 # Get parameter map
5441                 $handler = $file ? $file->getHandler() : false;
5442
5443                 [ $paramMap, $mwArray ] = $this->getImageParams( $handler );
5444
5445                 if ( !$file ) {
5446                         $this->addTrackingCategory( 'broken-file-category' );
5447                 }
5448
5449                 # Process the input parameters
5450                 $caption = '';
5451                 $params = [ 'frame' => [], 'handler' => [],
5452                         'horizAlign' => [], 'vertAlign' => [] ];
5453                 $seenformat = false;
5454                 foreach ( $parts as $part ) {
5455                         [ $magicName, $value ] = $mwArray->matchVariableStartToEnd( trim( $part ) );
5456                         $validated = false;
5457                         if ( isset( $paramMap[$magicName] ) ) {
5458                                 [ $type, $paramName ] = $paramMap[$magicName];
5459
5460                                 # Special case; width and height come in one variable together
5461                                 if ( $type === 'handler' && $paramName === 'width' ) {
5462                                         // The 'px' suffix has already been localized by img_width
5463                                         $parsedWidthParam = $this->parseWidthParam( $value, true, true );
5464                                         // Parsoid applies data-(width|height) attributes to broken
5465                                         // media spans, for client use.  See T273013
5466                                         $validateFunc = static function ( $name, $value ) use ( $handler ) {
5467                                                 return $handler
5468                                                         ? $handler->validateParam( $name, $value )
5469                                                         : $value > 0;
5470                                         };
5471                                         if ( isset( $parsedWidthParam['width'] ) ) {
5472                                                 $width = $parsedWidthParam['width'];
5473                                                 if ( $validateFunc( 'width', $width ) ) {
5474                                                         $params[$type]['width'] = $width;
5475                                                         $validated = true;
5476                                                 }
5477                                         }
5478                                         if ( isset( $parsedWidthParam['height'] ) ) {
5479                                                 $height = $parsedWidthParam['height'];
5480                                                 if ( $validateFunc( 'height', $height ) ) {
5481                                                         $params[$type]['height'] = $height;
5482                                                         $validated = true;
5483                                                 }
5484                                         }
5485                                         # else no validation -- T15436
5486                                 } else {
5487                                         if ( $type === 'handler' ) {
5488                                                 # Validate handler parameter
5489                                                 $validated = $handler->validateParam( $paramName, $value );
5490                                         } else {
5491                                                 # Validate internal parameters
5492                                                 switch ( $paramName ) {
5493                                                         case 'alt':
5494                                                         case 'class':
5495                                                                 $validated = true;
5496                                                                 $value = $this->stripAltText( $value, $holders );
5497                                                                 break;
5498                                                         case 'link':
5499                                                                 [ $paramName, $value ] =
5500                                                                         $this->parseLinkParameter(
5501                                                                                 $this->stripAltText( $value, $holders )
5502                                                                         );
5503                                                                 if ( $paramName ) {
5504                                                                         $validated = true;
5505                                                                         if ( $paramName === 'no-link' ) {
5506                                                                                 $value = true;
5507                                                                         }
5508                                                                 }
5509                                                                 break;
5510                                                         case 'manualthumb':
5511                                                                 # @todo FIXME: Possibly check validity here for
5512                                                                 # manualthumb? downstream behavior seems odd with
5513                                                                 # missing manual thumbs.
5514                                                                 $value = $this->stripAltText( $value, $holders );
5515                                                                 // fall through
5516                                                         case 'frameless':
5517                                                         case 'framed':
5518                                                         case 'thumbnail':
5519                                                                 // use first appearing option, discard others.
5520                                                                 $validated = !$seenformat;
5521                                                                 $seenformat = true;
5522                                                                 break;
5523                                                         default:
5524                                                                 # Most other things appear to be empty or numeric...
5525                                                                 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5526                                                 }
5527                                         }
5528
5529                                         if ( $validated ) {
5530                                                 $params[$type][$paramName] = $value;
5531                                         }
5532                                 }
5533                         }
5534                         if ( !$validated ) {
5535                                 $caption = $part;
5536                         }
5537                 }
5538
5539                 # Process alignment parameters
5540                 if ( $params['horizAlign'] !== [] ) {
5541                         $params['frame']['align'] = array_key_first( $params['horizAlign'] );
5542                 }
5543                 if ( $params['vertAlign'] !== [] ) {
5544                         $params['frame']['valign'] = array_key_first( $params['vertAlign'] );
5545                 }
5546
5547                 $params['frame']['caption'] = $caption;
5548
5549                 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5550
5551                 # Will the image be presented in a frame, with the caption below?
5552                 // @phan-suppress-next-line PhanImpossibleCondition
5553                 $hasVisibleCaption = isset( $params['frame']['framed'] )
5554                         // @phan-suppress-next-line PhanImpossibleCondition
5555                         || isset( $params['frame']['thumbnail'] )
5556                         // @phan-suppress-next-line PhanImpossibleCondition
5557                         || isset( $params['frame']['manualthumb'] );
5558
5559                 # In the old days, [[Image:Foo|text...]] would set alt text.  Later it
5560                 # came to also set the caption, ordinary text after the image -- which
5561                 # makes no sense, because that just repeats the text multiple times in
5562                 # screen readers.  It *also* came to set the title attribute.
5563                 # Now that we have an alt attribute, we should not set the alt text to
5564                 # equal the caption: that's worse than useless, it just repeats the
5565                 # text.  This is the framed/thumbnail case.  If there's no caption, we
5566                 # use the unnamed parameter for alt text as well, just for the time be-
5567                 # ing, if the unnamed param is set and the alt param is not.
5568                 # For the future, we need to figure out if we want to tweak this more,
5569                 # e.g., introducing a title= parameter for the title; ignoring the un-
5570                 # named parameter entirely for images without a caption; adding an ex-
5571                 # plicit caption= parameter and preserving the old magic unnamed para-
5572                 # meter for BC; ...
5573                 if ( $hasVisibleCaption ) {
5574                         if (
5575                                 // @phan-suppress-next-line PhanImpossibleCondition
5576                                 $caption === '' && !isset( $params['frame']['alt'] ) &&
5577                                 $enableLegacyMediaDOM
5578                         ) {
5579                                 # No caption or alt text, add the filename as the alt text so
5580                                 # that screen readers at least get some description of the image
5581                                 $params['frame']['alt'] = $link->getText();
5582                         }
5583                         # Do not set $params['frame']['title'] because tooltips are unnecessary
5584                         # for framed images, the caption is visible
5585                 } else {
5586                         // @phan-suppress-next-line PhanImpossibleCondition
5587                         if ( !isset( $params['frame']['alt'] ) ) {
5588                                 # No alt text, use the "caption" for the alt text
5589                                 if ( $caption !== '' ) {
5590                                         $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5591                                 } elseif ( $enableLegacyMediaDOM ) {
5592                                         # No caption, fall back to using the filename for the
5593                                         # alt text
5594                                         $params['frame']['alt'] = $link->getText();
5595                                 }
5596                         }
5597                         # Use the "caption" for the tooltip text
5598                         $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5599                 }
5600                 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5601
5602                 // hook signature compat again, $link may have changed
5603                 $title = Title::castFromLinkTarget( $link );
5604                 $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5605
5606                 # Linker does the rest
5607                 $time = $options['time'] ?? false;
5608                 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
5609                 $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5610                         $time, $descQuery, $this->mOptions->getThumbSize() );
5611
5612                 # Give the handler a chance to modify the parser object
5613                 if ( $handler ) {
5614                         $handler->parserTransformHook( $this, $file );
5615                 }
5616                 if ( $file ) {
5617                         $this->modifyImageHtml( $file, $params, $ret );
5618                 }
5619
5620                 return $ret;
5621         }
5622
5623         /**
5624          * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5625          *
5626          * Adds an entry to appropriate link tables.
5627          *
5628          * @since 1.32
5629          * @param string $value
5630          * @return array of `[ type, target ]`, where:
5631          *   - `type` is one of:
5632          *     - `null`: Given value is not a valid link target, use default
5633          *     - `'no-link'`: Given value is empty, do not generate a link
5634          *     - `'link-url'`: Given value is a valid external link
5635          *     - `'link-title'`: Given value is a valid internal link
5636          *   - `target` is:
5637          *     - When `type` is `null` or `'no-link'`: `false`
5638          *     - When `type` is `'link-url'`: URL string corresponding to given value
5639          *     - When `type` is `'link-title'`: Title object corresponding to given value
5640          */
5641         private function parseLinkParameter( $value ) {
5642                 $chars = self::EXT_LINK_URL_CLASS;
5643                 $addr = self::EXT_LINK_ADDR;
5644                 $prots = $this->urlUtils->validProtocols();
5645                 $type = null;
5646                 $target = false;
5647                 if ( $value === '' ) {
5648                         $type = 'no-link';
5649                 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5650                         if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value ) ) {
5651                                 $this->mOutput->addExternalLink( $value );
5652                                 $type = 'link-url';
5653                                 $target = $value;
5654                         }
5655                 } else {
5656                         // Percent-decode link arguments for consistency with wikilink
5657                         // handling (T216003#7836261).
5658                         //
5659                         // There's slight concern here though.  The |link= option supports
5660                         // two formats, link=Test%22test vs link=[[Test%22test]], both of
5661                         // which are about to be decoded.
5662                         //
5663                         // In the former case, the decoding here is straightforward and
5664                         // desirable.
5665                         //
5666                         // In the latter case, there's a potential for double decoding,
5667                         // because the wikilink syntax has a higher precedence and has
5668                         // already been parsed as a link before we get here.  $value
5669                         // has had stripAltText() called on it, which in turn calls
5670                         // replaceLinkHoldersText() on the link.  So, the text we're
5671                         // getting at this point has already been percent decoded.
5672                         //
5673                         // The problematic case is if %25 is in the title, since that
5674                         // decodes to %, which could combine with trailing characters.
5675                         // However, % is not a valid link title character, so it would
5676                         // not parse as a link and the string we received here would
5677                         // still contain the encoded %25.
5678                         //
5679                         // Hence, double decoded is not an issue.  See the test,
5680                         // "Should not double decode the link option"
5681                         if ( strpos( $value, '%' ) !== false ) {
5682                                 $value = rawurldecode( $value );
5683                         }
5684                         $linkTitle = Title::newFromText( $value );
5685                         if ( $linkTitle ) {
5686                                 $this->mOutput->addLink( $linkTitle );
5687                                 $type = 'link-title';
5688                                 $target = $linkTitle;
5689                         }
5690                 }
5691                 return [ $type, $target ];
5692         }
5693
5694         /**
5695          * Give hooks a chance to modify image thumbnail HTML
5696          *
5697          * @param File $file
5698          * @param array $params
5699          * @param string &$html
5700          */
5701         public function modifyImageHtml( File $file, array $params, string &$html ) {
5702                 $this->hookRunner->onParserModifyImageHTML( $this, $file, $params, $html );
5703         }
5704
5705         /**
5706          * @param string $caption
5707          * @param LinkHolderArray|false $holders
5708          * @return string
5709          */
5710         private function stripAltText( $caption, $holders ) {
5711                 # Strip bad stuff out of the title (tooltip).  We can't just use
5712                 # replaceLinkHoldersText() here, because if this function is called
5713                 # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5714                 if ( $holders ) {
5715                         $tooltip = $holders->replaceText( $caption );
5716                 } else {
5717                         $tooltip = $this->replaceLinkHoldersText( $caption );
5718                 }
5719
5720                 # make sure there are no placeholders in thumbnail attributes
5721                 # that are later expanded to html- so expand them now and
5722                 # remove the tags
5723                 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5724                 # Compatibility hack!  In HTML certain entity references not terminated
5725                 # by a semicolon are decoded (but not if we're in an attribute; that's
5726                 # how link URLs get away without properly escaping & in queries).
5727                 # But wikitext has always required semicolon-termination of entities,
5728                 # so encode & where needed to avoid decode of semicolon-less entities.
5729                 # See T209236 and
5730                 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5731                 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5732                 $tooltip = preg_replace( "/
5733                         &                       # 1. entity prefix
5734                         (?=                     # 2. followed by:
5735                         (?:                     #  a. one of the legacy semicolon-less named entities
5736                                 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5737                                 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5738                                 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5739                                 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5740                                 U(?:acute|circ|grave|uml)|Yacute|
5741                                 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5742                                 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5743                                 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5744                                 frac(?:1(?:2|4)|34)|
5745                                 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5746                                 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5747                                 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5748                                 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5749                                 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5750                                 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5751                                 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5752                                 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5753                                 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5754                         )
5755                         (?:[^;]|$))     #  b. and not followed by a semicolon
5756                         # S = study, for efficiency
5757                         /Sx", '&amp;', $tooltip );
5758                 $tooltip = Sanitizer::stripAllTags( $tooltip );
5759
5760                 return $tooltip;
5761         }
5762
5763         /**
5764          * Callback from the Sanitizer for expanding items found in HTML attribute
5765          * values, so they can be safely tested and escaped.
5766          *
5767          * @param string &$text
5768          * @param PPFrame|false $frame
5769          * @return string
5770          * @deprecated since 1.35, internal callback should not have been public
5771          */
5772         public function attributeStripCallback( &$text, $frame = false ) {
5773                 wfDeprecated( __METHOD__, '1.35' );
5774                 $text = $this->replaceVariables( $text, $frame );
5775                 $text = $this->mStripState->unstripBoth( $text );
5776                 return $text;
5777         }
5778
5779         /**
5780          * Accessor
5781          *
5782          * @return array
5783          * @since 1.6
5784          */
5785         public function getTags(): array {
5786                 return array_keys( $this->mTagHooks );
5787         }
5788
5789         /**
5790          * @since 1.32
5791          * @return array{0:array<string,string>,1:array<string,string>}
5792          */
5793         public function getFunctionSynonyms() {
5794                 return $this->mFunctionSynonyms;
5795         }
5796
5797         /**
5798          * @since 1.32
5799          * @return string
5800          */
5801         public function getUrlProtocols() {
5802                 return $this->urlUtils->validProtocols();
5803         }
5804
5805         /**
5806          * Break wikitext input into sections, and either pull or replace
5807          * some particular section's text.
5808          *
5809          * External callers should use the getSection and replaceSection methods.
5810          *
5811          * @param string $text Page wikitext
5812          * @param string|int $sectionId A section identifier string of the form:
5813          *   "<flag1> - <flag2> - ... - <section number>"
5814          *
5815          * Currently the only recognised flag is "T", which means the target section number
5816          * was derived during a template inclusion parse, in other words this is a template
5817          * section edit link. If no flags are given, it was an ordinary section edit link.
5818          * This flag is required to avoid a section numbering mismatch when a section is
5819          * enclosed by "<includeonly>" (T8563).
5820          *
5821          * The section number 0 pulls the text before the first heading; other numbers will
5822          * pull the given section along with its lower-level subsections. If the section is
5823          * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5824          *
5825          * Section 0 is always considered to exist, even if it only contains the empty
5826          * string. If $text is the empty string and section 0 is replaced, $newText is
5827          * returned.
5828          *
5829          * @param string $mode One of "get" or "replace"
5830          * @param string|false $newText Replacement text for section data.
5831          * @param PageReference|null $page
5832          * @return string For "get", the extracted section text.
5833          *   for "replace", the whole page with the section replaced.
5834          */
5835         private function extractSections( $text, $sectionId, $mode, $newText, ?PageReference $page = null ) {
5836                 $magicScopeVariable = $this->lock();
5837                 $this->startParse(
5838                         $page,
5839                         ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
5840                         self::OT_PLAIN,
5841                         true
5842                 );
5843                 $outText = '';
5844                 $frame = $this->getPreprocessor()->newFrame();
5845
5846                 # Process section extraction flags
5847                 $flags = 0;
5848                 $sectionParts = explode( '-', $sectionId );
5849                 // The section ID may either be a magic string such as 'new' (which should be treated as 0),
5850                 // or a numbered section ID in the format of "T-<section index>".
5851                 // Explicitly coerce the section index into a number accordingly. (T323373)
5852                 $sectionIndex = (int)array_pop( $sectionParts );
5853                 foreach ( $sectionParts as $part ) {
5854                         if ( $part === 'T' ) {
5855                                 $flags |= Preprocessor::DOM_FOR_INCLUSION;
5856                         }
5857                 }
5858
5859                 # Check for empty input
5860                 if ( strval( $text ) === '' ) {
5861                         # Only sections 0 and T-0 exist in an empty document
5862                         if ( $sectionIndex === 0 ) {
5863                                 if ( $mode === 'get' ) {
5864                                         return '';
5865                                 }
5866
5867                                 return $newText;
5868                         } else {
5869                                 if ( $mode === 'get' ) {
5870                                         return $newText;
5871                                 }
5872
5873                                 return $text;
5874                         }
5875                 }
5876
5877                 # Preprocess the text
5878                 $root = $this->preprocessToDom( $text, $flags );
5879
5880                 # <h> nodes indicate section breaks
5881                 # They can only occur at the top level, so we can find them by iterating the root's children
5882                 $node = $root->getFirstChild();
5883
5884                 # Find the target section
5885                 if ( $sectionIndex === 0 ) {
5886                         # Section zero doesn't nest, level=big
5887                         $targetLevel = 1000;
5888                 } else {
5889                         while ( $node ) {
5890                                 if ( $node->getName() === 'h' ) {
5891                                         $bits = $node->splitHeading();
5892                                         if ( $bits['i'] == $sectionIndex ) {
5893                                                 $targetLevel = $bits['level'];
5894                                                 break;
5895                                         }
5896                                 }
5897                                 if ( $mode === 'replace' ) {
5898                                         $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5899                                 }
5900                                 $node = $node->getNextSibling();
5901                         }
5902                 }
5903
5904                 if ( !$node ) {
5905                         # Not found
5906                         if ( $mode === 'get' ) {
5907                                 return $newText;
5908                         } else {
5909                                 return $text;
5910                         }
5911                 }
5912
5913                 # Find the end of the section, including nested sections
5914                 do {
5915                         if ( $node->getName() === 'h' ) {
5916                                 $bits = $node->splitHeading();
5917                                 $curLevel = $bits['level'];
5918                                 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable False positive
5919                                 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5920                                         break;
5921                                 }
5922                         }
5923                         if ( $mode === 'get' ) {
5924                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5925                         }
5926                         $node = $node->getNextSibling();
5927                 } while ( $node );
5928
5929                 # Write out the remainder (in replace mode only)
5930                 if ( $mode === 'replace' ) {
5931                         # Output the replacement text
5932                         # Add two newlines on -- trailing whitespace in $newText is conventionally
5933                         # stripped by the editor, so we need both newlines to restore the paragraph gap
5934                         # Only add trailing whitespace if there is newText
5935                         if ( $newText != "" ) {
5936                                 $outText .= $newText . "\n\n";
5937                         }
5938
5939                         while ( $node ) {
5940                                 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5941                                 $node = $node->getNextSibling();
5942                         }
5943                 }
5944
5945                 # Re-insert stripped tags
5946                 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5947
5948                 return $outText;
5949         }
5950
5951         /**
5952          * This function returns the text of a section, specified by a number ($section).
5953          * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5954          * the first section before any such heading (section 0).
5955          *
5956          * If a section contains subsections, these are also returned.
5957          *
5958          * @param string $text Text to look in
5959          * @param string|int $sectionId Section identifier as a number or string
5960          * (e.g. 0, 1 or 'T-1').
5961          * @param string|false $defaultText Default to return if section is not found
5962          *
5963          * @return string Text of the requested section
5964          * @since 1.7
5965          */
5966         public function getSection( $text, $sectionId, $defaultText = '' ) {
5967                 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5968         }
5969
5970         /**
5971          * This function returns $oldtext after the content of the section
5972          * specified by $section has been replaced with $text. If the target
5973          * section does not exist, $oldtext is returned unchanged.
5974          *
5975          * @param string $oldText Former text of the article
5976          * @param string|int $sectionId Section identifier as a number or string
5977          * (e.g. 0, 1 or 'T-1').
5978          * @param string|false $newText Replacing text
5979          *
5980          * @return string Modified text
5981          * @since 1.7
5982          */
5983         public function replaceSection( $oldText, $sectionId, $newText ) {
5984                 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5985         }
5986
5987         /**
5988          * Get an array of preprocessor section information.
5989          *
5990          * Preprocessor sections are those identified by wikitext-style syntax, not
5991          * HTML-style syntax. Templates are not expanded, so these sections do not
5992          * include sections created by templates or parser functions. This is the
5993          * same definition of a section as used by section editing, but not the
5994          * same as TOC generation.
5995          *
5996          * These sections are typically smaller than those acted on by getSection() and
5997          * replaceSection() since they are not nested. Section nesting could be
5998          * reconstructed from the heading levels.
5999          *
6000          * The return value is an array of associative array info structures. Each
6001          * associative array contains the following keys, describing a section:
6002          *
6003          *  - index: An integer identifying the section.
6004          *  - level: The heading level, e.g. 1 for <h1>. For the section before the
6005          *    the first heading, this will be 0.
6006          *  - offset: The byte offset within the wikitext at which the section starts
6007          *  - heading: The wikitext for the header which introduces the section,
6008          *    including equals signs. For the section before the first heading, this
6009          *    will be an empty string.
6010          *  - text: The complete text of the section.
6011          *
6012          * @param string $text
6013          * @return array[]
6014          * @internal
6015          */
6016         public function getFlatSectionInfo( $text ) {
6017                 $magicScopeVariable = $this->lock();
6018                 $this->startParse(
6019                         null,
6020                         ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
6021                         self::OT_PLAIN,
6022                         true
6023                 );
6024                 $frame = $this->getPreprocessor()->newFrame();
6025                 $root = $this->preprocessToDom( $text, 0 );
6026                 $node = $root->getFirstChild();
6027                 $offset = 0;
6028                 $currentSection = [
6029                         'index' => 0,
6030                         'level' => 0,
6031                         'offset' => 0,
6032                         'heading' => '',
6033                         'text' => ''
6034                 ];
6035                 $sections = [];
6036
6037                 while ( $node ) {
6038                         $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
6039                         if ( $node->getName() === 'h' ) {
6040                                 $bits = $node->splitHeading();
6041                                 $sections[] = $currentSection;
6042                                 $currentSection = [
6043                                         'index' => $bits['i'],
6044                                         'level' => $bits['level'],
6045                                         'offset' => $offset,
6046                                         'heading' => $nodeText,
6047                                         'text' => $nodeText
6048                                 ];
6049                         } else {
6050                                 $currentSection['text'] .= $nodeText;
6051                         }
6052                         $offset += strlen( $nodeText );
6053                         $node = $node->getNextSibling();
6054                 }
6055                 $sections[] = $currentSection;
6056                 return $sections;
6057         }
6058
6059         /**
6060          * Get the ID of the revision we are parsing
6061          *
6062          * The return value will be either:
6063          *   - a) Positive, indicating a specific revision ID (current or old)
6064          *   - b) Zero, meaning the revision ID is specified by getCurrentRevisionRecordCallback()
6065          *   - c) Null, meaning the parse is for preview mode and there is no revision
6066          *
6067          * @return int|null
6068          * @since 1.13
6069          */
6070         public function getRevisionId() {
6071                 return $this->mRevisionId;
6072         }
6073
6074         /**
6075          * Get the revision record object for $this->mRevisionId
6076          *
6077          * @return RevisionRecord|null Either a RevisionRecord object or null
6078          * @since 1.35
6079          */
6080         public function getRevisionRecordObject() {
6081                 if ( $this->mRevisionRecordObject ) {
6082                         return $this->mRevisionRecordObject;
6083                 }
6084
6085                 // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
6086                 // This is useful when parsing a revision that has not yet been saved.
6087                 // However, if we get back a saved revision even though we are in
6088                 // preview mode, we'll have to ignore it, see below.
6089                 // NOTE: This callback may be used to inject an OLD revision that was
6090                 // already loaded, so "current" is a bit of a misnomer. We can't just
6091                 // skip it if mRevisionId is set.
6092                 $rev = call_user_func(
6093                         $this->mOptions->getCurrentRevisionRecordCallback(),
6094                         $this->getTitle(),
6095                         $this
6096                 );
6097
6098                 if ( !$rev ) {
6099                         // The revision record callback returns `false` (not null) to
6100                         // indicate that the revision is missing.  (See for example
6101                         // Parser::statelessFetchRevisionRecord(), the default callback.)
6102                         // This API expects `null` instead. (T251952)
6103                         return null;
6104                 }
6105
6106                 if ( $this->mRevisionId === null && $rev->getId() ) {
6107                         // We are in preview mode (mRevisionId is null), and the current revision callback
6108                         // returned an existing revision. Ignore it and return null, it's probably the page's
6109                         // current revision, which is not what we want here. Note that we do want to call the
6110                         // callback to allow the unsaved revision to be injected here, e.g. for
6111                         // self-transclusion previews.
6112                         return null;
6113                 }
6114
6115                 // If the parse is for a new revision, then the callback should have
6116                 // already been set to force the object and should match mRevisionId.
6117                 // If not, try to fetch by mRevisionId instead.
6118                 if ( $this->mRevisionId && $rev->getId() != $this->mRevisionId ) {
6119                         $rev = MediaWikiServices::getInstance()
6120                                 ->getRevisionLookup()
6121                                 ->getRevisionById( $this->mRevisionId );
6122                 }
6123
6124                 $this->mRevisionRecordObject = $rev;
6125
6126                 return $this->mRevisionRecordObject;
6127         }
6128
6129         /**
6130          * Get the timestamp associated with the current revision, adjusted for
6131          * the default server-local timestamp
6132          * @return string TS_MW timestamp
6133          * @since 1.9
6134          */
6135         public function getRevisionTimestamp() {
6136                 if ( $this->mRevisionTimestamp !== null ) {
6137                         return $this->mRevisionTimestamp;
6138                 }
6139
6140                 # Use specified revision timestamp, falling back to the current timestamp
6141                 $revObject = $this->getRevisionRecordObject();
6142                 $timestamp = $revObject && $revObject->getTimestamp()
6143                         ? $revObject->getTimestamp()
6144                         : $this->mOptions->getTimestamp();
6145                 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6146
6147                 # The cryptic '' timezone parameter tells to use the site-default
6148                 # timezone offset instead of the user settings.
6149                 # Since this value will be saved into the parser cache, served
6150                 # to other users, and potentially even used inside links and such,
6151                 # it needs to be consistent for all visitors.
6152                 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6153
6154                 return $this->mRevisionTimestamp;
6155         }
6156
6157         /**
6158          * Get the name of the user that edited the last revision
6159          *
6160          * @return string|null User name
6161          * @since 1.15
6162          */
6163         public function getRevisionUser(): ?string {
6164                 if ( $this->mRevisionUser === null ) {
6165                         $revObject = $this->getRevisionRecordObject();
6166
6167                         # if this template is subst: the revision id will be blank,
6168                         # so just use the current user's name
6169                         if ( $revObject && $revObject->getUser() ) {
6170                                 $this->mRevisionUser = $revObject->getUser()->getName();
6171                         } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6172                                 $this->mRevisionUser = $this->getUserIdentity()->getName();
6173                         } else {
6174                                 # Note that we fall through here with
6175                                 # $this->mRevisionUser still null
6176                         }
6177                 }
6178                 return $this->mRevisionUser;
6179         }
6180
6181         /**
6182          * Get the size of the revision
6183          *
6184          * @return int|null Revision size
6185          * @since 1.22
6186          */
6187         public function getRevisionSize() {
6188                 if ( $this->mRevisionSize === null ) {
6189                         $revObject = $this->getRevisionRecordObject();
6190
6191                         # if this variable is subst: the revision id will be blank,
6192                         # so just use the parser input size, because the own substitution
6193                         # will change the size.
6194                         if ( $revObject ) {
6195                                 $this->mRevisionSize = $revObject->getSize();
6196                         } else {
6197                                 $this->mRevisionSize = $this->mInputSize;
6198                         }
6199                 }
6200                 return $this->mRevisionSize;
6201         }
6202
6203         /**
6204          * Accessor for the 'defaultsort' page property.
6205          * Will use the empty string if none is set.
6206          *
6207          * This value is treated as a prefix, so the
6208          * empty string is equivalent to sorting by
6209          * page name.
6210          *
6211          * @return string
6212          * @since 1.9
6213          * @deprecated since 1.38, use
6214          * $parser->getOutput()->getPageProperty('defaultsort') ?? ''
6215          */
6216         public function getDefaultSort() {
6217                 wfDeprecated( __METHOD__, '1.38' );
6218                 return $this->mOutput->getPageProperty( 'defaultsort' ) ?? '';
6219         }
6220
6221         private static function getSectionNameFromStrippedText( $text ) {
6222                 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6223                 $text = Sanitizer::decodeCharReferences( $text );
6224                 $text = self::normalizeSectionName( $text );
6225                 return $text;
6226         }
6227
6228         private static function makeAnchor( $sectionName ) {
6229                 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6230         }
6231
6232         private function makeLegacyAnchor( $sectionName ) {
6233                 $fragmentMode = $this->svcOptions->get( MainConfigNames::FragmentMode );
6234                 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6235                         // ForAttribute() and ForLink() are the same for legacy encoding
6236                         $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6237                 } else {
6238                         $id = Sanitizer::escapeIdForLink( $sectionName );
6239                 }
6240
6241                 return "#$id";
6242         }
6243
6244         /**
6245          * Try to guess the section anchor name based on a wikitext fragment
6246          * presumably extracted from a heading, for example "Header" from
6247          * "== Header ==".
6248          *
6249          * @param string $text
6250          * @return string Anchor (starting with '#')
6251          * @since 1.12
6252          */
6253         public function guessSectionNameFromWikiText( $text ) {
6254                 # Strip out wikitext links(they break the anchor)
6255                 $text = $this->stripSectionName( $text );
6256                 $sectionName = self::getSectionNameFromStrippedText( $text );
6257                 return self::makeAnchor( $sectionName );
6258         }
6259
6260         /**
6261          * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6262          * instead, if possible. For use in redirects, since various versions
6263          * of Microsoft browsers interpret Location: headers as something other
6264          * than UTF-8, resulting in breakage.
6265          *
6266          * @param string $text The section name
6267          * @return string Anchor (starting with '#')
6268          * @since 1.17
6269          */
6270         public function guessLegacySectionNameFromWikiText( $text ) {
6271                 # Strip out wikitext links(they break the anchor)
6272                 $text = $this->stripSectionName( $text );
6273                 $sectionName = self::getSectionNameFromStrippedText( $text );
6274                 return $this->makeLegacyAnchor( $sectionName );
6275         }
6276
6277         /**
6278          * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6279          * @param string $text Section name (plain text)
6280          * @return string Anchor (starting with '#')
6281          * @since 1.31
6282          */
6283         public static function guessSectionNameFromStrippedText( $text ) {
6284                 $sectionName = self::getSectionNameFromStrippedText( $text );
6285                 return self::makeAnchor( $sectionName );
6286         }
6287
6288         /**
6289          * Apply the same normalization as code making links to this section would
6290          *
6291          * @param string $text
6292          * @return string
6293          */
6294         private static function normalizeSectionName( $text ) {
6295                 # T90902: ensure the same normalization is applied for IDs as to links
6296                 /** @var MediaWikiTitleCodec $titleParser */
6297                 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6298                 '@phan-var MediaWikiTitleCodec $titleParser';
6299                 try {
6300
6301                         $parts = $titleParser->splitTitleString( "#$text" );
6302                 } catch ( MalformedTitleException $ex ) {
6303                         return $text;
6304                 }
6305                 return $parts['fragment'];
6306         }
6307
6308         /**
6309          * Strips a text string of wikitext for use in a section anchor
6310          *
6311          * Accepts a text string and then removes all wikitext from the
6312          * string and leaves only the resultant text (i.e. the result of
6313          * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6314          * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6315          * to create valid section anchors by mimicking the output of the
6316          * parser when headings are parsed.
6317          *
6318          * @param string $text Text string to be stripped of wikitext
6319          * for use in a Section anchor
6320          * @return string Filtered text string
6321          * @since 1.12
6322          */
6323         public function stripSectionName( $text ) {
6324                 # Strip internal link markup
6325                 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6326                 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6327
6328                 # Strip external link markup
6329                 # @todo FIXME: Not tolerant to blank link text
6330                 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6331                 # on how many empty links there are on the page - need to figure that out.
6332                 $text = preg_replace(
6333                         '/\[(?i:' . $this->urlUtils->validProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6334
6335                 # Parse wikitext quotes (italics & bold)
6336                 $text = $this->doQuotes( $text );
6337
6338                 # Strip HTML tags
6339                 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6340                 return $text;
6341         }
6342
6343         /**
6344          * Call a callback function on all regions of the given text that are not
6345          * inside strip markers, and replace those regions with the return value
6346          * of the callback. For example, with input:
6347          *
6348          *  aaa<MARKER>bbb
6349          *
6350          * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6351          * two strings will be replaced with the value returned by the callback in
6352          * each case.
6353          *
6354          * @param string $s
6355          * @param callable $callback
6356          *
6357          * @return string
6358          * @internal
6359          * @since 1.12
6360          */
6361         public function markerSkipCallback( $s, callable $callback ) {
6362                 $i = 0;
6363                 $out = '';
6364                 while ( $i < strlen( $s ) ) {
6365                         $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6366                         if ( $markerStart === false ) {
6367                                 $out .= call_user_func( $callback, substr( $s, $i ) );
6368                                 break;
6369                         } else {
6370                                 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6371                                 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6372                                 if ( $markerEnd === false ) {
6373                                         $out .= substr( $s, $markerStart );
6374                                         break;
6375                                 } else {
6376                                         $markerEnd += strlen( self::MARKER_SUFFIX );
6377                                         $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6378                                         $i = $markerEnd;
6379                                 }
6380                         }
6381                 }
6382                 return $out;
6383         }
6384
6385         /**
6386          * Remove any strip markers found in the given text.
6387          *
6388          * @param string $text
6389          * @return string
6390          * @since 1.19
6391          */
6392         public function killMarkers( $text ) {
6393                 return $this->mStripState->killMarkers( $text );
6394         }
6395
6396         /**
6397          * Parsed a width param of imagelink like 300px or 200x300px
6398          *
6399          * @param string $value
6400          * @param bool $parseHeight
6401          * @param bool $localized Defaults to false; set to true if the $value
6402          *   has already been matched against `img_width` to localize the `px`
6403          *   suffix.
6404          *
6405          * @return array
6406          * @since 1.20
6407          * @internal
6408          */
6409         public function parseWidthParam( $value, $parseHeight = true, bool $localized = false ) {
6410                 $parsedWidthParam = [];
6411                 if ( $value === '' ) {
6412                         return $parsedWidthParam;
6413                 }
6414                 $m = [];
6415                 if ( !$localized ) {
6416                         // Strip a localized 'px' suffix (T374311)
6417                         $mwArray = $this->magicWordFactory->newArray( [ 'img_width' ] );
6418                         [ $magicWord, $newValue ] = $mwArray->matchVariableStartToEnd( $value );
6419                         $value = $magicWord ? $newValue : $value;
6420                 }
6421
6422                 # (T15500) In both cases (width/height and width only),
6423                 # permit trailing "px" for backward compatibility.
6424                 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(px)?\s*$/', $value, $m ) ) {
6425                         $width = intval( $m[1] );
6426                         $height = intval( $m[2] );
6427                         $parsedWidthParam['width'] = $width;
6428                         $parsedWidthParam['height'] = $height;
6429                         if ( $m[3] ?? false ) {
6430                                 $this->addTrackingCategory( 'double-px-category' );
6431                         }
6432                 } elseif ( preg_match( '/^([0-9]*)\s*(px)?\s*$/', $value, $m ) ) {
6433                         $width = intval( $m[1] );
6434                         $parsedWidthParam['width'] = $width;
6435                         if ( $m[2] ?? false ) {
6436                                 $this->addTrackingCategory( 'double-px-category' );
6437                         }
6438                 }
6439                 return $parsedWidthParam;
6440         }
6441
6442         /**
6443          * Lock the current instance of the parser.
6444          *
6445          * This is meant to stop someone from calling the parser
6446          * recursively and messing up all the strip state.
6447          *
6448          * @return ScopedCallback The lock will be released once the return value goes out of scope.
6449          */
6450         protected function lock() {
6451                 if ( $this->mInParse ) {
6452                         throw new LogicException( "Parser state cleared while parsing. "
6453                                 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6454                 }
6455
6456                 // Save the backtrace when locking, so that if some code tries locking again,
6457                 // we can print the lock owner's backtrace for easier debugging
6458                 $e = new RuntimeException;
6459                 $this->mInParse = $e->getTraceAsString();
6460
6461                 $recursiveCheck = new ScopedCallback( function () {
6462                         $this->mInParse = false;
6463                 } );
6464
6465                 return $recursiveCheck;
6466         }
6467
6468         /**
6469          * Will entry points such as parse() throw an exception due to the parser
6470          * already being active?
6471          *
6472          * @since 1.39
6473          * @return bool
6474          */
6475         public function isLocked() {
6476                 return (bool)$this->mInParse;
6477         }
6478
6479         /**
6480          * Strip outer <p></p> tag from the HTML source of a single paragraph.
6481          *
6482          * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6483          * or if there is more than one <p/> tag in the input HTML.
6484          *
6485          * @param string $html
6486          * @return string
6487          * @since 1.24
6488          */
6489         public static function stripOuterParagraph( $html ) {
6490                 $m = [];
6491                 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6492                         $html = $m[1];
6493                 }
6494
6495                 return $html;
6496         }
6497
6498         /**
6499          * Add HTML tags marking the parts of a page title, to be displayed in the first heading of the page.
6500          *
6501          * @internal
6502          * @since 1.39
6503          * @param string|HtmlArmor $nsText
6504          * @param string|HtmlArmor $nsSeparator
6505          * @param string|HtmlArmor $mainText
6506          * @return string HTML
6507          */
6508         public static function formatPageTitle( $nsText, $nsSeparator, $mainText ): string {
6509                 $html = '';
6510                 if ( $nsText !== '' ) {
6511                         $html .= '<span class="mw-page-title-namespace">' . HtmlArmor::getHtml( $nsText ) . '</span>';
6512                         $html .= '<span class="mw-page-title-separator">' . HtmlArmor::getHtml( $nsSeparator ) . '</span>';
6513                 }
6514                 $html .= '<span class="mw-page-title-main">' . HtmlArmor::getHtml( $mainText ) . '</span>';
6515                 return $html;
6516         }
6517
6518         /**
6519          * Strip everything but the <body> from the provided string
6520          * @param string $text
6521          * @return string
6522          * @unstable
6523          */
6524         public static function extractBody( string $text ): string {
6525                 $text = preg_replace( '!^.*?<body[^>]*>!s', '', $text, 1 );
6526                 $text = preg_replace( '!</body>\s*</html>\s*$!', '', $text, 1 );
6527                 return $text;
6528         }
6529
6530         /**
6531          * Set's up the PHP implementation of OOUI for use in this request
6532          * and instructs OutputPage to enable OOUI for itself.
6533          *
6534          * @since 1.26
6535          * @deprecated since 1.35, use $parser->getOutput()->setEnableOOUI() instead.
6536          */
6537         public function enableOOUI() {
6538                 wfDeprecated( __METHOD__, '1.35' );
6539                 OutputPage::setupOOUI();
6540                 $this->mOutput->setEnableOOUI( true );
6541         }
6542
6543         /**
6544          * Sets the flag on the parser output but also does some debug logging.
6545          * Note that there is a copy of this method in CoreMagicVariables as well.
6546          * @param string $flag
6547          * @param string $reason
6548          */
6549         private function setOutputFlag( string $flag, string $reason ): void {
6550                 $this->mOutput->setOutputFlag( $flag );
6551                 $name = $this->getTitle()->getPrefixedText();
6552                 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6553         }
6554 }
6555
6556 /** @deprecated class alias since 1.42 */
6557 class_alias( Parser::class, 'Parser' );