Merge "ResourceLoader: Deprecate ResourceLoader::makeConfigSetScript"
[mediawiki.git] / includes / parser / Parser.php
blob186f0344d43e4ecf5aeb4828fdc65751281798eb
1 <?php
2 /**
3 * PHP parser that converts wiki markup to HTML.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Parser
24 namespace MediaWiki\Parser;
26 use BadMethodCallException;
27 use Exception;
28 use File;
29 use HtmlArmor;
30 use ImageGalleryBase;
31 use ImageGalleryClassNotFoundException;
32 use InvalidArgumentException;
33 use LogicException;
34 use MapCacheLRU;
35 use MediaHandler;
36 use MediaWiki\Cache\CacheKeyHelper;
37 use MediaWiki\Category\TrackingCategories;
38 use MediaWiki\Config\ServiceOptions;
39 use MediaWiki\Content\TextContent;
40 use MediaWiki\Context\RequestContext;
41 use MediaWiki\Debug\DeprecationHelper;
42 use MediaWiki\HookContainer\HookContainer;
43 use MediaWiki\HookContainer\HookRunner;
44 use MediaWiki\Html\Html;
45 use MediaWiki\Http\HttpRequestFactory;
46 use MediaWiki\Language\ILanguageConverter;
47 use MediaWiki\Language\Language;
48 use MediaWiki\Language\LanguageCode;
49 use MediaWiki\Language\RawMessage;
50 use MediaWiki\Languages\LanguageConverterFactory;
51 use MediaWiki\Languages\LanguageNameUtils;
52 use MediaWiki\Linker\Linker;
53 use MediaWiki\Linker\LinkRenderer;
54 use MediaWiki\Linker\LinkRendererFactory;
55 use MediaWiki\Linker\LinkTarget;
56 use MediaWiki\MainConfigNames;
57 use MediaWiki\MediaWikiServices;
58 use MediaWiki\Message\Message;
59 use MediaWiki\Output\OutputPage;
60 use MediaWiki\Page\File\BadFileLookup;
61 use MediaWiki\Page\PageIdentity;
62 use MediaWiki\Page\PageReference;
63 use MediaWiki\Preferences\SignatureValidatorFactory;
64 use MediaWiki\Request\FauxRequest;
65 use MediaWiki\Revision\RevisionAccessException;
66 use MediaWiki\Revision\RevisionRecord;
67 use MediaWiki\Revision\SlotRecord;
68 use MediaWiki\SpecialPage\SpecialPage;
69 use MediaWiki\SpecialPage\SpecialPageFactory;
70 use MediaWiki\Tidy\TidyDriverBase;
71 use MediaWiki\Title\MalformedTitleException;
72 use MediaWiki\Title\MediaWikiTitleCodec;
73 use MediaWiki\Title\NamespaceInfo;
74 use MediaWiki\Title\Title;
75 use MediaWiki\Title\TitleFormatter;
76 use MediaWiki\User\Options\UserOptionsLookup;
77 use MediaWiki\User\User;
78 use MediaWiki\User\UserFactory;
79 use MediaWiki\User\UserIdentity;
80 use MediaWiki\User\UserNameUtils;
81 use MediaWiki\Utils\MWTimestamp;
82 use MediaWiki\Utils\UrlUtils;
83 use MediaWiki\Xml\Xml;
84 use Psr\Log\LoggerInterface;
85 use RuntimeException;
86 use SectionProfiler;
87 use StringUtils;
88 use UnexpectedValueException;
89 use Wikimedia\Bcp47Code\Bcp47CodeValue;
90 use Wikimedia\IPUtils;
91 use Wikimedia\Message\MessageParam;
92 use Wikimedia\Message\MessageSpecifier;
93 use Wikimedia\ObjectCache\WANObjectCache;
94 use Wikimedia\Parsoid\Core\SectionMetadata;
95 use Wikimedia\Parsoid\Core\TOCData;
96 use Wikimedia\Parsoid\DOM\Comment;
97 use Wikimedia\Parsoid\DOM\DocumentFragment;
98 use Wikimedia\Parsoid\DOM\Element;
99 use Wikimedia\Parsoid\DOM\Node;
100 use Wikimedia\Parsoid\Utils\DOMCompat;
101 use Wikimedia\Parsoid\Utils\DOMUtils;
102 use Wikimedia\ScopedCallback;
105 * @defgroup Parser Parser
109 * PHP Parser - Processes wiki markup (which uses a more user-friendly
110 * syntax, such as "[[link]]" for making links), and provides a one-way
111 * transformation of that wiki markup it into (X)HTML output / markup
112 * (which in turn the browser understands, and can display).
114 * There are seven main entry points into the Parser class:
116 * - Parser::parse()
117 * produces HTML output
118 * - Parser::preSaveTransform()
119 * produces altered wiki markup
120 * - Parser::preprocess()
121 * removes HTML comments and expands templates
122 * - Parser::cleanSig() and Parser::cleanSigInSig()
123 * cleans a signature before saving it to preferences
124 * - Parser::getSection()
125 * return the content of a section from an article for section editing
126 * - Parser::replaceSection()
127 * replaces a section by number inside an article
128 * - Parser::getPreloadText()
129 * removes <noinclude> sections and <includeonly> tags
131 * @warning $wgUser or $wgTitle or $wgRequest or $wgLang. Keep them away!
133 * @par Settings:
134 * $wgNamespacesWithSubpages
136 * @par Settings only within ParserOptions:
137 * $wgAllowExternalImages
138 * $wgAllowSpecialInclusion
139 * $wgInterwikiMagic
140 * $wgMaxArticleSize
142 * @ingroup Parser
144 #[\AllowDynamicProperties]
145 class Parser {
146 use DeprecationHelper;
148 # Flags for Parser::setFunctionHook
149 public const SFH_NO_HASH = 1;
150 public const SFH_OBJECT_ARGS = 2;
152 # Constants needed for external link processing
154 * Everything except bracket, space, or control characters.
155 * \p{Zs} is unicode 'separator, space' category. It covers the space 0x20
156 * as well as U+3000 is IDEOGRAPHIC SPACE for T21052.
157 * \x{FFFD} is the Unicode replacement character, which the HTML5 spec
158 * uses to replace invalid HTML characters.
160 public const EXT_LINK_URL_CLASS = '[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]';
162 * Simplified expression to match an IPv4 or IPv6 address, or
163 * at least one character of a host name (embeds Parser::EXT_LINK_URL_CLASS)
165 // phpcs:ignore Generic.Files.LineLength
166 private const EXT_LINK_ADDR = '(?:[0-9.]+|\\[(?i:[0-9a-f:.]+)\\]|[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}])';
167 /** RegExp to make image URLs (embeds IPv6 part of Parser::EXT_LINK_ADDR) */
168 // phpcs:ignore Generic.Files.LineLength
169 private const EXT_IMAGE_REGEX = '/^(http:\/\/|https:\/\/)((?:\\[(?i:[0-9a-f:.]+)\\])?[^][<>"\\x00-\\x20\\x7F\p{Zs}\x{FFFD}]+)
170 \\/([A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF]+)\\.((?i)avif|gif|jpg|jpeg|png|svg|webp)$/Sxu';
172 /** Regular expression for a non-newline space */
173 private const SPACE_NOT_NL = '(?:\t|&nbsp;|&\#0*160;|&\#[Xx]0*[Aa]0;|\p{Zs})';
176 * @var int Preprocess wikitext in transclusion mode
177 * @deprecated Since 1.36
179 public const PTD_FOR_INCLUSION = Preprocessor::DOM_FOR_INCLUSION;
181 # Allowed values for $this->mOutputType
182 /** Output type: like Parser::parse() */
183 public const OT_HTML = 1;
184 /** Output type: like Parser::preSaveTransform() */
185 public const OT_WIKI = 2;
186 /** Output type: like Parser::preprocess() */
187 public const OT_PREPROCESS = 3;
189 * Output type: like Parser::extractSections() - portions of the
190 * original are returned unchanged.
192 public const OT_PLAIN = 4;
195 * @var string Prefix and suffix for temporary replacement strings
196 * for the multipass parser.
198 * \x7f should never appear in input as it's disallowed in XML.
199 * Using it at the front also gives us a little extra robustness
200 * since it shouldn't match when butted up against identifier-like
201 * string constructs.
203 * Must not consist of all title characters, or else it will change
204 * the behavior of <nowiki> in a link.
206 * Must have a character that needs escaping in attributes, otherwise
207 * someone could put a strip marker in an attribute, to get around
208 * escaping quote marks, and break out of the attribute. Thus we add
209 * `'".
211 public const MARKER_SUFFIX = "-QINU`\"'\x7f";
212 public const MARKER_PREFIX = "\x7f'\"`UNIQ-";
215 * Internal marker used by parser to track where the table of
216 * contents should be. Various magic words can change the position
217 * during the parse. The table of contents is generated during
218 * the parse, however skins have the final decision on whether the
219 * table of contents is injected. This placeholder element
220 * identifies where in the page the table of contents should be
221 * injected, if at all.
222 * @var string
223 * @see Keep this in sync with BlockLevelPass::execute() and
224 * RemexCompatMunger::isTableOfContentsMarker()
225 * @internal Skins should *not* directly reference TOC_PLACEHOLDER
226 * but instead use Parser::replaceTableOfContentsMarker().
228 public const TOC_PLACEHOLDER = '<meta property="mw:PageProp/toc" />';
231 * Permissive regexp matching TOC_PLACEHOLDER. This allows for some
232 * minor modifications to the placeholder to be made by extensions
233 * without breaking the TOC (T317857); note also that Parsoid's version
234 * of the placeholder might include additional attributes.
235 * @var string
237 private const TOC_PLACEHOLDER_REGEX = '/<meta\\b[^>]*\\bproperty\\s*=\\s*"mw:PageProp\\/toc"[^>]*>/';
239 # Persistent:
240 /** @var array<string,callable> */
241 private array $mTagHooks = [];
242 /** @var array<string,array{0:callable,1:int}> */
243 private array $mFunctionHooks = [];
244 /** @var array{0:array<string,string>,1:array<string,string>} */
245 private array $mFunctionSynonyms = [ 0 => [], 1 => [] ];
246 /** @var string[] */
247 private array $mStripList = [];
248 /** @var array<string,string> */
249 private array $mVarCache = [];
250 /** @var array<string,array<string,string[]>> */
251 private array $mImageParams = [];
252 /** @var array<string,MagicWordArray> */
253 private array $mImageParamsMagicArray = [];
254 /** @deprecated since 1.35 */
255 public $mMarkerIndex = 0;
257 // Initialised by initializeVariables()
258 /** @var MagicWordArray */
259 private MagicWordArray $mVariables;
260 private MagicWordArray $mSubstWords;
262 // Initialised in constructor
263 /** @var string */
264 private string $mExtLinkBracketedRegex;
265 private UrlUtils $urlUtils;
266 private Preprocessor $mPreprocessor;
268 // Cleared with clearState():
269 /** @var ParserOutput */
270 private ParserOutput $mOutput;
271 private int $mAutonumber = 0;
272 private StripState $mStripState;
273 private LinkHolderArray $mLinkHolders;
274 private int $mLinkID = 0;
275 private array $mIncludeSizes;
277 * @internal
278 * @var int
280 public $mPPNodeCount;
282 * @internal
283 * @var int
285 public $mHighestExpansionDepth;
286 private array $mTplRedirCache;
287 /** @internal */
288 public array $mHeadings;
289 /** @var array<string,false> */
290 private array $mDoubleUnderscores;
292 * Number of expensive parser function calls
293 * @deprecated since 1.35
295 public $mExpensiveFunctionCount;
296 private bool $mShowToc;
297 private bool $mForceTocPosition;
298 private array $mTplDomCache;
299 private ?UserIdentity $mUser;
301 # Temporary
302 # These are variables reset at least once per parse regardless of $clearState
305 * @var ParserOptions|null
306 * @deprecated since 1.35, use Parser::getOptions()
308 private $mOptions;
310 # Deprecated "dynamic" properties
311 # These used to be dynamic properties added to the parser, but these
312 # have been deprecated since 1.42.
313 /** @deprecated since 1.42: T343229 */
314 public $scribunto_engine;
315 /** @deprecated since 1.42: T343230 */
316 public $extCite;
317 /** @deprecated since 1.42: T343226 */
318 public $extTemplateStylesCache;
319 /** @deprecated since 1.42: T357838 */
320 public $static_tag_buf;
321 /** @deprecated since 1.42: T203531 */
322 public $mExtVariables;
323 /** @deprecated since 1.42: T203532 */
324 public $mExtArrays;
325 /** @deprecated since 1.42: T359887 */
326 public $mExtHashTables;
327 /** @deprecated since 1.42: T203563 */
328 public $mExtLoopsCounter;
329 /** @deprecated since 1.42: T362664 */
330 public $proofreadRenderingPages;
331 /** @deprecated since 1.42: T362693 */
332 public $mTemplatePath;
335 * Title context, used for self-link rendering and similar things
337 * @deprecated since 1.35, use Parser::getPage()
339 private Title $mTitle;
340 /** Output type, one of the OT_xxx constants */
341 private int $mOutputType;
342 /** When false, suppress extension tag processing for OT_PREPROCESS */
343 private bool $mStripExtTags = true;
345 * Shortcut alias, see Parser::setOutputType()
346 * @deprecated since 1.35
348 private array $ot;
349 /** ID to display in {{REVISIONID}} tags */
350 private ?int $mRevisionId = null;
351 /** The timestamp of the specified revision ID */
352 private ?string $mRevisionTimestamp = null;
353 /** User to display in {{REVISIONUSER}} tag */
354 private ?string $mRevisionUser = null;
355 /** Size to display in {{REVISIONSIZE}} variable */
356 private ?int $mRevisionSize = null;
357 /** @var int|false For {{PAGESIZE}} on current page */
358 private $mInputSize = false;
360 private ?RevisionRecord $mRevisionRecordObject = null;
363 * A cache of the current revisions of titles. Keys are $title->getPrefixedDbKey()
365 * @since 1.24
367 private ?MapCacheLRU $currentRevisionCache = null;
370 * @var bool|string Recursive call protection.
371 * @internal
373 private $mInParse = false;
375 private SectionProfiler $mProfiler;
376 private ?LinkRenderer $mLinkRenderer = null;
378 private MagicWordFactory $magicWordFactory;
379 private Language $contLang;
380 private LanguageConverterFactory $languageConverterFactory;
381 private LanguageNameUtils $languageNameUtils;
382 private ParserFactory $factory;
383 private SpecialPageFactory $specialPageFactory;
384 private TitleFormatter $titleFormatter;
386 * This is called $svcOptions instead of $options like elsewhere to avoid confusion with
387 * $mOptions, which is public and widely used, and also with the local variable $options used
388 * for ParserOptions throughout this file.
390 private ServiceOptions $svcOptions;
391 private LinkRendererFactory $linkRendererFactory;
392 private NamespaceInfo $nsInfo;
393 private LoggerInterface $logger;
394 private BadFileLookup $badFileLookup;
395 private HookContainer $hookContainer;
396 private HookRunner $hookRunner;
397 private TidyDriverBase $tidy;
398 private WANObjectCache $wanCache;
399 private UserOptionsLookup $userOptionsLookup;
400 private UserFactory $userFactory;
401 private HttpRequestFactory $httpRequestFactory;
402 private TrackingCategories $trackingCategories;
403 private SignatureValidatorFactory $signatureValidatorFactory;
404 private UserNameUtils $userNameUtils;
407 * @internal For use by ServiceWiring
409 public const CONSTRUCTOR_OPTIONS = [
410 // See documentation for the corresponding config options
411 // Many of these are only used in (eg) CoreMagicVariables
412 MainConfigNames::AllowDisplayTitle,
413 MainConfigNames::AllowSlowParserFunctions,
414 MainConfigNames::ArticlePath,
415 MainConfigNames::EnableScaryTranscluding,
416 MainConfigNames::ExtraInterlanguageLinkPrefixes,
417 MainConfigNames::FragmentMode,
418 MainConfigNames::Localtimezone,
419 MainConfigNames::MaxSigChars,
420 MainConfigNames::MaxTocLevel,
421 MainConfigNames::MiserMode,
422 MainConfigNames::RawHtml,
423 MainConfigNames::ScriptPath,
424 MainConfigNames::Server,
425 MainConfigNames::ServerName,
426 MainConfigNames::ShowHostnames,
427 MainConfigNames::SignatureValidation,
428 MainConfigNames::Sitename,
429 MainConfigNames::StylePath,
430 MainConfigNames::TranscludeCacheExpiry,
431 MainConfigNames::PreprocessorCacheThreshold,
432 MainConfigNames::ParserEnableLegacyMediaDOM,
433 MainConfigNames::EnableParserLimitReporting,
434 MainConfigNames::ParserEnableUserLanguage,
435 MainConfigNames::ParsoidFragmentSupport,
439 * Constructing parsers directly is not allowed! Use a ParserFactory.
440 * @internal
442 * @param ServiceOptions $svcOptions
443 * @param MagicWordFactory $magicWordFactory
444 * @param Language $contLang Content language
445 * @param ParserFactory $factory
446 * @param UrlUtils $urlUtils
447 * @param SpecialPageFactory $spFactory
448 * @param LinkRendererFactory $linkRendererFactory
449 * @param NamespaceInfo $nsInfo
450 * @param LoggerInterface $logger
451 * @param BadFileLookup $badFileLookup
452 * @param LanguageConverterFactory $languageConverterFactory
453 * @param LanguageNameUtils $languageNameUtils
454 * @param HookContainer $hookContainer
455 * @param TidyDriverBase $tidy
456 * @param WANObjectCache $wanCache
457 * @param UserOptionsLookup $userOptionsLookup
458 * @param UserFactory $userFactory
459 * @param TitleFormatter $titleFormatter
460 * @param HttpRequestFactory $httpRequestFactory
461 * @param TrackingCategories $trackingCategories
462 * @param SignatureValidatorFactory $signatureValidatorFactory
463 * @param UserNameUtils $userNameUtils
465 public function __construct(
466 ServiceOptions $svcOptions,
467 MagicWordFactory $magicWordFactory,
468 Language $contLang,
469 ParserFactory $factory,
470 UrlUtils $urlUtils,
471 SpecialPageFactory $spFactory,
472 LinkRendererFactory $linkRendererFactory,
473 NamespaceInfo $nsInfo,
474 LoggerInterface $logger,
475 BadFileLookup $badFileLookup,
476 LanguageConverterFactory $languageConverterFactory,
477 LanguageNameUtils $languageNameUtils,
478 HookContainer $hookContainer,
479 TidyDriverBase $tidy,
480 WANObjectCache $wanCache,
481 UserOptionsLookup $userOptionsLookup,
482 UserFactory $userFactory,
483 TitleFormatter $titleFormatter,
484 HttpRequestFactory $httpRequestFactory,
485 TrackingCategories $trackingCategories,
486 SignatureValidatorFactory $signatureValidatorFactory,
487 UserNameUtils $userNameUtils
489 $this->deprecateDynamicPropertiesAccess( '1.42', __CLASS__ );
490 $this->deprecatePublicProperty( 'ot', '1.35', __CLASS__ );
491 $this->deprecatePublicProperty( 'mTitle', '1.35', __CLASS__ );
492 $this->deprecatePublicProperty( 'mOptions', '1.35', __CLASS__ );
494 if ( ParserFactory::$inParserFactory === 0 ) {
495 // Direct construction of Parser was deprecated in 1.34 and
496 // removed in 1.36; use a ParserFactory instead.
497 throw new BadMethodCallException( 'Direct construction of Parser not allowed' );
499 $svcOptions->assertRequiredOptions( self::CONSTRUCTOR_OPTIONS );
500 $this->svcOptions = $svcOptions;
502 $this->urlUtils = $urlUtils;
503 $this->mExtLinkBracketedRegex = '/\[(((?i)' . $this->urlUtils->validProtocols() . ')' .
504 self::EXT_LINK_ADDR .
505 self::EXT_LINK_URL_CLASS . '*)\p{Zs}*([^\]\\x00-\\x08\\x0a-\\x1F\\x{FFFD}]*)\]/Su';
507 $this->magicWordFactory = $magicWordFactory;
509 $this->contLang = $contLang;
511 $this->factory = $factory;
512 $this->specialPageFactory = $spFactory;
513 $this->linkRendererFactory = $linkRendererFactory;
514 $this->nsInfo = $nsInfo;
515 $this->logger = $logger;
516 $this->badFileLookup = $badFileLookup;
518 $this->languageConverterFactory = $languageConverterFactory;
519 $this->languageNameUtils = $languageNameUtils;
521 $this->hookContainer = $hookContainer;
522 $this->hookRunner = new HookRunner( $hookContainer );
524 $this->tidy = $tidy;
526 $this->wanCache = $wanCache;
527 $this->mPreprocessor = new Preprocessor_Hash(
528 $this,
529 $this->wanCache,
531 'cacheThreshold' => $svcOptions->get( MainConfigNames::PreprocessorCacheThreshold ),
532 'disableLangConversion' => $languageConverterFactory->isConversionDisabled(),
536 $this->userOptionsLookup = $userOptionsLookup;
537 $this->userFactory = $userFactory;
538 $this->titleFormatter = $titleFormatter;
539 $this->httpRequestFactory = $httpRequestFactory;
540 $this->trackingCategories = $trackingCategories;
541 $this->signatureValidatorFactory = $signatureValidatorFactory;
542 $this->userNameUtils = $userNameUtils;
544 // These steps used to be done in "::firstCallInit()"
545 // (if you're chasing a reference from some old code)
546 CoreParserFunctions::register(
547 $this,
548 new ServiceOptions( CoreParserFunctions::REGISTER_OPTIONS, $svcOptions )
550 CoreTagHooks::register(
551 $this,
552 new ServiceOptions( CoreTagHooks::REGISTER_OPTIONS, $svcOptions )
554 $this->initializeVariables();
556 $this->hookRunner->onParserFirstCallInit( $this );
557 $this->mTitle = Title::makeTitle( NS_SPECIAL, 'Badtitle/Missing' );
561 * Reduce memory usage to reduce the impact of circular references
563 public function __destruct() {
564 // @phan-suppress-next-line PhanRedundantCondition Typed property not set in constructor, may be uninitialized
565 if ( isset( $this->mLinkHolders ) ) {
566 // @phan-suppress-next-line PhanTypeObjectUnsetDeclaredProperty
567 unset( $this->mLinkHolders );
569 // @phan-suppress-next-line PhanTypeSuspiciousNonTraversableForeach
570 foreach ( $this as $name => $value ) {
571 unset( $this->$name );
576 * Allow extensions to clean up when the parser is cloned
578 public function __clone() {
579 $this->mInParse = false;
581 $this->mPreprocessor = clone $this->mPreprocessor;
582 $this->mPreprocessor->resetParser( $this );
584 $this->hookRunner->onParserCloned( $this );
588 * Used to do various kinds of initialisation on the first call of the
589 * parser.
590 * @deprecated since 1.35, this initialization is done in the constructor
591 * and manual calls to ::firstCallInit() have no effect.
592 * @since 1.7
594 public function firstCallInit() {
596 * This method should be hard-deprecated once remaining calls are
597 * removed; it no longer does anything.
602 * Clear Parser state
604 * @internal
606 public function clearState() {
607 $this->resetOutput();
608 $this->mAutonumber = 0;
609 $this->mLinkHolders = new LinkHolderArray(
610 $this,
611 $this->getContentLanguageConverter(),
612 $this->getHookContainer()
614 $this->mLinkID = 0;
615 $this->mRevisionTimestamp = null;
616 $this->mRevisionId = null;
617 $this->mRevisionUser = null;
618 $this->mRevisionSize = null;
619 $this->mRevisionRecordObject = null;
620 $this->mVarCache = [];
621 $this->mUser = null;
622 $this->currentRevisionCache = null;
624 $this->mStripState = new StripState( $this );
626 # Clear these on every parse, T6549
627 $this->mTplRedirCache = [];
628 $this->mTplDomCache = [];
630 $this->mShowToc = true;
631 $this->mForceTocPosition = false;
632 $this->mIncludeSizes = [
633 'post-expand' => 0,
634 'arg' => 0,
636 $this->mPPNodeCount = 0;
637 $this->mHighestExpansionDepth = 0;
638 $this->mHeadings = [];
639 $this->mDoubleUnderscores = [];
640 $this->mExpensiveFunctionCount = 0;
642 $this->mProfiler = new SectionProfiler();
644 $this->hookRunner->onParserClearState( $this );
648 * Reset the ParserOutput
649 * @since 1.34
651 public function resetOutput() {
652 $this->mOutput = new ParserOutput;
653 $this->mOptions->registerWatcher( [ $this->mOutput, 'recordOption' ] );
657 * Convert wikitext to HTML
658 * Do not call this function recursively.
660 * @param string $text Text we want to parse
661 * @param-taint $text escapes_htmlnoent
662 * @param PageReference $page
663 * @param ParserOptions $options
664 * @param bool $linestart
665 * @param bool $clearState
666 * @param int|null $revid ID of the revision being rendered. This is used to render
667 * REVISION* magic words. 0 means that any current revision will be used. Null means
668 * that {{REVISIONID}}/{{REVISIONUSER}} will be empty and {{REVISIONTIMESTAMP}} will
669 * use the current timestamp.
670 * @return ParserOutput
671 * @return-taint escaped
672 * @since 1.10 method is public
674 public function parse(
675 $text, PageReference $page, ParserOptions $options,
676 $linestart = true, $clearState = true, $revid = null
678 if ( $clearState ) {
679 // We use U+007F DELETE to construct strip markers, so we have to make
680 // sure that this character does not occur in the input text.
681 $text = strtr( $text, "\x7f", "?" );
682 $magicScopeVariable = $this->lock();
684 // Strip U+0000 NULL (T159174)
685 $text = str_replace( "\000", '', $text );
687 $this->startParse( $page, $options, self::OT_HTML, $clearState );
689 $this->currentRevisionCache = null;
690 $this->mInputSize = strlen( $text );
691 $this->mOutput->resetParseStartTime();
693 $oldRevisionId = $this->mRevisionId;
694 $oldRevisionRecordObject = $this->mRevisionRecordObject;
695 $oldRevisionTimestamp = $this->mRevisionTimestamp;
696 $oldRevisionUser = $this->mRevisionUser;
697 $oldRevisionSize = $this->mRevisionSize;
698 if ( $revid !== null ) {
699 $this->mRevisionId = $revid;
700 $this->mRevisionRecordObject = null;
701 $this->mRevisionTimestamp = null;
702 $this->mRevisionUser = null;
703 $this->mRevisionSize = null;
706 $text = $this->internalParse( $text );
707 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
709 $text = $this->internalParseHalfParsed( $text, true, $linestart );
712 * A converted title will be provided in the output object if title and
713 * content conversion are enabled, the article text does not contain
714 * a conversion-suppressing double-underscore tag, and no
715 * {{DISPLAYTITLE:...}} is present. DISPLAYTITLE takes precedence over
716 * automatic link conversion.
718 if ( !$options->getDisableTitleConversion()
719 && !isset( $this->mDoubleUnderscores['nocontentconvert'] )
720 && !isset( $this->mDoubleUnderscores['notitleconvert'] )
721 && $this->mOutput->getDisplayTitle() === false
723 $titleText = $this->getTargetLanguageConverter()->getConvRuleTitle();
724 if ( $titleText !== false ) {
725 $titleText = Sanitizer::removeSomeTags( $titleText );
726 } else {
727 [ $nsText, $nsSeparator, $mainText ] = $this->getTargetLanguageConverter()->convertSplitTitle( $page );
728 // In the future, those three pieces could be stored separately rather than joined into $titleText,
729 // and OutputPage would format them and join them together, to resolve T314399.
730 $titleText = self::formatPageTitle( $nsText, $nsSeparator, $mainText );
732 $this->mOutput->setTitleText( $titleText );
735 # Recording timing info. Must be called before finalizeAdaptiveCacheExpiry() and
736 # makeLimitReport(), which make use of the timing info.
737 $this->mOutput->recordTimeProfile();
739 # Compute runtime adaptive expiry if set
740 $this->mOutput->finalizeAdaptiveCacheExpiry();
742 # Warn if too many heavyweight parser functions were used
743 if ( $this->mExpensiveFunctionCount > $options->getExpensiveParserFunctionLimit() ) {
744 $this->limitationWarn( 'expensive-parserfunction',
745 $this->mExpensiveFunctionCount,
746 $options->getExpensiveParserFunctionLimit()
750 # Information on limits, for the benefit of users who try to skirt them
751 if ( $this->svcOptions->get( MainConfigNames::EnableParserLimitReporting ) ) {
752 $this->makeLimitReport( $this->mOptions, $this->mOutput );
755 $this->mOutput->setFromParserOptions( $options );
757 $this->mOutput->setRawText( $text );
759 $this->mRevisionId = $oldRevisionId;
760 $this->mRevisionRecordObject = $oldRevisionRecordObject;
761 $this->mRevisionTimestamp = $oldRevisionTimestamp;
762 $this->mRevisionUser = $oldRevisionUser;
763 $this->mRevisionSize = $oldRevisionSize;
764 $this->mInputSize = false;
765 $this->currentRevisionCache = null;
767 return $this->mOutput;
771 * Set the limit report data in the current ParserOutput.
772 * @internal
774 public function makeLimitReport(
775 ParserOptions $parserOptions, ParserOutput $parserOutput
777 $maxIncludeSize = $parserOptions->getMaxIncludeSize();
779 $cpuTime = $parserOutput->getTimeProfile( 'cpu' );
780 if ( $cpuTime !== null ) {
781 $parserOutput->setLimitReportData( 'limitreport-cputime',
782 sprintf( "%.3f", $cpuTime )
786 $wallTime = $parserOutput->getTimeProfile( 'wall' );
787 $parserOutput->setLimitReportData( 'limitreport-walltime',
788 sprintf( "%.3f", $wallTime )
791 $parserOutput->setLimitReportData( 'limitreport-ppvisitednodes',
792 [ $this->mPPNodeCount, $parserOptions->getMaxPPNodeCount() ]
794 $parserOutput->setLimitReportData( 'limitreport-postexpandincludesize',
795 [ $this->mIncludeSizes['post-expand'], $maxIncludeSize ]
797 $parserOutput->setLimitReportData( 'limitreport-templateargumentsize',
798 [ $this->mIncludeSizes['arg'], $maxIncludeSize ]
800 $parserOutput->setLimitReportData( 'limitreport-expansiondepth',
801 [ $this->mHighestExpansionDepth, $parserOptions->getMaxPPExpandDepth() ]
803 $parserOutput->setLimitReportData( 'limitreport-expensivefunctioncount',
804 [ $this->mExpensiveFunctionCount, $parserOptions->getExpensiveParserFunctionLimit() ]
807 foreach ( $this->mStripState->getLimitReport() as [ $key, $value ] ) {
808 $parserOutput->setLimitReportData( $key, $value );
811 $this->hookRunner->onParserLimitReportPrepare( $this, $parserOutput );
813 // Add on template profiling data in human/machine readable way
814 $dataByFunc = $this->mProfiler->getFunctionStats();
815 uasort( $dataByFunc, static function ( $a, $b ) {
816 return $b['real'] <=> $a['real']; // descending order
817 } );
818 $profileReport = [];
819 foreach ( array_slice( $dataByFunc, 0, 10 ) as $item ) {
820 $profileReport[] = sprintf( "%6.2f%% %8.3f %6d %s",
821 $item['%real'], $item['real'], $item['calls'],
822 htmlspecialchars( $item['name'] ) );
825 $parserOutput->setLimitReportData( 'limitreport-timingprofile', $profileReport );
827 // Add other cache related metadata
828 if ( $this->svcOptions->get( MainConfigNames::ShowHostnames ) ) {
829 $parserOutput->setLimitReportData( 'cachereport-origin', wfHostname() );
831 $parserOutput->setLimitReportData( 'cachereport-timestamp',
832 $parserOutput->getCacheTime() );
833 $parserOutput->setLimitReportData( 'cachereport-ttl',
834 $parserOutput->getCacheExpiry() );
835 $parserOutput->setLimitReportData( 'cachereport-transientcontent',
836 $parserOutput->hasReducedExpiry() );
840 * Half-parse wikitext to half-parsed HTML. This recursive parser entry point
841 * can be called from an extension tag hook.
843 * The output of this function IS NOT SAFE PARSED HTML; it is "half-parsed"
844 * instead, which means that lists and links have not been fully parsed yet,
845 * and strip markers are still present.
847 * Use recursiveTagParseFully() to fully parse wikitext to output-safe HTML.
849 * Use this function if you're a parser tag hook and you want to parse
850 * wikitext before or after applying additional transformations, and you
851 * intend to *return the result as hook output*, which will cause it to go
852 * through the rest of parsing process automatically.
854 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
855 * $text are not expanded
857 * @param string $text Text extension wants to have parsed
858 * @param-taint $text escapes_htmlnoent
859 * @param PPFrame|false $frame The frame to use for expanding any template variables
860 * @return string UNSAFE half-parsed HTML
861 * @return-taint escaped
862 * @since 1.8
864 public function recursiveTagParse( $text, $frame = false ) {
865 $text = $this->internalParse( $text, false, $frame );
866 return $text;
870 * Fully parse wikitext to fully parsed HTML. This recursive parser entry
871 * point can be called from an extension tag hook.
873 * The output of this function is fully-parsed HTML that is safe for output.
874 * If you're a parser tag hook, you might want to use recursiveTagParse()
875 * instead.
877 * If $frame is not provided, then template variables (e.g., {{{1}}}) within
878 * $text are not expanded
880 * @since 1.25
882 * @param string $text Text extension wants to have parsed
883 * @param-taint $text escapes_htmlnoent
884 * @param PPFrame|false $frame The frame to use for expanding any template variables
885 * @return string Fully parsed HTML
886 * @return-taint escaped
888 public function recursiveTagParseFully( $text, $frame = false ) {
889 $text = $this->recursiveTagParse( $text, $frame );
890 $text = $this->internalParseHalfParsed( $text, false );
891 return $text;
895 * Needed by Parsoid/PHP to ensure all the hooks for extensions
896 * are run in the right order. The primary differences between this
897 * and recursiveTagParseFully are:
898 * (a) absence of $frame
899 * (b) passing true to internalParseHalfParse so all hooks are run
900 * (c) running 'ParserAfterParse' hook at the same point in the parsing
901 * pipeline when parse() does it. This kinda mimics Parsoid/JS behavior
902 * where exttags are processed by the M/w API.
904 * This is a temporary convenience method and will go away as we proceed
905 * further with Parsoid <-> Parser.php integration.
907 * @internal
908 * @deprecated
909 * @param string $text Wikitext source of the extension
910 * @return string
911 * @return-taint escaped
913 public function parseExtensionTagAsTopLevelDoc( $text ) {
914 $text = $this->recursiveTagParse( $text );
915 $this->hookRunner->onParserAfterParse( $this, $text, $this->mStripState );
916 $text = $this->internalParseHalfParsed( $text, true );
917 return $text;
921 * Expand templates and variables in the text, producing valid, static wikitext.
922 * Also removes comments.
923 * Do not call this function recursively.
924 * @param string $text
925 * @param ?PageReference $page
926 * @param ParserOptions $options
927 * @param int|null $revid
928 * @param PPFrame|false $frame
929 * @return mixed|string
930 * @since 1.8
932 public function preprocess(
933 $text,
934 ?PageReference $page,
935 ParserOptions $options,
936 $revid = null,
937 $frame = false
939 $magicScopeVariable = $this->lock();
940 $this->startParse( $page, $options, self::OT_PREPROCESS, true );
941 if ( $revid !== null ) {
942 $this->mRevisionId = $revid;
944 $this->hookRunner->onParserBeforePreprocess( $this, $text, $this->mStripState );
945 $text = $this->replaceVariables( $text, $frame );
946 $text = $this->mStripState->unstripBoth( $text );
947 return $text;
951 * Recursive parser entry point that can be called from an extension tag
952 * hook.
954 * @param string $text Text to be expanded
955 * @param PPFrame|false $frame The frame to use for expanding any template variables
956 * @return string
957 * @since 1.19
959 public function recursivePreprocess( $text, $frame = false ) {
960 $text = $this->replaceVariables( $text, $frame );
961 $text = $this->mStripState->unstripBoth( $text );
962 return $text;
966 * Process the wikitext for the "?preload=" feature. (T7210)
968 * "<noinclude>", "<includeonly>" etc. are parsed as for template
969 * transclusion, comments, templates, arguments, tags hooks and parser
970 * functions are untouched.
972 * @param string $text
973 * @param PageReference $page
974 * @param ParserOptions $options
975 * @param array $params
976 * @return string
977 * @since 1.17
979 public function getPreloadText( $text, PageReference $page, ParserOptions $options, $params = [] ) {
980 $msg = new RawMessage( $text );
981 $text = $msg->params( $params )->plain();
983 # Parser (re)initialisation
984 $magicScopeVariable = $this->lock();
985 $this->startParse( $page, $options, self::OT_PLAIN, true );
987 $flags = PPFrame::NO_ARGS | PPFrame::NO_TEMPLATES;
988 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
989 $text = $this->getPreprocessor()->newFrame()->expand( $dom, $flags );
990 $text = $this->mStripState->unstripBoth( $text );
991 return $text;
995 * Set the current user.
996 * Should only be used when doing pre-save transform.
998 * @param UserIdentity|null $user user identity or null (to reset)
999 * @since 1.17
1001 public function setUser( ?UserIdentity $user ) {
1002 $this->mUser = $user;
1006 * Set the context title
1008 * @deprecated since 1.37, use setPage() instead.
1009 * @param Title|null $t
1010 * @since 1.12
1012 public function setTitle( ?Title $t = null ) {
1013 $this->setPage( $t );
1017 * @since 1.6
1018 * @deprecated since 1.37, use getPage instead.
1019 * @return Title
1021 public function getTitle(): Title {
1022 return $this->mTitle;
1026 * Set the page used as context for parsing, e.g. when resolving relative subpage links.
1028 * @since 1.37
1029 * @param ?PageReference $t
1031 public function setPage( ?PageReference $t = null ) {
1032 if ( !$t ) {
1033 $t = Title::makeTitle( NS_SPECIAL, 'Badtitle/Parser' );
1034 } else {
1035 // For now (early 1.37 alpha), always convert to Title, so we don't have to do it over
1036 // and over again in other methods. Eventually, we will no longer need to have a Title
1037 // instance internally.
1038 $t = Title::newFromPageReference( $t );
1041 if ( $t->hasFragment() ) {
1042 # Strip the fragment to avoid various odd effects
1043 $this->mTitle = $t->createFragmentTarget( '' );
1044 } else {
1045 $this->mTitle = $t;
1050 * Returns the page used as context for parsing, e.g. when resolving relative subpage links.
1051 * @since 1.37
1052 * @return ?PageReference Null if no page is set (deprecated since 1.34)
1054 public function getPage(): ?PageReference {
1055 if ( $this->mTitle->isSpecial( 'Badtitle' ) ) {
1056 [ , $subPage ] = $this->specialPageFactory->resolveAlias( $this->mTitle->getDBkey() );
1058 if ( $subPage === 'Missing' ) {
1059 wfDeprecated( __METHOD__ . ' without a Title set', '1.34' );
1060 return null;
1064 return $this->mTitle;
1068 * Accessor for the output type.
1069 * @return int One of the Parser::OT_... constants
1070 * @since 1.35
1072 public function getOutputType(): int {
1073 return $this->mOutputType;
1077 * Mutator for the output type.
1078 * @param int $ot One of the Parser::OT_… constants
1079 * @since 1.8
1081 public function setOutputType( $ot ): void {
1082 $this->mOutputType = $ot;
1083 # Shortcut alias
1084 $this->ot = [
1085 'html' => $ot == self::OT_HTML,
1086 'wiki' => $ot == self::OT_WIKI,
1087 'pre' => $ot == self::OT_PREPROCESS,
1088 'plain' => $ot == self::OT_PLAIN,
1093 * Accessor/mutator for the output type
1095 * @param int|null $x New value or null to just get the current one
1096 * @return int
1097 * @deprecated since 1.35, use getOutputType()/setOutputType()
1099 public function OutputType( $x = null ) {
1100 wfDeprecated( __METHOD__, '1.35' );
1101 return wfSetVar( $this->mOutputType, $x );
1105 * @return ParserOutput
1106 * @since 1.14
1108 public function getOutput() {
1109 // @phan-suppress-next-line PhanRedundantCondition False positive, see https://github.com/phan/phan/issues/4720
1110 if ( !isset( $this->mOutput ) ) {
1111 wfDeprecated( __METHOD__ . ' before initialization', '1.42' );
1112 // @phan-suppress-next-line PhanTypeMismatchReturnProbablyReal We don’t want to tell anyone we’re doing this
1113 return null;
1115 return $this->mOutput;
1119 * @return ParserOptions|null
1120 * @since 1.6
1122 public function getOptions() {
1123 return $this->mOptions;
1127 * Mutator for the ParserOptions object
1128 * @param ParserOptions $options The new parser options
1129 * @since 1.35
1131 public function setOptions( ParserOptions $options ): void {
1132 $this->mOptions = $options;
1136 * Accessor/mutator for the ParserOptions object
1138 * @param ParserOptions|null $x New value or null to just get the current one
1139 * @return ParserOptions Current ParserOptions object
1140 * @deprecated since 1.35, use getOptions() / setOptions()
1142 public function Options( $x = null ) {
1143 wfDeprecated( __METHOD__, '1.35' );
1144 return wfSetVar( $this->mOptions, $x );
1148 * @return int
1149 * @since 1.14
1151 public function nextLinkID() {
1152 return $this->mLinkID++;
1156 * @param int $id
1157 * @since 1.8
1159 public function setLinkID( $id ) {
1160 $this->mLinkID = $id;
1164 * Get a language object for use in parser functions such as {{FORMATNUM:}}
1165 * @return Language
1166 * @since 1.7
1167 * @deprecated since 1.40; use ::getTargetLanguage() instead.
1169 public function getFunctionLang() {
1170 wfDeprecated( __METHOD__, '1.40' );
1171 return $this->getTargetLanguage();
1175 * Get the target language for the content being parsed. This is usually the
1176 * language that the content is in.
1178 * @since 1.19
1180 * @return Language
1182 public function getTargetLanguage() {
1183 $target = $this->mOptions->getTargetLanguage();
1185 if ( $target !== null ) {
1186 return $target;
1187 } elseif ( $this->mOptions->getInterfaceMessage() ) {
1188 return $this->mOptions->getUserLangObj();
1191 return $this->getTitle()->getPageLanguage();
1195 * Get a user either from the user set on Parser if it's set,
1196 * or from the ParserOptions object otherwise.
1198 * @since 1.36
1199 * @return UserIdentity
1201 public function getUserIdentity(): UserIdentity {
1202 return $this->mUser ?? $this->getOptions()->getUserIdentity();
1206 * Get a preprocessor object
1208 * @return Preprocessor
1209 * @since 1.12.0
1211 public function getPreprocessor() {
1212 return $this->mPreprocessor;
1216 * Get a LinkRenderer instance to make links with
1218 * @since 1.28
1219 * @return LinkRenderer
1221 public function getLinkRenderer() {
1222 // XXX We make the LinkRenderer with current options and then cache it forever
1223 if ( !$this->mLinkRenderer ) {
1224 $this->mLinkRenderer = $this->linkRendererFactory->create();
1227 return $this->mLinkRenderer;
1231 * Get the MagicWordFactory that this Parser is using
1233 * @since 1.32
1234 * @return MagicWordFactory
1236 public function getMagicWordFactory() {
1237 return $this->magicWordFactory;
1241 * Get the content language that this Parser is using
1243 * @since 1.32
1244 * @return Language
1246 public function getContentLanguage() {
1247 return $this->contLang;
1251 * Get the BadFileLookup instance that this Parser is using
1253 * @since 1.35
1254 * @return BadFileLookup
1256 public function getBadFileLookup() {
1257 return $this->badFileLookup;
1261 * Replaces all occurrences of HTML-style comments and the given tags
1262 * in the text with a random marker and returns the next text. The output
1263 * parameter $matches will be an associative array filled with data in
1264 * the form:
1266 * @code
1267 * 'UNIQ-xxxxx' => [
1268 * 'element',
1269 * 'tag content',
1270 * [ 'param' => 'x' ],
1271 * '<element param="x">tag content</element>' ]
1272 * @endcode
1274 * @param string[] $elements List of element names. Comments are always extracted.
1275 * @param string $text Source text string.
1276 * @param array[] &$matches Out parameter, Array: extracted tags
1277 * @return string Stripped text
1279 public static function extractTagsAndParams( array $elements, $text, &$matches ) {
1280 static $n = 1;
1281 $stripped = '';
1282 $matches = [];
1284 $taglist = implode( '|', $elements );
1285 $start = "/<($taglist)(\\s+[^>]*?|\\s*?)(\/?>)|<(!--)/i";
1287 while ( $text != '' ) {
1288 $p = preg_split( $start, $text, 2, PREG_SPLIT_DELIM_CAPTURE );
1289 $stripped .= $p[0];
1290 if ( count( $p ) < 5 ) {
1291 break;
1293 if ( count( $p ) > 5 ) {
1294 # comment
1295 $element = $p[4];
1296 $attributes = '';
1297 $close = '';
1298 $inside = $p[5];
1299 } else {
1300 # tag
1301 [ , $element, $attributes, $close, $inside ] = $p;
1304 $marker = self::MARKER_PREFIX . "-$element-" . sprintf( '%08X', $n++ ) . self::MARKER_SUFFIX;
1305 $stripped .= $marker;
1307 if ( $close === '/>' ) {
1308 # Empty element tag, <tag />
1309 $content = null;
1310 $text = $inside;
1311 $tail = null;
1312 } else {
1313 if ( $element === '!--' ) {
1314 $end = '/(-->)/';
1315 } else {
1316 $end = "/(<\\/$element\\s*>)/i";
1318 $q = preg_split( $end, $inside, 2, PREG_SPLIT_DELIM_CAPTURE );
1319 $content = $q[0];
1320 if ( count( $q ) < 3 ) {
1321 # No end tag -- let it run out to the end of the text.
1322 $tail = '';
1323 $text = '';
1324 } else {
1325 [ , $tail, $text ] = $q;
1329 $matches[$marker] = [ $element,
1330 $content,
1331 Sanitizer::decodeTagAttributes( $attributes ),
1332 "<$element$attributes$close$content$tail" ];
1334 return $stripped;
1338 * Get a list of strippable XML-like elements
1340 * @return array
1342 public function getStripList() {
1343 return $this->mStripList;
1347 * @return StripState
1348 * @since 1.34
1350 public function getStripState() {
1351 return $this->mStripState;
1355 * Add an item to the strip state
1356 * Returns the unique tag which must be inserted into the stripped text
1357 * The tag will be replaced with the original text in unstrip()
1359 * @param string $text
1361 * @return string
1363 public function insertStripItem( $text ) {
1364 $marker = self::MARKER_PREFIX . "-item-{$this->mMarkerIndex}-" . self::MARKER_SUFFIX;
1365 $this->mMarkerIndex++;
1366 $this->mStripState->addGeneral( $marker, $text );
1367 return $marker;
1371 * Parse the wiki syntax used to render tables.
1373 * @param string $text
1374 * @return string
1376 private function handleTables( $text ) {
1377 $lines = StringUtils::explode( "\n", $text );
1378 $out = '';
1379 $td_history = []; # Is currently a td tag open?
1380 $last_tag_history = []; # Save history of last lag activated (td, th or caption)
1381 $tr_history = []; # Is currently a tr tag open?
1382 $tr_attributes = []; # history of tr attributes
1383 $has_opened_tr = []; # Did this table open a <tr> element?
1384 $indent_level = 0; # indent level of the table
1386 foreach ( $lines as $outLine ) {
1387 $line = trim( $outLine );
1389 if ( $line === '' ) { # empty line, go to next line
1390 $out .= $outLine . "\n";
1391 continue;
1394 $first_character = $line[0];
1395 $first_two = substr( $line, 0, 2 );
1396 $matches = [];
1398 if ( preg_match( '/^(:*)\s*\{\|(.*)$/', $line, $matches ) ) {
1399 # First check if we are starting a new table
1400 $indent_level = strlen( $matches[1] );
1402 $attributes = $this->mStripState->unstripBoth( $matches[2] );
1403 $attributes = Sanitizer::fixTagAttributes( $attributes, 'table' );
1405 $outLine = str_repeat( '<dl><dd>', $indent_level ) . "<table{$attributes}>";
1406 $td_history[] = false;
1407 $last_tag_history[] = '';
1408 $tr_history[] = false;
1409 $tr_attributes[] = '';
1410 $has_opened_tr[] = false;
1411 } elseif ( count( $td_history ) == 0 ) {
1412 # Don't do any of the following
1413 $out .= $outLine . "\n";
1414 continue;
1415 } elseif ( $first_two === '|}' ) {
1416 # We are ending a table
1417 $line = '</table>' . substr( $line, 2 );
1418 $last_tag = array_pop( $last_tag_history );
1420 if ( !array_pop( $has_opened_tr ) ) {
1421 $line = "<tr><td></td></tr>{$line}";
1424 if ( array_pop( $tr_history ) ) {
1425 $line = "</tr>{$line}";
1428 if ( array_pop( $td_history ) ) {
1429 $line = "</{$last_tag}>{$line}";
1431 array_pop( $tr_attributes );
1432 if ( $indent_level > 0 ) {
1433 $outLine = rtrim( $line ) . str_repeat( '</dd></dl>', $indent_level );
1434 } else {
1435 $outLine = $line;
1437 } elseif ( $first_two === '|-' ) {
1438 # Now we have a table row
1439 $line = preg_replace( '#^\|-+#', '', $line );
1441 # Whats after the tag is now only attributes
1442 $attributes = $this->mStripState->unstripBoth( $line );
1443 $attributes = Sanitizer::fixTagAttributes( $attributes, 'tr' );
1444 array_pop( $tr_attributes );
1445 $tr_attributes[] = $attributes;
1447 $line = '';
1448 $last_tag = array_pop( $last_tag_history );
1449 array_pop( $has_opened_tr );
1450 $has_opened_tr[] = true;
1452 if ( array_pop( $tr_history ) ) {
1453 $line = '</tr>';
1456 if ( array_pop( $td_history ) ) {
1457 $line = "</{$last_tag}>{$line}";
1460 $outLine = $line;
1461 $tr_history[] = false;
1462 $td_history[] = false;
1463 $last_tag_history[] = '';
1464 } elseif ( $first_character === '|'
1465 || $first_character === '!'
1466 || $first_two === '|+'
1468 # This might be cell elements, td, th or captions
1469 if ( $first_two === '|+' ) {
1470 $first_character = '+';
1471 $line = substr( $line, 2 );
1472 } else {
1473 $line = substr( $line, 1 );
1476 // Implies both are valid for table headings.
1477 if ( $first_character === '!' ) {
1478 $line = StringUtils::replaceMarkup( '!!', '||', $line );
1481 # Split up multiple cells on the same line.
1482 # FIXME : This can result in improper nesting of tags processed
1483 # by earlier parser steps.
1484 $cells = explode( '||', $line );
1486 $outLine = '';
1488 # Loop through each table cell
1489 foreach ( $cells as $cell ) {
1490 $previous = '';
1491 if ( $first_character !== '+' ) {
1492 $tr_after = array_pop( $tr_attributes );
1493 if ( !array_pop( $tr_history ) ) {
1494 $previous = "<tr{$tr_after}>\n";
1496 $tr_history[] = true;
1497 $tr_attributes[] = '';
1498 array_pop( $has_opened_tr );
1499 $has_opened_tr[] = true;
1502 $last_tag = array_pop( $last_tag_history );
1504 if ( array_pop( $td_history ) ) {
1505 $previous = "</{$last_tag}>\n{$previous}";
1508 if ( $first_character === '|' ) {
1509 $last_tag = 'td';
1510 } elseif ( $first_character === '!' ) {
1511 $last_tag = 'th';
1512 } elseif ( $first_character === '+' ) {
1513 $last_tag = 'caption';
1514 } else {
1515 $last_tag = '';
1518 $last_tag_history[] = $last_tag;
1520 # A cell could contain both parameters and data
1521 $cell_data = explode( '|', $cell, 2 );
1523 # T2553: Note that a '|' inside an invalid link should not
1524 # be mistaken as delimiting cell parameters
1525 # Bug T153140: Neither should language converter markup.
1526 if ( preg_match( '/\[\[|-\{/', $cell_data[0] ) === 1 ) {
1527 $cell = "{$previous}<{$last_tag}>" . trim( $cell );
1528 } elseif ( count( $cell_data ) == 1 ) {
1529 // Whitespace in cells is trimmed
1530 $cell = "{$previous}<{$last_tag}>" . trim( $cell_data[0] );
1531 } else {
1532 $attributes = $this->mStripState->unstripBoth( $cell_data[0] );
1533 $attributes = Sanitizer::fixTagAttributes( $attributes, $last_tag );
1534 // Whitespace in cells is trimmed
1535 $cell = "{$previous}<{$last_tag}{$attributes}>" . trim( $cell_data[1] );
1538 $outLine .= $cell;
1539 $td_history[] = true;
1542 $out .= $outLine . "\n";
1545 # Closing open td, tr && table
1546 while ( count( $td_history ) > 0 ) {
1547 if ( array_pop( $td_history ) ) {
1548 $out .= "</td>\n";
1550 if ( array_pop( $tr_history ) ) {
1551 $out .= "</tr>\n";
1553 if ( !array_pop( $has_opened_tr ) ) {
1554 $out .= "<tr><td></td></tr>\n";
1557 $out .= "</table>\n";
1560 # Remove trailing line-ending (b/c)
1561 if ( substr( $out, -1 ) === "\n" ) {
1562 $out = substr( $out, 0, -1 );
1565 # special case: don't return empty table
1566 if ( $out === "<table>\n<tr><td></td></tr>\n</table>" ) {
1567 $out = '';
1570 return $out;
1574 * Helper function for parse() that transforms wiki markup into half-parsed
1575 * HTML. Only called for $mOutputType == self::OT_HTML.
1577 * @internal
1579 * @param string $text The text to parse
1580 * @param-taint $text escapes_html
1581 * @param bool $isMain Whether this is being called from the main parse() function
1582 * @param PPFrame|false $frame A pre-processor frame
1584 * @return string
1586 public function internalParse( $text, $isMain = true, $frame = false ) {
1587 $origText = $text;
1589 # Hook to suspend the parser in this state
1590 if ( !$this->hookRunner->onParserBeforeInternalParse( $this, $text, $this->mStripState ) ) {
1591 return $text;
1594 # if $frame is provided, then use $frame for replacing any variables
1595 if ( $frame ) {
1596 # use frame depth to infer how include/noinclude tags should be handled
1597 # depth=0 means this is the top-level document; otherwise it's an included document
1598 if ( !$frame->depth ) {
1599 $flag = 0;
1600 } else {
1601 $flag = Preprocessor::DOM_FOR_INCLUSION;
1603 $dom = $this->preprocessToDom( $text, $flag );
1604 $text = $frame->expand( $dom );
1605 } else {
1606 # if $frame is not provided, then use old-style replaceVariables
1607 $text = $this->replaceVariables( $text );
1610 $text = Sanitizer::internalRemoveHtmlTags(
1611 $text,
1612 // Callback from the Sanitizer for expanding items found in
1613 // HTML attribute values, so they can be safely tested and escaped.
1614 function ( &$text, $frame = false ) {
1615 $text = $this->replaceVariables( $text, $frame );
1616 $text = $this->mStripState->unstripBoth( $text );
1618 false,
1622 $this->hookRunner->onInternalParseBeforeLinks( $this, $text, $this->mStripState );
1624 # Tables need to come after variable replacement for things to work
1625 # properly; putting them before other transformations should keep
1626 # exciting things like link expansions from showing up in surprising
1627 # places.
1628 $text = $this->handleTables( $text );
1630 $text = preg_replace( '/(^|\n)-----*/', '\\1<hr />', $text );
1632 $text = $this->handleDoubleUnderscore( $text );
1634 $text = $this->handleHeadings( $text );
1635 $text = $this->handleInternalLinks( $text );
1636 $text = $this->handleAllQuotes( $text );
1637 $text = $this->handleExternalLinks( $text );
1639 # handleInternalLinks may sometimes leave behind
1640 # absolute URLs, which have to be masked to hide them from handleExternalLinks
1641 $text = str_replace( self::MARKER_PREFIX . 'NOPARSE', '', $text );
1643 $text = $this->handleMagicLinks( $text );
1644 $text = $this->finalizeHeadings( $text, $origText, $isMain );
1646 return $text;
1650 * Shorthand for getting a Language Converter for Target language
1652 * @since public since 1.38
1653 * @return ILanguageConverter
1655 public function getTargetLanguageConverter(): ILanguageConverter {
1656 return $this->languageConverterFactory->getLanguageConverter(
1657 $this->getTargetLanguage()
1662 * Shorthand for getting a Language Converter for Content language
1664 * @return ILanguageConverter
1666 private function getContentLanguageConverter(): ILanguageConverter {
1667 return $this->languageConverterFactory->getLanguageConverter(
1668 $this->getContentLanguage()
1673 * Get a HookContainer capable of returning metadata about hooks or running
1674 * extension hooks.
1676 * @since 1.35
1677 * @return HookContainer
1679 protected function getHookContainer() {
1680 return $this->hookContainer;
1684 * Get a HookRunner for calling core hooks
1686 * @internal This is for use by core only. Hook interfaces may be removed
1687 * without notice.
1688 * @since 1.35
1689 * @return HookRunner
1691 protected function getHookRunner() {
1692 return $this->hookRunner;
1696 * Helper function for parse() that transforms half-parsed HTML into fully
1697 * parsed HTML.
1699 * @param string $text
1700 * @param bool $isMain
1701 * @param bool $linestart
1702 * @return string
1704 private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
1705 $text = $this->mStripState->unstripGeneral( $text );
1707 $text = BlockLevelPass::doBlockLevels( $text, $linestart );
1709 $this->replaceLinkHoldersPrivate( $text );
1712 * The input doesn't get language converted if
1713 * a) It's disabled
1714 * b) Content isn't converted
1715 * c) It's a conversion table
1716 * d) it is an interface message (which is in the user language)
1718 $converter = null;
1719 if ( !( $this->mOptions->getDisableContentConversion()
1720 || isset( $this->mDoubleUnderscores['nocontentconvert'] )
1721 || $this->mOptions->getInterfaceMessage() )
1723 # The position of the convert() call should not be changed. it
1724 # assumes that the links are all replaced and the only thing left
1725 # is the <nowiki> mark.
1726 $converter = $this->getTargetLanguageConverter();
1727 $text = $converter->convert( $text );
1728 // TOC will be converted below.
1730 // Convert the TOC. This is done *after* the main text
1731 // so that all the editor-defined conversion rules (by convention
1732 // defined at the start of the article) are applied to the TOC
1733 self::localizeTOC(
1734 $this->mOutput->getTOCData(),
1735 $this->getTargetLanguage(),
1736 $converter // null if conversion is to be suppressed.
1738 if ( $converter ) {
1739 $this->mOutput->setLanguage( new Bcp47CodeValue(
1740 LanguageCode::bcp47( $converter->getPreferredVariant() )
1741 ) );
1742 } else {
1743 $this->mOutput->setLanguage( $this->getTargetLanguage() );
1746 $text = $this->mStripState->unstripNoWiki( $text );
1748 $text = $this->mStripState->unstripGeneral( $text );
1750 $text = $this->tidy->tidy( $text, [ Sanitizer::class, 'armorFrenchSpaces' ] );
1752 if ( $isMain ) {
1753 $this->hookRunner->onParserAfterTidy( $this, $text );
1756 return $text;
1760 * Replace special strings like "ISBN xxx" and "RFC xxx" with
1761 * magic external links.
1763 * DML
1765 * @param string $text
1767 * @return string
1769 private function handleMagicLinks( $text ) {
1770 $prots = $this->urlUtils->validAbsoluteProtocols();
1771 $urlChar = self::EXT_LINK_URL_CLASS;
1772 $addr = self::EXT_LINK_ADDR;
1773 $space = self::SPACE_NOT_NL; # non-newline space
1774 $spdash = "(?:-|$space)"; # a dash or a non-newline space
1775 $spaces = "$space++"; # possessive match of 1 or more spaces
1776 $text = preg_replace_callback(
1777 '!(?: # Start cases
1778 (<a[ \t\r\n>].*?</a>) | # m[1]: Skip link text
1779 (<.*?>) | # m[2]: Skip stuff inside HTML elements' . "
1780 (\b # m[3]: Free external links
1781 (?i:$prots)
1782 ($addr$urlChar*) # m[4]: Post-protocol path
1784 \b(?:RFC|PMID) $spaces # m[5]: RFC or PMID, capture number
1785 ([0-9]+)\b |
1786 \bISBN $spaces ( # m[6]: ISBN, capture number
1787 (?: 97[89] $spdash? )? # optional 13-digit ISBN prefix
1788 (?: [0-9] $spdash? ){9} # 9 digits with opt. delimiters
1789 [0-9Xx] # check digit
1791 )!xu",
1792 [ $this, 'magicLinkCallback' ],
1793 $text
1795 return $text;
1799 * @param array $m
1800 * @return string HTML
1802 private function magicLinkCallback( array $m ) {
1803 if ( isset( $m[1] ) && $m[1] !== '' ) {
1804 # Skip anchor
1805 return $m[0];
1806 } elseif ( isset( $m[2] ) && $m[2] !== '' ) {
1807 # Skip HTML element
1808 return $m[0];
1809 } elseif ( isset( $m[3] ) && $m[3] !== '' ) {
1810 # Free external link
1811 return $this->makeFreeExternalLink( $m[0], strlen( $m[4] ) );
1812 } elseif ( isset( $m[5] ) && $m[5] !== '' ) {
1813 # RFC or PMID
1814 if ( substr( $m[0], 0, 3 ) === 'RFC' ) {
1815 if ( !$this->mOptions->getMagicRFCLinks() ) {
1816 return $m[0];
1818 $keyword = 'RFC';
1819 $urlmsg = 'rfcurl';
1820 $cssClass = 'mw-magiclink-rfc';
1821 $trackingCat = 'magiclink-tracking-rfc';
1822 $id = $m[5];
1823 } elseif ( substr( $m[0], 0, 4 ) === 'PMID' ) {
1824 if ( !$this->mOptions->getMagicPMIDLinks() ) {
1825 return $m[0];
1827 $keyword = 'PMID';
1828 $urlmsg = 'pubmedurl';
1829 $cssClass = 'mw-magiclink-pmid';
1830 $trackingCat = 'magiclink-tracking-pmid';
1831 $id = $m[5];
1832 } else {
1833 // Should never happen
1834 throw new UnexpectedValueException( __METHOD__ . ': unrecognised match type "' .
1835 substr( $m[0], 0, 20 ) . '"' );
1837 $url = wfMessage( $urlmsg, $id )->inContentLanguage()->text();
1838 $this->addTrackingCategory( $trackingCat );
1839 return $this->getLinkRenderer()->makeExternalLink(
1840 $url,
1841 "{$keyword} {$id}",
1842 $this->getTitle(),
1843 $cssClass,
1846 } elseif ( isset( $m[6] ) && $m[6] !== ''
1847 && $this->mOptions->getMagicISBNLinks()
1849 # ISBN
1850 $isbn = $m[6];
1851 $space = self::SPACE_NOT_NL; # non-newline space
1852 $isbn = preg_replace( "/$space/", ' ', $isbn );
1853 $num = strtr( $isbn, [
1854 '-' => '',
1855 ' ' => '',
1856 'x' => 'X',
1857 ] );
1858 $this->addTrackingCategory( 'magiclink-tracking-isbn' );
1859 return $this->getLinkRenderer()->makeKnownLink(
1860 SpecialPage::getTitleFor( 'Booksources', $num ),
1861 "ISBN $isbn",
1863 'class' => 'internal mw-magiclink-isbn',
1864 'title' => false // suppress title attribute
1867 } else {
1868 return $m[0];
1873 * Make a free external link, given a user-supplied URL
1875 * @param string $url
1876 * @param int $numPostProto
1877 * The number of characters after the protocol.
1878 * @return string HTML
1879 * @internal
1881 private function makeFreeExternalLink( $url, $numPostProto ) {
1882 $trail = '';
1884 # The characters '<' and '>' (which were escaped by
1885 # internalRemoveHtmlTags()) should not be included in
1886 # URLs, per RFC 2396.
1887 # Make &nbsp; terminate a URL as well (bug T84937)
1888 $m2 = [];
1889 if ( preg_match(
1890 '/&(lt|gt|nbsp|#x0*(3[CcEe]|[Aa]0)|#0*(60|62|160));/',
1891 $url,
1892 $m2,
1893 PREG_OFFSET_CAPTURE
1894 ) ) {
1895 $trail = substr( $url, $m2[0][1] ) . $trail;
1896 $url = substr( $url, 0, $m2[0][1] );
1899 # Move trailing punctuation to $trail
1900 $sep = ',;\.:!?';
1901 # If there is no left bracket, then consider right brackets fair game too
1902 if ( strpos( $url, '(' ) === false ) {
1903 $sep .= ')';
1906 $urlRev = strrev( $url );
1907 $numSepChars = strspn( $urlRev, $sep );
1908 # Don't break a trailing HTML entity by moving the ; into $trail
1909 # This is in hot code, so use substr_compare to avoid having to
1910 # create a new string object for the comparison
1911 if ( $numSepChars && substr_compare( $url, ";", -$numSepChars, 1 ) === 0 ) {
1912 # more optimization: instead of running preg_match with a $
1913 # anchor, which can be slow, do the match on the reversed
1914 # string starting at the desired offset.
1915 # un-reversed regexp is: /&([a-z]+|#x[\da-f]+|#\d+)$/i
1916 if ( preg_match( '/\G([a-z]+|[\da-f]+x#|\d+#)&/i', $urlRev, $m2, 0, $numSepChars ) ) {
1917 $numSepChars--;
1920 if ( $numSepChars ) {
1921 $trail = substr( $url, -$numSepChars ) . $trail;
1922 $url = substr( $url, 0, -$numSepChars );
1925 # Verify that we still have a real URL after trail removal, and
1926 # not just lone protocol
1927 if ( strlen( $trail ) >= $numPostProto ) {
1928 return $url . $trail;
1931 $url = Sanitizer::cleanUrl( $url );
1933 # Is this an external image?
1934 $text = $this->maybeMakeExternalImage( $url );
1935 if ( $text === false ) {
1936 # Not an image, make a link
1937 $text = $this->getLinkRenderer()->makeExternalLink(
1938 $url,
1939 $this->getTargetLanguageConverter()->markNoConversion( $url ),
1940 $this->getTitle(),
1941 'free',
1942 $this->getExternalLinkAttribs( $url )
1944 # Register it in the output object...
1945 $this->mOutput->addExternalLink( $url );
1947 return $text . $trail;
1951 * Parse headers and return html
1953 * @param string $text
1954 * @return string
1956 private function handleHeadings( $text ) {
1957 for ( $i = 6; $i >= 1; --$i ) {
1958 $h = str_repeat( '=', $i );
1959 // Trim non-newline whitespace from headings
1960 // Using \s* will break for: "==\n===\n" and parse as <h2>=</h2>
1961 $text = preg_replace( "/^(?:$h)[ \\t]*(.+?)[ \\t]*(?:$h)\\s*$/m", "<h$i>\\1</h$i>", $text );
1963 return $text;
1967 * Replace single quotes with HTML markup
1969 * @param string $text
1971 * @return string The altered text
1973 private function handleAllQuotes( $text ) {
1974 $outtext = '';
1975 $lines = StringUtils::explode( "\n", $text );
1976 foreach ( $lines as $line ) {
1977 $outtext .= $this->doQuotes( $line ) . "\n";
1979 $outtext = substr( $outtext, 0, -1 );
1980 return $outtext;
1984 * Helper function for handleAllQuotes()
1986 * @param string $text
1988 * @return string
1989 * @internal
1991 public function doQuotes( $text ) {
1992 $arr = preg_split( "/(''+)/", $text, -1, PREG_SPLIT_DELIM_CAPTURE );
1993 $countarr = count( $arr );
1994 if ( $countarr == 1 ) {
1995 return $text;
1998 // First, do some preliminary work. This may shift some apostrophes from
1999 // being mark-up to being text. It also counts the number of occurrences
2000 // of bold and italics mark-ups.
2001 $numbold = 0;
2002 $numitalics = 0;
2003 for ( $i = 1; $i < $countarr; $i += 2 ) {
2004 $thislen = strlen( $arr[$i] );
2005 // If there are ever four apostrophes, assume the first is supposed to
2006 // be text, and the remaining three constitute mark-up for bold text.
2007 // (T15227: ''''foo'''' turns into ' ''' foo ' ''')
2008 if ( $thislen == 4 ) {
2009 $arr[$i - 1] .= "'";
2010 $arr[$i] = "'''";
2011 $thislen = 3;
2012 } elseif ( $thislen > 5 ) {
2013 // If there are more than 5 apostrophes in a row, assume they're all
2014 // text except for the last 5.
2015 // (T15227: ''''''foo'''''' turns into ' ''''' foo ' ''''')
2016 $arr[$i - 1] .= str_repeat( "'", $thislen - 5 );
2017 $arr[$i] = "'''''";
2018 $thislen = 5;
2020 // Count the number of occurrences of bold and italics mark-ups.
2021 if ( $thislen == 2 ) {
2022 $numitalics++;
2023 } elseif ( $thislen == 3 ) {
2024 $numbold++;
2025 } elseif ( $thislen == 5 ) {
2026 $numitalics++;
2027 $numbold++;
2031 // If there is an odd number of both bold and italics, it is likely
2032 // that one of the bold ones was meant to be an apostrophe followed
2033 // by italics. Which one we cannot know for certain, but it is more
2034 // likely to be one that has a single-letter word before it.
2035 if ( ( $numbold % 2 == 1 ) && ( $numitalics % 2 == 1 ) ) {
2036 $firstsingleletterword = -1;
2037 $firstmultiletterword = -1;
2038 $firstspace = -1;
2039 for ( $i = 1; $i < $countarr; $i += 2 ) {
2040 if ( strlen( $arr[$i] ) == 3 ) {
2041 $x1 = substr( $arr[$i - 1], -1 );
2042 $x2 = substr( $arr[$i - 1], -2, 1 );
2043 if ( $x1 === ' ' ) {
2044 if ( $firstspace == -1 ) {
2045 $firstspace = $i;
2047 } elseif ( $x2 === ' ' ) {
2048 $firstsingleletterword = $i;
2049 // if $firstsingleletterword is set, we don't
2050 // look at the other options, so we can bail early.
2051 break;
2052 } elseif ( $firstmultiletterword == -1 ) {
2053 $firstmultiletterword = $i;
2058 // If there is a single-letter word, use it!
2059 if ( $firstsingleletterword > -1 ) {
2060 $arr[$firstsingleletterword] = "''";
2061 $arr[$firstsingleletterword - 1] .= "'";
2062 } elseif ( $firstmultiletterword > -1 ) {
2063 // If not, but there's a multi-letter word, use that one.
2064 $arr[$firstmultiletterword] = "''";
2065 $arr[$firstmultiletterword - 1] .= "'";
2066 } elseif ( $firstspace > -1 ) {
2067 // ... otherwise use the first one that has neither.
2068 // (notice that it is possible for all three to be -1 if, for example,
2069 // there is only one pentuple-apostrophe in the line)
2070 $arr[$firstspace] = "''";
2071 $arr[$firstspace - 1] .= "'";
2075 // Now let's actually convert our apostrophic mush to HTML!
2076 $output = '';
2077 $buffer = '';
2078 $state = '';
2079 $i = 0;
2080 foreach ( $arr as $r ) {
2081 if ( ( $i % 2 ) == 0 ) {
2082 if ( $state === 'both' ) {
2083 $buffer .= $r;
2084 } else {
2085 $output .= $r;
2087 } else {
2088 $thislen = strlen( $r );
2089 if ( $thislen == 2 ) {
2090 // two quotes - open or close italics
2091 if ( $state === 'i' ) {
2092 $output .= '</i>';
2093 $state = '';
2094 } elseif ( $state === 'bi' ) {
2095 $output .= '</i>';
2096 $state = 'b';
2097 } elseif ( $state === 'ib' ) {
2098 $output .= '</b></i><b>';
2099 $state = 'b';
2100 } elseif ( $state === 'both' ) {
2101 $output .= '<b><i>' . $buffer . '</i>';
2102 $state = 'b';
2103 } else { // $state can be 'b' or ''
2104 $output .= '<i>';
2105 $state .= 'i';
2107 } elseif ( $thislen == 3 ) {
2108 // three quotes - open or close bold
2109 if ( $state === 'b' ) {
2110 $output .= '</b>';
2111 $state = '';
2112 } elseif ( $state === 'bi' ) {
2113 $output .= '</i></b><i>';
2114 $state = 'i';
2115 } elseif ( $state === 'ib' ) {
2116 $output .= '</b>';
2117 $state = 'i';
2118 } elseif ( $state === 'both' ) {
2119 $output .= '<i><b>' . $buffer . '</b>';
2120 $state = 'i';
2121 } else { // $state can be 'i' or ''
2122 $output .= '<b>';
2123 $state .= 'b';
2125 } elseif ( $thislen == 5 ) {
2126 // five quotes - open or close both separately
2127 if ( $state === 'b' ) {
2128 $output .= '</b><i>';
2129 $state = 'i';
2130 } elseif ( $state === 'i' ) {
2131 $output .= '</i><b>';
2132 $state = 'b';
2133 } elseif ( $state === 'bi' ) {
2134 $output .= '</i></b>';
2135 $state = '';
2136 } elseif ( $state === 'ib' ) {
2137 $output .= '</b></i>';
2138 $state = '';
2139 } elseif ( $state === 'both' ) {
2140 $output .= '<i><b>' . $buffer . '</b></i>';
2141 $state = '';
2142 } else { // ($state == '')
2143 $buffer = '';
2144 $state = 'both';
2148 $i++;
2150 // Now close all remaining tags. Notice that the order is important.
2151 if ( $state === 'b' || $state === 'ib' ) {
2152 $output .= '</b>';
2154 if ( $state === 'i' || $state === 'bi' || $state === 'ib' ) {
2155 $output .= '</i>';
2157 if ( $state === 'bi' ) {
2158 $output .= '</b>';
2160 // There might be lonely ''''', so make sure we have a buffer
2161 if ( $state === 'both' && $buffer ) {
2162 $output .= '<b><i>' . $buffer . '</i></b>';
2164 return $output;
2168 * Replace external links (REL)
2170 * Note: this is all very hackish and the order of execution matters a lot.
2171 * Make sure to run tests/parser/parserTests.php if you change this code.
2173 * @param string $text
2174 * @return string
2176 private function handleExternalLinks( $text ) {
2177 $bits = preg_split( $this->mExtLinkBracketedRegex, $text, -1, PREG_SPLIT_DELIM_CAPTURE );
2178 // @phan-suppress-next-line PhanTypeComparisonFromArray See phan issue #3161
2179 if ( $bits === false ) {
2180 throw new RuntimeException( "PCRE failure" );
2182 $s = array_shift( $bits );
2184 $i = 0;
2185 while ( $i < count( $bits ) ) {
2186 $url = $bits[$i++];
2187 $i++; // protocol
2188 $text = $bits[$i++];
2189 $trail = $bits[$i++];
2191 # The characters '<' and '>' (which were escaped by
2192 # internalRemoveHtmlTags()) should not be included in
2193 # URLs, per RFC 2396.
2194 $m2 = [];
2195 if ( preg_match( '/&(lt|gt);/', $url, $m2, PREG_OFFSET_CAPTURE ) ) {
2196 $text = substr( $url, $m2[0][1] ) . ' ' . $text;
2197 $url = substr( $url, 0, $m2[0][1] );
2200 # If the link text is an image URL, replace it with an <img> tag
2201 # This happened by accident in the original parser, but some people used it extensively
2202 $img = $this->maybeMakeExternalImage( $text );
2203 if ( $img !== false ) {
2204 $text = $img;
2207 $dtrail = '';
2209 # Set linktype for CSS
2210 $linktype = 'text';
2212 # No link text, e.g. [http://domain.tld/some.link]
2213 if ( $text == '' ) {
2214 # Autonumber
2215 $langObj = $this->getTargetLanguage();
2216 $text = '[' . $langObj->formatNum( ++$this->mAutonumber ) . ']';
2217 $linktype = 'autonumber';
2218 } else {
2219 # Have link text, e.g. [http://domain.tld/some.link text]s
2220 # Check for trail
2221 [ $dtrail, $trail ] = Linker::splitTrail( $trail );
2224 // Excluding protocol-relative URLs may avoid many false positives.
2225 if ( preg_match( '/^(?:' . $this->urlUtils->validAbsoluteProtocols() . ')/', $text ) ) {
2226 $text = $this->getTargetLanguageConverter()->markNoConversion( $text );
2229 $url = Sanitizer::cleanUrl( $url );
2231 # Use the encoded URL
2232 # This means that users can paste URLs directly into the text
2233 # Funny characters like ö aren't valid in URLs anyway
2234 # This was changed in August 2004
2235 $s .= $this->getLinkRenderer()->makeExternalLink(
2236 $url,
2237 // @phan-suppress-next-line SecurityCheck-XSS
2238 new HtmlArmor( $text ),
2239 $this->getTitle(),
2240 $linktype,
2241 $this->getExternalLinkAttribs( $url )
2242 ) . $dtrail . $trail;
2244 # Register link in the output object.
2245 $this->mOutput->addExternalLink( $url );
2248 // @phan-suppress-next-line PhanTypeMismatchReturnNullable False positive from array_shift
2249 return $s;
2253 * Get the rel attribute for a particular external link.
2255 * @since 1.21
2256 * @internal
2257 * @param string|false $url Optional URL, to extract the domain from for rel =>
2258 * nofollow if appropriate
2259 * @param LinkTarget|PageReference|null $title Optional page, for wgNoFollowNsExceptions lookups
2260 * @return string|null Rel attribute for $url
2262 public static function getExternalLinkRel( $url = false, $title = null ) {
2263 $mainConfig = MediaWikiServices::getInstance()->getMainConfig();
2264 $noFollowLinks = $mainConfig->get( MainConfigNames::NoFollowLinks );
2265 $noFollowNsExceptions = $mainConfig->get( MainConfigNames::NoFollowNsExceptions );
2266 $noFollowDomainExceptions = $mainConfig->get( MainConfigNames::NoFollowDomainExceptions );
2267 $ns = $title ? $title->getNamespace() : false;
2268 if (
2269 $noFollowLinks && !in_array( $ns, $noFollowNsExceptions )
2270 && !wfGetUrlUtils()->matchesDomainList( (string)$url, $noFollowDomainExceptions )
2272 return 'nofollow';
2274 return null;
2278 * Get an associative array of additional HTML attributes appropriate for a
2279 * particular external link. This currently may include rel => nofollow
2280 * (depending on configuration, namespace, and the URL's domain) and/or a
2281 * target attribute (depending on configuration).
2283 * @internal
2284 * @param string $url URL to extract the domain from for rel =>
2285 * nofollow if appropriate
2286 * @return array Associative array of HTML attributes
2288 public function getExternalLinkAttribs( $url ) {
2289 $attribs = [];
2290 $rel = self::getExternalLinkRel( $url, $this->getTitle() ) ?? '';
2292 $target = $this->mOptions->getExternalLinkTarget();
2293 if ( $target ) {
2294 $attribs['target'] = $target;
2295 if ( !in_array( $target, [ '_self', '_parent', '_top' ] ) ) {
2296 // T133507. New windows can navigate parent cross-origin.
2297 // Including noreferrer due to lacking browser
2298 // support of noopener. Eventually noreferrer should be removed.
2299 if ( $rel !== '' ) {
2300 $rel .= ' ';
2302 $rel .= 'noreferrer noopener';
2305 if ( $rel !== '' ) {
2306 $attribs['rel'] = $rel;
2308 return $attribs;
2312 * Replace unusual escape codes in a URL with their equivalent characters
2314 * This generally follows the syntax defined in RFC 3986, with special
2315 * consideration for HTTP query strings.
2317 * @internal
2318 * @param string $url
2319 * @return string
2321 public static function normalizeLinkUrl( $url ) {
2322 # Test for RFC 3986 IPv6 syntax
2323 $scheme = '[a-z][a-z0-9+.-]*:';
2324 $userinfo = '(?:[a-z0-9\-._~!$&\'()*+,;=:]|%[0-9a-f]{2})*';
2325 $ipv6Host = '\\[((?:[0-9a-f:]|%3[0-A]|%[46][1-6])+)\\]';
2326 if ( preg_match( "<^(?:{$scheme})?//(?:{$userinfo}@)?{$ipv6Host}(?:[:/?#].*|)$>i", $url, $m ) &&
2327 IPUtils::isValid( rawurldecode( $m[1] ) )
2329 $isIPv6 = rawurldecode( $m[1] );
2330 } else {
2331 $isIPv6 = false;
2334 # Make sure unsafe characters are encoded
2335 $url = preg_replace_callback(
2336 '/[\x00-\x20"<>\[\\\\\]^`{|}\x7F-\xFF]+/',
2337 static fn ( $m ) => rawurlencode( $m[0] ),
2338 $url
2341 $ret = '';
2342 $end = strlen( $url );
2344 # Fragment part - 'fragment'
2345 $start = strpos( $url, '#' );
2346 if ( $start !== false && $start < $end ) {
2347 $ret = self::normalizeUrlComponent(
2348 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}' ) . $ret;
2349 $end = $start;
2352 # Query part - 'query' minus &=+;
2353 $start = strpos( $url, '?' );
2354 if ( $start !== false && $start < $end ) {
2355 $ret = self::normalizeUrlComponent(
2356 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}&=+;' ) . $ret;
2357 $end = $start;
2360 # Path part - 'pchar', remove dot segments
2361 # (find first '/' after the optional '//' after the scheme)
2362 $start = strpos( $url, '//' );
2363 $start = strpos( $url, '/', $start === false ? 0 : $start + 2 );
2364 if ( $start !== false && $start < $end ) {
2365 $ret = UrlUtils::removeDotSegments( self::normalizeUrlComponent(
2366 substr( $url, $start, $end - $start ), '"#%<>[\]^`{|}/?' ) ) . $ret;
2367 $end = $start;
2370 # Scheme and host part - 'pchar'
2371 # (we assume no userinfo or encoded colons in the host)
2372 $ret = self::normalizeUrlComponent(
2373 substr( $url, 0, $end ), '"#%<>[\]^`{|}/?' ) . $ret;
2375 # Fix IPv6 syntax
2376 if ( $isIPv6 !== false ) {
2377 $ipv6Host = "%5B({$isIPv6})%5D";
2378 $ret = preg_replace(
2379 "<^((?:{$scheme})?//(?:{$userinfo}@)?){$ipv6Host}(?=[:/?#]|$)>i",
2380 "$1[$2]",
2381 $ret
2385 return $ret;
2388 private static function normalizeUrlComponent( $component, $unsafe ) {
2389 $callback = static function ( $matches ) use ( $unsafe ) {
2390 $char = urldecode( $matches[0] );
2391 $ord = ord( $char );
2392 if ( $ord > 32 && $ord < 127 && strpos( $unsafe, $char ) === false ) {
2393 # Unescape it
2394 return $char;
2395 } else {
2396 # Leave it escaped, but use uppercase for a-f
2397 return strtoupper( $matches[0] );
2400 return preg_replace_callback( '/%[0-9A-Fa-f]{2}/', $callback, $component );
2404 * make an image if it's allowed, either through the global
2405 * option, through the exception, or through the on-wiki whitelist
2407 * @param string $url
2409 * @return string
2411 private function maybeMakeExternalImage( $url ) {
2412 $imagesfrom = $this->mOptions->getAllowExternalImagesFrom();
2413 $imagesexception = (bool)$imagesfrom;
2414 $text = false;
2415 # $imagesfrom could be either a single string or an array of strings, parse out the latter
2416 if ( $imagesexception && is_array( $imagesfrom ) ) {
2417 $imagematch = false;
2418 foreach ( $imagesfrom as $match ) {
2419 if ( strpos( $url, $match ) === 0 ) {
2420 $imagematch = true;
2421 break;
2424 } elseif ( $imagesexception ) {
2425 $imagematch = ( strpos( $url, $imagesfrom ) === 0 );
2426 } else {
2427 $imagematch = false;
2430 if ( $this->mOptions->getAllowExternalImages()
2431 || ( $imagesexception && $imagematch )
2433 if ( preg_match( self::EXT_IMAGE_REGEX, $url ) ) {
2434 # Image found
2435 $text = Linker::makeExternalImage( $url );
2438 if ( !$text && $this->mOptions->getEnableImageWhitelist()
2439 && preg_match( self::EXT_IMAGE_REGEX, $url )
2441 $whitelist = explode(
2442 "\n",
2443 wfMessage( 'external_image_whitelist' )->inContentLanguage()->text()
2446 foreach ( $whitelist as $entry ) {
2447 # Sanitize the regex fragment, make it case-insensitive, ignore blank entries/comments
2448 if ( strpos( $entry, '#' ) === 0 || $entry === '' ) {
2449 continue;
2451 // @phan-suppress-next-line SecurityCheck-ReDoS preg_quote is not wanted here
2452 if ( preg_match( '/' . str_replace( '/', '\\/', $entry ) . '/i', $url ) ) {
2453 # Image matches a whitelist entry
2454 $text = Linker::makeExternalImage( $url );
2455 break;
2459 return $text;
2463 * Process [[ ]] wikilinks
2465 * @param string $text
2467 * @return string Processed text
2469 private function handleInternalLinks( $text ) {
2470 $this->mLinkHolders->merge( $this->handleInternalLinks2( $text ) );
2471 return $text;
2475 * Process [[ ]] wikilinks (RIL)
2476 * @param string &$s
2477 * @return LinkHolderArray
2479 private function handleInternalLinks2( &$s ) {
2480 static $tc = false, $e1, $e1_img;
2481 # the % is needed to support urlencoded titles as well
2482 if ( !$tc ) {
2483 $tc = Title::legalChars() . '#%';
2484 # Match a link having the form [[namespace:link|alternate]]trail
2485 $e1 = "/^([{$tc}]+)(?:\\|(.+?))?]](.*)\$/sD";
2486 # Match cases where there is no "]]", which might still be images
2487 $e1_img = "/^([{$tc}]+)\\|(.*)\$/sD";
2490 $holders = new LinkHolderArray(
2491 $this,
2492 $this->getContentLanguageConverter(),
2493 $this->getHookContainer() );
2495 # split the entire text string on occurrences of [[
2496 $a = StringUtils::explode( '[[', ' ' . $s );
2497 # get the first element (all text up to first [[), and remove the space we added
2498 $s = $a->current();
2499 $a->next();
2500 $line = $a->current(); # Workaround for broken ArrayIterator::next() that returns "void"
2501 $s = substr( $s, 1 );
2503 $nottalk = !$this->getTitle()->isTalkPage();
2505 $useLinkPrefixExtension = $this->getTargetLanguage()->linkPrefixExtension();
2506 $e2 = null;
2507 if ( $useLinkPrefixExtension ) {
2508 # Match the end of a line for a word that's not followed by whitespace,
2509 # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched
2510 $charset = $this->contLang->linkPrefixCharset();
2511 $e2 = "/^((?>.*[^$charset]|))(.+)$/sDu";
2512 $m = [];
2513 if ( preg_match( $e2, $s, $m ) ) {
2514 $first_prefix = $m[2];
2515 } else {
2516 $first_prefix = false;
2518 $prefix = false;
2519 } else {
2520 $first_prefix = false;
2521 $prefix = '';
2524 # Some namespaces don't allow subpages
2525 $useSubpages = $this->nsInfo->hasSubpages(
2526 $this->getTitle()->getNamespace()
2529 # Loop for each link
2530 for ( ; $line !== false && $line !== null; $a->next(), $line = $a->current() ) {
2531 # Check for excessive memory usage
2532 if ( $holders->isBig() ) {
2533 # Too big
2534 # Do the existence check, replace the link holders and clear the array
2535 $holders->replace( $s );
2536 $holders->clear();
2539 if ( $useLinkPrefixExtension ) {
2540 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal $e2 is set under this condition
2541 if ( preg_match( $e2, $s, $m ) ) {
2542 [ , $s, $prefix ] = $m;
2543 } else {
2544 $prefix = '';
2546 # first link
2547 if ( $first_prefix ) {
2548 $prefix = $first_prefix;
2549 $first_prefix = false;
2553 $might_be_img = false;
2555 if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
2556 $text = $m[2];
2557 # If we get a ] at the beginning of $m[3] that means we have a link that's something like:
2558 # [[Image:Foo.jpg|[http://example.com desc]]] <- having three ] in a row fucks up,
2559 # the real problem is with the $e1 regex
2560 # See T1500.
2561 # Still some problems for cases where the ] is meant to be outside punctuation,
2562 # and no image is in sight. See T4095.
2563 if ( $text !== ''
2564 && substr( $m[3], 0, 1 ) === ']'
2565 && strpos( $text, '[' ) !== false
2567 $text .= ']'; # so that handleExternalLinks($text) works later
2568 $m[3] = substr( $m[3], 1 );
2570 # fix up urlencoded title texts
2571 if ( strpos( $m[1], '%' ) !== false ) {
2572 # Should anchors '#' also be rejected?
2573 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2575 $trail = $m[3];
2576 } elseif ( preg_match( $e1_img, $line, $m ) ) {
2577 # Invalid, but might be an image with a link in its caption
2578 $might_be_img = true;
2579 $text = $m[2];
2580 if ( strpos( $m[1], '%' ) !== false ) {
2581 $m[1] = str_replace( [ '<', '>' ], [ '&lt;', '&gt;' ], rawurldecode( $m[1] ) );
2583 $trail = "";
2584 } else { # Invalid form; output directly
2585 $s .= $prefix . '[[' . $line;
2586 continue;
2589 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset preg_match success when reached here
2590 $origLink = ltrim( $m[1], ' ' );
2592 # Don't allow internal links to pages containing
2593 # PROTO: where PROTO is a valid URL protocol; these
2594 # should be external links.
2595 if ( preg_match( '/^(?i:' . $this->urlUtils->validProtocols() . ')/', $origLink ) ) {
2596 $s .= $prefix . '[[' . $line;
2597 continue;
2600 # Make subpage if necessary
2601 if ( $useSubpages ) {
2602 $link = Linker::normalizeSubpageLink(
2603 $this->getTitle(), $origLink, $text
2605 } else {
2606 $link = $origLink;
2609 // \x7f isn't a default legal title char, so most likely strip
2610 // markers will force us into the "invalid form" path above. But,
2611 // just in case, let's assert that xmlish tags aren't valid in
2612 // the title position.
2613 $unstrip = $this->mStripState->killMarkers( $link );
2614 $noMarkers = ( $unstrip === $link );
2616 $nt = $noMarkers ? Title::newFromText( $link ) : null;
2617 if ( $nt === null ) {
2618 $s .= $prefix . '[[' . $line;
2619 continue;
2622 $ns = $nt->getNamespace();
2623 $iw = $nt->getInterwiki();
2625 $noforce = ( substr( $origLink, 0, 1 ) !== ':' );
2627 if ( $might_be_img ) { # if this is actually an invalid link
2628 if ( $ns === NS_FILE && $noforce ) { # but might be an image
2629 $found = false;
2630 while ( true ) {
2631 # look at the next 'line' to see if we can close it there
2632 $a->next();
2633 $next_line = $a->current();
2634 if ( $next_line === false || $next_line === null ) {
2635 break;
2637 $m = explode( ']]', $next_line, 3 );
2638 if ( count( $m ) == 3 ) {
2639 # the first ]] closes the inner link, the second the image
2640 $found = true;
2641 $text .= "[[{$m[0]}]]{$m[1]}";
2642 $trail = $m[2];
2643 break;
2644 } elseif ( count( $m ) == 2 ) {
2645 # if there's exactly one ]] that's fine, we'll keep looking
2646 $text .= "[[{$m[0]}]]{$m[1]}";
2647 } else {
2648 # if $next_line is invalid too, we need look no further
2649 $text .= '[[' . $next_line;
2650 break;
2653 if ( !$found ) {
2654 # we couldn't find the end of this imageLink, so output it raw
2655 # but don't ignore what might be perfectly normal links in the text we've examined
2656 $holders->merge( $this->handleInternalLinks2( $text ) );
2657 $s .= "{$prefix}[[$link|$text";
2658 # note: no $trail, because without an end, there *is* no trail
2659 continue;
2661 } else { # it's not an image, so output it raw
2662 $s .= "{$prefix}[[$link|$text";
2663 # note: no $trail, because without an end, there *is* no trail
2664 continue;
2668 $wasblank = ( $text == '' );
2669 if ( $wasblank ) {
2670 $text = $link;
2671 if ( !$noforce ) {
2672 # Strip off leading ':'
2673 $text = substr( $text, 1 );
2675 } else {
2676 # T6598 madness. Handle the quotes only if they come from the alternate part
2677 # [[Lista d''e paise d''o munno]] -> <a href="...">Lista d''e paise d''o munno</a>
2678 # [[Criticism of Harry Potter|Criticism of ''Harry Potter'']]
2679 # -> <a href="Criticism of Harry Potter">Criticism of <i>Harry Potter</i></a>
2680 $text = $this->doQuotes( $text );
2683 # Link not escaped by : , create the various objects
2684 if ( $noforce && !$nt->wasLocalInterwiki() ) {
2685 # Interwikis
2686 if (
2687 $iw && $this->mOptions->getInterwikiMagic() && $nottalk && (
2688 $this->languageNameUtils->getLanguageName(
2689 $iw,
2690 LanguageNameUtils::AUTONYMS,
2691 LanguageNameUtils::DEFINED
2693 || in_array( $iw, $this->svcOptions->get( MainConfigNames::ExtraInterlanguageLinkPrefixes ) )
2696 # T26502: duplicates are resolved in ParserOutput
2697 $this->mOutput->addLanguageLink( $nt );
2700 * Strip the whitespace interlanguage links produce, see
2701 * T10897, T175416, and T359886.
2703 $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2704 continue;
2707 if ( $ns === NS_FILE ) {
2708 if ( $wasblank ) {
2709 # if no parameters were passed, $text
2710 # becomes something like "File:Foo.png",
2711 # which we don't want to pass on to the
2712 # image generator
2713 $text = '';
2714 } else {
2715 # recursively parse links inside the image caption
2716 # actually, this will parse them in any other parameters, too,
2717 # but it might be hard to fix that, and it doesn't matter ATM
2718 $text = $this->handleExternalLinks( $text );
2719 $holders->merge( $this->handleInternalLinks2( $text ) );
2721 # cloak any absolute URLs inside the image markup, so handleExternalLinks() won't touch them
2722 $s .= $prefix . $this->armorLinks(
2723 $this->makeImage( $nt, $text, $holders ) ) . $trail;
2724 continue;
2725 } elseif ( $ns === NS_CATEGORY ) {
2726 # Strip newlines from the left hand context of Category
2727 # links.
2728 # See T2087, T87753, T174639, T359886
2729 $s = preg_replace( '/\n\s*$/', '', $s . $prefix ) . $trail;
2731 $sortkey = ''; // filled in by CategoryLinksTable
2732 if ( !$wasblank ) {
2733 $sortkey = $text;
2735 $this->mOutput->addCategory( $nt, $sortkey );
2737 continue;
2741 # Self-link checking. For some languages, variants of the title are checked in
2742 # LinkHolderArray::doVariants() to allow batching the existence checks necessary
2743 # for linking to a different variant.
2744 if ( $ns !== NS_SPECIAL && $nt->equals( $this->getTitle() ) ) {
2745 $s .= $prefix . Linker::makeSelfLinkObj( $nt, $text, '', $trail, '',
2746 Sanitizer::escapeIdForLink( $nt->getFragment() ) );
2747 continue;
2750 # NS_MEDIA is a pseudo-namespace for linking directly to a file
2751 # @todo FIXME: Should do batch file existence checks, see comment below
2752 if ( $ns === NS_MEDIA ) {
2753 # Give extensions a chance to select the file revision for us
2754 $options = [];
2755 $descQuery = false;
2756 $this->hookRunner->onBeforeParserFetchFileAndTitle(
2757 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
2758 $this, $nt, $options, $descQuery
2760 # Fetch and register the file (file title may be different via hooks)
2761 [ $file, $nt ] = $this->fetchFileAndTitle( $nt, $options );
2762 # Cloak with NOPARSE to avoid replacement in handleExternalLinks
2763 $s .= $prefix . $this->armorLinks(
2764 Linker::makeMediaLinkFile( $nt, $file, $text ) ) . $trail;
2765 continue;
2768 # Some titles, such as valid special pages or files in foreign repos, should
2769 # be shown as bluelinks even though they're not included in the page table
2770 # @todo FIXME: isAlwaysKnown() can be expensive for file links; we should really do
2771 # batch file existence checks for NS_FILE and NS_MEDIA
2772 if ( $iw == '' && $nt->isAlwaysKnown() ) {
2773 $this->mOutput->addLink( $nt );
2774 $s .= $this->makeKnownLinkHolder( $nt, $text, $trail, $prefix );
2775 } else {
2776 # Links will be added to the output link list after checking
2777 $s .= $holders->makeHolder( $nt, $text, $trail, $prefix );
2780 return $holders;
2784 * Render a forced-blue link inline; protect against double expansion of
2785 * URLs if we're in a mode that prepends full URL prefixes to internal links.
2786 * Since this little disaster has to split off the trail text to avoid
2787 * breaking URLs in the following text without breaking trails on the
2788 * wiki links, it's been made into a horrible function.
2790 * @param LinkTarget $nt
2791 * @param string $text
2792 * @param string $trail
2793 * @param string $prefix
2794 * @return string HTML-wikitext mix oh yuck
2796 private function makeKnownLinkHolder( LinkTarget $nt, $text = '', $trail = '', $prefix = '' ) {
2797 [ $inside, $trail ] = Linker::splitTrail( $trail );
2799 if ( $text == '' ) {
2800 $text = htmlspecialchars( $this->titleFormatter->getPrefixedText( $nt ) );
2803 $link = $this->getLinkRenderer()->makeKnownLink(
2804 $nt, new HtmlArmor( "$prefix$text$inside" )
2807 return $this->armorLinks( $link ) . $trail;
2811 * Insert a NOPARSE hacky thing into any inline links in a chunk that's
2812 * going to go through further parsing steps before inline URL expansion.
2814 * Not needed quite as much as it used to be since free links are a bit
2815 * more sensible these days. But bracketed links are still an issue.
2817 * @param string $text More-or-less HTML
2818 * @return string Less-or-more HTML with NOPARSE bits
2820 private function armorLinks( $text ) {
2821 return preg_replace( '/\b((?i)' . $this->urlUtils->validProtocols() . ')/',
2822 self::MARKER_PREFIX . "NOPARSE$1", $text );
2826 * Make lists from lines starting with ':', '*', '#', etc. (DBL)
2828 * @param string $text
2829 * @param bool $linestart Whether or not this is at the start of a line.
2830 * @internal
2831 * @return string The lists rendered as HTML
2832 * @deprecated since 1.35, will not be supported in future parsers
2834 public function doBlockLevels( $text, $linestart ) {
2835 wfDeprecated( __METHOD__, '1.35' );
2836 return BlockLevelPass::doBlockLevels( $text, $linestart );
2840 * Return value of a magic variable (like PAGENAME)
2842 * @param string $index Magic variable identifier as mapped in MagicWordFactory::$mVariableIDs
2843 * @param PPFrame|false $frame
2845 * @return string
2847 private function expandMagicVariable( $index, $frame = false ) {
2849 * Some of these require message or data lookups and can be
2850 * expensive to check many times.
2852 if ( isset( $this->mVarCache[$index] ) ) {
2853 return $this->mVarCache[$index];
2856 $ts = new MWTimestamp( $this->mOptions->getTimestamp() /* TS_MW */ );
2857 if ( $this->hookContainer->isRegistered( 'ParserGetVariableValueTs' ) ) {
2858 $s = $ts->getTimestamp( TS_UNIX );
2859 $this->hookRunner->onParserGetVariableValueTs( $this, $s );
2860 $ts = new MWTimestamp( $s );
2863 $value = CoreMagicVariables::expand(
2864 $this, $index, $ts, $this->svcOptions, $this->logger
2867 if ( $value === null ) {
2868 // Not a defined core magic word
2869 // Don't give this hook unrestricted access to mVarCache
2870 $fakeCache = [];
2871 $this->hookRunner->onParserGetVariableValueSwitch(
2872 // @phan-suppress-next-line PhanTypeMismatchArgument $value is passed as null but returned as string
2873 $this, $fakeCache, $index, $value, $frame
2875 // Cache the value returned by the hook by falling through here.
2876 // Assert the the hook returned a non-null value for this MV
2877 '@phan-var string $value';
2880 $this->mVarCache[$index] = $value;
2882 return $value;
2886 * Initialize the magic variables (like CURRENTMONTHNAME) and
2887 * substitution modifiers.
2889 private function initializeVariables() {
2890 $variableIDs = $this->magicWordFactory->getVariableIDs();
2892 $this->mVariables = $this->magicWordFactory->newArray( $variableIDs );
2893 $this->mSubstWords = $this->magicWordFactory->getSubstArray();
2897 * Get the document object model for the given wikitext
2899 * @see Preprocessor::preprocessToObj()
2901 * The generated DOM tree must depend only on the input text and the flags.
2902 * The DOM tree must be the same in OT_HTML and OT_WIKI mode, to avoid a
2903 * regression of T6899.
2905 * Any flag added to the $flags parameter here, or any other parameter liable to cause a
2906 * change in the DOM tree for a given text, must be passed through the section identifier
2907 * in the section edit link and thus back to extractSections().
2909 * @param string $text Wikitext
2910 * @param int $flags Bit field of Preprocessor::DOM_* constants
2911 * @return PPNode
2912 * @since 1.23 method is public
2914 public function preprocessToDom( $text, $flags = 0 ) {
2915 return $this->getPreprocessor()->preprocessToObj( $text, $flags );
2919 * Replace magic variables, templates, and template arguments
2920 * with the appropriate text. Templates are substituted recursively,
2921 * taking care to avoid infinite loops.
2923 * Note that the substitution depends on value of $mOutputType:
2924 * self::OT_WIKI: only {{subst:}} templates
2925 * self::OT_PREPROCESS: templates but not extension tags
2926 * self::OT_HTML: all templates and extension tags
2928 * @param string $text The text to transform
2929 * @param false|PPFrame|array $frame Object describing the arguments passed to the
2930 * template. Arguments may also be provided as an associative array, as
2931 * was the usual case before MW1.12. Providing arguments this way may be
2932 * useful for extensions wishing to perform variable replacement
2933 * explicitly.
2934 * @param bool $argsOnly Only do argument (triple-brace) expansion, not
2935 * double-brace expansion.
2936 * @param bool $stripExtTags When true, put extension tags in general strip state; when
2937 * false extension tags are skipped during OT_PREPROCESS
2938 * @return string
2939 * @since 1.24 method is public
2941 public function replaceVariables( $text, $frame = false, $argsOnly = false, $stripExtTags = true ) {
2942 # Is there any text? Also, Prevent too big inclusions!
2943 $textSize = strlen( $text );
2944 if ( $textSize < 1 || $textSize > $this->mOptions->getMaxIncludeSize() ) {
2945 return $text;
2948 if ( $frame === false ) {
2949 $frame = $this->getPreprocessor()->newFrame();
2950 } elseif ( !( $frame instanceof PPFrame ) ) {
2951 wfDeprecated(
2952 __METHOD__ . " called using plain parameters instead of " .
2953 "a PPFrame instance. Creating custom frame.",
2954 '1.43'
2956 $frame = $this->getPreprocessor()->newCustomFrame( $frame );
2959 $dom = $this->preprocessToDom( $text );
2960 $flags = $argsOnly ? PPFrame::NO_TEMPLATES : 0;
2961 [ $stripExtTags, $this->mStripExtTags ] = [ $this->mStripExtTags, $stripExtTags ];
2962 $text = $frame->expand( $dom, $flags );
2963 $this->mStripExtTags = $stripExtTags;
2965 return $text;
2969 * Warn the user when a parser limitation is reached
2970 * Will warn at most once the user per limitation type
2972 * The results are shown during preview and run through the Parser (See EditPage.php)
2974 * @param string $limitationType Should be one of:
2975 * 'expensive-parserfunction' (corresponding messages:
2976 * 'expensive-parserfunction-warning',
2977 * 'expensive-parserfunction-category')
2978 * 'post-expand-template-argument' (corresponding messages:
2979 * 'post-expand-template-argument-warning',
2980 * 'post-expand-template-argument-category')
2981 * 'post-expand-template-inclusion' (corresponding messages:
2982 * 'post-expand-template-inclusion-warning',
2983 * 'post-expand-template-inclusion-category')
2984 * 'node-count-exceeded' (corresponding messages:
2985 * 'node-count-exceeded-warning',
2986 * 'node-count-exceeded-category')
2987 * 'expansion-depth-exceeded' (corresponding messages:
2988 * 'expansion-depth-exceeded-warning',
2989 * 'expansion-depth-exceeded-category')
2990 * @param string|int|null $current Current value
2991 * @param string|int|null $max Maximum allowed, when an explicit limit has been
2992 * exceeded, provide the values (optional)
2993 * @internal
2995 public function limitationWarn( $limitationType, $current = '', $max = '' ) {
2996 # does no harm if $current and $max are present but are unnecessary for the message
2997 # Not doing ->inLanguage( $this->mOptions->getUserLangObj() ), since this is shown
2998 # only during preview, and that would split the parser cache unnecessarily.
2999 $this->mOutput->addWarningMsg(
3000 "$limitationType-warning",
3001 Message::numParam( $current ),
3002 Message::numParam( $max )
3004 $this->addTrackingCategory( "$limitationType-category" );
3008 * Return the text of a template, after recursively
3009 * replacing any variables or templates within the template.
3011 * @param array $piece The parts of the template
3012 * $piece['title']: the title, i.e. the part before the |
3013 * $piece['parts']: the parameter array
3014 * $piece['lineStart']: whether the brace was at the start of a line
3015 * @param PPFrame $frame The current frame, contains template arguments
3016 * @throws Exception
3017 * @return string|array The text of the template
3018 * @internal
3020 public function braceSubstitution( array $piece, PPFrame $frame ) {
3021 // Flags
3023 // $text has been filled
3024 $found = false;
3025 $text = '';
3026 // wiki markup in $text should be escaped
3027 $nowiki = false;
3028 // $text is HTML, armour it against wikitext transformation
3029 $isHTML = false;
3030 // Force interwiki transclusion to be done in raw mode not rendered
3031 $forceRawInterwiki = false;
3032 // $text is a DOM node needing expansion in a child frame
3033 $isChildObj = false;
3034 // $text is a DOM node needing expansion in the current frame
3035 $isLocalObj = false;
3037 # Title object, where $text came from
3038 $title = false;
3040 # $part1 is the bit before the first |, and must contain only title characters.
3041 # Various prefixes will be stripped from it later.
3042 $titleWithSpaces = $frame->expand( $piece['title'] );
3043 $part1 = trim( $titleWithSpaces );
3044 $titleText = false;
3046 # Original title text preserved for various purposes
3047 $originalTitle = $part1;
3049 # $args is a list of argument nodes, starting from index 0, not including $part1
3050 $args = $piece['parts'];
3052 $profileSection = null; // profile templates
3054 $sawDeprecatedTemplateEquals = false; // T91154
3056 # SUBST
3057 // @phan-suppress-next-line PhanImpossibleCondition
3058 if ( !$found ) {
3059 $substMatch = $this->mSubstWords->matchStartAndRemove( $part1 );
3060 $part1 = trim( $part1 );
3062 # Possibilities for substMatch: "subst", "safesubst" or FALSE
3063 # Decide whether to expand template or keep wikitext as-is.
3064 if ( $this->ot['wiki'] ) {
3065 if ( $substMatch === false ) {
3066 $literal = true; # literal when in PST with no prefix
3067 } else {
3068 $literal = false; # expand when in PST with subst: or safesubst:
3070 } else {
3071 if ( $substMatch == 'subst' ) {
3072 $literal = true; # literal when not in PST with plain subst:
3073 } else {
3074 $literal = false; # expand when not in PST with safesubst: or no prefix
3077 if ( $literal ) {
3078 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3079 $isLocalObj = true;
3080 $found = true;
3084 # Variables
3085 if ( !$found && $args->getLength() == 0 ) {
3086 $id = $this->mVariables->matchStartToEnd( $part1 );
3087 if ( $id !== false ) {
3088 if ( strpos( $part1, ':' ) !== false ) {
3089 wfDeprecatedMsg(
3090 'Registering a magic variable with a name including a colon',
3091 '1.39', false, false
3094 $text = $this->expandMagicVariable( $id, $frame );
3095 $found = true;
3099 # MSG, MSGNW and RAW
3100 if ( !$found ) {
3101 # Check for MSGNW:
3102 $mwMsgnw = $this->magicWordFactory->get( 'msgnw' );
3103 if ( $mwMsgnw->matchStartAndRemove( $part1 ) ) {
3104 $nowiki = true;
3105 } else {
3106 # Remove obsolete MSG:
3107 $mwMsg = $this->magicWordFactory->get( 'msg' );
3108 $mwMsg->matchStartAndRemove( $part1 );
3111 # Check for RAW:
3112 $mwRaw = $this->magicWordFactory->get( 'raw' );
3113 if ( $mwRaw->matchStartAndRemove( $part1 ) ) {
3114 $forceRawInterwiki = true;
3118 # Parser functions
3119 if ( !$found ) {
3120 $colonPos = strpos( $part1, ':' );
3121 if ( $colonPos !== false ) {
3122 $func = substr( $part1, 0, $colonPos );
3123 $funcArgs = [ trim( substr( $part1, $colonPos + 1 ) ) ];
3124 $argsLength = $args->getLength();
3125 for ( $i = 0; $i < $argsLength; $i++ ) {
3126 $funcArgs[] = $args->item( $i );
3129 $result = $this->callParserFunction( $frame, $func, $funcArgs );
3131 // Extract any forwarded flags
3132 if ( isset( $result['title'] ) ) {
3133 $title = $result['title'];
3135 if ( isset( $result['found'] ) ) {
3136 $found = $result['found'];
3138 if ( array_key_exists( 'text', $result ) ) {
3139 // a string or null
3140 $text = $result['text'];
3142 if ( isset( $result['nowiki'] ) ) {
3143 $nowiki = $result['nowiki'];
3145 if ( isset( $result['isHTML'] ) ) {
3146 $isHTML = $result['isHTML'];
3148 if ( isset( $result['forceRawInterwiki'] ) ) {
3149 $forceRawInterwiki = $result['forceRawInterwiki'];
3151 if ( isset( $result['isChildObj'] ) ) {
3152 $isChildObj = $result['isChildObj'];
3154 if ( isset( $result['isLocalObj'] ) ) {
3155 $isLocalObj = $result['isLocalObj'];
3160 # Finish mangling title and then check for loops.
3161 # Set $title to a Title object and $titleText to the PDBK
3162 if ( !$found ) {
3163 $ns = NS_TEMPLATE;
3164 # Split the title into page and subpage
3165 $subpage = '';
3166 $relative = Linker::normalizeSubpageLink(
3167 $this->getTitle(), $part1, $subpage
3169 if ( $part1 !== $relative ) {
3170 $part1 = $relative;
3171 $ns = $this->getTitle()->getNamespace();
3173 $title = Title::newFromText( $part1, $ns );
3174 if ( $title ) {
3175 $titleText = $title->getPrefixedText();
3176 # Check for language variants if the template is not found
3177 if ( $this->getTargetLanguageConverter()->hasVariants() && $title->getArticleID() == 0 ) {
3178 $this->getTargetLanguageConverter()->findVariantLink( $part1, $title, true );
3180 # Do recursion depth check
3181 $limit = $this->mOptions->getMaxTemplateDepth();
3182 if ( $frame->depth >= $limit ) {
3183 $found = true;
3184 $text = '<span class="error">'
3185 . wfMessage( 'parser-template-recursion-depth-warning' )
3186 ->numParams( $limit )->inContentLanguage()->text()
3187 . '</span>';
3192 # Load from database
3193 if ( !$found && $title ) {
3194 $profileSection = $this->mProfiler->scopedProfileIn( $title->getPrefixedDBkey() );
3195 if ( !$title->isExternal() ) {
3196 if ( $title->isSpecialPage()
3197 && $this->mOptions->getAllowSpecialInclusion()
3198 && $this->ot['html']
3200 $specialPage = $this->specialPageFactory->getPage( $title->getDBkey() );
3201 // Pass the template arguments as URL parameters.
3202 // "uselang" will have no effect since the Language object
3203 // is forced to the one defined in ParserOptions.
3204 $pageArgs = [];
3205 $argsLength = $args->getLength();
3206 for ( $i = 0; $i < $argsLength; $i++ ) {
3207 $bits = $args->item( $i )->splitArg();
3208 if ( strval( $bits['index'] ) === '' ) {
3209 $name = trim( $frame->expand( $bits['name'], PPFrame::STRIP_COMMENTS ) );
3210 $value = trim( $frame->expand( $bits['value'] ) );
3211 $pageArgs[$name] = $value;
3215 // Create a new context to execute the special page, that is expensive
3216 if ( $this->incrementExpensiveFunctionCount() ) {
3217 $context = new RequestContext;
3218 $context->setTitle( $title );
3219 $context->setRequest( new FauxRequest( $pageArgs ) );
3220 if ( $specialPage && $specialPage->maxIncludeCacheTime() === 0 ) {
3221 $context->setUser( $this->userFactory->newFromUserIdentity( $this->getUserIdentity() ) );
3222 } else {
3223 // If this page is cached, then we better not be per user.
3224 $context->setUser( User::newFromName( '127.0.0.1', false ) );
3226 $context->setLanguage( $this->mOptions->getUserLangObj() );
3227 $ret = $this->specialPageFactory->capturePath( $title, $context, $this->getLinkRenderer() );
3228 if ( $ret ) {
3229 $text = $context->getOutput()->getHTML();
3230 $this->mOutput->addOutputPageMetadata( $context->getOutput() );
3231 $found = true;
3232 $isHTML = true;
3233 if ( $specialPage && $specialPage->maxIncludeCacheTime() !== false ) {
3234 $this->mOutput->updateRuntimeAdaptiveExpiry(
3235 $specialPage->maxIncludeCacheTime()
3240 } elseif ( $this->nsInfo->isNonincludable( $title->getNamespace() ) ) {
3241 $found = false; # access denied
3242 $this->logger->debug(
3243 __METHOD__ .
3244 ": template inclusion denied for " . $title->getPrefixedDBkey()
3246 } else {
3247 [ $text, $title ] = $this->getTemplateDom( $title );
3248 if ( $text !== false ) {
3249 $found = true;
3250 $isChildObj = true;
3251 if (
3252 $title->getNamespace() === NS_TEMPLATE &&
3253 $title->getDBkey() === '=' &&
3254 $originalTitle === '='
3256 // Note that we won't get here if `=` is evaluated
3257 // (in the future) as a parser function, nor if
3258 // the Template namespace is given explicitly,
3259 // ie `{{Template:=}}`. Only `{{=}}` triggers.
3260 $sawDeprecatedTemplateEquals = true; // T91154
3265 # If the title is valid but undisplayable, make a link to it
3266 if ( !$found && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3267 $text = "[[:$titleText]]";
3268 $found = true;
3270 } elseif ( $title->isTrans() ) {
3271 # Interwiki transclusion
3272 if ( $this->ot['html'] && !$forceRawInterwiki ) {
3273 $text = $this->interwikiTransclude( $title, 'render' );
3274 $isHTML = true;
3275 } else {
3276 $text = $this->interwikiTransclude( $title, 'raw' );
3277 # Preprocess it like a template
3278 $text = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3279 $isChildObj = true;
3281 $found = true;
3284 # Do infinite loop check
3285 # This has to be done after redirect resolution to avoid infinite loops via redirects
3286 if ( !$frame->loopCheck( $title ) ) {
3287 $found = true;
3288 $text = '<span class="error">'
3289 . wfMessage( 'parser-template-loop-warning', $titleText )->inContentLanguage()->text()
3290 . '</span>';
3291 $this->addTrackingCategory( 'template-loop-category' );
3292 $this->mOutput->addWarningMsg(
3293 'template-loop-warning',
3294 Message::plaintextParam( $titleText )
3296 $this->logger->debug( __METHOD__ . ": template loop broken at '$titleText'" );
3300 # If we haven't found text to substitute by now, we're done
3301 # Recover the source wikitext and return it
3302 if ( !$found ) {
3303 $text = $frame->virtualBracketedImplode( '{{', '|', '}}', $titleWithSpaces, $args );
3304 if ( $profileSection ) {
3305 $this->mProfiler->scopedProfileOut( $profileSection );
3307 return [ 'object' => $text ];
3310 # Expand DOM-style return values in a child frame
3311 if ( $isChildObj ) {
3312 # Clean up argument array
3313 $newFrame = $frame->newChild( $args, $title );
3315 if ( $nowiki ) {
3316 $text = $newFrame->expand( $text, PPFrame::RECOVER_ORIG );
3317 } elseif ( $titleText !== false && $newFrame->isEmpty() ) {
3318 # Expansion is eligible for the empty-frame cache
3319 $text = $newFrame->cachedExpand( $titleText, $text );
3320 } else {
3321 # Uncached expansion
3322 $text = $newFrame->expand( $text );
3325 if ( $isLocalObj && $nowiki ) {
3326 $text = $frame->expand( $text, PPFrame::RECOVER_ORIG );
3327 $isLocalObj = false;
3330 if ( $profileSection ) {
3331 $this->mProfiler->scopedProfileOut( $profileSection );
3333 if (
3334 $sawDeprecatedTemplateEquals &&
3335 $this->mStripState->unstripBoth( $text ) !== '='
3337 // T91154: {{=}} is deprecated when it doesn't expand to `=`;
3338 // use {{Template:=}} if you must.
3339 $this->addTrackingCategory( 'template-equals-category' );
3340 $this->mOutput->addWarningMsg( 'template-equals-warning' );
3343 # Replace raw HTML by a placeholder
3344 if ( $isHTML ) {
3345 // @phan-suppress-next-line SecurityCheck-XSS
3346 $text = $this->insertStripItem( $text );
3347 } elseif ( $nowiki && ( $this->ot['html'] || $this->ot['pre'] ) ) {
3348 # Escape nowiki-style return values
3349 // @phan-suppress-next-line SecurityCheck-DoubleEscaped
3350 $text = wfEscapeWikiText( $text );
3351 } elseif ( is_string( $text )
3352 && !$piece['lineStart']
3353 && preg_match( '/^(?:{\\||:|;|#|\*)/', $text )
3355 # T2529: if the template begins with a table or block-level
3356 # element, it should be treated as beginning a new line.
3357 # This behavior is somewhat controversial.
3358 $text = "\n" . $text;
3361 if ( is_string( $text ) && !$this->incrementIncludeSize( 'post-expand', strlen( $text ) ) ) {
3362 # Error, oversize inclusion
3363 if ( $titleText !== false ) {
3364 # Make a working, properly escaped link if possible (T25588)
3365 $text = "[[:$titleText]]";
3366 } else {
3367 # This will probably not be a working link, but at least it may
3368 # provide some hint of where the problem is
3369 $originalTitle = preg_replace( '/^:/', '', $originalTitle );
3370 $text = "[[:$originalTitle]]";
3372 $text .= $this->insertStripItem( '<!-- WARNING: template omitted, '
3373 . 'post-expand include size too large -->' );
3374 $this->limitationWarn( 'post-expand-template-inclusion' );
3377 if ( $isLocalObj ) {
3378 $ret = [ 'object' => $text ];
3379 } else {
3380 $ret = [ 'text' => $text ];
3383 return $ret;
3387 * Call a parser function and return an array with text and flags.
3389 * The returned array will always contain a boolean 'found', indicating
3390 * whether the parser function was found or not. It may also contain the
3391 * following:
3392 * text: string|object, resulting wikitext or PP DOM object
3393 * isHTML: bool, $text is HTML, armour it against wikitext transformation
3394 * isChildObj: bool, $text is a DOM node needing expansion in a child frame
3395 * isLocalObj: bool, $text is a DOM node needing expansion in the current frame
3396 * nowiki: bool, wiki markup in $text should be escaped
3398 * @since 1.21
3399 * @param PPFrame $frame The current frame, contains template arguments
3400 * @param string $function Function name
3401 * @param array $args Arguments to the function
3402 * @return array
3404 public function callParserFunction( PPFrame $frame, $function, array $args = [] ) {
3405 # Case sensitive functions
3406 if ( isset( $this->mFunctionSynonyms[1][$function] ) ) {
3407 $function = $this->mFunctionSynonyms[1][$function];
3408 } else {
3409 # Case insensitive functions
3410 $function = $this->contLang->lc( $function );
3411 if ( isset( $this->mFunctionSynonyms[0][$function] ) ) {
3412 $function = $this->mFunctionSynonyms[0][$function];
3413 } else {
3414 return [ 'found' => false ];
3418 [ $callback, $flags ] = $this->mFunctionHooks[$function];
3420 $allArgs = [ $this ];
3421 if ( $flags & self::SFH_OBJECT_ARGS ) {
3422 # Convert arguments to PPNodes and collect for appending to $allArgs
3423 $funcArgs = [];
3424 foreach ( $args as $k => $v ) {
3425 if ( $v instanceof PPNode || $k === 0 ) {
3426 $funcArgs[] = $v;
3427 } else {
3428 $funcArgs[] = $this->mPreprocessor->newPartNodeArray( [ $k => $v ] )->item( 0 );
3432 # Add a frame parameter, and pass the arguments as an array
3433 $allArgs[] = $frame;
3434 $allArgs[] = $funcArgs;
3435 } else {
3436 # Convert arguments to plain text and append to $allArgs
3437 foreach ( $args as $k => $v ) {
3438 if ( $v instanceof PPNode ) {
3439 $allArgs[] = trim( $frame->expand( $v ) );
3440 } elseif ( is_int( $k ) && $k >= 0 ) {
3441 $allArgs[] = trim( $v );
3442 } else {
3443 $allArgs[] = trim( "$k=$v" );
3448 $result = $callback( ...$allArgs );
3450 # The interface for function hooks allows them to return a wikitext
3451 # string or an array containing the string and any flags. This mungs
3452 # things around to match what this method should return.
3453 if ( !is_array( $result ) ) {
3454 $result = [
3455 'found' => true,
3456 'text' => $result,
3458 } else {
3459 if ( isset( $result[0] ) && !isset( $result['text'] ) ) {
3460 $result['text'] = $result[0];
3462 unset( $result[0] );
3463 $result += [
3464 'found' => true,
3468 $noparse = true;
3469 $preprocessFlags = 0;
3470 if ( isset( $result['noparse'] ) ) {
3471 $noparse = $result['noparse'];
3473 if ( isset( $result['preprocessFlags'] ) ) {
3474 $preprocessFlags = $result['preprocessFlags'];
3477 if ( !$noparse ) {
3478 $result['text'] = $this->preprocessToDom( $result['text'], $preprocessFlags );
3479 $result['isChildObj'] = true;
3482 return $result;
3486 * Get the semi-parsed DOM representation of a template with a given title,
3487 * and its redirect destination title. Cached.
3489 * @param LinkTarget $title
3491 * @return array
3492 * @since 1.12
3494 public function getTemplateDom( LinkTarget $title ) {
3495 $cacheTitle = $title;
3496 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3498 if ( isset( $this->mTplRedirCache[$titleKey] ) ) {
3499 [ $ns, $dbk ] = $this->mTplRedirCache[$titleKey];
3500 $title = Title::makeTitle( $ns, $dbk );
3501 $titleKey = CacheKeyHelper::getKeyForPage( $title );
3503 if ( isset( $this->mTplDomCache[$titleKey] ) ) {
3504 return [ $this->mTplDomCache[$titleKey], $title ];
3507 # Cache miss, go to the database
3508 [ $text, $title ] = $this->fetchTemplateAndTitle( $title );
3510 if ( $text === false ) {
3511 $this->mTplDomCache[$titleKey] = false;
3512 return [ false, $title ];
3515 $dom = $this->preprocessToDom( $text, Preprocessor::DOM_FOR_INCLUSION );
3516 $this->mTplDomCache[$titleKey] = $dom;
3518 if ( !$title->isSamePageAs( $cacheTitle ) ) {
3519 $this->mTplRedirCache[ CacheKeyHelper::getKeyForPage( $cacheTitle ) ] =
3520 [ $title->getNamespace(), $title->getDBkey() ];
3523 return [ $dom, $title ];
3527 * Fetch the current revision of a given title as a RevisionRecord.
3528 * Note that the revision (and even the title) may not exist in the database,
3529 * so everything contributing to the output of the parser should use this method
3530 * where possible, rather than getting the revisions themselves. This
3531 * method also caches its results, so using it benefits performance.
3533 * This can return null if the callback returns false
3535 * @since 1.35
3536 * @param LinkTarget $link
3537 * @return RevisionRecord|null
3539 public function fetchCurrentRevisionRecordOfTitle( LinkTarget $link ) {
3540 $cacheKey = CacheKeyHelper::getKeyForPage( $link );
3541 if ( !$this->currentRevisionCache ) {
3542 $this->currentRevisionCache = new MapCacheLRU( 100 );
3544 if ( !$this->currentRevisionCache->has( $cacheKey ) ) {
3545 $title = Title::newFromLinkTarget( $link ); // hook signature compat
3546 $revisionRecord =
3547 // Defaults to Parser::statelessFetchRevisionRecord()
3548 call_user_func(
3549 $this->mOptions->getCurrentRevisionRecordCallback(),
3550 $title,
3551 $this
3553 if ( $revisionRecord === false ) {
3554 // Parser::statelessFetchRevisionRecord() can return false;
3555 // normalize it to null.
3556 $revisionRecord = null;
3558 $this->currentRevisionCache->set( $cacheKey, $revisionRecord );
3560 return $this->currentRevisionCache->get( $cacheKey );
3564 * @param LinkTarget $link
3565 * @return bool
3566 * @since 1.34
3567 * @internal
3569 public function isCurrentRevisionOfTitleCached( LinkTarget $link ) {
3570 $key = CacheKeyHelper::getKeyForPage( $link );
3571 return (
3572 $this->currentRevisionCache &&
3573 $this->currentRevisionCache->has( $key )
3578 * Wrapper around RevisionLookup::getKnownCurrentRevision
3580 * @since 1.34
3581 * @param LinkTarget $link
3582 * @param Parser|null $parser
3583 * @return RevisionRecord|false False if missing
3585 public static function statelessFetchRevisionRecord( LinkTarget $link, $parser = null ) {
3586 if ( $link instanceof PageIdentity ) {
3587 // probably a Title, just use it.
3588 $page = $link;
3589 } else {
3590 // XXX: use RevisionStore::getPageForLink()!
3591 // ...but get the info for the current revision at the same time?
3592 // Should RevisionStore::getKnownCurrentRevision accept a LinkTarget?
3593 $page = Title::newFromLinkTarget( $link );
3596 $revRecord = MediaWikiServices::getInstance()
3597 ->getRevisionLookup()
3598 ->getKnownCurrentRevision( $page );
3599 return $revRecord;
3603 * Fetch the unparsed text of a template and register a reference to it.
3604 * @param LinkTarget $link
3605 * @return array ( string or false, Title )
3606 * @since 1.11
3608 public function fetchTemplateAndTitle( LinkTarget $link ) {
3609 // Use Title for compatibility with callbacks and return type
3610 $title = Title::newFromLinkTarget( $link );
3612 // Defaults to Parser::statelessFetchTemplate()
3613 $templateCb = $this->mOptions->getTemplateCallback();
3614 $stuff = $templateCb( $title, $this );
3615 $revRecord = $stuff['revision-record'] ?? null;
3617 $text = $stuff['text'];
3618 if ( is_string( $stuff['text'] ) ) {
3619 // We use U+007F DELETE to distinguish strip markers from regular text
3620 $text = strtr( $text, "\x7f", "?" );
3622 $finalTitle = $stuff['finalTitle'] ?? $title;
3623 foreach ( ( $stuff['deps'] ?? [] ) as $dep ) {
3624 $this->mOutput->addTemplate( $dep['title'], $dep['page_id'], $dep['rev_id'] );
3625 if ( $dep['title']->equals( $this->getTitle() ) && $revRecord instanceof RevisionRecord ) {
3626 // Self-transclusion; final result may change based on the new page version
3627 try {
3628 $sha1 = $revRecord->getSha1();
3629 } catch ( RevisionAccessException $e ) {
3630 $sha1 = null;
3632 $this->setOutputFlag( ParserOutputFlags::VARY_REVISION_SHA1, 'Self transclusion' );
3633 $this->getOutput()->setRevisionUsedSha1Base36( $sha1 );
3637 return [ $text, $finalTitle ];
3641 * Static function to get a template
3642 * Can be overridden via ParserOptions::setTemplateCallback().
3644 * @param LinkTarget $page
3645 * @param Parser|false $parser
3647 * @return array
3648 * @since 1.12
3650 public static function statelessFetchTemplate( $page, $parser = false ) {
3651 $title = Title::castFromLinkTarget( $page ); // for compatibility with return type
3652 $text = $skip = false;
3653 $finalTitle = $title;
3654 $deps = [];
3655 $revRecord = null;
3656 $contextTitle = $parser ? $parser->getTitle() : null;
3658 # Loop to fetch the article, with up to 2 redirects
3660 # Note that $title (including redirect targets) could be
3661 # external; we do allow hooks a chance to redirect the
3662 # external title to a local one (which might be useful), but
3663 # are careful not to add external titles to the dependency
3664 # list. (T362221)
3666 $services = MediaWikiServices::getInstance();
3667 $revLookup = $services->getRevisionLookup();
3668 $hookRunner = new HookRunner( $services->getHookContainer() );
3669 for ( $i = 0; $i < 3 && is_object( $title ); $i++ ) {
3670 # Give extensions a chance to select the revision instead
3671 $revRecord = null; # Assume no hook
3672 $origTitle = $title;
3673 $titleChanged = false;
3674 $hookRunner->onBeforeParserFetchTemplateRevisionRecord(
3675 # The $title is a not a PageIdentity, as it may
3676 # contain fragments or even represent an attempt to transclude
3677 # a broken or otherwise-missing Title, which the hook may
3678 # fix up. Similarly, the $contextTitle may represent a special
3679 # page or other page which "exists" as a parsing context but
3680 # is not in the DB.
3681 $contextTitle, $title,
3682 $skip, $revRecord
3685 if ( $skip ) {
3686 $text = false;
3687 if ( !$title->isExternal() ) {
3688 $deps[] = [
3689 'title' => $title,
3690 'page_id' => $title->getArticleID(),
3691 'rev_id' => null
3694 break;
3696 # Get the revision
3697 if ( !$revRecord ) {
3698 if ( $parser ) {
3699 $revRecord = $parser->fetchCurrentRevisionRecordOfTitle( $title );
3700 } else {
3701 $revRecord = $revLookup->getRevisionByTitle( $title );
3704 if ( $revRecord ) {
3705 # Update title, as $revRecord may have been changed by hook
3706 $title = Title::newFromLinkTarget(
3707 $revRecord->getPageAsLinkTarget()
3709 // Assuming title is not external if we've got a $revRecord
3710 $deps[] = [
3711 'title' => $title,
3712 'page_id' => $revRecord->getPageId(),
3713 'rev_id' => $revRecord->getId(),
3715 } elseif ( !$title->isExternal() ) {
3716 $deps[] = [
3717 'title' => $title,
3718 'page_id' => $title->getArticleID(),
3719 'rev_id' => null,
3722 if ( !$title->equals( $origTitle ) ) {
3723 # If we fetched a rev from a different title, register
3724 # the original title too...
3725 if ( !$origTitle->isExternal() ) {
3726 $deps[] = [
3727 'title' => $origTitle,
3728 'page_id' => $origTitle->getArticleID(),
3729 'rev_id' => null,
3732 $titleChanged = true;
3734 # If there is no current revision, there is no page
3735 if ( $revRecord === null || $revRecord->getId() === null ) {
3736 $linkCache = $services->getLinkCache();
3737 $linkCache->addBadLinkObj( $title );
3739 if ( $revRecord ) {
3740 if ( $titleChanged && !$revRecord->hasSlot( SlotRecord::MAIN ) ) {
3741 // We've added this (missing) title to the dependencies;
3742 // give the hook another chance to redirect it to an
3743 // actual page.
3744 $text = false;
3745 $finalTitle = $title;
3746 continue;
3748 if ( $revRecord->hasSlot( SlotRecord::MAIN ) ) { // T276476
3749 $content = $revRecord->getContent( SlotRecord::MAIN );
3750 $text = $content ? $content->getWikitextForTransclusion() : null;
3751 } else {
3752 $text = false;
3755 if ( $text === false || $text === null ) {
3756 $text = false;
3757 break;
3759 } elseif ( $title->getNamespace() === NS_MEDIAWIKI ) {
3760 $message = wfMessage( $services->getContentLanguage()->
3761 lcfirst( $title->getText() ) )->inContentLanguage();
3762 if ( !$message->exists() ) {
3763 $text = false;
3764 break;
3766 $text = $message->plain();
3767 break;
3768 } else {
3769 break;
3771 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable Only reached when content is set
3772 if ( !$content ) {
3773 break;
3775 # Redirect?
3776 $finalTitle = $title;
3777 $title = $content->getRedirectTarget();
3780 $retValues = [
3781 // previously, when this also returned a Revision object, we set
3782 // 'revision-record' to false instead of null if it was unavailable,
3783 // so that callers to use isset and then rely on the revision-record
3784 // key instead of the revision key, even if there was no corresponding
3785 // object - we continue to set to false here for backwards compatability
3786 'revision-record' => $revRecord ?: false,
3787 'text' => $text,
3788 'finalTitle' => $finalTitle,
3789 'deps' => $deps
3791 return $retValues;
3795 * Fetch a file and its title and register a reference to it.
3796 * If 'broken' is a key in $options then the file will appear as a broken thumbnail.
3797 * @param LinkTarget $link
3798 * @param array $options Array of options to RepoGroup::findFile
3799 * @return array ( File or false, Title of file )
3800 * @since 1.18
3802 public function fetchFileAndTitle( LinkTarget $link, array $options = [] ) {
3803 $file = $this->fetchFileNoRegister( $link, $options );
3805 $time = $file ? $file->getTimestamp() : false;
3806 $sha1 = $file ? $file->getSha1() : false;
3807 # Register the file as a dependency...
3808 $this->mOutput->addImage( $link, $time, $sha1 );
3809 if ( $file && !$link->isSameLinkAs( $file->getTitle() ) ) {
3810 # Update fetched file title after resolving redirects, etc.
3811 $link = $file->getTitle();
3812 $this->mOutput->addImage( $link, $time, $sha1 );
3815 $title = Title::newFromLinkTarget( $link ); // for return type compat
3816 return [ $file, $title ];
3820 * Helper function for fetchFileAndTitle.
3822 * Also useful if you need to fetch a file but not use it yet,
3823 * for example to get the file's handler.
3825 * @param LinkTarget $link
3826 * @param array $options Array of options to RepoGroup::findFile
3827 * @return File|false
3829 protected function fetchFileNoRegister( LinkTarget $link, array $options = [] ) {
3830 if ( isset( $options['broken'] ) ) {
3831 $file = false; // broken thumbnail forced by hook
3832 } else {
3833 $repoGroup = MediaWikiServices::getInstance()->getRepoGroup();
3834 if ( isset( $options['sha1'] ) ) { // get by (sha1,timestamp)
3835 $file = $repoGroup->findFileFromKey( $options['sha1'], $options );
3836 } else { // get by (name,timestamp)
3837 $file = $repoGroup->findFile( $link, $options );
3840 return $file;
3844 * Transclude an interwiki link.
3846 * @param LinkTarget $link
3847 * @param string $action Usually one of (raw, render)
3849 * @return string
3850 * @internal
3852 public function interwikiTransclude( LinkTarget $link, $action ) {
3853 if ( !$this->svcOptions->get( MainConfigNames::EnableScaryTranscluding ) ) {
3854 return wfMessage( 'scarytranscludedisabled' )->inContentLanguage()->text();
3857 // TODO: extract relevant functionality from Title
3858 $title = Title::newFromLinkTarget( $link );
3860 $url = $title->getFullURL( [ 'action' => $action ] );
3861 if ( strlen( $url ) > 1024 ) {
3862 return wfMessage( 'scarytranscludetoolong' )->inContentLanguage()->text();
3865 $wikiId = $title->getTransWikiID(); // remote wiki ID or false
3867 $fname = __METHOD__;
3869 $cache = $this->wanCache;
3870 $data = $cache->getWithSetCallback(
3871 $cache->makeGlobalKey(
3872 'interwiki-transclude',
3873 ( $wikiId !== false ) ? $wikiId : 'external',
3874 sha1( $url )
3876 $this->svcOptions->get( MainConfigNames::TranscludeCacheExpiry ),
3877 function ( $oldValue, &$ttl ) use ( $url, $fname, $cache ) {
3878 $req = $this->httpRequestFactory->create( $url, [], $fname );
3880 $status = $req->execute(); // Status object
3881 if ( !$status->isOK() ) {
3882 $ttl = $cache::TTL_UNCACHEABLE;
3883 } elseif ( $req->getResponseHeader( 'X-Database-Lagged' ) !== null ) {
3884 $ttl = min( $cache::TTL_LAGGED, $ttl );
3887 return [
3888 'text' => $status->isOK() ? $req->getContent() : null,
3889 'code' => $req->getStatus()
3893 'checkKeys' => ( $wikiId !== false )
3894 ? [ $cache->makeGlobalKey( 'interwiki-page', $wikiId, $title->getDBkey() ) ]
3895 : [],
3896 'pcGroup' => 'interwiki-transclude:5',
3897 'pcTTL' => $cache::TTL_PROC_LONG
3901 if ( is_string( $data['text'] ) ) {
3902 $text = $data['text'];
3903 } elseif ( $data['code'] != 200 ) {
3904 // Though we failed to fetch the content, this status is useless.
3905 $text = wfMessage( 'scarytranscludefailed-httpstatus' )
3906 ->params( $url, $data['code'] )->inContentLanguage()->text();
3907 } else {
3908 $text = wfMessage( 'scarytranscludefailed', $url )->inContentLanguage()->text();
3911 return $text;
3915 * Triple brace replacement -- used for template arguments
3917 * @param array $piece
3918 * @param PPFrame $frame
3920 * @return array
3921 * @internal
3923 public function argSubstitution( array $piece, PPFrame $frame ) {
3924 $error = false;
3925 $parts = $piece['parts'];
3926 $nameWithSpaces = $frame->expand( $piece['title'] );
3927 $argName = trim( $nameWithSpaces );
3928 $object = false;
3929 $text = $frame->getArgument( $argName );
3930 if ( $text === false && $parts->getLength() > 0
3931 && ( $this->ot['html']
3932 || $this->ot['pre']
3933 || ( $this->ot['wiki'] && $frame->isTemplate() )
3936 # No match in frame, use the supplied default
3937 $object = $parts->item( 0 )->getChildren();
3939 if ( !$this->incrementIncludeSize( 'arg', strlen( $text ) ) ) {
3940 $error = '<!-- WARNING: argument omitted, expansion size too large -->';
3941 $this->limitationWarn( 'post-expand-template-argument' );
3944 if ( $text === false && $object === false ) {
3945 # No match anywhere
3946 $object = $frame->virtualBracketedImplode( '{{{', '|', '}}}', $nameWithSpaces, $parts );
3948 if ( $error !== false ) {
3949 $text .= $error;
3951 if ( $object !== false ) {
3952 $ret = [ 'object' => $object ];
3953 } else {
3954 $ret = [ 'text' => $text ];
3957 return $ret;
3961 * @param string $lowerTagName
3962 * @return bool
3964 public function tagNeedsNowikiStrippedInTagPF( string $lowerTagName ): bool {
3965 $parsoidSiteConfig = MediaWikiServices::getInstance()->getParsoidSiteConfig();
3966 return $parsoidSiteConfig->tagNeedsNowikiStrippedInTagPF( $lowerTagName );
3970 * Return the text to be used for a given extension tag.
3971 * This is the ghost of strip().
3973 * @param array $params Associative array of parameters:
3974 * name PPNode for the tag name
3975 * attr PPNode for unparsed text where tag attributes are thought to be
3976 * attributes Optional associative array of parsed attributes
3977 * inner Contents of extension element
3978 * noClose Original text did not have a close tag
3979 * @param PPFrame $frame
3980 * @param bool $processNowiki Process nowiki tags by running the nowiki tag handler
3981 * Normally, nowikis are only processed for the HTML output type. With this
3982 * arg set to true, they are processed (and converted to a nowiki strip marker)
3983 * for all output types.
3984 * @return string
3985 * @internal
3986 * @since 1.12
3988 public function extensionSubstitution( array $params, PPFrame $frame, bool $processNowiki = false ) {
3989 static $errorStr = '<span class="error">';
3991 $name = $frame->expand( $params['name'] );
3992 if ( str_starts_with( $name, $errorStr ) ) {
3993 // Probably expansion depth or node count exceeded. Just punt the
3994 // error up.
3995 return $name;
3998 // Parse attributes from XML-like wikitext syntax
3999 $attrText = !isset( $params['attr'] ) ? '' : $frame->expand( $params['attr'] );
4000 if ( str_starts_with( $attrText, $errorStr ) ) {
4001 // See above
4002 return $attrText;
4005 // We can't safely check if the expansion for $content resulted in an
4006 // error, because the content could happen to be the error string
4007 // (T149622).
4008 $content = !isset( $params['inner'] ) ? null : $frame->expand( $params['inner'] );
4010 $marker = self::MARKER_PREFIX . "-$name-"
4011 . sprintf( '%08X', $this->mMarkerIndex++ ) . self::MARKER_SUFFIX;
4013 $normalizedName = strtolower( $name );
4014 $isNowiki = $normalizedName === 'nowiki';
4015 $markerType = $isNowiki ? 'nowiki' : 'general';
4016 if ( $this->ot['html'] || ( $processNowiki && $isNowiki ) ) {
4017 $attributes = Sanitizer::decodeTagAttributes( $attrText );
4018 // Merge in attributes passed via {{#tag:}} parser function
4019 if ( isset( $params['attributes'] ) ) {
4020 $attributes += $params['attributes'];
4023 if ( isset( $this->mTagHooks[$normalizedName] ) ) {
4024 // Note that $content may be null here, for example if the
4025 // tag is self-closed.
4026 $output = call_user_func_array( $this->mTagHooks[$normalizedName],
4027 [ $content, $attributes, $this, $frame ] );
4028 } else {
4029 $output = '<span class="error">Invalid tag extension name: ' .
4030 htmlspecialchars( $normalizedName ) . '</span>';
4033 if ( is_array( $output ) ) {
4034 // Extract flags
4035 $flags = $output;
4036 $output = $flags[0];
4037 if ( isset( $flags['markerType'] ) ) {
4038 $markerType = $flags['markerType'];
4041 } else {
4042 // We're substituting a {{subst:#tag:}} parser function.
4043 // Convert the attributes it passed into the XML-like string.
4044 if ( isset( $params['attributes'] ) ) {
4045 foreach ( $params['attributes'] as $attrName => $attrValue ) {
4046 $attrText .= ' ' . htmlspecialchars( $attrName ) . '="' .
4047 htmlspecialchars( $this->getStripState()->unstripBoth( $attrValue ), ENT_COMPAT ) . '"';
4050 if ( $content === null ) {
4051 $output = "<$name$attrText/>";
4052 } else {
4053 $close = $params['close'] === null ? '' : $frame->expand( $params['close'] );
4054 if ( str_starts_with( $close, $errorStr ) ) {
4055 // See above
4056 return $close;
4058 $output = "<$name$attrText>$content$close";
4060 if ( !$this->mStripExtTags ) {
4061 if ( $this->svcOptions->get( MainConfigNames::ParsoidFragmentSupport ) === 'v2' ) {
4062 $markerType = 'exttag';
4063 } else {
4064 $markerType = 'none';
4069 if ( $markerType === 'none' ) {
4070 return $output;
4071 } elseif ( $markerType === 'nowiki' ) {
4072 $this->mStripState->addNoWiki( $marker, $output );
4073 } elseif ( $markerType === 'general' ) {
4074 $this->mStripState->addGeneral( $marker, $output );
4075 } elseif ( $markerType === 'exttag' ) {
4076 $this->mStripState->addExtTag( $marker, $output );
4077 } else {
4078 throw new UnexpectedValueException( __METHOD__ . ': invalid marker type' );
4080 return $marker;
4084 * Increment an include size counter
4086 * @param string $type The type of expansion
4087 * @param int $size The size of the text
4088 * @return bool False if this inclusion would take it over the maximum, true otherwise
4090 private function incrementIncludeSize( $type, $size ) {
4091 if ( $this->mIncludeSizes[$type] + $size > $this->mOptions->getMaxIncludeSize() ) {
4092 return false;
4093 } else {
4094 $this->mIncludeSizes[$type] += $size;
4095 return true;
4100 * @return bool False if the limit has been exceeded
4101 * @since 1.13
4103 public function incrementExpensiveFunctionCount() {
4104 $this->mExpensiveFunctionCount++;
4105 return $this->mExpensiveFunctionCount <= $this->mOptions->getExpensiveParserFunctionLimit();
4109 * Strip double-underscore items like __NOGALLERY__ and __NOTOC__
4110 * Fills $this->mDoubleUnderscores, returns the modified text
4112 * @param string $text
4113 * @return string
4115 private function handleDoubleUnderscore( $text ) {
4116 # The position of __TOC__ needs to be recorded
4117 $mw = $this->magicWordFactory->get( 'toc' );
4118 if ( $mw->match( $text ) ) {
4119 $this->mShowToc = true;
4120 $this->mForceTocPosition = true;
4122 # Set a placeholder. At the end we'll fill it in with the TOC.
4123 $text = $mw->replace( self::TOC_PLACEHOLDER, $text, 1 );
4125 # Only keep the first one.
4126 $text = $mw->replace( '', $text );
4127 # For consistency with all other double-underscores
4128 # (see below)
4129 $this->mOutput->setUnsortedPageProperty( 'toc' );
4132 # Now match and remove the rest of them
4133 $mwa = $this->magicWordFactory->getDoubleUnderscoreArray();
4134 $this->mDoubleUnderscores = $mwa->matchAndRemove( $text );
4136 if ( isset( $this->mDoubleUnderscores['nogallery'] ) ) {
4137 $this->mOutput->setNoGallery( true );
4139 if ( isset( $this->mDoubleUnderscores['notoc'] ) && !$this->mForceTocPosition ) {
4140 $this->mShowToc = false;
4142 if ( isset( $this->mDoubleUnderscores['hiddencat'] )
4143 && $this->getTitle()->getNamespace() === NS_CATEGORY
4145 $this->addTrackingCategory( 'hidden-category-category' );
4147 # (T10068) Allow control over whether robots index a page.
4148 # __INDEX__ always overrides __NOINDEX__, see T16899
4149 if ( isset( $this->mDoubleUnderscores['noindex'] ) && $this->getTitle()->canUseNoindex() ) {
4150 $this->mOutput->setIndexPolicy( 'noindex' );
4151 $this->addTrackingCategory( 'noindex-category' );
4153 if ( isset( $this->mDoubleUnderscores['index'] ) && $this->getTitle()->canUseNoindex() ) {
4154 $this->mOutput->setIndexPolicy( 'index' );
4155 $this->addTrackingCategory( 'index-category' );
4158 # Cache all double underscores in the database
4159 foreach ( $this->mDoubleUnderscores as $key => $val ) {
4160 $this->mOutput->setUnsortedPageProperty( $key );
4163 return $text;
4167 * @see TrackingCategories::addTrackingCategory()
4168 * @param string $msg Message key
4169 * @return bool Whether the addition was successful
4170 * @since 1.19 method is public
4172 public function addTrackingCategory( $msg ) {
4173 return $this->trackingCategories->addTrackingCategory(
4174 $this->mOutput, $msg, $this->getPage()
4179 * Helper function to correctly set the target language and title of
4180 * a message based on the parser context. Most uses of system messages
4181 * inside extensions or parser functions should use this method (instead
4182 * of directly using `wfMessage`) to ensure that the cache is not
4183 * polluted.
4185 * @param string $msg The localization message key
4186 * @phpcs:ignore Generic.Files.LineLength
4187 * @param MessageParam|MessageSpecifier|string|int|float|list<MessageParam|MessageSpecifier|string|int|float> ...$params
4188 * See Message::params()
4189 * @return Message
4190 * @since 1.40
4191 * @see https://phabricator.wikimedia.org/T202481
4193 public function msg( string $msg, ...$params ): Message {
4194 return wfMessage( $msg, ...$params )
4195 ->inLanguage( $this->getTargetLanguage() )
4196 ->page( $this->getPage() );
4199 private function cleanUpTocLine( Node $container ) {
4200 '@phan-var Element|DocumentFragment $container'; // @var Element|DocumentFragment $container
4201 # Strip out HTML
4202 # Allowed tags are:
4203 # * <sup> and <sub> (T10393)
4204 # * <i> (T28375)
4205 # * <b> (r105284)
4206 # * <bdi> (T74884)
4207 # * <span dir="rtl"> and <span dir="ltr"> (T37167)
4208 # * <s> and <strike> (T35715)
4209 # * <q> (T251672)
4210 # We strip any parameter from accepted tags, except dir="rtl|ltr" from <span>,
4211 # to allow setting directionality in toc items.
4212 $allowedTags = [ 'span', 'sup', 'sub', 'bdi', 'i', 'b', 's', 'strike', 'q' ];
4213 $node = $container->firstChild;
4214 while ( $node !== null ) {
4215 $next = $node->nextSibling;
4216 if ( $node instanceof Element ) {
4217 $nodeName = DOMCompat::nodeName( $node );
4218 if ( in_array( $nodeName, [ 'style', 'script' ], true ) ) {
4219 # Remove any <style> or <script> tags (T198618)
4220 DOMCompat::remove( $node );
4221 } elseif ( in_array( $nodeName, $allowedTags, true ) ) {
4222 // Keep tag, remove attributes
4223 $removeAttrs = [];
4224 foreach ( $node->attributes as $attr ) {
4225 if (
4226 $nodeName === 'span' && $attr->name === 'dir'
4227 && ( $attr->value === 'rtl' || $attr->value === 'ltr' )
4229 // Keep <span dir="rtl"> and <span dir="ltr">
4230 continue;
4232 $removeAttrs[] = $attr;
4234 foreach ( $removeAttrs as $attr ) {
4235 $node->removeAttributeNode( $attr );
4237 $this->cleanUpTocLine( $node );
4238 # Strip '<span></span>', which is the result from the above if
4239 # <span id="foo"></span> is used to produce an additional anchor
4240 # for a section.
4241 if ( $nodeName === 'span' && !$node->hasChildNodes() ) {
4242 DOMCompat::remove( $node );
4244 } else {
4245 // Strip tag
4246 $next = $node->firstChild;
4247 // phpcs:ignore Generic.CodeAnalysis.AssignmentInCondition.FoundInWhileCondition
4248 while ( $childNode = $node->firstChild ) {
4249 $node->parentNode->insertBefore( $childNode, $node );
4251 DOMCompat::remove( $node );
4253 } elseif ( $node instanceof Comment ) {
4254 // Extensions may add comments to headings;
4255 // these shouldn't appear in the ToC either.
4256 DOMCompat::remove( $node );
4258 $node = $next;
4263 * This function accomplishes several tasks:
4264 * 1) Auto-number headings if that option is enabled
4265 * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page
4266 * 3) Add a Table of contents on the top for users who have enabled the option
4267 * 4) Auto-anchor headings
4269 * It loops through all headlines, collects the necessary data, then splits up the
4270 * string and re-inserts the newly formatted headlines.
4272 * @param string $text
4273 * @param string $origText Original, untouched wikitext
4274 * @param bool $isMain
4275 * @return string
4277 private function finalizeHeadings( $text, $origText, $isMain = true ) {
4278 # Inhibit editsection links if requested in the page
4279 if ( isset( $this->mDoubleUnderscores['noeditsection'] ) ) {
4280 $maybeShowEditLink = false;
4281 } else {
4282 $maybeShowEditLink = true; /* Actual presence will depend on post-cache transforms */
4285 # Get all headlines for numbering them and adding funky stuff like [edit]
4286 # links - this is for later, but we need the number of headlines right now
4287 # NOTE: white space in headings have been trimmed in handleHeadings. They shouldn't
4288 # be trimmed here since whitespace in HTML headings is significant.
4289 $matches = [];
4290 $numMatches = preg_match_all(
4291 '/<H(?P<level>[1-6])(?P<attrib>.*?>)(?P<header>[\s\S]*?)<\/H[1-6] *>/i',
4292 $text,
4293 $matches
4296 # if there are fewer than 4 headlines in the article, do not show TOC
4297 # unless it's been explicitly enabled.
4298 $enoughToc = $this->mShowToc &&
4299 ( ( $numMatches >= 4 ) || $this->mForceTocPosition );
4301 # Allow user to stipulate that a page should have a "new section"
4302 # link added via __NEWSECTIONLINK__
4303 if ( isset( $this->mDoubleUnderscores['newsectionlink'] ) ) {
4304 $this->mOutput->setNewSection( true );
4307 # Allow user to remove the "new section"
4308 # link via __NONEWSECTIONLINK__
4309 if ( isset( $this->mDoubleUnderscores['nonewsectionlink'] ) ) {
4310 $this->mOutput->setHideNewSection( true );
4313 # if the string __FORCETOC__ (not case-sensitive) occurs in the HTML,
4314 # override above conditions and always show TOC above first header
4315 if ( isset( $this->mDoubleUnderscores['forcetoc'] ) ) {
4316 $this->mShowToc = true;
4317 $enoughToc = true;
4320 # headline counter
4321 $headlineCount = 0;
4322 $haveTocEntries = false;
4324 # Ugh .. the TOC should have neat indentation levels which can be
4325 # passed to the skin functions. These are determined here
4326 $full = '';
4327 $head = [];
4328 $level = 0;
4329 $tocData = new TOCData();
4330 $markerRegex = self::MARKER_PREFIX . "-h-(\d+)-" . self::MARKER_SUFFIX;
4331 $baseTitleText = $this->getTitle()->getPrefixedDBkey();
4332 $oldType = $this->mOutputType;
4333 $this->setOutputType( self::OT_WIKI );
4334 $frame = $this->getPreprocessor()->newFrame();
4335 $root = $this->preprocessToDom( $origText );
4336 $node = $root->getFirstChild();
4337 $cpOffset = 0;
4338 $refers = [];
4340 $headlines = $numMatches !== false ? $matches[3] : [];
4342 $maxTocLevel = $this->svcOptions->get( MainConfigNames::MaxTocLevel );
4343 $domDocument = DOMUtils::parseHTML( '' );
4344 foreach ( $headlines as $headline ) {
4345 $isTemplate = false;
4346 $titleText = false;
4347 $sectionIndex = false;
4348 $markerMatches = [];
4349 if ( preg_match( "/^$markerRegex/", $headline, $markerMatches ) ) {
4350 $serial = (int)$markerMatches[1];
4351 [ $titleText, $sectionIndex ] = $this->mHeadings[$serial];
4352 $isTemplate = ( $titleText != $baseTitleText );
4353 $headline = preg_replace( "/^$markerRegex\\s*/", "", $headline );
4356 $sectionMetadata = SectionMetadata::fromLegacy( [
4357 "fromtitle" => $titleText ?: null,
4358 "index" => $sectionIndex === false
4359 ? '' : ( ( $isTemplate ? 'T-' : '' ) . $sectionIndex )
4360 ] );
4361 $tocData->addSection( $sectionMetadata );
4363 $oldLevel = $level;
4364 $level = (int)$matches[1][$headlineCount];
4365 $tocData->processHeading( $oldLevel, $level, $sectionMetadata );
4367 if ( $tocData->getCurrentTOCLevel() < $maxTocLevel ) {
4368 $haveTocEntries = true;
4371 # The safe header is a version of the header text safe to use for links
4373 # Remove link placeholders by the link text.
4374 # <!--LINK number-->
4375 # turns into
4376 # link text with suffix
4377 # Do this before unstrip since link text can contain strip markers
4378 $safeHeadline = $this->replaceLinkHoldersText( $headline );
4380 # Avoid insertion of weird stuff like <math> by expanding the relevant sections
4381 $safeHeadline = $this->mStripState->unstripBoth( $safeHeadline );
4383 // Run Tidy to convert wikitext entities to HTML entities (T355386),
4384 // conveniently also giving us a way to handle French spaces (T324763)
4385 $safeHeadline = $this->tidy->tidy( $safeHeadline, [ Sanitizer::class, 'armorFrenchSpaces' ] );
4387 // Wrap the safe headline to parse the heading attributes
4388 // Literal HTML tags should be sanitized at this point
4389 // cleanUpTocLine will strip the headline tag
4390 $wrappedHeadline = "<h$level" . $matches['attrib'][$headlineCount] . $safeHeadline . "</h$level>";
4392 // Parse the heading contents as HTML. This makes it easier to strip out some HTML tags,
4393 // and ensures that we generate balanced HTML at the end (T218330).
4394 $headlineDom = DOMUtils::parseHTMLToFragment( $domDocument, $wrappedHeadline );
4396 // Extract a user defined id on the heading
4397 // A heading is expected as the first child and could be asserted
4398 $h = $headlineDom->firstChild;
4399 $headingId = ( $h instanceof Element && DOMUtils::isHeading( $h ) ) ?
4400 DOMCompat::getAttribute( $h, 'id' ) : null;
4402 $this->cleanUpTocLine( $headlineDom );
4404 // Serialize back to HTML
4405 $tocline = trim( DOMUtils::getFragmentInnerHTML( $headlineDom ) );
4407 # For the anchor, strip out HTML-y stuff period
4408 $safeHeadline = trim( $headlineDom->textContent );
4410 # Save headline for section edit hint before it's normalized for the link
4411 $headlineHint = htmlspecialchars( $safeHeadline );
4413 $safeHeadline = Sanitizer::normalizeSectionNameWhitespace( $safeHeadline );
4414 $safeHeadline = self::normalizeSectionName( $safeHeadline );
4416 if ( $headingId !== null && $headingId !== '' ) {
4417 $safeHeadline = $headingId;
4420 $fallbackHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_FALLBACK );
4421 $linkAnchor = Sanitizer::escapeIdForLink( $safeHeadline );
4422 $safeHeadline = Sanitizer::escapeIdForAttribute( $safeHeadline, Sanitizer::ID_PRIMARY );
4423 if ( $fallbackHeadline === $safeHeadline ) {
4424 # No reason to have both (in fact, we can't)
4425 $fallbackHeadline = false;
4428 # HTML IDs must be case-insensitively unique for IE compatibility (T12721).
4429 $arrayKey = strtolower( $safeHeadline );
4430 if ( $fallbackHeadline === false ) {
4431 $fallbackArrayKey = false;
4432 } else {
4433 $fallbackArrayKey = strtolower( $fallbackHeadline );
4436 # Create the anchor for linking from the TOC to the section
4437 $anchor = $safeHeadline;
4438 $fallbackAnchor = $fallbackHeadline;
4439 if ( isset( $refers[$arrayKey] ) ) {
4440 for ( $i = 2; isset( $refers["{$arrayKey}_$i"] ); ++$i );
4441 $anchor .= "_$i";
4442 $linkAnchor .= "_$i";
4443 $refers["{$arrayKey}_$i"] = true;
4444 } else {
4445 $refers[$arrayKey] = true;
4447 if ( $fallbackHeadline !== false && isset( $refers[$fallbackArrayKey] ) ) {
4448 for ( $i = 2; isset( $refers["{$fallbackArrayKey}_$i"] ); ++$i );
4449 $fallbackAnchor .= "_$i";
4450 $refers["{$fallbackArrayKey}_$i"] = true;
4451 } else {
4452 $refers[$fallbackArrayKey] = true;
4455 # Add the section to the section tree
4456 # Find the DOM node for this header
4457 $noOffset = ( $isTemplate || $sectionIndex === false );
4458 while ( $node && !$noOffset ) {
4459 if ( $node->getName() === 'h' ) {
4460 $bits = $node->splitHeading();
4461 if ( $bits['i'] == $sectionIndex ) {
4462 break;
4465 $cpOffset += mb_strlen(
4466 $this->mStripState->unstripBoth(
4467 $frame->expand( $node, PPFrame::RECOVER_ORIG )
4470 $node = $node->getNextSibling();
4472 $sectionMetadata->line = $tocline;
4473 $sectionMetadata->codepointOffset = ( $noOffset ? null : $cpOffset );
4474 $sectionMetadata->anchor = $anchor;
4475 $sectionMetadata->linkAnchor = $linkAnchor;
4477 if ( $maybeShowEditLink && $sectionIndex !== false ) {
4478 // Output edit section links as markers with styles that can be customized by skins
4479 if ( $isTemplate ) {
4480 # Put a T flag in the section identifier, to indicate to extractSections()
4481 # that sections inside <includeonly> should be counted.
4482 $editsectionPage = $titleText;
4483 $editsectionSection = "T-$sectionIndex";
4484 } else {
4485 $editsectionPage = $this->getTitle()->getPrefixedText();
4486 $editsectionSection = $sectionIndex;
4488 // Construct a pseudo-HTML tag as a placeholder for the section edit link. It is replaced in
4489 // MediaWiki\OutputTransform\Stages\HandleSectionLinks with the real link.
4491 // Any HTML markup in the input has already been escaped,
4492 // so we don't have to worry about a user trying to input one of these markers directly.
4494 // We put the page and section in attributes to stop the language converter from
4495 // converting them, but put the headline hint in tag content
4496 // because it is supposed to be able to convert that.
4497 $editlink = '<mw:editsection page="' . htmlspecialchars( $editsectionPage, ENT_COMPAT );
4498 $editlink .= '" section="' . htmlspecialchars( $editsectionSection, ENT_COMPAT ) . '"';
4499 $editlink .= '>' . $headlineHint . '</mw:editsection>';
4500 } else {
4501 $editlink = '';
4503 // Reconstruct the original <h#> tag with added attributes. It is replaced in
4504 // MediaWiki\OutputTransform\Stages\HandleSectionLinks to add anchors and stuff.
4506 // data-mw-... attributes are forbidden in Sanitizer::isReservedDataAttribute(),
4507 // so we don't have to worry about a user trying to input one of these markers directly.
4509 // We put the anchors in attributes to stop the language converter from converting them.
4510 $head[$headlineCount] = "<h$level" . Html::expandAttributes( [
4511 'data-mw-anchor' => $anchor,
4512 'data-mw-fallback-anchor' => $fallbackAnchor,
4513 ] ) . $matches['attrib'][$headlineCount] . $headline . $editlink . "</h$level>";
4515 $headlineCount++;
4518 $this->setOutputType( $oldType );
4520 # Never ever show TOC if no headers (or suppressed)
4521 $suppressToc = $this->mOptions->getSuppressTOC();
4522 if ( !$haveTocEntries ) {
4523 $enoughToc = false;
4525 $addTOCPlaceholder = false;
4527 if ( $isMain && !$suppressToc ) {
4528 // We generally output the section information via the API
4529 // even if there isn't "enough" of a ToC to merit showing
4530 // it -- but the "suppress TOC" parser option is set when
4531 // any sections that might be found aren't "really there"
4532 // (ie, JavaScript content that might have spurious === or
4533 // <h2>: T307691) so we will *not* set section information
4534 // in that case.
4535 $this->mOutput->setTOCData( $tocData );
4537 // T294950: Record a suggestion that the TOC should be shown.
4538 // Skins are free to ignore this suggestion and implement their
4539 // own criteria for showing/suppressing TOC (T318186).
4540 if ( $enoughToc ) {
4541 $this->mOutput->setOutputFlag( ParserOutputFlags::SHOW_TOC );
4542 if ( !$this->mForceTocPosition ) {
4543 $addTOCPlaceholder = true;
4547 // If __NOTOC__ is used on the page (and not overridden by
4548 // __TOC__ or __FORCETOC__) set the NO_TOC flag to tell
4549 // the skin that although the section information is
4550 // valid, it should perhaps not be presented as a Table Of
4551 // Contents.
4552 if ( !$this->mShowToc ) {
4553 $this->mOutput->setOutputFlag( ParserOutputFlags::NO_TOC );
4557 # split up and insert constructed headlines
4558 $blocks = preg_split( '/<h[1-6]\b[^>]*>.*?<\/h[1-6]>/is', $text );
4559 $i = 0;
4561 // build an array of document sections
4562 $sections = [];
4563 foreach ( $blocks as $block ) {
4564 // $head is zero-based, sections aren't.
4565 if ( empty( $head[$i - 1] ) ) {
4566 $sections[$i] = $block;
4567 } else {
4568 $sections[$i] = $head[$i - 1] . $block;
4571 $i++;
4574 if ( $addTOCPlaceholder ) {
4575 // append the TOC at the beginning
4576 // Top anchor now in skin
4577 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset At least one element when enoughToc is true
4578 $sections[0] .= self::TOC_PLACEHOLDER . "\n";
4581 $full .= implode( '', $sections );
4583 return $full;
4587 * Localize the TOC into the given target language; this includes
4588 * invoking the language converter on the headings.
4589 * @param ?TOCData $tocData The Table of Contents
4590 * @param Language $lang The target language
4591 * @param ?ILanguageConverter $converter The target language converter, or
4592 * null if language conversion is to be suppressed.
4593 * @internal
4595 private static function localizeTOC(
4596 ?TOCData $tocData, Language $lang, ?ILanguageConverter $converter
4598 if ( $tocData === null ) {
4599 return; // Nothing to do
4601 foreach ( $tocData->getSections() as $s ) {
4602 // Localize heading
4603 if ( $converter ) {
4604 // T331316: don't use 'convert' or 'convertTo' as these reset
4605 // the language converter state.
4606 $s->line = $converter->convertTo(
4607 $s->line, $converter->getPreferredVariant(), false
4610 // Localize numbering
4611 $dot = '.';
4612 $pieces = explode( $dot, $s->number );
4613 $numbering = '';
4614 foreach ( $pieces as $i => $p ) {
4615 if ( $i > 0 ) {
4616 $numbering .= $dot;
4618 $numbering .= $lang->formatNum( $p );
4620 $s->number = $numbering;
4625 * Transform wiki markup when saving a page by doing "\r\n" -> "\n"
4626 * conversion, substituting signatures, {{subst:}} templates, etc.
4628 * @param string $text The text to transform
4629 * @param PageReference $page the current article
4630 * @param UserIdentity $user the current user
4631 * @param ParserOptions $options Parsing options
4632 * @param bool $clearState Whether to clear the parser state first
4633 * @return string The altered wiki markup
4634 * @since 1.3
4636 public function preSaveTransform(
4637 $text,
4638 PageReference $page,
4639 UserIdentity $user,
4640 ParserOptions $options,
4641 $clearState = true
4643 if ( $clearState ) {
4644 $magicScopeVariable = $this->lock();
4646 $this->startParse( $page, $options, self::OT_WIKI, $clearState );
4647 $this->setUser( $user );
4649 // Strip U+0000 NULL (T159174)
4650 $text = str_replace( "\000", '', $text );
4652 // We still normalize line endings (including trimming trailing whitespace) for
4653 // backwards-compatibility with other code that just calls PST, but this should already
4654 // be handled in TextContent subclasses
4655 $text = TextContent::normalizeLineEndings( $text );
4657 if ( $options->getPreSaveTransform() ) {
4658 $text = $this->pstPass2( $text, $user );
4660 $text = $this->mStripState->unstripBoth( $text );
4662 // Trim trailing whitespace again, because the previous steps can introduce it.
4663 $text = rtrim( $text );
4665 $this->hookRunner->onParserPreSaveTransformComplete( $this, $text );
4667 $this->setUser( null ); # Reset
4669 return $text;
4673 * Pre-save transform helper function
4675 * @param string $text
4676 * @param UserIdentity $user
4678 * @return string
4680 private function pstPass2( $text, UserIdentity $user ) {
4681 # Note: This is the timestamp saved as hardcoded wikitext to the database, we use
4682 # $this->contLang here in order to give everyone the same signature and use the default one
4683 # rather than the one selected in each user's preferences. (see also T14815)
4684 $ts = $this->mOptions->getTimestamp();
4685 $timestamp = MWTimestamp::getLocalInstance( $ts );
4686 $ts = $timestamp->format( 'YmdHis' );
4687 $tzMsg = $timestamp->getTimezoneMessage()->inContentLanguage()->text();
4689 $d = $this->contLang->timeanddate( $ts, false, false ) . " ($tzMsg)";
4691 # Variable replacement
4692 # Because mOutputType is OT_WIKI, this will only process {{subst:xxx}} type tags
4693 $text = $this->replaceVariables( $text );
4695 # This works almost by chance, as the replaceVariables are done before the getUserSig(),
4696 # which may corrupt this parser instance via its wfMessage()->text() call-
4698 # Signatures
4699 if ( strpos( $text, '~~~' ) !== false ) {
4700 $sigText = $this->getUserSig( $user );
4701 $text = strtr( $text, [
4702 '~~~~~' => $d,
4703 '~~~~' => "$sigText $d",
4704 '~~~' => $sigText
4705 ] );
4706 # The main two signature forms used above are time-sensitive
4707 $this->setOutputFlag( ParserOutputFlags::USER_SIGNATURE, 'User signature detected' );
4710 # Context links ("pipe tricks"): [[|name]] and [[name (context)|]]
4711 $tc = '[' . Title::legalChars() . ']';
4712 $nc = '[ _0-9A-Za-z\x80-\xff-]'; # Namespaces can use non-ascii!
4714 // [[ns:page (context)|]]
4715 $p1 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\))\\|]]/";
4716 // [[ns:page(context)|]] (double-width brackets, added in r40257)
4717 $p4 = "/\[\[(:?$nc+:|:|)($tc+?)( ?($tc+))\\|]]/";
4718 // [[ns:page (context), context|]] (using single, double-width or Arabic comma)
4719 $p3 = "/\[\[(:?$nc+:|:|)($tc+?)( ?\\($tc+\\)|)((?:, |,|، )$tc+|)\\|]]/";
4720 // [[|page]] (reverse pipe trick: add context from page title)
4721 $p2 = "/\[\[\\|($tc+)]]/";
4723 # try $p1 first, to turn "[[A, B (C)|]]" into "[[A, B (C)|A, B]]"
4724 $text = preg_replace( $p1, '[[\\1\\2\\3|\\2]]', $text );
4725 $text = preg_replace( $p4, '[[\\1\\2\\3|\\2]]', $text );
4726 $text = preg_replace( $p3, '[[\\1\\2\\3\\4|\\2]]', $text );
4728 $t = $this->getTitle()->getText();
4729 $m = [];
4730 if ( preg_match( "/^($nc+:|)$tc+?( \\($tc+\\))$/", $t, $m ) ) {
4731 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4732 } elseif ( preg_match( "/^($nc+:|)$tc+?(, $tc+|)$/", $t, $m ) && "$m[1]$m[2]" != '' ) {
4733 $text = preg_replace( $p2, "[[$m[1]\\1$m[2]|\\1]]", $text );
4734 } else {
4735 # if there's no context, don't bother duplicating the title
4736 $text = preg_replace( $p2, '[[\\1]]', $text );
4739 return $text;
4743 * Fetch the user's signature text, if any, and normalize to
4744 * validated, ready-to-insert wikitext.
4745 * If you have pre-fetched the nickname or the fancySig option, you can
4746 * specify them here to save a database query.
4747 * Do not reuse this parser instance after calling getUserSig(),
4748 * as it may have changed.
4750 * @param UserIdentity $user
4751 * @param string|false $nickname Nickname to use or false to use user's default nickname
4752 * @param bool|null $fancySig whether the nicknname is the complete signature
4753 * or null to use default value
4754 * @return string
4755 * @since 1.6
4757 public function getUserSig( UserIdentity $user, $nickname = false, $fancySig = null ) {
4758 $username = $user->getName();
4760 # If not given, retrieve from the user object.
4761 if ( $nickname === false ) {
4762 $nickname = $this->userOptionsLookup->getOption( $user, 'nickname' );
4765 $fancySig ??= $this->userOptionsLookup->getBoolOption( $user, 'fancysig' );
4767 if ( $nickname === null || $nickname === '' ) {
4768 // Empty value results in the default signature (even when fancysig is enabled)
4769 $nickname = $username;
4770 } elseif ( mb_strlen( $nickname ) > $this->svcOptions->get( MainConfigNames::MaxSigChars ) ) {
4771 $nickname = $username;
4772 $this->logger->debug( __METHOD__ . ": $username has overlong signature." );
4773 } elseif ( $fancySig !== false ) {
4774 # Sig. might contain markup; validate this
4775 $isValid = $this->validateSig( $nickname ) !== false;
4777 # New validator
4778 $sigValidation = $this->svcOptions->get( MainConfigNames::SignatureValidation );
4779 if ( $isValid && $sigValidation === 'disallow' ) {
4780 $parserOpts = new ParserOptions(
4781 $this->mOptions->getUserIdentity(),
4782 $this->contLang
4784 $validator = $this->signatureValidatorFactory
4785 ->newSignatureValidator( $user, null, $parserOpts );
4786 $isValid = !$validator->validateSignature( $nickname );
4789 if ( $isValid ) {
4790 # Validated; clean up (if needed) and return it
4791 return $this->cleanSig( $nickname, true );
4792 } else {
4793 # Failed to validate; fall back to the default
4794 $nickname = $username;
4795 $this->logger->debug( __METHOD__ . ": $username has invalid signature." );
4799 # Make sure nickname doesnt get a sig in a sig
4800 $nickname = self::cleanSigInSig( $nickname );
4802 # If we're still here, make it a link to the user page
4803 $userText = wfEscapeWikiText( $username );
4804 $nickText = wfEscapeWikiText( $nickname );
4805 if ( $this->userNameUtils->isTemp( $username ) ) {
4806 $msgName = 'signature-temp';
4807 } elseif ( $user->isRegistered() ) {
4808 $msgName = 'signature';
4809 } else {
4810 $msgName = 'signature-anon';
4813 return wfMessage( $msgName, $userText, $nickText )->inContentLanguage()
4814 ->page( $this->getPage() )->text();
4818 * Check that the user's signature contains no bad XML
4820 * @param string $text
4821 * @return string|false An expanded string, or false if invalid.
4822 * @since 1.6
4824 public function validateSig( $text ) {
4825 return Xml::isWellFormedXmlFragment( $text ) ? $text : false;
4829 * Clean up signature text
4831 * 1) Strip 3, 4 or 5 tildes out of signatures @see cleanSigInSig
4832 * 2) Substitute all transclusions
4834 * @param string $text
4835 * @param bool $parsing Whether we're cleaning (preferences save) or parsing
4836 * @return string Signature text
4837 * @since 1.6
4839 public function cleanSig( $text, $parsing = false ) {
4840 if ( !$parsing ) {
4841 $magicScopeVariable = $this->lock();
4842 $this->startParse(
4843 $this->mTitle,
4844 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
4845 self::OT_PREPROCESS,
4846 true
4850 # Option to disable this feature
4851 if ( !$this->mOptions->getCleanSignatures() ) {
4852 return $text;
4855 # @todo FIXME: Regex doesn't respect extension tags or nowiki
4856 # => Move this logic to braceSubstitution()
4857 $substWord = $this->magicWordFactory->get( 'subst' );
4858 $substRegex = '/\{\{(?!(?:' . $substWord->getBaseRegex() . '))/x' . $substWord->getRegexCase();
4859 $substText = '{{' . $substWord->getSynonym( 0 );
4861 $text = preg_replace( $substRegex, $substText, $text );
4862 $text = self::cleanSigInSig( $text );
4863 $dom = $this->preprocessToDom( $text );
4864 $frame = $this->getPreprocessor()->newFrame();
4865 $text = $frame->expand( $dom );
4867 if ( !$parsing ) {
4868 $text = $this->mStripState->unstripBoth( $text );
4871 return $text;
4875 * Strip 3, 4 or 5 tildes out of signatures.
4877 * @param string $text
4878 * @return string Signature text with /~{3,5}/ removed
4879 * @since 1.7
4881 public static function cleanSigInSig( $text ) {
4882 $text = preg_replace( '/~{3,5}/', '', $text );
4883 return $text;
4887 * Replace table of contents marker in parsed HTML.
4889 * Used to remove or replace the marker. This method should be
4890 * used instead of direct access to Parser::TOC_PLACEHOLDER, since
4891 * in the future the placeholder might have additional attributes
4892 * attached which should be ignored when the replacement is made.
4894 * @since 1.38
4895 * @stable
4897 * @param string $text Parsed HTML
4898 * @param string $toc HTML table of contents string, or else an empty
4899 * string to remove the marker.
4900 * @return string Result HTML
4902 public static function replaceTableOfContentsMarker( $text, $toc ) {
4903 $replaced = false;
4904 // remove the additional metas. while not strictly necessary, this also ensures idempotence if we run
4905 // the pass more than once on a given content and TOC markers are not inserted by $toc. At the same time,
4906 // if $toc inserts TOC markers (which, as of 2024-05, it shouldn't be able to), these are preserved by the
4907 // fact that we run a single pass with a callback (rather than doing a first replacement with the $toc and
4908 // a replacement of leftover markers as a second pass).
4909 $callback = static function ( array $matches ) use( &$replaced, $toc ): string {
4910 if ( !$replaced ) {
4911 $replaced = true;
4912 return $toc;
4914 return '';
4917 return preg_replace_callback( self::TOC_PLACEHOLDER_REGEX, $callback, $text );
4921 * Set up some variables which are usually set up in parse()
4922 * so that an external function can call some class members with confidence
4924 * @param ?PageReference $page
4925 * @param ParserOptions $options
4926 * @param int $outputType One of the Parser::OT_… constants
4927 * @param bool $clearState
4928 * @param int|null $revId
4929 * @since 1.3
4931 public function startExternalParse( ?PageReference $page, ParserOptions $options,
4932 $outputType, $clearState = true, $revId = null
4934 $this->startParse( $page, $options, $outputType, $clearState );
4935 if ( $revId !== null ) {
4936 $this->mRevisionId = $revId;
4941 * @param ?PageReference $page
4942 * @param ParserOptions $options
4943 * @param int $outputType
4944 * @param bool $clearState
4946 private function startParse( ?PageReference $page, ParserOptions $options,
4947 $outputType, $clearState = true
4949 $this->setPage( $page );
4950 $this->mOptions = $options;
4951 $this->setOutputType( $outputType );
4952 if ( $clearState ) {
4953 $this->clearState();
4958 * Wrapper for preprocess()
4960 * @param string $text The text to preprocess
4961 * @param ParserOptions $options
4962 * @param ?PageReference $page The context page
4963 * @return string
4964 * @since 1.3
4966 public function transformMsg( $text, ParserOptions $options, ?PageReference $page = null ) {
4967 static $executing = false;
4969 # Guard against infinite recursion
4970 if ( $executing ) {
4971 return $text;
4973 $executing = true;
4975 $text = $this->preprocess( $text, $page ?? $this->mTitle, $options );
4977 $executing = false;
4978 return $text;
4982 * Create an HTML-style tag, e.g. "<yourtag>special text</yourtag>"
4983 * The callback should have the following form:
4984 * function myParserHook( $text, array $params, Parser $parser, PPFrame $frame ) { ... }
4986 * Transform and return $text. Use $parser for any required context, e.g. use
4987 * $parser->getTitle() and $parser->getOptions() not $wgTitle or $wgOut->mParserOptions
4989 * Hooks may return extended information by returning an array, of which the
4990 * first numbered element (index 0) must be the return string. The following other
4991 * keys are used:
4992 * - 'markerType': used by some core tag hooks to override which strip
4993 * array their results are placed in, 'general' or 'nowiki'.
4995 * @param string $tag The tag to use, e.g. 'hook' for "<hook>"
4996 * @param callable $callback The callback to use for the tag
4997 * @return callable|null The old value of the mTagHooks array associated with the hook
4998 * @since 1.3
5000 public function setHook( $tag, callable $callback ) {
5001 $tag = strtolower( $tag );
5002 if ( preg_match( '/[<>\r\n]/', $tag, $m ) ) {
5003 throw new InvalidArgumentException( "Invalid character {$m[0]} in setHook('$tag', ...) call" );
5005 $oldVal = $this->mTagHooks[$tag] ?? null;
5006 $this->mTagHooks[$tag] = $callback;
5007 if ( !in_array( $tag, $this->mStripList ) ) {
5008 $this->mStripList[] = $tag;
5011 return $oldVal;
5015 * Remove all tag hooks
5016 * @since 1.12
5018 public function clearTagHooks() {
5019 $this->mTagHooks = [];
5020 $this->mStripList = [];
5024 * Create a function, e.g. {{sum:1|2|3}}
5025 * The callback function should have the form:
5026 * function myParserFunction( &$parser, $arg1, $arg2, $arg3 ) { ... }
5028 * Or with Parser::SFH_OBJECT_ARGS:
5029 * function myParserFunction( $parser, $frame, $args ) { ... }
5031 * The callback may either return the text result of the function, or an array with the text
5032 * in element 0, and a number of flags in the other elements. The names of the flags are
5033 * specified in the keys. Valid flags are:
5034 * found The text returned is valid, stop processing the template. This
5035 * is on by default.
5036 * nowiki Wiki markup in the return value should be escaped
5037 * isHTML The returned text is HTML, armour it against wikitext transformation
5039 * @param string $id The magic word ID
5040 * @param callable $callback The callback function (and object) to use
5041 * @param int $flags A combination of the following flags:
5042 * Parser::SFH_NO_HASH No leading hash, i.e. {{plural:...}} instead of {{#if:...}}
5044 * Parser::SFH_OBJECT_ARGS Pass the template arguments as PPNode objects instead of text.
5045 * This allows for conditional expansion of the parse tree, allowing you to eliminate dead
5046 * branches and thus speed up parsing. It is also possible to analyse the parse tree of
5047 * the arguments, and to control the way they are expanded.
5049 * The $frame parameter is a PPFrame. This can be used to produce expanded text from the
5050 * arguments, for instance:
5051 * $text = isset( $args[0] ) ? $frame->expand( $args[0] ) : '';
5053 * For technical reasons, $args[0] is pre-expanded and will be a string. This may change in
5054 * future versions. Please call $frame->expand() on it anyway so that your code keeps
5055 * working if/when this is changed.
5057 * If you want whitespace to be trimmed from $args, you need to do it yourself, post-
5058 * expansion.
5060 * Please read the documentation in includes/parser/Preprocessor.php for more information
5061 * about the methods available in PPFrame and PPNode.
5063 * @return string|callable|null The old callback function for this name, if any
5064 * @since 1.6
5066 public function setFunctionHook( $id, callable $callback, $flags = 0 ) {
5067 $oldVal = $this->mFunctionHooks[$id][0] ?? null;
5068 $this->mFunctionHooks[$id] = [ $callback, $flags ];
5070 # Add to function cache
5071 $mw = $this->magicWordFactory->get( $id );
5073 $synonyms = $mw->getSynonyms();
5074 $sensitive = intval( $mw->isCaseSensitive() );
5076 foreach ( $synonyms as $syn ) {
5077 # Case
5078 if ( !$sensitive ) {
5079 $syn = $this->contLang->lc( $syn );
5081 # Add leading hash
5082 if ( !( $flags & self::SFH_NO_HASH ) ) {
5083 $syn = '#' . $syn;
5085 # Remove trailing colon
5086 if ( substr( $syn, -1, 1 ) === ':' ) {
5087 $syn = substr( $syn, 0, -1 );
5089 $this->mFunctionSynonyms[$sensitive][$syn] = $id;
5091 return $oldVal;
5095 * Get all registered function hook identifiers
5097 * @return array
5098 * @since 1.8
5100 public function getFunctionHooks() {
5101 return array_keys( $this->mFunctionHooks );
5105 * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5106 * Placeholders created in Linker::link()
5108 * @param string &$text
5109 * @deprecated since 1.34; should not be used outside parser class.
5111 public function replaceLinkHolders( &$text ) {
5112 $this->replaceLinkHoldersPrivate( $text );
5116 * Replace "<!--LINK-->" link placeholders with actual links, in the buffer
5117 * Placeholders created in Linker::link()
5119 * @param string &$text
5121 private function replaceLinkHoldersPrivate( &$text ) {
5122 $this->mLinkHolders->replace( $text );
5126 * Replace "<!--LINK-->" link placeholders with plain text of links
5127 * (not HTML-formatted).
5129 * @param string $text
5130 * @return string
5132 private function replaceLinkHoldersText( $text ) {
5133 return $this->mLinkHolders->replaceText( $text );
5137 * Renders an image gallery from a text with one line per image.
5138 * text labels may be given by using |-style alternative text. E.g.
5139 * Image:one.jpg|The number "1"
5140 * Image:tree.jpg|A tree
5141 * given as text will return the HTML of a gallery with two images,
5142 * labeled 'The number "1"' and
5143 * 'A tree'.
5145 * @param string $text
5146 * @param array $params
5147 * @return string HTML
5148 * @internal
5150 public function renderImageGallery( $text, array $params ) {
5151 $mode = false;
5152 if ( isset( $params['mode'] ) ) {
5153 $mode = $params['mode'];
5156 try {
5157 $ig = ImageGalleryBase::factory( $mode );
5158 } catch ( ImageGalleryClassNotFoundException $e ) {
5159 // If invalid type set, fallback to default.
5160 $ig = ImageGalleryBase::factory( false );
5163 $ig->setContextTitle( $this->getTitle() );
5164 $ig->setShowBytes( false );
5165 $ig->setShowDimensions( false );
5166 $ig->setShowFilename( false );
5167 $ig->setParser( $this );
5168 $ig->setHideBadImages();
5169 $ig->setAttributes( Sanitizer::validateTagAttributes( $params, 'ul' ) );
5171 if ( isset( $params['showfilename'] ) ) {
5172 $ig->setShowFilename( true );
5173 } else {
5174 $ig->setShowFilename( false );
5176 if ( isset( $params['caption'] ) ) {
5177 // NOTE: We aren't passing a frame here or below. Frame info
5178 // is currently opaque to Parsoid, which acts on OT_PREPROCESS.
5179 // See T107332#4030581
5180 $caption = $this->recursiveTagParse( $params['caption'] );
5181 $ig->setCaptionHtml( $caption );
5183 if ( isset( $params['perrow'] ) ) {
5184 $ig->setPerRow( $params['perrow'] );
5186 if ( isset( $params['widths'] ) ) {
5187 $ig->setWidths( $params['widths'] );
5189 if ( isset( $params['heights'] ) ) {
5190 $ig->setHeights( $params['heights'] );
5192 $ig->setAdditionalOptions( $params );
5194 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5196 $lines = StringUtils::explode( "\n", $text );
5197 foreach ( $lines as $line ) {
5198 # match lines like these:
5199 # Image:someimage.jpg|This is some image
5200 $matches = [];
5201 preg_match( "/^([^|]+)(\\|(.*))?$/", $line, $matches );
5202 # Skip empty lines
5203 if ( count( $matches ) == 0 ) {
5204 continue;
5207 if ( strpos( $matches[0], '%' ) !== false ) {
5208 $matches[1] = rawurldecode( $matches[1] );
5210 $title = Title::newFromText( $matches[1], NS_FILE );
5211 if ( $title === null ) {
5212 # Bogus title. Ignore these so we don't bomb out later.
5213 continue;
5216 # We need to get what handler the file uses, to figure out parameters.
5217 # Note, a hook can override the file name, and chose an entirely different
5218 # file (which potentially could be of a different type and have different handler).
5219 $options = [];
5220 $descQuery = false;
5221 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5222 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5223 $this, $title, $options, $descQuery
5225 # Don't register it now, as TraditionalImageGallery does that later.
5226 $file = $this->fetchFileNoRegister( $title, $options );
5227 $handler = $file ? $file->getHandler() : false;
5229 $paramMap = [
5230 'img_alt' => 'gallery-internal-alt',
5231 'img_link' => 'gallery-internal-link',
5233 if ( $handler ) {
5234 $paramMap += $handler->getParamMap();
5235 // We don't want people to specify per-image widths.
5236 // Additionally the width parameter would need special casing anyhow.
5237 unset( $paramMap['img_width'] );
5240 $mwArray = $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5242 $label = '';
5243 $alt = null;
5244 $handlerOptions = [];
5245 $imageOptions = [];
5246 $hasAlt = false;
5248 if ( isset( $matches[3] ) ) {
5249 // look for an |alt= definition while trying not to break existing
5250 // captions with multiple pipes (|) in it, until a more sensible grammar
5251 // is defined for images in galleries
5253 // FIXME: Doing recursiveTagParse at this stage is a bit odd,
5254 // and different from makeImage.
5255 $matches[3] = $this->recursiveTagParse( $matches[3] );
5256 // Protect LanguageConverter markup
5257 $parameterMatches = StringUtils::delimiterExplode(
5258 '-{', '}-',
5259 '|',
5260 $matches[3],
5261 true /* nested */
5264 foreach ( $parameterMatches as $parameterMatch ) {
5265 [ $magicName, $match ] = $mwArray->matchVariableStartToEnd( trim( $parameterMatch ) );
5266 if ( !$magicName ) {
5267 // Last pipe wins.
5268 $label = $parameterMatch;
5269 continue;
5272 $paramName = $paramMap[$magicName];
5273 switch ( $paramName ) {
5274 case 'gallery-internal-alt':
5275 $hasAlt = true;
5276 $alt = $this->stripAltText( $match, false );
5277 break;
5278 case 'gallery-internal-link':
5279 $linkValue = $this->stripAltText( $match, false );
5280 if ( preg_match( '/^-{R\|(.*)}-$/', $linkValue ) ) {
5281 // Result of LanguageConverter::markNoConversion
5282 // invoked on an external link.
5283 $linkValue = substr( $linkValue, 4, -2 );
5285 [ $type, $target ] = $this->parseLinkParameter( $linkValue );
5286 if ( $type ) {
5287 if ( $type === 'no-link' ) {
5288 $target = true;
5290 $imageOptions[$type] = $target;
5292 break;
5293 default:
5294 // Must be a handler specific parameter.
5295 if ( $handler->validateParam( $paramName, $match ) ) {
5296 $handlerOptions[$paramName] = $match;
5297 } else {
5298 // Guess not, consider it as caption.
5299 $this->logger->debug(
5300 "$parameterMatch failed parameter validation" );
5301 $label = $parameterMatch;
5307 // Match makeImage when !$hasVisibleCaption
5308 if ( !$hasAlt ) {
5309 if ( $label !== '' ) {
5310 $alt = $this->stripAltText( $label, false );
5311 } else {
5312 if ( $enableLegacyMediaDOM ) {
5313 $alt = $title->getText();
5317 $imageOptions['title'] = $this->stripAltText( $label, false );
5319 // Match makeImage which sets this unconditionally
5320 $handlerOptions['targetlang'] = $this->getTargetLanguage()->getCode();
5322 $ig->add(
5323 $title, $label, $alt, '', $handlerOptions,
5324 ImageGalleryBase::LOADING_DEFAULT, $imageOptions
5327 $html = $ig->toHTML();
5328 $this->hookRunner->onAfterParserFetchFileAndTitle( $this, $ig, $html );
5329 return $html;
5333 * @param MediaHandler|false $handler
5334 * @return array
5336 private function getImageParams( $handler ) {
5337 if ( $handler ) {
5338 $handlerClass = get_class( $handler );
5339 } else {
5340 $handlerClass = '';
5342 if ( !isset( $this->mImageParams[$handlerClass] ) ) {
5343 # Initialise static lists
5344 static $internalParamNames = [
5345 'horizAlign' => [ 'left', 'right', 'center', 'none' ],
5346 'vertAlign' => [ 'baseline', 'sub', 'super', 'top', 'text-top', 'middle',
5347 'bottom', 'text-bottom' ],
5348 'frame' => [ 'thumbnail', 'framed', 'frameless', 'border',
5349 // These parameters take arguments, so to ensure literals
5350 // have precedence, keep them listed last (T372935):
5351 'manualthumb', 'upright', 'link', 'alt', 'class' ],
5353 static $internalParamMap;
5354 if ( !$internalParamMap ) {
5355 $internalParamMap = [];
5356 foreach ( $internalParamNames as $type => $names ) {
5357 foreach ( $names as $name ) {
5358 // For grep: img_left, img_right, img_center, img_none,
5359 // img_baseline, img_sub, img_super, img_top, img_text_top, img_middle,
5360 // img_bottom, img_text_bottom,
5361 // img_thumbnail, img_manualthumb, img_framed, img_frameless, img_upright,
5362 // img_border, img_link, img_alt, img_class
5363 $magicName = str_replace( '-', '_', "img_$name" );
5364 $internalParamMap[$magicName] = [ $type, $name ];
5369 # Add handler params
5370 # Since img_width is one of these, it is important it is listed
5371 # *after* the literal parameter names above (T372935).
5372 $paramMap = $internalParamMap;
5373 if ( $handler ) {
5374 $handlerParamMap = $handler->getParamMap();
5375 foreach ( $handlerParamMap as $magic => $paramName ) {
5376 $paramMap[$magic] = [ 'handler', $paramName ];
5378 } else {
5379 // Parse the size for non-existent files. See T273013
5380 $paramMap[ 'img_width' ] = [ 'handler', 'width' ];
5382 $this->mImageParams[$handlerClass] = $paramMap;
5383 $this->mImageParamsMagicArray[$handlerClass] =
5384 $this->magicWordFactory->newArray( array_keys( $paramMap ) );
5386 return [ $this->mImageParams[$handlerClass], $this->mImageParamsMagicArray[$handlerClass] ];
5390 * Parse image options text and use it to make an image
5392 * @param LinkTarget $link
5393 * @param string $options
5394 * @param LinkHolderArray|false $holders
5395 * @return string HTML
5396 * @since 1.5
5398 public function makeImage( LinkTarget $link, $options, $holders = false ) {
5399 # Check if the options text is of the form "options|alt text"
5400 # Options are:
5401 # * thumbnail make a thumbnail with enlarge-icon and caption, alignment depends on lang
5402 # * left no resizing, just left align. label is used for alt= only
5403 # * right same, but right aligned
5404 # * none same, but not aligned
5405 # * ___px scale to ___ pixels width, no aligning. e.g. use in taxobox
5406 # * center center the image
5407 # * framed Keep original image size, no magnify-button.
5408 # * frameless like 'thumb' but without a frame. Keeps user preferences for width
5409 # * upright reduce width for upright images, rounded to full __0 px
5410 # * border draw a 1px border around the image
5411 # * alt Text for HTML alt attribute (defaults to empty)
5412 # * class Set a class for img node
5413 # * link Set the target of the image link. Can be external, interwiki, or local
5414 # vertical-align values (no % or length right now):
5415 # * baseline
5416 # * sub
5417 # * super
5418 # * top
5419 # * text-top
5420 # * middle
5421 # * bottom
5422 # * text-bottom
5424 # Protect LanguageConverter markup when splitting into parts
5425 $parts = StringUtils::delimiterExplode(
5426 '-{', '}-', '|', $options, true /* allow nesting */
5429 # Give extensions a chance to select the file revision for us
5430 $options = [];
5431 $descQuery = false;
5432 $title = Title::castFromLinkTarget( $link ); // hook signature compat
5433 $this->hookRunner->onBeforeParserFetchFileAndTitle(
5434 // @phan-suppress-next-line PhanTypeMismatchArgument Type mismatch on pass-by-ref args
5435 $this, $title, $options, $descQuery
5437 # Fetch and register the file (file title may be different via hooks)
5438 [ $file, $link ] = $this->fetchFileAndTitle( $link, $options );
5440 # Get parameter map
5441 $handler = $file ? $file->getHandler() : false;
5443 [ $paramMap, $mwArray ] = $this->getImageParams( $handler );
5445 if ( !$file ) {
5446 $this->addTrackingCategory( 'broken-file-category' );
5449 # Process the input parameters
5450 $caption = '';
5451 $params = [ 'frame' => [], 'handler' => [],
5452 'horizAlign' => [], 'vertAlign' => [] ];
5453 $seenformat = false;
5454 foreach ( $parts as $part ) {
5455 [ $magicName, $value ] = $mwArray->matchVariableStartToEnd( trim( $part ) );
5456 $validated = false;
5457 if ( isset( $paramMap[$magicName] ) ) {
5458 [ $type, $paramName ] = $paramMap[$magicName];
5460 # Special case; width and height come in one variable together
5461 if ( $type === 'handler' && $paramName === 'width' ) {
5462 // The 'px' suffix has already been localized by img_width
5463 $parsedWidthParam = $this->parseWidthParam( $value, true, true );
5464 // Parsoid applies data-(width|height) attributes to broken
5465 // media spans, for client use. See T273013
5466 $validateFunc = static function ( $name, $value ) use ( $handler ) {
5467 return $handler
5468 ? $handler->validateParam( $name, $value )
5469 : $value > 0;
5471 if ( isset( $parsedWidthParam['width'] ) ) {
5472 $width = $parsedWidthParam['width'];
5473 if ( $validateFunc( 'width', $width ) ) {
5474 $params[$type]['width'] = $width;
5475 $validated = true;
5478 if ( isset( $parsedWidthParam['height'] ) ) {
5479 $height = $parsedWidthParam['height'];
5480 if ( $validateFunc( 'height', $height ) ) {
5481 $params[$type]['height'] = $height;
5482 $validated = true;
5485 # else no validation -- T15436
5486 } else {
5487 if ( $type === 'handler' ) {
5488 # Validate handler parameter
5489 $validated = $handler->validateParam( $paramName, $value );
5490 } else {
5491 # Validate internal parameters
5492 switch ( $paramName ) {
5493 case 'alt':
5494 case 'class':
5495 $validated = true;
5496 $value = $this->stripAltText( $value, $holders );
5497 break;
5498 case 'link':
5499 [ $paramName, $value ] =
5500 $this->parseLinkParameter(
5501 $this->stripAltText( $value, $holders )
5503 if ( $paramName ) {
5504 $validated = true;
5505 if ( $paramName === 'no-link' ) {
5506 $value = true;
5509 break;
5510 case 'manualthumb':
5511 # @todo FIXME: Possibly check validity here for
5512 # manualthumb? downstream behavior seems odd with
5513 # missing manual thumbs.
5514 $value = $this->stripAltText( $value, $holders );
5515 // fall through
5516 case 'frameless':
5517 case 'framed':
5518 case 'thumbnail':
5519 // use first appearing option, discard others.
5520 $validated = !$seenformat;
5521 $seenformat = true;
5522 break;
5523 default:
5524 # Most other things appear to be empty or numeric...
5525 $validated = ( $value === false || is_numeric( trim( $value ) ) );
5529 if ( $validated ) {
5530 $params[$type][$paramName] = $value;
5534 if ( !$validated ) {
5535 $caption = $part;
5539 # Process alignment parameters
5540 if ( $params['horizAlign'] !== [] ) {
5541 $params['frame']['align'] = array_key_first( $params['horizAlign'] );
5543 if ( $params['vertAlign'] !== [] ) {
5544 $params['frame']['valign'] = array_key_first( $params['vertAlign'] );
5547 $params['frame']['caption'] = $caption;
5549 $enableLegacyMediaDOM = $this->svcOptions->get( MainConfigNames::ParserEnableLegacyMediaDOM );
5551 # Will the image be presented in a frame, with the caption below?
5552 // @phan-suppress-next-line PhanImpossibleCondition
5553 $hasVisibleCaption = isset( $params['frame']['framed'] )
5554 // @phan-suppress-next-line PhanImpossibleCondition
5555 || isset( $params['frame']['thumbnail'] )
5556 // @phan-suppress-next-line PhanImpossibleCondition
5557 || isset( $params['frame']['manualthumb'] );
5559 # In the old days, [[Image:Foo|text...]] would set alt text. Later it
5560 # came to also set the caption, ordinary text after the image -- which
5561 # makes no sense, because that just repeats the text multiple times in
5562 # screen readers. It *also* came to set the title attribute.
5563 # Now that we have an alt attribute, we should not set the alt text to
5564 # equal the caption: that's worse than useless, it just repeats the
5565 # text. This is the framed/thumbnail case. If there's no caption, we
5566 # use the unnamed parameter for alt text as well, just for the time be-
5567 # ing, if the unnamed param is set and the alt param is not.
5568 # For the future, we need to figure out if we want to tweak this more,
5569 # e.g., introducing a title= parameter for the title; ignoring the un-
5570 # named parameter entirely for images without a caption; adding an ex-
5571 # plicit caption= parameter and preserving the old magic unnamed para-
5572 # meter for BC; ...
5573 if ( $hasVisibleCaption ) {
5574 if (
5575 // @phan-suppress-next-line PhanImpossibleCondition
5576 $caption === '' && !isset( $params['frame']['alt'] ) &&
5577 $enableLegacyMediaDOM
5579 # No caption or alt text, add the filename as the alt text so
5580 # that screen readers at least get some description of the image
5581 $params['frame']['alt'] = $link->getText();
5583 # Do not set $params['frame']['title'] because tooltips are unnecessary
5584 # for framed images, the caption is visible
5585 } else {
5586 // @phan-suppress-next-line PhanImpossibleCondition
5587 if ( !isset( $params['frame']['alt'] ) ) {
5588 # No alt text, use the "caption" for the alt text
5589 if ( $caption !== '' ) {
5590 $params['frame']['alt'] = $this->stripAltText( $caption, $holders );
5591 } elseif ( $enableLegacyMediaDOM ) {
5592 # No caption, fall back to using the filename for the
5593 # alt text
5594 $params['frame']['alt'] = $link->getText();
5597 # Use the "caption" for the tooltip text
5598 $params['frame']['title'] = $this->stripAltText( $caption, $holders );
5600 $params['handler']['targetlang'] = $this->getTargetLanguage()->getCode();
5602 // hook signature compat again, $link may have changed
5603 $title = Title::castFromLinkTarget( $link );
5604 $this->hookRunner->onParserMakeImageParams( $title, $file, $params, $this );
5606 # Linker does the rest
5607 $time = $options['time'] ?? false;
5608 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset
5609 $ret = Linker::makeImageLink( $this, $link, $file, $params['frame'], $params['handler'],
5610 $time, $descQuery, $this->mOptions->getThumbSize() );
5612 # Give the handler a chance to modify the parser object
5613 if ( $handler ) {
5614 $handler->parserTransformHook( $this, $file );
5616 if ( $file ) {
5617 $this->modifyImageHtml( $file, $params, $ret );
5620 return $ret;
5624 * Parse the value of 'link' parameter in image syntax (`[[File:Foo.jpg|link=<value>]]`).
5626 * Adds an entry to appropriate link tables.
5628 * @since 1.32
5629 * @param string $value
5630 * @return array of `[ type, target ]`, where:
5631 * - `type` is one of:
5632 * - `null`: Given value is not a valid link target, use default
5633 * - `'no-link'`: Given value is empty, do not generate a link
5634 * - `'link-url'`: Given value is a valid external link
5635 * - `'link-title'`: Given value is a valid internal link
5636 * - `target` is:
5637 * - When `type` is `null` or `'no-link'`: `false`
5638 * - When `type` is `'link-url'`: URL string corresponding to given value
5639 * - When `type` is `'link-title'`: Title object corresponding to given value
5641 private function parseLinkParameter( $value ) {
5642 $chars = self::EXT_LINK_URL_CLASS;
5643 $addr = self::EXT_LINK_ADDR;
5644 $prots = $this->urlUtils->validProtocols();
5645 $type = null;
5646 $target = false;
5647 if ( $value === '' ) {
5648 $type = 'no-link';
5649 } elseif ( preg_match( "/^((?i)$prots)/", $value ) ) {
5650 if ( preg_match( "/^((?i)$prots)$addr$chars*$/u", $value ) ) {
5651 $this->mOutput->addExternalLink( $value );
5652 $type = 'link-url';
5653 $target = $value;
5655 } else {
5656 // Percent-decode link arguments for consistency with wikilink
5657 // handling (T216003#7836261).
5659 // There's slight concern here though. The |link= option supports
5660 // two formats, link=Test%22test vs link=[[Test%22test]], both of
5661 // which are about to be decoded.
5663 // In the former case, the decoding here is straightforward and
5664 // desirable.
5666 // In the latter case, there's a potential for double decoding,
5667 // because the wikilink syntax has a higher precedence and has
5668 // already been parsed as a link before we get here. $value
5669 // has had stripAltText() called on it, which in turn calls
5670 // replaceLinkHoldersText() on the link. So, the text we're
5671 // getting at this point has already been percent decoded.
5673 // The problematic case is if %25 is in the title, since that
5674 // decodes to %, which could combine with trailing characters.
5675 // However, % is not a valid link title character, so it would
5676 // not parse as a link and the string we received here would
5677 // still contain the encoded %25.
5679 // Hence, double decoded is not an issue. See the test,
5680 // "Should not double decode the link option"
5681 if ( strpos( $value, '%' ) !== false ) {
5682 $value = rawurldecode( $value );
5684 $linkTitle = Title::newFromText( $value );
5685 if ( $linkTitle ) {
5686 $this->mOutput->addLink( $linkTitle );
5687 $type = 'link-title';
5688 $target = $linkTitle;
5691 return [ $type, $target ];
5695 * Give hooks a chance to modify image thumbnail HTML
5697 * @param File $file
5698 * @param array $params
5699 * @param string &$html
5701 public function modifyImageHtml( File $file, array $params, string &$html ) {
5702 $this->hookRunner->onParserModifyImageHTML( $this, $file, $params, $html );
5706 * @param string $caption
5707 * @param LinkHolderArray|false $holders
5708 * @return string
5710 private function stripAltText( $caption, $holders ) {
5711 # Strip bad stuff out of the title (tooltip). We can't just use
5712 # replaceLinkHoldersText() here, because if this function is called
5713 # from handleInternalLinks2(), mLinkHolders won't be up-to-date.
5714 if ( $holders ) {
5715 $tooltip = $holders->replaceText( $caption );
5716 } else {
5717 $tooltip = $this->replaceLinkHoldersText( $caption );
5720 # make sure there are no placeholders in thumbnail attributes
5721 # that are later expanded to html- so expand them now and
5722 # remove the tags
5723 $tooltip = $this->mStripState->unstripBoth( $tooltip );
5724 # Compatibility hack! In HTML certain entity references not terminated
5725 # by a semicolon are decoded (but not if we're in an attribute; that's
5726 # how link URLs get away without properly escaping & in queries).
5727 # But wikitext has always required semicolon-termination of entities,
5728 # so encode & where needed to avoid decode of semicolon-less entities.
5729 # See T209236 and
5730 # https://www.w3.org/TR/html5/syntax.html#named-character-references
5731 # T210437 discusses moving this workaround to Sanitizer::stripAllTags.
5732 $tooltip = preg_replace( "/
5733 & # 1. entity prefix
5734 (?= # 2. followed by:
5735 (?: # a. one of the legacy semicolon-less named entities
5736 A(?:Elig|MP|acute|circ|grave|ring|tilde|uml)|
5737 C(?:OPY|cedil)|E(?:TH|acute|circ|grave|uml)|
5738 GT|I(?:acute|circ|grave|uml)|LT|Ntilde|
5739 O(?:acute|circ|grave|slash|tilde|uml)|QUOT|REG|THORN|
5740 U(?:acute|circ|grave|uml)|Yacute|
5741 a(?:acute|c(?:irc|ute)|elig|grave|mp|ring|tilde|uml)|brvbar|
5742 c(?:cedil|edil|urren)|cent(?!erdot;)|copy(?!sr;)|deg|
5743 divide(?!ontimes;)|e(?:acute|circ|grave|th|uml)|
5744 frac(?:1(?:2|4)|34)|
5745 gt(?!c(?:c|ir)|dot|lPar|quest|r(?:a(?:pprox|rr)|dot|eq(?:less|qless)|less|sim);)|
5746 i(?:acute|circ|excl|grave|quest|uml)|laquo|
5747 lt(?!c(?:c|ir)|dot|hree|imes|larr|quest|r(?:Par|i(?:e|f|));)|
5748 m(?:acr|i(?:cro|ddot))|n(?:bsp|tilde)|
5749 not(?!in(?:E|dot|v(?:a|b|c)|)|ni(?:v(?:a|b|c)|);)|
5750 o(?:acute|circ|grave|rd(?:f|m)|slash|tilde|uml)|
5751 p(?:lusmn|ound)|para(?!llel;)|quot|r(?:aquo|eg)|
5752 s(?:ect|hy|up(?:1|2|3)|zlig)|thorn|times(?!b(?:ar|)|d;)|
5753 u(?:acute|circ|grave|ml|uml)|y(?:acute|en|uml)
5755 (?:[^;]|$)) # b. and not followed by a semicolon
5756 # S = study, for efficiency
5757 /Sx", '&amp;', $tooltip );
5758 $tooltip = Sanitizer::stripAllTags( $tooltip );
5760 return $tooltip;
5764 * Callback from the Sanitizer for expanding items found in HTML attribute
5765 * values, so they can be safely tested and escaped.
5767 * @param string &$text
5768 * @param PPFrame|false $frame
5769 * @return string
5770 * @deprecated since 1.35, internal callback should not have been public
5772 public function attributeStripCallback( &$text, $frame = false ) {
5773 wfDeprecated( __METHOD__, '1.35' );
5774 $text = $this->replaceVariables( $text, $frame );
5775 $text = $this->mStripState->unstripBoth( $text );
5776 return $text;
5780 * Accessor
5782 * @return array
5783 * @since 1.6
5785 public function getTags(): array {
5786 return array_keys( $this->mTagHooks );
5790 * @since 1.32
5791 * @return array{0:array<string,string>,1:array<string,string>}
5793 public function getFunctionSynonyms() {
5794 return $this->mFunctionSynonyms;
5798 * @since 1.32
5799 * @return string
5801 public function getUrlProtocols() {
5802 return $this->urlUtils->validProtocols();
5806 * Break wikitext input into sections, and either pull or replace
5807 * some particular section's text.
5809 * External callers should use the getSection and replaceSection methods.
5811 * @param string $text Page wikitext
5812 * @param string|int $sectionId A section identifier string of the form:
5813 * "<flag1> - <flag2> - ... - <section number>"
5815 * Currently the only recognised flag is "T", which means the target section number
5816 * was derived during a template inclusion parse, in other words this is a template
5817 * section edit link. If no flags are given, it was an ordinary section edit link.
5818 * This flag is required to avoid a section numbering mismatch when a section is
5819 * enclosed by "<includeonly>" (T8563).
5821 * The section number 0 pulls the text before the first heading; other numbers will
5822 * pull the given section along with its lower-level subsections. If the section is
5823 * not found, $mode=get will return $newtext, and $mode=replace will return $text.
5825 * Section 0 is always considered to exist, even if it only contains the empty
5826 * string. If $text is the empty string and section 0 is replaced, $newText is
5827 * returned.
5829 * @param string $mode One of "get" or "replace"
5830 * @param string|false $newText Replacement text for section data.
5831 * @param PageReference|null $page
5832 * @return string For "get", the extracted section text.
5833 * for "replace", the whole page with the section replaced.
5835 private function extractSections( $text, $sectionId, $mode, $newText, ?PageReference $page = null ) {
5836 $magicScopeVariable = $this->lock();
5837 $this->startParse(
5838 $page,
5839 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
5840 self::OT_PLAIN,
5841 true
5843 $outText = '';
5844 $frame = $this->getPreprocessor()->newFrame();
5846 # Process section extraction flags
5847 $flags = 0;
5848 $sectionParts = explode( '-', $sectionId );
5849 // The section ID may either be a magic string such as 'new' (which should be treated as 0),
5850 // or a numbered section ID in the format of "T-<section index>".
5851 // Explicitly coerce the section index into a number accordingly. (T323373)
5852 $sectionIndex = (int)array_pop( $sectionParts );
5853 foreach ( $sectionParts as $part ) {
5854 if ( $part === 'T' ) {
5855 $flags |= Preprocessor::DOM_FOR_INCLUSION;
5859 # Check for empty input
5860 if ( strval( $text ) === '' ) {
5861 # Only sections 0 and T-0 exist in an empty document
5862 if ( $sectionIndex === 0 ) {
5863 if ( $mode === 'get' ) {
5864 return '';
5867 return $newText;
5868 } else {
5869 if ( $mode === 'get' ) {
5870 return $newText;
5873 return $text;
5877 # Preprocess the text
5878 $root = $this->preprocessToDom( $text, $flags );
5880 # <h> nodes indicate section breaks
5881 # They can only occur at the top level, so we can find them by iterating the root's children
5882 $node = $root->getFirstChild();
5884 # Find the target section
5885 if ( $sectionIndex === 0 ) {
5886 # Section zero doesn't nest, level=big
5887 $targetLevel = 1000;
5888 } else {
5889 while ( $node ) {
5890 if ( $node->getName() === 'h' ) {
5891 $bits = $node->splitHeading();
5892 if ( $bits['i'] == $sectionIndex ) {
5893 $targetLevel = $bits['level'];
5894 break;
5897 if ( $mode === 'replace' ) {
5898 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5900 $node = $node->getNextSibling();
5904 if ( !$node ) {
5905 # Not found
5906 if ( $mode === 'get' ) {
5907 return $newText;
5908 } else {
5909 return $text;
5913 # Find the end of the section, including nested sections
5914 do {
5915 if ( $node->getName() === 'h' ) {
5916 $bits = $node->splitHeading();
5917 $curLevel = $bits['level'];
5918 // @phan-suppress-next-line PhanPossiblyUndeclaredVariable False positive
5919 if ( $bits['i'] != $sectionIndex && $curLevel <= $targetLevel ) {
5920 break;
5923 if ( $mode === 'get' ) {
5924 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5926 $node = $node->getNextSibling();
5927 } while ( $node );
5929 # Write out the remainder (in replace mode only)
5930 if ( $mode === 'replace' ) {
5931 # Output the replacement text
5932 # Add two newlines on -- trailing whitespace in $newText is conventionally
5933 # stripped by the editor, so we need both newlines to restore the paragraph gap
5934 # Only add trailing whitespace if there is newText
5935 if ( $newText != "" ) {
5936 $outText .= $newText . "\n\n";
5939 while ( $node ) {
5940 $outText .= $frame->expand( $node, PPFrame::RECOVER_ORIG );
5941 $node = $node->getNextSibling();
5945 # Re-insert stripped tags
5946 $outText = rtrim( $this->mStripState->unstripBoth( $outText ) );
5948 return $outText;
5952 * This function returns the text of a section, specified by a number ($section).
5953 * A section is text under a heading like == Heading == or \<h1\>Heading\</h1\>, or
5954 * the first section before any such heading (section 0).
5956 * If a section contains subsections, these are also returned.
5958 * @param string $text Text to look in
5959 * @param string|int $sectionId Section identifier as a number or string
5960 * (e.g. 0, 1 or 'T-1').
5961 * @param string|false $defaultText Default to return if section is not found
5963 * @return string Text of the requested section
5964 * @since 1.7
5966 public function getSection( $text, $sectionId, $defaultText = '' ) {
5967 return $this->extractSections( $text, $sectionId, 'get', $defaultText );
5971 * This function returns $oldtext after the content of the section
5972 * specified by $section has been replaced with $text. If the target
5973 * section does not exist, $oldtext is returned unchanged.
5975 * @param string $oldText Former text of the article
5976 * @param string|int $sectionId Section identifier as a number or string
5977 * (e.g. 0, 1 or 'T-1').
5978 * @param string|false $newText Replacing text
5980 * @return string Modified text
5981 * @since 1.7
5983 public function replaceSection( $oldText, $sectionId, $newText ) {
5984 return $this->extractSections( $oldText, $sectionId, 'replace', $newText );
5988 * Get an array of preprocessor section information.
5990 * Preprocessor sections are those identified by wikitext-style syntax, not
5991 * HTML-style syntax. Templates are not expanded, so these sections do not
5992 * include sections created by templates or parser functions. This is the
5993 * same definition of a section as used by section editing, but not the
5994 * same as TOC generation.
5996 * These sections are typically smaller than those acted on by getSection() and
5997 * replaceSection() since they are not nested. Section nesting could be
5998 * reconstructed from the heading levels.
6000 * The return value is an array of associative array info structures. Each
6001 * associative array contains the following keys, describing a section:
6003 * - index: An integer identifying the section.
6004 * - level: The heading level, e.g. 1 for <h1>. For the section before the
6005 * the first heading, this will be 0.
6006 * - offset: The byte offset within the wikitext at which the section starts
6007 * - heading: The wikitext for the header which introduces the section,
6008 * including equals signs. For the section before the first heading, this
6009 * will be an empty string.
6010 * - text: The complete text of the section.
6012 * @param string $text
6013 * @return array[]
6014 * @internal
6016 public function getFlatSectionInfo( $text ) {
6017 $magicScopeVariable = $this->lock();
6018 $this->startParse(
6019 null,
6020 ParserOptions::newFromUser( RequestContext::getMain()->getUser() ),
6021 self::OT_PLAIN,
6022 true
6024 $frame = $this->getPreprocessor()->newFrame();
6025 $root = $this->preprocessToDom( $text, 0 );
6026 $node = $root->getFirstChild();
6027 $offset = 0;
6028 $currentSection = [
6029 'index' => 0,
6030 'level' => 0,
6031 'offset' => 0,
6032 'heading' => '',
6033 'text' => ''
6035 $sections = [];
6037 while ( $node ) {
6038 $nodeText = $frame->expand( $node, PPFrame::RECOVER_ORIG );
6039 if ( $node->getName() === 'h' ) {
6040 $bits = $node->splitHeading();
6041 $sections[] = $currentSection;
6042 $currentSection = [
6043 'index' => $bits['i'],
6044 'level' => $bits['level'],
6045 'offset' => $offset,
6046 'heading' => $nodeText,
6047 'text' => $nodeText
6049 } else {
6050 $currentSection['text'] .= $nodeText;
6052 $offset += strlen( $nodeText );
6053 $node = $node->getNextSibling();
6055 $sections[] = $currentSection;
6056 return $sections;
6060 * Get the ID of the revision we are parsing
6062 * The return value will be either:
6063 * - a) Positive, indicating a specific revision ID (current or old)
6064 * - b) Zero, meaning the revision ID is specified by getCurrentRevisionRecordCallback()
6065 * - c) Null, meaning the parse is for preview mode and there is no revision
6067 * @return int|null
6068 * @since 1.13
6070 public function getRevisionId() {
6071 return $this->mRevisionId;
6075 * Get the revision record object for $this->mRevisionId
6077 * @return RevisionRecord|null Either a RevisionRecord object or null
6078 * @since 1.35
6080 public function getRevisionRecordObject() {
6081 if ( $this->mRevisionRecordObject ) {
6082 return $this->mRevisionRecordObject;
6085 // NOTE: try to get the RevisionRecord object even if mRevisionId is null.
6086 // This is useful when parsing a revision that has not yet been saved.
6087 // However, if we get back a saved revision even though we are in
6088 // preview mode, we'll have to ignore it, see below.
6089 // NOTE: This callback may be used to inject an OLD revision that was
6090 // already loaded, so "current" is a bit of a misnomer. We can't just
6091 // skip it if mRevisionId is set.
6092 $rev = call_user_func(
6093 $this->mOptions->getCurrentRevisionRecordCallback(),
6094 $this->getTitle(),
6095 $this
6098 if ( !$rev ) {
6099 // The revision record callback returns `false` (not null) to
6100 // indicate that the revision is missing. (See for example
6101 // Parser::statelessFetchRevisionRecord(), the default callback.)
6102 // This API expects `null` instead. (T251952)
6103 return null;
6106 if ( $this->mRevisionId === null && $rev->getId() ) {
6107 // We are in preview mode (mRevisionId is null), and the current revision callback
6108 // returned an existing revision. Ignore it and return null, it's probably the page's
6109 // current revision, which is not what we want here. Note that we do want to call the
6110 // callback to allow the unsaved revision to be injected here, e.g. for
6111 // self-transclusion previews.
6112 return null;
6115 // If the parse is for a new revision, then the callback should have
6116 // already been set to force the object and should match mRevisionId.
6117 // If not, try to fetch by mRevisionId instead.
6118 if ( $this->mRevisionId && $rev->getId() != $this->mRevisionId ) {
6119 $rev = MediaWikiServices::getInstance()
6120 ->getRevisionLookup()
6121 ->getRevisionById( $this->mRevisionId );
6124 $this->mRevisionRecordObject = $rev;
6126 return $this->mRevisionRecordObject;
6130 * Get the timestamp associated with the current revision, adjusted for
6131 * the default server-local timestamp
6132 * @return string TS_MW timestamp
6133 * @since 1.9
6135 public function getRevisionTimestamp() {
6136 if ( $this->mRevisionTimestamp !== null ) {
6137 return $this->mRevisionTimestamp;
6140 # Use specified revision timestamp, falling back to the current timestamp
6141 $revObject = $this->getRevisionRecordObject();
6142 $timestamp = $revObject && $revObject->getTimestamp()
6143 ? $revObject->getTimestamp()
6144 : $this->mOptions->getTimestamp();
6145 $this->mOutput->setRevisionTimestampUsed( $timestamp ); // unadjusted time zone
6147 # The cryptic '' timezone parameter tells to use the site-default
6148 # timezone offset instead of the user settings.
6149 # Since this value will be saved into the parser cache, served
6150 # to other users, and potentially even used inside links and such,
6151 # it needs to be consistent for all visitors.
6152 $this->mRevisionTimestamp = $this->contLang->userAdjust( $timestamp, '' );
6154 return $this->mRevisionTimestamp;
6158 * Get the name of the user that edited the last revision
6160 * @return string|null User name
6161 * @since 1.15
6163 public function getRevisionUser(): ?string {
6164 if ( $this->mRevisionUser === null ) {
6165 $revObject = $this->getRevisionRecordObject();
6167 # if this template is subst: the revision id will be blank,
6168 # so just use the current user's name
6169 if ( $revObject && $revObject->getUser() ) {
6170 $this->mRevisionUser = $revObject->getUser()->getName();
6171 } elseif ( $this->ot['wiki'] || $this->mOptions->getIsPreview() ) {
6172 $this->mRevisionUser = $this->getUserIdentity()->getName();
6173 } else {
6174 # Note that we fall through here with
6175 # $this->mRevisionUser still null
6178 return $this->mRevisionUser;
6182 * Get the size of the revision
6184 * @return int|null Revision size
6185 * @since 1.22
6187 public function getRevisionSize() {
6188 if ( $this->mRevisionSize === null ) {
6189 $revObject = $this->getRevisionRecordObject();
6191 # if this variable is subst: the revision id will be blank,
6192 # so just use the parser input size, because the own substitution
6193 # will change the size.
6194 if ( $revObject ) {
6195 $this->mRevisionSize = $revObject->getSize();
6196 } else {
6197 $this->mRevisionSize = $this->mInputSize;
6200 return $this->mRevisionSize;
6204 * Accessor for the 'defaultsort' page property.
6205 * Will use the empty string if none is set.
6207 * This value is treated as a prefix, so the
6208 * empty string is equivalent to sorting by
6209 * page name.
6211 * @return string
6212 * @since 1.9
6213 * @deprecated since 1.38, use
6214 * $parser->getOutput()->getPageProperty('defaultsort') ?? ''
6216 public function getDefaultSort() {
6217 wfDeprecated( __METHOD__, '1.38' );
6218 return $this->mOutput->getPageProperty( 'defaultsort' ) ?? '';
6221 private static function getSectionNameFromStrippedText( $text ) {
6222 $text = Sanitizer::normalizeSectionNameWhitespace( $text );
6223 $text = Sanitizer::decodeCharReferences( $text );
6224 $text = self::normalizeSectionName( $text );
6225 return $text;
6228 private static function makeAnchor( $sectionName ) {
6229 return '#' . Sanitizer::escapeIdForLink( $sectionName );
6232 private function makeLegacyAnchor( $sectionName ) {
6233 $fragmentMode = $this->svcOptions->get( MainConfigNames::FragmentMode );
6234 if ( isset( $fragmentMode[1] ) && $fragmentMode[1] === 'legacy' ) {
6235 // ForAttribute() and ForLink() are the same for legacy encoding
6236 $id = Sanitizer::escapeIdForAttribute( $sectionName, Sanitizer::ID_FALLBACK );
6237 } else {
6238 $id = Sanitizer::escapeIdForLink( $sectionName );
6241 return "#$id";
6245 * Try to guess the section anchor name based on a wikitext fragment
6246 * presumably extracted from a heading, for example "Header" from
6247 * "== Header ==".
6249 * @param string $text
6250 * @return string Anchor (starting with '#')
6251 * @since 1.12
6253 public function guessSectionNameFromWikiText( $text ) {
6254 # Strip out wikitext links(they break the anchor)
6255 $text = $this->stripSectionName( $text );
6256 $sectionName = self::getSectionNameFromStrippedText( $text );
6257 return self::makeAnchor( $sectionName );
6261 * Same as guessSectionNameFromWikiText(), but produces legacy anchors
6262 * instead, if possible. For use in redirects, since various versions
6263 * of Microsoft browsers interpret Location: headers as something other
6264 * than UTF-8, resulting in breakage.
6266 * @param string $text The section name
6267 * @return string Anchor (starting with '#')
6268 * @since 1.17
6270 public function guessLegacySectionNameFromWikiText( $text ) {
6271 # Strip out wikitext links(they break the anchor)
6272 $text = $this->stripSectionName( $text );
6273 $sectionName = self::getSectionNameFromStrippedText( $text );
6274 return $this->makeLegacyAnchor( $sectionName );
6278 * Like guessSectionNameFromWikiText(), but takes already-stripped text as input.
6279 * @param string $text Section name (plain text)
6280 * @return string Anchor (starting with '#')
6281 * @since 1.31
6283 public static function guessSectionNameFromStrippedText( $text ) {
6284 $sectionName = self::getSectionNameFromStrippedText( $text );
6285 return self::makeAnchor( $sectionName );
6289 * Apply the same normalization as code making links to this section would
6291 * @param string $text
6292 * @return string
6294 private static function normalizeSectionName( $text ) {
6295 # T90902: ensure the same normalization is applied for IDs as to links
6296 /** @var MediaWikiTitleCodec $titleParser */
6297 $titleParser = MediaWikiServices::getInstance()->getTitleParser();
6298 '@phan-var MediaWikiTitleCodec $titleParser';
6299 try {
6301 $parts = $titleParser->splitTitleString( "#$text" );
6302 } catch ( MalformedTitleException $ex ) {
6303 return $text;
6305 return $parts['fragment'];
6309 * Strips a text string of wikitext for use in a section anchor
6311 * Accepts a text string and then removes all wikitext from the
6312 * string and leaves only the resultant text (i.e. the result of
6313 * [[User:WikiSysop|Sysop]] would be "Sysop" and the result of
6314 * [[User:WikiSysop]] would be "User:WikiSysop") - this is intended
6315 * to create valid section anchors by mimicking the output of the
6316 * parser when headings are parsed.
6318 * @param string $text Text string to be stripped of wikitext
6319 * for use in a Section anchor
6320 * @return string Filtered text string
6321 * @since 1.12
6323 public function stripSectionName( $text ) {
6324 # Strip internal link markup
6325 $text = preg_replace( '/\[\[:?([^[|]+)\|([^[]+)\]\]/', '$2', $text );
6326 $text = preg_replace( '/\[\[:?([^[]+)\|?\]\]/', '$1', $text );
6328 # Strip external link markup
6329 # @todo FIXME: Not tolerant to blank link text
6330 # I.E. [https://www.mediawiki.org] will render as [1] or something depending
6331 # on how many empty links there are on the page - need to figure that out.
6332 $text = preg_replace(
6333 '/\[(?i:' . $this->urlUtils->validProtocols() . ')([^ ]+?) ([^[]+)\]/', '$2', $text );
6335 # Parse wikitext quotes (italics & bold)
6336 $text = $this->doQuotes( $text );
6338 # Strip HTML tags
6339 $text = StringUtils::delimiterReplace( '<', '>', '', $text );
6340 return $text;
6344 * Call a callback function on all regions of the given text that are not
6345 * inside strip markers, and replace those regions with the return value
6346 * of the callback. For example, with input:
6348 * aaa<MARKER>bbb
6350 * This will call the callback function twice, with 'aaa' and 'bbb'. Those
6351 * two strings will be replaced with the value returned by the callback in
6352 * each case.
6354 * @param string $s
6355 * @param callable $callback
6357 * @return string
6358 * @internal
6359 * @since 1.12
6361 public function markerSkipCallback( $s, callable $callback ) {
6362 $i = 0;
6363 $out = '';
6364 while ( $i < strlen( $s ) ) {
6365 $markerStart = strpos( $s, self::MARKER_PREFIX, $i );
6366 if ( $markerStart === false ) {
6367 $out .= call_user_func( $callback, substr( $s, $i ) );
6368 break;
6369 } else {
6370 $out .= call_user_func( $callback, substr( $s, $i, $markerStart - $i ) );
6371 $markerEnd = strpos( $s, self::MARKER_SUFFIX, $markerStart );
6372 if ( $markerEnd === false ) {
6373 $out .= substr( $s, $markerStart );
6374 break;
6375 } else {
6376 $markerEnd += strlen( self::MARKER_SUFFIX );
6377 $out .= substr( $s, $markerStart, $markerEnd - $markerStart );
6378 $i = $markerEnd;
6382 return $out;
6386 * Remove any strip markers found in the given text.
6388 * @param string $text
6389 * @return string
6390 * @since 1.19
6392 public function killMarkers( $text ) {
6393 return $this->mStripState->killMarkers( $text );
6397 * Parsed a width param of imagelink like 300px or 200x300px
6399 * @param string $value
6400 * @param bool $parseHeight
6401 * @param bool $localized Defaults to false; set to true if the $value
6402 * has already been matched against `img_width` to localize the `px`
6403 * suffix.
6405 * @return array
6406 * @since 1.20
6407 * @internal
6409 public function parseWidthParam( $value, $parseHeight = true, bool $localized = false ) {
6410 $parsedWidthParam = [];
6411 if ( $value === '' ) {
6412 return $parsedWidthParam;
6414 $m = [];
6415 if ( !$localized ) {
6416 // Strip a localized 'px' suffix (T374311)
6417 $mwArray = $this->magicWordFactory->newArray( [ 'img_width' ] );
6418 [ $magicWord, $newValue ] = $mwArray->matchVariableStartToEnd( $value );
6419 $value = $magicWord ? $newValue : $value;
6422 # (T15500) In both cases (width/height and width only),
6423 # permit trailing "px" for backward compatibility.
6424 if ( $parseHeight && preg_match( '/^([0-9]*)x([0-9]*)\s*(px)?\s*$/', $value, $m ) ) {
6425 $width = intval( $m[1] );
6426 $height = intval( $m[2] );
6427 $parsedWidthParam['width'] = $width;
6428 $parsedWidthParam['height'] = $height;
6429 if ( $m[3] ?? false ) {
6430 $this->addTrackingCategory( 'double-px-category' );
6432 } elseif ( preg_match( '/^([0-9]*)\s*(px)?\s*$/', $value, $m ) ) {
6433 $width = intval( $m[1] );
6434 $parsedWidthParam['width'] = $width;
6435 if ( $m[2] ?? false ) {
6436 $this->addTrackingCategory( 'double-px-category' );
6439 return $parsedWidthParam;
6443 * Lock the current instance of the parser.
6445 * This is meant to stop someone from calling the parser
6446 * recursively and messing up all the strip state.
6448 * @return ScopedCallback The lock will be released once the return value goes out of scope.
6450 protected function lock() {
6451 if ( $this->mInParse ) {
6452 throw new LogicException( "Parser state cleared while parsing. "
6453 . "Did you call Parser::parse recursively? Lock is held by: " . $this->mInParse );
6456 // Save the backtrace when locking, so that if some code tries locking again,
6457 // we can print the lock owner's backtrace for easier debugging
6458 $e = new RuntimeException;
6459 $this->mInParse = $e->getTraceAsString();
6461 $recursiveCheck = new ScopedCallback( function () {
6462 $this->mInParse = false;
6463 } );
6465 return $recursiveCheck;
6469 * Will entry points such as parse() throw an exception due to the parser
6470 * already being active?
6472 * @since 1.39
6473 * @return bool
6475 public function isLocked() {
6476 return (bool)$this->mInParse;
6480 * Strip outer <p></p> tag from the HTML source of a single paragraph.
6482 * Returns original HTML if the <p/> tag has any attributes, if there's no wrapping <p/> tag,
6483 * or if there is more than one <p/> tag in the input HTML.
6485 * @param string $html
6486 * @return string
6487 * @since 1.24
6489 public static function stripOuterParagraph( $html ) {
6490 $m = [];
6491 if ( preg_match( '/^<p>(.*)\n?<\/p>\n?$/sU', $html, $m ) && strpos( $m[1], '</p>' ) === false ) {
6492 $html = $m[1];
6495 return $html;
6499 * Add HTML tags marking the parts of a page title, to be displayed in the first heading of the page.
6501 * @internal
6502 * @since 1.39
6503 * @param string|HtmlArmor $nsText
6504 * @param string|HtmlArmor $nsSeparator
6505 * @param string|HtmlArmor $mainText
6506 * @return string HTML
6508 public static function formatPageTitle( $nsText, $nsSeparator, $mainText ): string {
6509 $html = '';
6510 if ( $nsText !== '' ) {
6511 $html .= '<span class="mw-page-title-namespace">' . HtmlArmor::getHtml( $nsText ) . '</span>';
6512 $html .= '<span class="mw-page-title-separator">' . HtmlArmor::getHtml( $nsSeparator ) . '</span>';
6514 $html .= '<span class="mw-page-title-main">' . HtmlArmor::getHtml( $mainText ) . '</span>';
6515 return $html;
6519 * Strip everything but the <body> from the provided string
6520 * @param string $text
6521 * @return string
6522 * @unstable
6524 public static function extractBody( string $text ): string {
6525 $text = preg_replace( '!^.*?<body[^>]*>!s', '', $text, 1 );
6526 $text = preg_replace( '!</body>\s*</html>\s*$!', '', $text, 1 );
6527 return $text;
6531 * Set's up the PHP implementation of OOUI for use in this request
6532 * and instructs OutputPage to enable OOUI for itself.
6534 * @since 1.26
6535 * @deprecated since 1.35, use $parser->getOutput()->setEnableOOUI() instead.
6537 public function enableOOUI() {
6538 wfDeprecated( __METHOD__, '1.35' );
6539 OutputPage::setupOOUI();
6540 $this->mOutput->setEnableOOUI( true );
6544 * Sets the flag on the parser output but also does some debug logging.
6545 * Note that there is a copy of this method in CoreMagicVariables as well.
6546 * @param string $flag
6547 * @param string $reason
6549 private function setOutputFlag( string $flag, string $reason ): void {
6550 $this->mOutput->setOutputFlag( $flag );
6551 $name = $this->getTitle()->getPrefixedText();
6552 $this->logger->debug( __METHOD__ . ": set $flag flag on '$name'; $reason" );
6556 /** @deprecated class alias since 1.42 */
6557 class_alias( Parser::class, 'Parser' );