[JsonCodec] Hide TYPE_ANNOTATION from the unserialization methods
[mediawiki.git] / includes / CommentFormatter / CommentParser.php
blob20b5034d5d9bdbc021bcec73062c7bb2ac3c5a6f
1 <?php
3 namespace MediaWiki\CommentFormatter;
5 use File;
6 use HtmlArmor;
7 use Language;
8 use MediaWiki\Cache\LinkBatch;
9 use MediaWiki\Cache\LinkBatchFactory;
10 use MediaWiki\Cache\LinkCache;
11 use MediaWiki\HookContainer\HookContainer;
12 use MediaWiki\HookContainer\HookRunner;
13 use MediaWiki\Linker\Linker;
14 use MediaWiki\Linker\LinkRenderer;
15 use MediaWiki\Linker\LinkTarget;
16 use MediaWiki\Parser\Parser;
17 use MediaWiki\Parser\Sanitizer;
18 use MediaWiki\Title\MalformedTitleException;
19 use MediaWiki\Title\NamespaceInfo;
20 use MediaWiki\Title\Title;
21 use MediaWiki\Title\TitleParser;
22 use MediaWiki\Title\TitleValue;
23 use MediaWiki\WikiMap\WikiMap;
24 use RepoGroup;
25 use StringUtils;
27 /**
28 * The text processing backend for CommentFormatter.
30 * CommentParser objects should be discarded after the comment batch is
31 * complete, in order to reduce memory usage.
33 * @internal
35 class CommentParser {
36 /** @var LinkRenderer */
37 private $linkRenderer;
38 /** @var LinkBatchFactory */
39 private $linkBatchFactory;
40 /** @var RepoGroup */
41 private $repoGroup;
42 /** @var Language */
43 private $userLang;
44 /** @var Language */
45 private $contLang;
46 /** @var TitleParser */
47 private $titleParser;
48 /** @var NamespaceInfo */
49 private $namespaceInfo;
50 /** @var HookRunner */
51 private $hookRunner;
52 /** @var LinkCache */
53 private $linkCache;
55 /** @var callable[] */
56 private $links = [];
57 /** @var LinkBatch|null */
58 private $linkBatch;
60 /** @var array Input to RepoGroup::findFiles() */
61 private $fileBatch;
62 /** @var File[] Resolved File objects indexed by DB key */
63 private $files = [];
65 /** @var int The maximum number of digits in a marker ID */
66 private const MAX_ID_SIZE = 7;
67 /** @var string Prefix for marker. ' and " included to break attributes (T355538) */
68 private const MARKER_PREFIX = "\x1B\"'";
70 /**
71 * @param LinkRenderer $linkRenderer
72 * @param LinkBatchFactory $linkBatchFactory
73 * @param LinkCache $linkCache
74 * @param RepoGroup $repoGroup
75 * @param Language $userLang
76 * @param Language $contLang
77 * @param TitleParser $titleParser
78 * @param NamespaceInfo $namespaceInfo
79 * @param HookContainer $hookContainer
81 public function __construct(
82 LinkRenderer $linkRenderer,
83 LinkBatchFactory $linkBatchFactory,
84 LinkCache $linkCache,
85 RepoGroup $repoGroup,
86 Language $userLang,
87 Language $contLang,
88 TitleParser $titleParser,
89 NamespaceInfo $namespaceInfo,
90 HookContainer $hookContainer
91 ) {
92 $this->linkRenderer = $linkRenderer;
93 $this->linkBatchFactory = $linkBatchFactory;
94 $this->linkCache = $linkCache;
95 $this->repoGroup = $repoGroup;
96 $this->userLang = $userLang;
97 $this->contLang = $contLang;
98 $this->titleParser = $titleParser;
99 $this->namespaceInfo = $namespaceInfo;
100 $this->hookRunner = new HookRunner( $hookContainer );
104 * Convert a comment to HTML, but replace links with markers which are
105 * resolved later.
107 * @param string $comment
108 * @param LinkTarget|null $selfLinkTarget
109 * @param bool $samePage
110 * @param string|false|null $wikiId
111 * @param bool $enableSectionLinks
112 * @return string
114 public function preprocess( string $comment, LinkTarget $selfLinkTarget = null,
115 $samePage = false, $wikiId = false, $enableSectionLinks = true
117 return $this->preprocessInternal( $comment, false, $selfLinkTarget,
118 $samePage, $wikiId, $enableSectionLinks );
122 * Convert a comment in pseudo-HTML format to HTML, replacing links with markers.
124 * @param string $comment
125 * @param LinkTarget|null $selfLinkTarget
126 * @param bool $samePage
127 * @param string|false|null $wikiId
128 * @param bool $enableSectionLinks
129 * @return string
131 public function preprocessUnsafe( $comment, LinkTarget $selfLinkTarget = null,
132 $samePage = false, $wikiId = false, $enableSectionLinks = true
134 return $this->preprocessInternal( $comment, true, $selfLinkTarget,
135 $samePage, $wikiId, $enableSectionLinks );
139 * Execute pending batch queries and replace markers in the specified
140 * string(s) with actual links.
142 * @param string|string[] $comments
143 * @return string|string[]
145 public function finalize( $comments ) {
146 $this->flushLinkBatches();
147 return preg_replace_callback(
148 '/' . self::MARKER_PREFIX . '([0-9]{' . self::MAX_ID_SIZE . '})/',
149 function ( $m ) {
150 $callback = $this->links[(int)$m[1]] ?? null;
151 if ( $callback ) {
152 return $callback();
153 } else {
154 return '<!-- MISSING -->';
157 $comments
162 * @param string $comment
163 * @param bool $unsafe
164 * @param LinkTarget|null $selfLinkTarget
165 * @param bool $samePage
166 * @param string|false|null $wikiId
167 * @param bool $enableSectionLinks
168 * @return string
170 private function preprocessInternal( $comment, $unsafe, $selfLinkTarget, $samePage, $wikiId,
171 $enableSectionLinks
173 // Sanitize text a bit
174 // \x1b needs to be stripped because it is used for link markers
175 $comment = strtr( $comment, "\n\x1b", " " );
176 // Allow HTML entities (for T15815)
177 if ( !$unsafe ) {
178 $comment = Sanitizer::escapeHtmlAllowEntities( $comment );
180 if ( $enableSectionLinks ) {
181 $comment = $this->doSectionLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
183 return $this->doWikiLinks( $comment, $selfLinkTarget, $samePage, $wikiId );
187 * Converts C-style comments in edit summaries into section links.
189 * Too many things are called "comments", so these are mostly now called
190 * section links rather than autocomments.
192 * We look for all comments, match any text before and after the comment,
193 * add a separator where needed and format the comment itself with CSS.
195 * @param string $comment Comment text
196 * @param LinkTarget|null $selfLinkTarget An optional LinkTarget object used to links to sections
197 * @param bool $samePage Whether section links should refer to local page
198 * @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
199 * as used by WikiMap.
200 * @return string Preprocessed comment
202 private function doSectionLinks(
203 $comment,
204 $selfLinkTarget = null,
205 $samePage = false,
206 $wikiId = false
208 // @todo $append here is something of a hack to preserve the status
209 // quo. Someone who knows more about bidi and such should decide
210 // (1) what sensible rendering even *is* for an LTR edit summary on an RTL
211 // wiki, both when autocomments exist and when they don't, and
212 // (2) what markup will make that actually happen.
213 $append = '';
214 $comment = preg_replace_callback(
215 // To detect the presence of content before or after the
216 // auto-comment, we use capturing groups inside optional zero-width
217 // assertions. But older versions of PCRE can't directly make
218 // zero-width assertions optional, so wrap them in a non-capturing
219 // group.
220 '!(?:(?<=(.)))?/\*\s*(.*?)\s*\*/(?:(?=(.)))?!',
221 function ( $match ) use ( &$append, $selfLinkTarget, $samePage, $wikiId ) {
222 // Ensure all match positions are defined
223 $match += [ '', '', '', '' ];
225 $pre = $match[1] !== '';
226 $auto = $match[2];
227 $post = $match[3] !== '';
228 $comment = null;
230 $this->hookRunner->onFormatAutocomments(
231 $comment, $pre, $auto, $post,
232 Title::castFromLinkTarget( $selfLinkTarget ),
233 $samePage,
234 $wikiId );
235 if ( $comment !== null ) {
236 return $comment;
239 if ( $selfLinkTarget ) {
240 $section = $auto;
241 # Remove links that a user may have manually put in the autosummary
242 # This could be improved by copying as much of Parser::stripSectionName as desired.
243 $section = str_replace( [
244 '[[:',
245 '[[',
246 ']]'
247 ], '', $section );
249 // We don't want any links in the auto text to be linked, but we still
250 // want to show any [[ ]]
251 $sectionText = str_replace( '[[', '&#91;[', $auto );
253 $section = substr( Parser::guessSectionNameFromStrippedText( $section ), 1 );
254 if ( $section !== '' ) {
255 if ( $samePage ) {
256 $sectionTitle = new TitleValue( NS_MAIN, '', $section );
257 } else {
258 $sectionTitle = $selfLinkTarget->createFragmentTarget( $section );
260 $auto = $this->makeSectionLink(
261 $sectionTitle,
262 $this->userLang->getArrow() . $this->userLang->getDirMark() . $sectionText,
263 $wikiId
267 if ( $pre ) {
268 # written summary $presep autocomment (summary /* section */)
269 $pre = wfMessage( 'autocomment-prefix' )->inContentLanguage()->escaped();
271 if ( $post ) {
272 # autocomment $postsep written summary (/* section */ summary)
273 $auto .= wfMessage( 'colon-separator' )->inContentLanguage()->escaped();
275 if ( $auto ) {
276 $auto = '<span dir="auto"><span class="autocomment">' . $auto . '</span>';
277 $append .= '</span>';
279 $comment = $pre . $auto;
280 return $comment;
282 $comment
284 return $comment . $append;
288 * Make a section link. These don't need to go into the LinkBatch, since
289 * the link class does not depend on whether the link is known.
291 * @param LinkTarget $target
292 * @param string $text
293 * @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
294 * as used by WikiMap.
296 * @return string HTML link
298 private function makeSectionLink(
299 LinkTarget $target, $text, $wikiId
301 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
302 return Linker::makeExternalLink(
303 WikiMap::getForeignURL(
304 $wikiId,
305 $target->getNamespace() === 0
306 ? $target->getDBkey()
307 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
308 ':' . $target->getDBkey(),
309 $target->getFragment()
311 $text,
312 /* escape = */ false // Already escaped
315 return $this->linkRenderer->makePreloadedLink( $target, new HtmlArmor( $text ), '' );
319 * Formats wiki links and media links in text; all other wiki formatting
320 * is ignored
322 * @todo FIXME: Doesn't handle sub-links as in image thumb texts like the main parser
324 * @param string $comment Text to format links in. WARNING! Since the output of this
325 * function is html, $comment must be sanitized for use as html. You probably want
326 * to pass $comment through Sanitizer::escapeHtmlAllowEntities() before calling
327 * this function.
328 * as used by WikiMap.
329 * @param LinkTarget|null $selfLinkTarget An optional LinkTarget object used to links to sections
330 * @param bool $samePage Whether section links should refer to local page
331 * @param string|false|null $wikiId Id of the wiki to link to (if not the local wiki),
332 * as used by WikiMap.
334 * @return string HTML
336 private function doWikiLinks( $comment, $selfLinkTarget = null, $samePage = false, $wikiId = false ) {
337 return preg_replace_callback(
339 \[\[
340 \s*+ # ignore leading whitespace, the *+ quantifier disallows backtracking
341 :? # ignore optional leading colon
342 ([^[\]|]+) # 1. link target; page names cannot include [, ] or |
343 (?:\|
344 # 2. link text
345 # Stop matching at ]] without relying on backtracking.
346 ((?:]?[^\]])*+)
348 \]\]
349 ([^[]*) # 3. link trail (the text up until the next link)
350 /x',
351 function ( $match ) use ( $selfLinkTarget, $samePage, $wikiId ) {
352 $medians = '(?:';
353 $medians .= preg_quote(
354 $this->namespaceInfo->getCanonicalName( NS_MEDIA ), '/' );
355 $medians .= '|';
356 $medians .= preg_quote(
357 $this->contLang->getNsText( NS_MEDIA ),
359 ) . '):';
361 $comment = $match[0];
363 // Fix up urlencoded title texts (copied from Parser::replaceInternalLinks)
364 if ( strpos( $match[1], '%' ) !== false ) {
365 $match[1] = strtr(
366 rawurldecode( $match[1] ),
367 [ '<' => '&lt;', '>' => '&gt;' ]
371 // Handle link renaming [[foo|text]] will show link as "text"
372 if ( $match[2] != "" ) {
373 $text = $match[2];
374 } else {
375 $text = $match[1];
377 $submatch = [];
378 $linkMarker = null;
379 if ( preg_match( '/^' . $medians . '(.*)$/i', $match[1], $submatch ) ) {
380 // Media link; trail not supported.
381 $linkRegexp = '/\[\[(.*?)\]\]/';
382 $linkTarget = $this->titleParser->makeTitleValueSafe( NS_FILE, $submatch[1] );
383 if ( $linkTarget ) {
384 $linkMarker = $this->addFileLink( $linkTarget, $text );
386 } else {
387 // Other kind of link
388 // Make sure its target is non-empty
389 if ( isset( $match[1][0] ) && $match[1][0] == ':' ) {
390 $match[1] = substr( $match[1], 1 );
392 // @phan-suppress-next-line PhanTypePossiblyInvalidDimOffset False positive
393 if ( $match[1] !== false && $match[1] !== null && $match[1] !== '' ) {
394 if ( preg_match(
395 $this->contLang->linkTrail(),
396 $match[3],
397 $submatch
398 ) ) {
399 $trail = $submatch[1];
400 } else {
401 $trail = "";
403 $linkRegexp = '/\[\[(.*?)\]\]' . preg_quote( $trail, '/' ) . '/';
404 [ $inside, $trail ] = Linker::splitTrail( $trail );
406 $linkText = $text;
407 $linkTarget = Linker::normalizeSubpageLink( $selfLinkTarget, $match[1], $linkText );
409 try {
410 $target = $this->titleParser->parseTitle( $linkTarget );
412 if ( $target->getText() == '' && !$target->isExternal()
413 && !$samePage && $selfLinkTarget
415 $target = $selfLinkTarget->createFragmentTarget( $target->getFragment() );
418 $linkMarker = $this->addPageLink( $target, $linkText . $inside, $wikiId );
419 $linkMarker .= $trail;
420 } catch ( MalformedTitleException $e ) {
421 // Fall through
425 if ( $linkMarker ) {
426 // If the link is still valid, go ahead and replace it in!
427 $comment = preg_replace(
428 // @phan-suppress-next-next-line PhanPossiblyUndeclaredVariable linkRegexp set when used
429 // @phan-suppress-next-line PhanTypeMismatchArgumentNullableInternal linkRegexp set when used
430 $linkRegexp,
431 StringUtils::escapeRegexReplacement( $linkMarker ),
432 $comment,
437 return $comment;
439 $comment
444 * Add a deferred link to the list and return its marker.
446 * @param callable $callback
447 * @return string
449 private function addLinkMarker( $callback ) {
450 $nextId = count( $this->links );
451 if ( strlen( (string)$nextId ) > self::MAX_ID_SIZE ) {
452 throw new \RuntimeException( 'Too many links in comment batch' );
454 $this->links[] = $callback;
455 return sprintf( self::MARKER_PREFIX . "%0" . self::MAX_ID_SIZE . 'd', $nextId );
459 * Link to a LinkTarget. Return either HTML or a marker depending on whether
460 * existence checks are deferred.
462 * @param LinkTarget $target
463 * @param string $text
464 * @param string|false|null $wikiId
465 * @return string
467 private function addPageLink( LinkTarget $target, $text, $wikiId ) {
468 if ( $wikiId !== null && $wikiId !== false && !$target->isExternal() ) {
469 // Handle links from a foreign wiki ID
470 return Linker::makeExternalLink(
471 WikiMap::getForeignURL(
472 $wikiId,
473 $target->getNamespace() === 0
474 ? $target->getDBkey()
475 : $this->namespaceInfo->getCanonicalName( $target->getNamespace() ) .
476 ':' . $target->getDBkey(),
477 $target->getFragment()
479 $text,
480 /* escape = */ false // Already escaped
482 } elseif ( $this->linkCache->getGoodLinkID( $target ) ||
483 Title::newFromLinkTarget( $target )->isAlwaysKnown()
485 // Already known
486 return $this->linkRenderer->makeKnownLink( $target, new HtmlArmor( $text ) );
487 } elseif ( $this->linkCache->isBadLink( $target ) ) {
488 // Already cached as unknown
489 return $this->linkRenderer->makeBrokenLink( $target, new HtmlArmor( $text ) );
492 // Defer page link
493 if ( !$this->linkBatch ) {
494 $this->linkBatch = $this->linkBatchFactory->newLinkBatch();
495 $this->linkBatch->setCaller( __METHOD__ );
497 $this->linkBatch->addObj( $target );
498 return $this->addLinkMarker( function () use ( $target, $text ) {
499 return $this->linkRenderer->makeLink( $target, new HtmlArmor( $text ) );
500 } );
504 * Link to a file, returning a marker.
506 * @param LinkTarget $target The name of the file.
507 * @param string $html The inner HTML of the link
508 * @return string
510 private function addFileLink( LinkTarget $target, $html ) {
511 $this->fileBatch[] = [
512 'title' => $target
514 return $this->addLinkMarker( function () use ( $target, $html ) {
515 return Linker::makeMediaLinkFile(
516 $target,
517 $this->files[$target->getDBkey()] ?? false,
518 $html
520 } );
524 * Execute any pending link batch or file batch
526 private function flushLinkBatches() {
527 if ( $this->linkBatch ) {
528 $this->linkBatch->execute();
529 $this->linkBatch = null;
531 if ( $this->fileBatch ) {
532 $this->files += $this->repoGroup->findFiles( $this->fileBatch );
533 $this->fileBatch = [];