Merge "Set namespaces for dtp"
[mediawiki.git] / includes / content / WikitextContentHandler.php
blob18879ea09af5e0d11a0e3c8a5c9a0cfa02e4b36d
1 <?php
2 /**
3 * Content handler for wiki text pages.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @since 1.21
22 * @file
23 * @ingroup Content
26 namespace MediaWiki\Content;
28 use MediaWiki\Content\Renderer\ContentParseParams;
29 use MediaWiki\Content\Transform\PreloadTransformParams;
30 use MediaWiki\Content\Transform\PreSaveTransformParams;
31 use MediaWiki\Languages\LanguageNameUtils;
32 use MediaWiki\Linker\LinkRenderer;
33 use MediaWiki\Logger\LoggerFactory;
34 use MediaWiki\Parser\MagicWordFactory;
35 use MediaWiki\Parser\ParserFactory;
36 use MediaWiki\Parser\ParserOutput;
37 use MediaWiki\Parser\ParserOutputFlags;
38 use MediaWiki\Parser\Parsoid\ParsoidParserFactory;
39 use MediaWiki\Revision\RevisionRecord;
40 use MediaWiki\Title\Title;
41 use MediaWiki\Title\TitleFactory;
42 use SearchEngine;
43 use SearchIndexField;
44 use Wikimedia\UUID\GlobalIdGenerator;
45 use WikiPage;
47 /**
48 * Content handler for wiki text pages.
50 * @ingroup Content
52 class WikitextContentHandler extends TextContentHandler {
54 private TitleFactory $titleFactory;
55 private ParserFactory $parserFactory;
56 private GlobalIdGenerator $globalIdGenerator;
57 private LanguageNameUtils $languageNameUtils;
58 private LinkRenderer $linkRenderer;
59 private MagicWordFactory $magicWordFactory;
60 private ParsoidParserFactory $parsoidParserFactory;
62 public function __construct(
63 string $modelId,
64 TitleFactory $titleFactory,
65 ParserFactory $parserFactory,
66 GlobalIdGenerator $globalIdGenerator,
67 LanguageNameUtils $languageNameUtils,
68 LinkRenderer $linkRenderer,
69 MagicWordFactory $magicWordFactory,
70 ParsoidParserFactory $parsoidParserFactory
71 ) {
72 // $modelId should always be CONTENT_MODEL_WIKITEXT
73 parent::__construct( $modelId, [ CONTENT_FORMAT_WIKITEXT ] );
74 $this->titleFactory = $titleFactory;
75 $this->parserFactory = $parserFactory;
76 $this->globalIdGenerator = $globalIdGenerator;
77 $this->languageNameUtils = $languageNameUtils;
78 $this->linkRenderer = $linkRenderer;
79 $this->magicWordFactory = $magicWordFactory;
80 $this->parsoidParserFactory = $parsoidParserFactory;
83 /**
84 * @return class-string<WikitextContent>
86 protected function getContentClass() {
87 return WikitextContent::class;
90 /**
91 * Returns a WikitextContent object representing a redirect to the given destination page.
93 * @param Title $destination The page to redirect to.
94 * @param string $text Text to include in the redirect, if possible.
96 * @return Content
98 * @see ContentHandler::makeRedirectContent
100 public function makeRedirectContent( Title $destination, $text = '' ) {
101 $optionalColon = '';
103 if ( $destination->getNamespace() === NS_CATEGORY ) {
104 $optionalColon = ':';
105 } else {
106 $iw = $destination->getInterwiki();
107 if ( $iw && $this->languageNameUtils->getLanguageName( $iw,
108 LanguageNameUtils::AUTONYMS,
109 LanguageNameUtils::DEFINED
110 ) ) {
111 $optionalColon = ':';
115 $mwRedir = $this->magicWordFactory->get( 'redirect' );
116 $redirectText = $mwRedir->getSynonym( 0 ) .
117 ' [[' . $optionalColon . $destination->getFullText() . ']]';
119 if ( $text != '' ) {
120 $redirectText .= "\n" . $text;
123 $class = $this->getContentClass();
125 return new $class( $redirectText );
129 * Returns true because wikitext supports redirects.
131 * @return bool Always true.
133 * @see ContentHandler::supportsRedirects
135 public function supportsRedirects() {
136 return true;
140 * Returns true because wikitext supports sections.
142 * @return bool Always true.
144 * @see ContentHandler::supportsSections
146 public function supportsSections() {
147 return true;
151 * Returns true, because wikitext supports caching using the
152 * ParserCache mechanism.
154 * @since 1.21
156 * @return bool Always true.
158 * @see ContentHandler::isParserCacheSupported
160 public function isParserCacheSupported() {
161 return true;
164 /** @inheritDoc */
165 public function supportsPreloadContent(): bool {
166 return true;
170 * @return FileContentHandler
172 protected function getFileHandler() {
173 return new FileContentHandler(
174 $this->getModelID(),
175 $this->titleFactory,
176 $this->parserFactory,
177 $this->globalIdGenerator,
178 $this->languageNameUtils,
179 $this->linkRenderer,
180 $this->magicWordFactory,
181 $this->parsoidParserFactory
185 public function getFieldsForSearchIndex( SearchEngine $engine ) {
186 $fields = parent::getFieldsForSearchIndex( $engine );
188 $fields['heading'] =
189 $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT );
190 $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING );
192 $fields['auxiliary_text'] =
193 $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT );
195 $fields['opening_text'] =
196 $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT );
197 $fields['opening_text']->setFlag(
198 SearchIndexField::FLAG_SCORING | SearchIndexField::FLAG_NO_HIGHLIGHT
201 // Until we have the full first-class content handler for files, we invoke it explicitly here
202 return array_merge( $fields, $this->getFileHandler()->getFieldsForSearchIndex( $engine ) );
205 public function getDataForSearchIndex(
206 WikiPage $page,
207 ParserOutput $parserOutput,
208 SearchEngine $engine,
209 ?RevisionRecord $revision = null
211 $fields = parent::getDataForSearchIndex( $page, $parserOutput, $engine, $revision );
213 $structure = new WikiTextStructure( $parserOutput );
214 $fields['heading'] = $structure->headings();
215 // text fields
216 $fields['opening_text'] = $structure->getOpeningText();
217 $fields['text'] = $structure->getMainText(); // overwrites one from ContentHandler
218 $fields['auxiliary_text'] = $structure->getAuxiliaryText();
219 $fields['defaultsort'] = $structure->getDefaultSort();
220 $fields['file_text'] = null;
222 // Until we have the full first-class content handler for files, we invoke it explicitly here
223 if ( $page->getTitle()->getNamespace() === NS_FILE ) {
224 $fields = array_merge(
225 $fields,
226 $this->getFileHandler()->getDataForSearchIndex( $page, $parserOutput, $engine, $revision )
230 return $fields;
234 * Returns the content's text as-is.
236 * @param Content $content
237 * @param string|null $format The serialization format to check
239 * @return mixed
241 public function serializeContent( Content $content, $format = null ) {
242 $this->checkFormat( $format );
243 return parent::serializeContent( $content, $format );
246 public function preSaveTransform(
247 Content $content,
248 PreSaveTransformParams $pstParams
249 ): Content {
250 '@phan-var WikitextContent $content';
251 $text = $content->getText();
253 $parser = $this->parserFactory->getInstance();
254 $pst = $parser->preSaveTransform(
255 $text,
256 $pstParams->getPage(),
257 $pstParams->getUser(),
258 $pstParams->getParserOptions()
261 if ( $text === $pst ) {
262 return $content;
265 $contentClass = $this->getContentClass();
266 $ret = new $contentClass( $pst );
267 $ret->setPreSaveTransformFlags( $parser->getOutput()->getAllFlags() );
269 return $ret;
273 * Returns a Content object with preload transformations applied (or this
274 * object if no transformations apply).
276 * @param Content $content
277 * @param PreloadTransformParams $pltParams
279 * @return Content
281 public function preloadTransform(
282 Content $content,
283 PreloadTransformParams $pltParams
284 ): Content {
285 '@phan-var WikitextContent $content';
286 $text = $content->getText();
288 $plt = $this->parserFactory->getInstance()->getPreloadText(
289 $text,
290 $pltParams->getPage(),
291 $pltParams->getParserOptions(),
292 $pltParams->getParams()
295 $contentClass = $this->getContentClass();
297 return new $contentClass( $plt );
301 * Extract the redirect target and the remaining text on the page.
303 * @since 1.41 (used to be a method on WikitextContent since 1.23)
305 * @return array List of two elements: LinkTarget|null and WikitextContent object.
307 public function extractRedirectTargetAndText( WikitextContent $content ): array {
308 $redir = $this->magicWordFactory->get( 'redirect' );
309 $text = ltrim( $content->getText() );
311 if ( !$redir->matchStartAndRemove( $text ) ) {
312 return [ null, $content ];
315 // Extract the first link and see if it's usable
316 // Ensure that it really does come directly after #REDIRECT
317 // Some older redirects included a colon, so don't freak about that!
318 $m = [];
319 if ( preg_match( '!^\s*:?\s*\[{2}(.*?)(?:\|.*?)?\]{2}\s*!', $text, $m ) ) {
320 // Strip preceding colon used to "escape" categories, etc.
321 // and URL-decode links
322 if ( strpos( $m[1], '%' ) !== false ) {
323 // Match behavior of inline link parsing here;
324 $m[1] = rawurldecode( ltrim( $m[1], ':' ) );
327 // TODO: Move isValidRedirectTarget() out Title, so we can use a TitleValue here.
328 $title = $this->titleFactory->newFromText( $m[1] );
330 // If the title is a redirect to bad special pages or is invalid, return null
331 if ( !$title instanceof Title || !$title->isValidRedirectTarget() ) {
332 return [ null, $content ];
335 $remainingContent = new WikitextContent( substr( $text, strlen( $m[0] ) ) );
336 return [ $title, $remainingContent ];
339 return [ null, $content ];
343 * Returns a ParserOutput object resulting from parsing the content's text
344 * using the global Parser service.
346 * @since 1.38
348 * @param Content $content
349 * @param ContentParseParams $cpoParams
350 * @param ParserOutput &$parserOutput The output object to fill (reference).
352 protected function fillParserOutput(
353 Content $content,
354 ContentParseParams $cpoParams,
355 ParserOutput &$parserOutput
357 '@phan-var WikitextContent $content';
358 $title = $this->titleFactory->newFromPageReference( $cpoParams->getPage() );
359 $parserOptions = $cpoParams->getParserOptions();
360 $revId = $cpoParams->getRevId();
362 [ $redir, $contentWithoutRedirect ] = $this->extractRedirectTargetAndText( $content );
363 if ( $parserOptions->getUseParsoid() ) {
364 $parser = $this->parsoidParserFactory->create();
365 // Parsoid renders the #REDIRECT magic word as an invisible
366 // <link> tag and doesn't require it to be stripped.
367 // T349087: ...and in fact, RESTBase relies on getting
368 // redirect information from this <link> tag, so it needs
369 // to be present.
370 // Further, Parsoid can accept a Content in place of a string.
371 $text = $content;
372 $extraArgs = [ $cpoParams->getPreviousOutput() ];
373 } else {
374 // The legacy parser requires the #REDIRECT magic word to
375 // be stripped from the content before parsing.
376 $parser = $this->parserFactory->getInstance();
377 $text = $contentWithoutRedirect->getText();
378 $extraArgs = [];
381 $time = -microtime( true );
383 $parserOutput = $parser
384 ->parse( $text, $title, $parserOptions, true, true, $revId, ...$extraArgs );
385 $time += microtime( true );
387 // Timing hack
388 if ( $time > 3 ) {
389 // TODO: Use Parser's logger (once it has one)
390 $channel = $parserOptions->getUseParsoid() ? 'slow-parsoid' : 'slow-parse';
391 $logger = LoggerFactory::getInstance( $channel );
392 $logger->info( 'Parsing {title} was slow, took {time} seconds', [
393 'time' => number_format( $time, 2 ),
394 'title' => (string)$title,
395 'trigger' => $parserOptions->getRenderReason(),
396 ] );
399 // T330667: Record the fact that we used the value of
400 // 'useParsoid' to influence this parse. Note that
401 // ::getUseParsoid() has a side-effect on $parserOutput here
402 // which didn't occur when we called ::getUseParsoid() earlier
403 // because $parserOutput didn't exist at that time.
404 $parserOptions->getUseParsoid();
406 // Add redirect indicator at the top
407 if ( $redir ) {
408 // Make sure to include the redirect link in pagelinks
409 $parserOutput->addLink( $redir );
410 if ( $cpoParams->getGenerateHtml() ) {
411 $parserOutput->setRedirectHeader(
412 $this->linkRenderer->makeRedirectHeader(
413 $title->getPageLanguage(), $redir, false
416 $parserOutput->addModuleStyles( [ 'mediawiki.action.view.redirectPage' ] );
417 } else {
418 $parserOutput->setRawText( null );
422 // Pass along user-signature flag
423 if ( in_array( 'user-signature', $content->getPreSaveTransformFlags() ) ) {
424 $parserOutput->setOutputFlag( ParserOutputFlags::USER_SIGNATURE );
429 /** @deprecated class alias since 1.43 */
430 class_alias( WikitextContentHandler::class, 'WikitextContentHandler' );