3 * Created on Dec 01, 2007
5 * Copyright © 2007 Yuri Astrakhan "<Firstname><Lastname>@gmail.com"
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
28 class ApiParse
extends ApiBase
{
30 /** @var String $section */
31 private $section = null;
33 /** @var Content $content */
34 private $content = null;
36 /** @var Content $pstContent */
37 private $pstContent = null;
39 public function execute() {
40 // The data is hot but user-dependent, like page views, so we set vary cookies
41 $this->getMain()->setCacheMode( 'anon-public-user-private' );
44 $params = $this->extractRequestParams();
45 $text = $params['text'];
46 $title = $params['title'];
47 $page = $params['page'];
48 $pageid = $params['pageid'];
49 $oldid = $params['oldid'];
51 $model = $params['contentmodel'];
52 $format = $params['contentformat'];
54 if ( !is_null( $page ) && ( !is_null( $text ) ||
$title != 'API' ) ) {
55 $this->dieUsage( 'The page parameter cannot be used together with the text and title parameters', 'params' );
58 $prop = array_flip( $params['prop'] );
60 if ( isset( $params['section'] ) ) {
61 $this->section
= $params['section'];
63 $this->section
= false;
66 // The parser needs $wgTitle to be set, apparently the
67 // $title parameter in Parser::parse isn't enough *sigh*
68 // TODO: Does this still need $wgTitle?
69 global $wgParser, $wgTitle;
71 // Currently unnecessary, code to act as a safeguard against any change in current behavior of uselang
73 if ( isset( $params['uselang'] ) && $params['uselang'] != $this->getContext()->getLanguage()->getCode() ) {
74 $oldLang = $this->getContext()->getLanguage(); // Backup language
75 $this->getContext()->setLanguage( Language
::factory( $params['uselang'] ) );
81 $result = $this->getResult();
83 if ( !is_null( $oldid ) ||
!is_null( $pageid ) ||
!is_null( $page ) ) {
84 if ( !is_null( $oldid ) ) {
85 // Don't use the parser cache
86 $rev = Revision
::newFromID( $oldid );
88 $this->dieUsage( "There is no revision ID $oldid", 'missingrev' );
90 if ( !$rev->userCan( Revision
::DELETED_TEXT
, $this->getUser() ) ) {
91 $this->dieUsage( "You don't have permission to view deleted revisions", 'permissiondenied' );
94 $titleObj = $rev->getTitle();
96 $pageObj = WikiPage
::factory( $titleObj );
97 $popts = $this->makeParserOptions( $pageObj, $params );
99 // If for some reason the "oldid" is actually the current revision, it may be cached
100 if ( $rev->isCurrent() ) {
101 // May get from/save to parser cache
102 $p_result = $this->getParsedContent( $pageObj, $popts,
103 $pageid, isset( $prop['wikitext'] ) );
104 } else { // This is an old revision, so get the text differently
105 $this->content
= $rev->getContent( Revision
::FOR_THIS_USER
, $this->getUser() );
107 if ( $this->section
!== false ) {
108 $this->content
= $this->getSectionContent( $this->content
, 'r' . $rev->getId() );
111 // Should we save old revision parses to the parser cache?
112 $p_result = $this->content
->getParserOutput( $titleObj, $rev->getId(), $popts );
114 } else { // Not $oldid, but $pageid or $page
115 if ( $params['redirects'] ) {
120 if ( !is_null ( $pageid ) ) {
121 $reqParams['pageids'] = $pageid;
123 $reqParams['titles'] = $page;
125 $req = new FauxRequest( $reqParams );
126 $main = new ApiMain( $req );
128 $data = $main->getResultData();
129 $redirValues = isset( $data['query']['redirects'] )
130 ?
$data['query']['redirects']
133 foreach ( (array)$redirValues as $r ) {
136 $pageParams = array( 'title' => $to );
137 } elseif ( !is_null( $pageid ) ) {
138 $pageParams = array( 'pageid' => $pageid );
140 $pageParams = array( 'title' => $page );
143 $pageObj = $this->getTitleOrPageId( $pageParams, 'fromdb' );
144 $titleObj = $pageObj->getTitle();
145 if ( !$titleObj ||
!$titleObj->exists() ) {
146 $this->dieUsage( "The page you specified doesn't exist", 'missingtitle' );
148 $wgTitle = $titleObj;
150 if ( isset( $prop['revid'] ) ) {
151 $oldid = $pageObj->getLatest();
154 $popts = $this->makeParserOptions( $pageObj, $params );
156 // Potentially cached
157 $p_result = $this->getParsedContent( $pageObj, $popts, $pageid,
158 isset( $prop['wikitext'] ) );
160 } else { // Not $oldid, $pageid, $page. Hence based on $text
161 $titleObj = Title
::newFromText( $title );
162 if ( !$titleObj ||
$titleObj->isExternal() ) {
163 $this->dieUsageMsg( array( 'invalidtitle', $title ) );
165 if ( !$titleObj->canExist() ) {
166 $this->dieUsage( "Namespace doesn't allow actual pages", 'pagecannotexist' );
168 $wgTitle = $titleObj;
169 $pageObj = WikiPage
::factory( $titleObj );
171 $popts = $this->makeParserOptions( $pageObj, $params );
173 if ( is_null( $text ) ) {
174 if ( $title !== 'API' && ( $prop ||
$params['generatexml'] ) ) {
176 "'title' used without 'text', and parsed page properties were requested " .
177 "(did you mean to use 'page' instead of 'title'?)"
180 // Prevent warning from ContentHandler::makeContent()
185 $this->content
= ContentHandler
::makeContent( $text, $titleObj, $model, $format );
186 } catch ( MWContentSerializationException
$ex ) {
187 $this->dieUsage( $ex->getMessage(), 'parseerror' );
190 if ( $this->section
!== false ) {
191 $this->content
= $this->getSectionContent( $this->content
, $titleObj->getText() );
194 if ( $params['pst'] ||
$params['onlypst'] ) {
195 $this->pstContent
= $this->content
->preSaveTransform( $titleObj, $this->getUser(), $popts );
197 if ( $params['onlypst'] ) {
198 // Build a result and bail out
199 $result_array = array();
200 $result_array['text'] = array();
201 ApiResult
::setContent( $result_array['text'], $this->pstContent
->serialize( $format ) );
202 if ( isset( $prop['wikitext'] ) ) {
203 $result_array['wikitext'] = array();
204 ApiResult
::setContent( $result_array['wikitext'], $this->content
->serialize( $format ) );
206 $result->addValue( null, $this->getModuleName(), $result_array );
210 // Not cached (save or load)
211 if ( $params['pst'] ) {
212 $p_result = $this->pstContent
->getParserOutput( $titleObj, null, $popts );
214 $p_result = $this->content
->getParserOutput( $titleObj, null, $popts );
218 $result_array = array();
220 $result_array['title'] = $titleObj->getPrefixedText();
222 if ( !is_null( $oldid ) ) {
223 $result_array['revid'] = intval( $oldid );
226 if ( $params['redirects'] && !is_null( $redirValues ) ) {
227 $result_array['redirects'] = $redirValues;
230 if ( isset( $prop['text'] ) ) {
231 $result_array['text'] = array();
232 ApiResult
::setContent( $result_array['text'], $p_result->getText() );
235 if ( !is_null( $params['summary'] ) ) {
236 $result_array['parsedsummary'] = array();
237 ApiResult
::setContent( $result_array['parsedsummary'], Linker
::formatComment( $params['summary'], $titleObj ) );
240 if ( isset( $prop['langlinks'] ) ||
isset( $prop['languageshtml'] ) ) {
241 $langlinks = $p_result->getLanguageLinks();
243 if ( $params['effectivelanglinks'] ) {
244 // Link flags are ignored for now, but may in the future be
245 // included in the result.
246 $linkFlags = array();
247 wfRunHooks( 'LanguageLinks', array( $titleObj, &$langlinks, &$linkFlags ) );
253 if ( isset( $prop['langlinks'] ) ) {
254 $result_array['langlinks'] = $this->formatLangLinks( $langlinks );
256 if ( isset( $prop['languageshtml'] ) ) {
257 $languagesHtml = $this->languagesHtml( $langlinks );
259 $result_array['languageshtml'] = array();
260 ApiResult
::setContent( $result_array['languageshtml'], $languagesHtml );
262 if ( isset( $prop['categories'] ) ) {
263 $result_array['categories'] = $this->formatCategoryLinks( $p_result->getCategories() );
265 if ( isset( $prop['categorieshtml'] ) ) {
266 $categoriesHtml = $this->categoriesHtml( $p_result->getCategories() );
267 $result_array['categorieshtml'] = array();
268 ApiResult
::setContent( $result_array['categorieshtml'], $categoriesHtml );
270 if ( isset( $prop['links'] ) ) {
271 $result_array['links'] = $this->formatLinks( $p_result->getLinks() );
273 if ( isset( $prop['templates'] ) ) {
274 $result_array['templates'] = $this->formatLinks( $p_result->getTemplates() );
276 if ( isset( $prop['images'] ) ) {
277 $result_array['images'] = array_keys( $p_result->getImages() );
279 if ( isset( $prop['externallinks'] ) ) {
280 $result_array['externallinks'] = array_keys( $p_result->getExternalLinks() );
282 if ( isset( $prop['sections'] ) ) {
283 $result_array['sections'] = $p_result->getSections();
286 if ( isset( $prop['displaytitle'] ) ) {
287 $result_array['displaytitle'] = $p_result->getDisplayTitle() ?
288 $p_result->getDisplayTitle() :
289 $titleObj->getPrefixedText();
292 if ( isset( $prop['headitems'] ) ||
isset( $prop['headhtml'] ) ) {
293 $context = $this->getContext();
294 $context->setTitle( $titleObj );
295 $context->getOutput()->addParserOutputNoText( $p_result );
297 if ( isset( $prop['headitems'] ) ) {
298 $headItems = $this->formatHeadItems( $p_result->getHeadItems() );
300 $css = $this->formatCss( $context->getOutput()->buildCssLinksArray() );
302 $scripts = array( $context->getOutput()->getHeadScripts() );
304 $result_array['headitems'] = array_merge( $headItems, $css, $scripts );
307 if ( isset( $prop['headhtml'] ) ) {
308 $result_array['headhtml'] = array();
309 ApiResult
::setContent( $result_array['headhtml'], $context->getOutput()->headElement( $context->getSkin() ) );
313 if ( isset( $prop['iwlinks'] ) ) {
314 $result_array['iwlinks'] = $this->formatIWLinks( $p_result->getInterwikiLinks() );
317 if ( isset( $prop['wikitext'] ) ) {
318 $result_array['wikitext'] = array();
319 ApiResult
::setContent( $result_array['wikitext'], $this->content
->serialize( $format ) );
320 if ( !is_null( $this->pstContent
) ) {
321 $result_array['psttext'] = array();
322 ApiResult
::setContent( $result_array['psttext'], $this->pstContent
->serialize( $format ) );
325 if ( isset( $prop['properties'] ) ) {
326 $result_array['properties'] = $this->formatProperties( $p_result->getProperties() );
329 if ( $params['generatexml'] ) {
330 if ( $this->content
->getModel() != CONTENT_MODEL_WIKITEXT
) {
331 $this->dieUsage( "generatexml is only supported for wikitext content", "notwikitext" );
334 $wgParser->startExternalParse( $titleObj, $popts, OT_PREPROCESS
);
335 $dom = $wgParser->preprocessToDom( $this->content
->getNativeData() );
336 if ( is_callable( array( $dom, 'saveXML' ) ) ) {
337 $xml = $dom->saveXML();
339 $xml = $dom->__toString();
341 $result_array['parsetree'] = array();
342 ApiResult
::setContent( $result_array['parsetree'], $xml );
345 $result_mapping = array(
348 'categories' => 'cl',
352 'externallinks' => 'el',
356 'properties' => 'pp',
358 $this->setIndexedTagNames( $result_array, $result_mapping );
359 $result->addValue( null, $this->getModuleName(), $result_array );
361 if ( !is_null( $oldLang ) ) {
362 $this->getContext()->setLanguage( $oldLang ); // Reset language to $oldLang
367 * Constructs a ParserOptions object
369 * @param WikiPage $pageObj
370 * @param array $params
372 * @return ParserOptions
374 protected function makeParserOptions( WikiPage
$pageObj, array $params ) {
375 wfProfileIn( __METHOD__
);
377 $popts = $pageObj->makeParserOptions( $this->getContext() );
378 $popts->enableLimitReport( !$params['disablepp'] );
379 $popts->setIsPreview( $params['preview'] ||
$params['sectionpreview'] );
380 $popts->setIsSectionPreview( $params['sectionpreview'] );
382 wfProfileOut( __METHOD__
);
387 * @param $page WikiPage
388 * @param $popts ParserOptions
390 * @param $getWikitext Bool
391 * @return ParserOutput
393 private function getParsedContent( WikiPage
$page, $popts, $pageId = null, $getWikitext = false ) {
394 $this->content
= $page->getContent( Revision
::RAW
); //XXX: really raw?
396 if ( $this->section
!== false && $this->content
!== null ) {
397 $this->content
= $this->getSectionContent(
399 !is_null( $pageId ) ?
'page id ' . $pageId : $page->getTitle()->getText() );
401 // Not cached (save or load)
402 return $this->content
->getParserOutput( $page->getTitle(), null, $popts );
404 // Try the parser cache first
405 // getParserOutput will save to Parser cache if able
406 $pout = $page->getParserOutput( $popts );
408 $this->dieUsage( "There is no revision ID {$page->getLatest()}", 'missingrev' );
410 if ( $getWikitext ) {
411 $this->content
= $page->getContent( Revision
::RAW
);
417 private function getSectionContent( Content
$content, $what ) {
418 // Not cached (save or load)
419 $section = $content->getSection( $this->section
);
420 if ( $section === false ) {
421 $this->dieUsage( "There is no section {$this->section} in " . $what, 'nosuchsection' );
423 if ( $section === null ) {
424 $this->dieUsage( "Sections are not supported by " . $what, 'nosuchsection' );
430 private function formatLangLinks( $links ) {
432 foreach ( $links as $link ) {
434 $bits = explode( ':', $link, 2 );
435 $title = Title
::newFromText( $link );
437 $entry['lang'] = $bits[0];
439 $entry['url'] = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT
);
441 ApiResult
::setContent( $entry, $bits[1] );
447 private function formatCategoryLinks( $links ) {
449 foreach ( $links as $link => $sortkey ) {
451 $entry['sortkey'] = $sortkey;
452 ApiResult
::setContent( $entry, $link );
458 private function categoriesHtml( $categories ) {
459 $context = $this->getContext();
460 $context->getOutput()->addCategoryLinks( $categories );
461 return $context->getSkin()->getCategories();
465 * @deprecated since 1.18 No modern skin generates language links this way, please use language links
466 * data to generate your own HTML.
467 * @param $languages array
470 private function languagesHtml( $languages ) {
471 wfDeprecated( __METHOD__
, '1.18' );
473 global $wgContLang, $wgHideInterlanguageLinks;
475 if ( $wgHideInterlanguageLinks ||
count( $languages ) == 0 ) {
479 $s = htmlspecialchars( wfMessage( 'otherlanguages' )->text() . wfMessage( 'colon-separator' )->text() );
482 foreach ( $languages as $l ) {
483 $nt = Title
::newFromText( $l );
484 $text = Language
::fetchLanguageName( $nt->getInterwiki() );
486 $langs[] = Html
::element( 'a',
487 array( 'href' => $nt->getFullURL(), 'title' => $nt->getText(), 'class' => 'external' ),
488 $text == '' ?
$l : $text );
491 $s .= implode( wfMessage( 'pipe-separator' )->escaped(), $langs );
493 if ( $wgContLang->isRTL() ) {
494 $s = Html
::rawElement( 'span', array( 'dir' => 'LTR' ), $s );
500 private function formatLinks( $links ) {
502 foreach ( $links as $ns => $nslinks ) {
503 foreach ( $nslinks as $title => $id ) {
506 ApiResult
::setContent( $entry, Title
::makeTitle( $ns, $title )->getFullText() );
508 $entry['exists'] = '';
516 private function formatIWLinks( $iw ) {
518 foreach ( $iw as $prefix => $titles ) {
519 foreach ( array_keys( $titles ) as $title ) {
521 $entry['prefix'] = $prefix;
523 $title = Title
::newFromText( "{$prefix}:{$title}" );
525 $entry['url'] = wfExpandUrl( $title->getFullURL(), PROTO_CURRENT
);
528 ApiResult
::setContent( $entry, $title->getFullText() );
535 private function formatHeadItems( $headItems ) {
537 foreach ( $headItems as $tag => $content ) {
539 $entry['tag'] = $tag;
540 ApiResult
::setContent( $entry, $content );
546 private function formatProperties( $properties ) {
548 foreach ( $properties as $name => $value ) {
550 $entry['name'] = $name;
551 ApiResult
::setContent( $entry, $value );
557 private function formatCss( $css ) {
559 foreach ( $css as $file => $link ) {
561 $entry['file'] = $file;
562 ApiResult
::setContent( $entry, $link );
568 private function setIndexedTagNames( &$array, $mapping ) {
569 foreach ( $mapping as $key => $name ) {
570 if ( isset( $array[$key] ) ) {
571 $this->getResult()->setIndexedTagName( $array[$key], $name );
576 public function getAllowedParams() {
579 ApiBase
::PARAM_DFLT
=> 'API',
585 ApiBase
::PARAM_TYPE
=> 'integer',
587 'redirects' => false,
589 ApiBase
::PARAM_TYPE
=> 'integer',
592 ApiBase
::PARAM_DFLT
=> 'text|langlinks|categories|links|templates|images|externallinks|sections|revid|displaytitle|iwlinks|properties',
593 ApiBase
::PARAM_ISMULTI
=> true,
594 ApiBase
::PARAM_TYPE
=> array(
616 'effectivelanglinks' => false,
619 'disablepp' => false,
620 'generatexml' => false,
622 'sectionpreview' => false,
623 'contentformat' => array(
624 ApiBase
::PARAM_TYPE
=> ContentHandler
::getAllContentFormats(),
626 'contentmodel' => array(
627 ApiBase
::PARAM_TYPE
=> ContentHandler
::getContentModels(),
632 public function getParamDescription() {
633 $p = $this->getModulePrefix();
635 'text' => 'Wikitext to parse',
636 'summary' => 'Summary to parse',
637 'redirects' => "If the {$p}page or the {$p}pageid parameter is set to a redirect, resolve it",
638 'title' => 'Title of page the text belongs to',
639 'page' => "Parse the content of this page. Cannot be used together with {$p}text and {$p}title",
640 'pageid' => "Parse the content of this page. Overrides {$p}page",
641 'oldid' => "Parse the content of this revision. Overrides {$p}page and {$p}pageid",
643 'Which pieces of information to get',
644 ' text - Gives the parsed text of the wikitext',
645 ' langlinks - Gives the language links in the parsed wikitext',
646 ' categories - Gives the categories in the parsed wikitext',
647 ' categorieshtml - Gives the HTML version of the categories',
648 ' languageshtml - Gives the HTML version of the language links',
649 ' links - Gives the internal links in the parsed wikitext',
650 ' templates - Gives the templates in the parsed wikitext',
651 ' images - Gives the images in the parsed wikitext',
652 ' externallinks - Gives the external links in the parsed wikitext',
653 ' sections - Gives the sections in the parsed wikitext',
654 ' revid - Adds the revision ID of the parsed page',
655 ' displaytitle - Adds the title of the parsed wikitext',
656 ' headitems - Gives items to put in the <head> of the page',
657 ' headhtml - Gives parsed <head> of the page',
658 ' iwlinks - Gives interwiki links in the parsed wikitext',
659 ' wikitext - Gives the original wikitext that was parsed',
660 ' properties - Gives various properties defined in the parsed wikitext',
662 'effectivelanglinks' => array(
663 'Includes language links supplied by extensions',
664 '(for use with prop=langlinks|languageshtml)',
667 'Do a pre-save transform on the input before parsing it',
668 'Ignored if page, pageid or oldid is used'
671 'Do a pre-save transform (PST) on the input, but don\'t parse it',
672 'Returns the same wikitext, after a PST has been applied. Ignored if page, pageid or oldid is used'
674 'uselang' => 'Which language to parse the request in',
675 'section' => 'Only retrieve the content of this section number',
676 'disablepp' => 'Disable the PP Report from the parser output',
677 'generatexml' => 'Generate XML parse tree (requires prop=wikitext)',
678 'preview' => 'Parse in preview mode',
679 'sectionpreview' => 'Parse in section preview mode (enables preview mode too)',
680 'contentformat' => 'Content serialization format used for the input text',
681 'contentmodel' => 'Content model of the new content',
685 public function getDescription() {
687 'Parses wikitext and returns parser output',
688 'See the various prop-Modules of action=query to get information from the current version of a page',
692 public function getPossibleErrors() {
693 return array_merge( parent
::getPossibleErrors(), array(
694 array( 'code' => 'params', 'info' => 'The page parameter cannot be used together with the text and title parameters' ),
695 array( 'code' => 'missingrev', 'info' => 'There is no revision ID oldid' ),
696 array( 'code' => 'permissiondenied', 'info' => 'You don\'t have permission to view deleted revisions' ),
697 array( 'code' => 'missingtitle', 'info' => 'The page you specified doesn\'t exist' ),
698 array( 'code' => 'nosuchsection', 'info' => 'There is no section sectionnumber in page' ),
699 array( 'nosuchpageid' ),
700 array( 'invalidtitle', 'title' ),
701 array( 'code' => 'parseerror', 'info' => 'Failed to parse the given text.' ),
702 array( 'code' => 'notwikitext', 'info' => 'The requested operation is only supported on wikitext content.' ),
703 array( 'code' => 'pagecannotexist', 'info' => "Namespace doesn't allow actual pages" ),
707 public function getExamples() {
709 'api.php?action=parse&page=Project:Sandbox' => 'Parse a page',
710 'api.php?action=parse&text={{Project:Sandbox}}' => 'Parse wikitext',
711 'api.php?action=parse&text={{PAGENAME}}&title=Test' => 'Parse wikitext, specifying the page title',
712 'api.php?action=parse&summary=Some+[[link]]&prop=' => 'Parse a summary',
716 public function getHelpUrls() {
717 return 'https://www.mediawiki.org/wiki/API:Parsing_wikitext#parse';